diff --git a/docker/CI/cli.sh b/docker/CI/cli.sh
deleted file mode 100755
index 658324b8f7a..00000000000
--- a/docker/CI/cli.sh
+++ /dev/null
@@ -1 +0,0 @@
-docker-compose exec server-0 /opt/byconity/bin/clickhouse client --port 52145 --host 127.0.0.1
diff --git a/docker/CI/docker-compose-nexusfs.yml b/docker/CI/docker-compose-nexusfs.yml
deleted file mode 100644
index c4dda2deba8..00000000000
--- a/docker/CI/docker-compose-nexusfs.yml
+++ /dev/null
@@ -1,174 +0,0 @@
-version: "3"
-
-services:
-  # After upgrade to docker-compose v2, we could use `include` instead of `extend`.
-  hdfs-namenode:
-    extends:
-      file: ./common/hdfs.yml
-      service: hdfs-namenode
-  hdfs-datanode:
-    extends:
-      file: ./common/hdfs.yml
-      service: hdfs-datanode
-  fdb:
-    extends:
-      file: ./common/fdb.yml
-      service: fdb
-  my_mysql:
-    extends:
-      file: ./common/mysql.yml
-      service: my_mysql
-  tso:
-    image: hub.byted.org/bytehouse/debian.bullseye.fdb.udf:0.1
-    command: bash -c "fdbcli -C /config/fdb.cluster --exec \"configure new single ssd\"; tso-server --config-file /config/tso.yml"
-    depends_on:
-      - fdb
-      - hdfs-namenode
-    volumes:
-      - ${CNCH_BINARY_PATH}/:/opt/byconity/bin/:ro
-      - ${CNCH_LIBRARY_PATH}/:/opt/byconity/lib/:ro
-      - ./nexusfs/:/config/:ro
-      - ./test_output/tso/:/var/log/byconity/:rw
-    environment: &env
-      LD_LIBRARY_PATH: /opt/byconity/lib
-      PATH: /opt/byconity/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
-      ASAN_OPTIONS:
-      TSAN_OPTIONS:
-      IS_CI_ENV: 1
-      CI_PIPELINE_NAME: CI
-    cap_add:
-      - SYS_PTRACE
-    healthcheck:
-      test: ["CMD", "curl", "localhost:18845"]
-      interval: 5s
-
-  server-0:
-    image: hub.byted.org/bytehouse/debian.bullseye.fdb.udf:0.1
-    command: bash -c "(udf-manager --config-file /config/server.yml & clickhouse-server --config-file /config/server.yml)"
-    depends_on:
-      tso:
-        condition: service_healthy
-    ports:
-      - "9000:52145"
-      - "127.0.0.1:8123:21557"
-      - "127.0.0.1:9004:9004"
-    environment:
-      <<: *env
-      SERVER_ID: server-0
-    volumes:
-      - ${CNCH_BINARY_PATH}/:/opt/byconity/bin/:ro
-      - ${CNCH_LIBRARY_PATH}/:/opt/byconity/lib/:ro
-      - ./nexusfs/:/config/:ro
-      - ./test_output/server-0/:/var/log/byconity/:rw
-      - ./queries/:/opt/byconity/queries/:ro
-    cap_add:
-      - SYS_PTRACE
-    healthcheck:
-      test: ["CMD", "curl", "localhost:21557"]
-      interval: 5s
-
-  server-1:
-    image: hub.byted.org/bytehouse/debian.bullseye.fdb.udf:0.1
-    command: bash -c "(udf-manager --config-file /config/server.yml & clickhouse-server --config-file /config/server.yml)"
-    depends_on:
-      tso:
-        condition: service_healthy
-    ports:
-      - "9001:52145"
-      - "127.0.0.1:8124:21557"
-    environment:
-      <<: *env
-      SERVER_ID: server-1
-    volumes:
-      - ${CNCH_BINARY_PATH}/:/opt/byconity/bin/:ro
-      - ${CNCH_LIBRARY_PATH}/:/opt/byconity/lib/:ro
-      - ./nexusfs/:/config/:ro
-      - ./test_output/server-1/:/var/log/byconity/:rw
-      - ./queries/:/opt/byconity/queries/:ro
-    cap_add:
-      - SYS_PTRACE
-    healthcheck:
-      test: ["CMD", "curl", "localhost:52145"]
-      interval: 5s
-
-  worker-write:
-    image: hub.byted.org/bytehouse/debian.bullseye.fdb.udf:0.1
-    command: bash -c "clickhouse-server --config-file /config/worker.yml"
-    depends_on:
-      - server-0
-      - server-1
-    ports:
-      - "52149:52145"
-    environment:
-      <<: *env
-      WORKER_GROUP_ID: wg_write
-      VIRTUAL_WAREHOUSE_ID: vw_write
-      WORKER_ID: w0
-    volumes:
-      - ${CNCH_BINARY_PATH}/:/opt/byconity/bin/:ro
-      - ${CNCH_LIBRARY_PATH}/:/opt/byconity/lib/:ro
-      - ./nexusfs/:/config/:ro
-      - ./test_output/worker-write/:/var/log/byconity/:rw
-      - ./queries/:/opt/byconity/queries/:ro
-    cap_add:
-      - SYS_PTRACE
-  worker-default:
-    image: hub.byted.org/bytehouse/debian.bullseye.fdb.udf:0.1
-    command: bash -c "(udf-manager --config-file /config/worker.yml & clickhouse-server --config-file /config/worker.yml)"
-    depends_on:
-      - server-0
-      - server-1
-    environment:
-      <<: *env
-      WORKER_GROUP_ID: wg_default
-      VIRTUAL_WAREHOUSE_ID: vw_default
-      WORKER_ID: r0
-    volumes:
-      - ${CNCH_BINARY_PATH}/:/opt/byconity/bin/:ro
-      - ${CNCH_LIBRARY_PATH}/:/opt/byconity/lib/:ro
-      - ./nexusfs/:/config/:ro
-      - ./test_output/worker-default/:/var/log/byconity/:rw
-      - ./queries/:/opt/byconity/queries/:ro
-    cap_add:
-      - SYS_PTRACE
-  daemon-manager:
-    image: hub.byted.org/bytehouse/debian.bullseye.fdb.udf:0.1
-    command: bash -c "daemon-manager --config-file ./config/daemon-manager.yml"
-    depends_on:
-      server-0:
-        condition: service_healthy
-      server-1:
-        condition: service_healthy
-    environment:
-      <<: *env
-    volumes:
-      - ${CNCH_BINARY_PATH}/:/opt/byconity/bin/:ro
-      - ${CNCH_LIBRARY_PATH}/:/opt/byconity/lib/:ro
-      - ./nexusfs/:/config/:ro
-      - ./test_output/daemon-manager/:/var/log/byconity/:rw
-    cap_add:
-      - SYS_PTRACE
-    restart: always
-
-  resource-manager:
-    image: hub.byted.org/bytehouse/debian.bullseye.fdb.udf:0.1
-    command: bash -c "resource-manager --config-file /config/resource-manager.yml"
-    depends_on:
-      - tso
-    volumes:
-      - ${CNCH_BINARY_PATH}/:/opt/byconity/bin/:ro
-      - ${CNCH_LIBRARY_PATH}/:/opt/byconity/lib/:ro
-      - ./nexusfs/:/config/:ro
-      - ./test_output/rm/:/var/log/byconity/:rw
-    environment:
-      <<: *env
-    cap_add:
-      - SYS_PTRACE
-
-volumes:
-  fdb-data:
-    external: false
-  hdfs-namenode:
-    external: false
-  hdfs-datanode:
-    external: false
diff --git a/docker/CI/multi-servers/server.yml b/docker/CI/multi-servers/server.yml
deleted file mode 100644
index 3c5d5ad9480..00000000000
--- a/docker/CI/multi-servers/server.yml
+++ /dev/null
@@ -1,228 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-  console: true
-additional_services:
-  GIS: 1
-  VectorSearch: 1
-  FullTextSearch: 1
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-mysql_port: 9004
-listen_host: "0.0.0.0"
-prometheus:
-  endpoint: "/metrics"
-  port: 0
-  metrics: true
-  events: true
-  asynchronous_metrics: true
-  part_metrics: false
-cnch_type: server
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-enable_tenant_systemdb: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-storage_configuration:
-  disks:
-    hdfs_disk:
-      path: /user/clickhouse/
-      type: bytehdfs
-    local_disk:
-      path: /var/byconity/data/
-      type: local
-  policies:
-    default:
-      volumes:
-        hdfs:
-          default: hdfs_disk
-          disk: hdfs_disk
-        local:
-          default: local_disk
-          disk: local_disk
-cnch_kafka_log:
-  database: cnch_system
-  table: cnch_kafka_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_unique_table_log:
-  database: cnch_system
-  table: cnch_unique_table_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_query_log:
-  database: cnch_system
-  table: cnch_query_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-query_log:
-  database: system
-  table: query_log
-  flush_interval_milliseconds: 15000
-  partition_by: event_date
-part_allocation_algorithm: 1
-consistent_hash_ring:
-  num_replicas: 16
-  num_probes: 21
-  load_factor: 1.3
-service_discovery:
-  mode: local
-  cluster: default
-  disable_cache: false
-  cache_timeout: 5
-  server:
-    psm: data.cnch.server
-    node:
-      - host: server-0
-        hostname: server-0
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-      - host: server-1
-        hostname: server-1
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-  tso:
-    psm: data.cnch.tso
-    node:
-      host: tso-0
-      hostname: tso
-      ports:
-        port:
-          - name: PORT0
-            value: 18845
-          - name: PORT2
-            value: 9181
-  resource_manager:
-    psm: data.cnch.resource_manager
-    node:
-      host: resource-manager-0
-      hostname: resource-manager-0
-      ports:
-        port:
-          name: PORT0
-          value: 28989
-  daemon_manager:
-    psm: data.cnch.daemon_manager
-    node:
-      host: daemon-manager-0
-      hostname: daemon-manager
-      ports:
-        port:
-          name: PORT0
-          value: 17553
-  vw_psm: data.cnch.vw
-  vw:
-    psm: data.cnch.vw
-    node:
-      - host: worker-write-0
-        hostname: worker-write
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_write
-      - host: worker-default-0
-        hostname: worker-default
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_default
-catalog:
-  name_space: default
-catalog_service:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb.cluster
-hdfs_addr: hdfs://hdfs-namenode:9000
-udf_path: /var/byconity/data/user_defined
-udf_manager_server:
-  timeout_ms: 20000
-  max_retry: 1
-udf_processor:
-  count: 3
-  uds_path: /dev/shm/udf_processor_server
-  timeout_ms: 10000
-  max_retry: 1
-custom_settings_prefixes: SQL_
-restrict_tenanted_users_to_whitelist_settings: false
-restrict_tenanted_users_to_privileged_operations: false
-sensitive_permission_tenants: 1234
diff --git a/docker/CI/multi-servers/worker.yml b/docker/CI/multi-servers/worker.yml
deleted file mode 100644
index 314f12df597..00000000000
--- a/docker/CI/multi-servers/worker.yml
+++ /dev/null
@@ -1,202 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-listen_host: "0.0.0.0"
-cnch_type: worker
-vw_name: vw_default
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-enable_tenant_systemdb: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-storage_configuration:
-  disks:
-    hdfs_disk:
-      path: /user/clickhouse/
-      type: bytehdfs
-    local_disk:
-      path: /var/byconity/data/
-      type: local
-  policies:
-    default:
-      volumes:
-        hdfs:
-          default: hdfs_disk
-          disk: hdfs_disk
-        local:
-          default: local_disk
-          disk: local_disk
-hdfs_addr: "hdfs://hdfs-namenode:9000"
-cnch_unique_table_log:
-  database: cnch_system
-  table: cnch_unique_table_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-query_log:
-  database: system
-  table: query_log
-  flush_interval_milliseconds: 15000
-  partition_by: event_date
-service_discovery:
-  mode: local
-  cluster: default
-  disable_cache: false
-  cache_timeout: 5
-  server:
-    psm: data.cnch.server
-    node:
-      - host: server-0
-        hostname: server-0
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-      - host: server-1
-        hostname: server-1
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-  tso:
-    psm: data.cnch.tso
-    node:
-      host: tso-0
-      hostname: tso
-      ports:
-        port:
-          - name: PORT0
-            value: 18845
-          - name: PORT2
-            value: 9181
-  resource_manager:
-    psm: data.cnch.resource_manager
-    node:
-      host: resource-manager-0
-      hostname: resource-manager-0
-      ports:
-        port:
-          name: PORT0
-          value: 28989
-  daemon_manager:
-    psm: data.cnch.daemon_manager
-    node:
-      host: daemon-manager-0
-      hostname: daemon-manager
-      ports:
-        port:
-          name: PORT0
-          value: 17553
-  vw_psm: data.cnch.vw
-  vw:
-    psm: data.cnch.vw
-    node:
-      - host: worker-write-0
-        hostname: worker-write
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_write
-      - host: worker-default-0
-        hostname: worker-default
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_default
-catalog:
-  name_space: default
-catalog_service:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb.cluster
-udf_path: /var/byconity/data/user_defined
-udf_manager_server:
-  timeout_ms: 20000
-  max_retry: 1
-udf_processor:
-  count: 3
-  uds_path: /dev/shm/udf_processor_worker
-  timeout_ms: 10000
-  max_retry: 1
-restrict_tenanted_users_to_whitelist_settings: false
-restrict_tenanted_users_to_privileged_operations: false
-additional_services:
-  FullTextSearch: true
-sensitive_permission_tenants: 1234
diff --git a/docker/CI/multi-workers/server.yml b/docker/CI/multi-workers/server.yml
deleted file mode 100644
index 109b3dd933e..00000000000
--- a/docker/CI/multi-workers/server.yml
+++ /dev/null
@@ -1,252 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-additional_services:
-  GIS: 1
-  VectorSearch: 1
-  FullTextSearch: 1
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-mysql_port: 9004
-listen_host: "0.0.0.0"
-prometheus:
-  endpoint: "/metrics"
-  port: 0
-  metrics: true
-  events: true
-  asynchronous_metrics: true
-  part_metrics: false
-cnch_type: server
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-enable_tenant_systemdb: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-storage_configuration:
-  disks:
-    hdfs_disk:
-      path: /user/clickhouse/
-      type: bytehdfs
-    local_disk:
-      path: /var/byconity/data/
-      type: local
-  policies:
-    default:
-      volumes:
-        hdfs:
-          default: hdfs_disk
-          disk: hdfs_disk
-        local:
-          default: local_disk
-          disk: local_disk
-cnch_kafka_log:
-  database: cnch_system
-  table: cnch_kafka_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_unique_table_log:
-  database: cnch_system
-  table: cnch_unique_table_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_query_log:
-  database: cnch_system
-  table: cnch_query_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-query_log:
-  database: system
-  table: query_log
-  flush_interval_milliseconds: 15000
-  partition_by: event_date
-part_allocation_algorithm: 1
-consistent_hash_ring:
-  num_replicas: 16
-  num_probes: 21
-  load_factor: 1.3
-service_discovery:
-  mode: local
-  cluster: default
-  disable_cache: false
-  cache_timeout: 5
-  server:
-    psm: data.cnch.server
-    node:
-      - host: server-0
-        hostname: server-0
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-      - host: server-1
-        hostname: server-1
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-  resource_manager:
-    psm: data.cnch.resource_manager
-    node:
-      host: resource-manager-0
-      hostname: resource-manager-0
-      ports:
-        port:
-          name: PORT0
-          value: 28989
-  daemon_manager:
-    psm: data.cnch.daemon_manager
-    node:
-      host: daemon-manager-0
-      hostname: daemon-manager-0
-      ports:
-        port:
-          name: PORT0
-          value: 17553
-  tso:
-    psm: data.cnch.tso
-    node:
-      host: tso-0
-      hostname: tso-0
-      ports:
-        port:
-          - name: PORT0
-            value: 18845
-          - name: PORT2
-            value: 9181
-  vw_psm: data.cnch.vw
-  vw:
-    psm: data.cnch.vw
-    node:
-      vw_name: vw_write
-      host: worker-write
-      hostname: worker-write
-      ports:
-        port:
-          - name: PORT2
-            value: 21557
-          - name: PORT1
-            value: 30605
-          - name: PORT0
-            value: 52145
-          - name: PORT4
-            value: 27651
-          - name: PORT3
-            value: 45443
-          - name: PORT5
-            value: 47447
-          - name: PORT6
-            value: 60611
-    node:
-      vw_name: vw_default
-      host: worker-default-0
-      hostname: worker-default-0
-      ports:
-        port:
-          - name: PORT2
-            value: 21557
-          - name: PORT1
-            value: 30605
-          - name: PORT0
-            value: 52145
-          - name: PORT4
-            value: 27651
-          - name: PORT3
-            value: 45443
-          - name: PORT5
-            value: 47447
-          - name: PORT6
-            value: 60611
-    node:
-      vw_name: vw_default
-      host: worker-default-1
-      hostname: worker-default-1
-      ports:
-        port:
-          - name: PORT2
-            value: 21557
-          - name: PORT1
-            value: 30605
-          - name: PORT0
-            value: 52145
-          - name: PORT4
-            value: 27651
-          - name: PORT3
-            value: 45443
-          - name: PORT5
-            value: 47447
-          - name: PORT6
-            value: 60611
-catalog:
-  name_space: default
-catalog_service:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb.cluster
-external_catalog_mgr:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb/cluster
-hdfs_addr: "hdfs://hdfs-namenode:9000"
-udf_path: /var/byconity/data/user_defined
-udf_manager_server:
-  timeout_ms: 20000
-  max_retry: 1
-udf_processor:
-  count: 3
-  uds_path: /dev/shm/udf_processor_server
-  timeout_ms: 10000
-  max_retry: 1
-custom_settings_prefixes: SQL_
-restrict_tenanted_users_to_whitelist_settings: false
-restrict_tenanted_users_to_privileged_operations: false
-sensitive_permission_tenants: 1234
diff --git a/docker/CI/nexusfs/conf.d/catalog.yml b/docker/CI/nexusfs/conf.d/catalog.yml
deleted file mode 100644
index 7ddd7231874..00000000000
--- a/docker/CI/nexusfs/conf.d/catalog.yml
+++ /dev/null
@@ -1,6 +0,0 @@
-catalog:
-  name_space: default
-catalog_service:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb.cluster
diff --git a/docker/CI/nexusfs/conf.d/service_discovery.yml b/docker/CI/nexusfs/conf.d/service_discovery.yml
deleted file mode 100644
index 7627487161e..00000000000
--- a/docker/CI/nexusfs/conf.d/service_discovery.yml
+++ /dev/null
@@ -1,115 +0,0 @@
-service_discovery:
-  mode: local
-  cluster: default
-  disable_cache: false
-  cache_timeout: 5
-  server:
-    psm: data.cnch.server
-    node:
-      - host: server-0
-        hostname: server-0
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-      - host: server-1
-        hostname: server-1
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-  tso:
-    psm: data.cnch.tso
-    node:
-      host: tso
-      hostname: tso
-      ports:
-        port:
-          - name: PORT0
-            value: 18845
-          - name: PORT2
-            value: 9181
-  resource_manager:
-    psm: data.cnch.resource_manager
-    node:
-      host: resource-manager
-      hostname: resource-manager
-      ports:
-        port:
-          name: PORT0
-          value: 28989
-  daemon_manager:
-    psm: data.cnch.daemon_manager
-    node:
-      host: daemon-manager
-      hostname: daemon-manager
-      ports:
-        port:
-          name: PORT0
-          value: 17553
-  vw_psm: data.cnch.vw
-  vw:
-    psm: data.cnch.vw
-    node:
-      - host: worker-write
-        hostname: worker-write
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_write
-      - host: worker-default
-        hostname: worker-default
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_default
diff --git a/docker/CI/nexusfs/conf.d/storage.yml b/docker/CI/nexusfs/conf.d/storage.yml
deleted file mode 100644
index 020132e7b53..00000000000
--- a/docker/CI/nexusfs/conf.d/storage.yml
+++ /dev/null
@@ -1,18 +0,0 @@
-hdfs_addr: hdfs://hdfs-namenode:9000
-storage_configuration:
-  disks:
-    hdfs_disk:
-      path: /user/clickhouse/
-      type: bytehdfs
-    local_disk:
-      path: /var/byconity/data/
-      type: local
-  policies:
-    default:
-      volumes:
-        hdfs:
-          default: hdfs_disk
-          disk: hdfs_disk
-        local:
-          default: local_disk
-          disk: local_disk
diff --git a/docker/CI/nexusfs/daemon-manager.yml b/docker/CI/nexusfs/daemon-manager.yml
deleted file mode 100644
index c4cbe3dcbf3..00000000000
--- a/docker/CI/nexusfs/daemon-manager.yml
+++ /dev/null
@@ -1,63 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-listen_host: "0.0.0.0"
-cnch_type: server
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-daemon_manager:
-  port: 17553
-  daemon_jobs:
-    job:
-      - name: PART_GC
-        interval: 10000
-        disable: 0
-      - name: PART_MERGE
-        interval: 10000
-        disable: 0
-      - name: CONSUMER
-        interval: 10000
-        disable: 0
-      - name: GLOBAL_GC
-        interval: 5000
-        disable: 1
-      - name: PART_CLUSTERING
-        interval: 30000
-        disable: 0
-      - name: DEDUP_WORKER
-        interval: 3000
-        disable: 0
-      # Increasing the frequency of recycling in a test environment
-      - name: TXN_GC
-        interval: 3000
-        disable: 0
diff --git a/docker/CI/nexusfs/fdb.cluster b/docker/CI/nexusfs/fdb.cluster
deleted file mode 100644
index b04f02bc3b5..00000000000
--- a/docker/CI/nexusfs/fdb.cluster
+++ /dev/null
@@ -1 +0,0 @@
-docker:docker@fdb:4550
diff --git a/docker/CI/nexusfs/resource-manager.yml b/docker/CI/nexusfs/resource-manager.yml
deleted file mode 100644
index b53233f1d0f..00000000000
--- a/docker/CI/nexusfs/resource-manager.yml
+++ /dev/null
@@ -1,29 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-listen_host: "0.0.0.0"
-path: /var/byconity/
-timezone: Europe/Moscow
-perQuery: 1
-resource_manager:
-  port: 28989
-  vws:
-    vw:
-      - name: vw_default
-        type: default
-        num_workers: 1
-        worker_groups:
-          worker_group:
-            name: wg_default
-            type: Physical
-      - name: vw_write
-        type: write
-        num_workers: 1
-        worker_groups:
-          worker_group:
-            name: wg_write
-            type: Physical
diff --git a/docker/CI/nexusfs/server.yml b/docker/CI/nexusfs/server.yml
deleted file mode 100644
index f03178bd0e2..00000000000
--- a/docker/CI/nexusfs/server.yml
+++ /dev/null
@@ -1,105 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-  console: true
-additional_services:
-  GIS: 1
-  VectorSearch: 1
-  FullTextSearch: 1
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-mysql_port: 9004
-listen_host: "0.0.0.0"
-prometheus:
-  endpoint: "/metrics"
-  port: 0
-  metrics: true
-  events: true
-  asynchronous_metrics: true
-  part_metrics: false
-cnch_type: server
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-enable_tenant_systemdb: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-nexus_fs:
-  enable: 1
-  use_memory_device: 0
-  enable_async_io: 0
-  cache_size: 5368709120
-  region_size: 4194304
-  segment_size: 524288
-  enable_memory_buffer: 1
-  memory_buffer_size: 1073741824
-  clean_regions_pool: 16
-  clean_region_threads: 4
-  num_in_mem_buffers: 32
-  reader_threads: 32
-merge_tree:
-  reorganize_marks_data_layout: 1
-  enable_nexus_fs: 1
-cnch_kafka_log:
-  database: cnch_system
-  table: cnch_kafka_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_unique_table_log:
-  database: cnch_system
-  table: cnch_unique_table_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_query_log:
-  database: cnch_system
-  table: cnch_query_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-query_log:
-  database: system
-  table: query_log
-  flush_interval_milliseconds: 15000
-  partition_by: event_date
-part_allocation_algorithm: 1
-consistent_hash_ring:
-  num_replicas: 16
-  num_probes: 21
-  load_factor: 1.3
-udf_path: /var/byconity/data/user_defined
-udf_manager_server:
-  timeout_ms: 20000
-  max_retry: 1
-udf_processor:
-  count: 3
-  uds_path: /dev/shm/udf_processor_server
-  timeout_ms: 10000
-  max_retry: 1
-custom_settings_prefixes: SQL_
-restrict_tenanted_users_to_whitelist_settings: false
-restrict_tenanted_users_to_privileged_operations: false
-sensitive_permission_tenants: 1234
diff --git a/docker/CI/nexusfs/tso.yml b/docker/CI/nexusfs/tso.yml
deleted file mode 100644
index 095eb2ebe7e..00000000000
--- a/docker/CI/nexusfs/tso.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/tso.log
-  errorlog: /var/log/byconity/tso.err.log
-  testlog: /var/log/byconity/tso.test.log
-  size: 1000M
-  count: 10
-  console: false
-listen_host: "0.0.0.0"
-path: /var/byconity/tso
-tmp_path: /var/byconity/tmp
-tso_service:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb.cluster
-  port: 18845
-  http:
-    port: 9181
-    receive_timeout: 1800
-    send_timeout: 1800
-  tso_window_ms: 3000
-  tso_get_leader_info_interval_ms: 0
diff --git a/docker/CI/nexusfs/users.yml b/docker/CI/nexusfs/users.yml
deleted file mode 100644
index 61e2e5a63d0..00000000000
--- a/docker/CI/nexusfs/users.yml
+++ /dev/null
@@ -1,38 +0,0 @@
-profiles:
-  default:
-    load_balancing: random
-    log_queries: 1
-    max_execution_time: 180
-    exchange_timeout_ms: 300000
-    enable_nexus_fs: 1
-
-users:
-  default:
-    networks:
-      ip: ::/0
-    password: ""
-    profile: default
-    quota: default
-    access_management: 1
-  server:
-    networks:
-      ip: ::/0
-    password: ""
-    profile: default
-    quota: default
-  probe:
-    networks:
-      ip: ::/0
-    password: ""
-    profile: default
-    quota: default
-
-quotas:
-  default:
-    interval:
-      duration: 3600
-      queries: 0
-      errors: 0
-      result_rows: 0
-      read_rows: 0
-      execution_time: 0
\ No newline at end of file
diff --git a/docker/CI/nexusfs/worker.yml b/docker/CI/nexusfs/worker.yml
deleted file mode 100644
index a97e011eb56..00000000000
--- a/docker/CI/nexusfs/worker.yml
+++ /dev/null
@@ -1,82 +0,0 @@
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-listen_host: "0.0.0.0"
-cnch_type: worker
-vw_name: vw_default
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-enable_tenant_systemdb: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-nexus_fs:
-  enable: 1
-  use_memory_device: 0
-  enable_async_io: 0
-  cache_size: 5368709120
-  region_size: 4194304
-  segment_size: 524288
-  enable_memory_buffer: 1
-  memory_buffer_size: 1073741824
-  clean_regions_pool: 16
-  clean_region_threads: 4
-  num_in_mem_buffers: 32
-  reader_threads: 32
-merge_tree:
-  reorganize_marks_data_layout: 1
-  enable_nexus_fs: 1
-cnch_unique_table_log:
-  database: cnch_system
-  table: cnch_unique_table_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-query_log:
-  database: system
-  table: query_log
-  flush_interval_milliseconds: 15000
-  partition_by: event_date
-udf_path: /var/byconity/data/user_defined
-udf_manager_server:
-  timeout_ms: 20000
-  max_retry: 1
-udf_processor:
-  count: 3
-  uds_path: /dev/shm/udf_processor_worker
-  timeout_ms: 10000
-  max_retry: 1
-restrict_tenanted_users_to_system_tables: false
-restrict_tenanted_users_to_whitelist_settings: false
-restrict_tenanted_users_to_privileged_operations: false
-additional_services:
-  FullTextSearch: true
-  VectorSearch: true
-  GIS: true
-sensitive_permission_tenants: 1234
diff --git a/docker/CI/s3/server.yml b/docker/CI/s3/server.yml
deleted file mode 100644
index 6d53ac43097..00000000000
--- a/docker/CI/s3/server.yml
+++ /dev/null
@@ -1,236 +0,0 @@
-# Auto-generated! Please do not modify this file directly. Refer to 'convert-hdfs-configs-to-s3.sh'.
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-  console: true
-additional_services:
-  GIS: 1
-  VectorSearch: 1
-  FullTextSearch: 1
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-mysql_port: 9004
-listen_host: "0.0.0.0"
-prometheus:
-  endpoint: "/metrics"
-  port: 0
-  metrics: true
-  events: true
-  asynchronous_metrics: true
-  part_metrics: false
-cnch_type: server
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-enable_tenant_systemdb: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-storage_configuration:
-  disks:
-    local_disk:
-      path: /var/byconity/data/
-      type: local
-    s3_disk:
-      path: data123/
-      type: s3
-      endpoint: http://minio:9000
-      bucket: cnch
-      ak_id: minio
-      ak_secret: minio123
-  policies:
-    default:
-      volumes:
-        local:
-          default: local_disk
-          disk: local_disk
-    cnch_default_hdfs:
-      volumes:
-        s3:
-          default: s3_disk
-          disk: s3_disk
-  # To avoid break hard-coded test cases.
-  cnch_default_policy: cnch_default_hdfs
-cnch_kafka_log:
-  database: cnch_system
-  table: cnch_kafka_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_unique_table_log:
-  database: cnch_system
-  table: cnch_unique_table_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-cnch_query_log:
-  database: cnch_system
-  table: cnch_query_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-query_log:
-  database: system
-  table: query_log
-  flush_interval_milliseconds: 15000
-  partition_by: event_date
-part_allocation_algorithm: 1
-consistent_hash_ring:
-  num_replicas: 16
-  num_probes: 21
-  load_factor: 1.3
-service_discovery:
-  mode: local
-  cluster: default
-  disable_cache: false
-  cache_timeout: 5
-  server:
-    psm: data.cnch.server
-    node:
-      - host: server-0
-        hostname: server-0
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-      - host: server-1
-        hostname: server-1
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-  tso:
-    psm: data.cnch.tso
-    node:
-      host: tso-0
-      hostname: tso
-      ports:
-        port:
-          - name: PORT0
-            value: 18845
-          - name: PORT2
-            value: 9181
-  resource_manager:
-    psm: data.cnch.resource_manager
-    node:
-      host: resource-manager-0
-      hostname: resource-manager-0
-      ports:
-        port:
-          name: PORT0
-          value: 28989
-  daemon_manager:
-    psm: data.cnch.daemon_manager
-    node:
-      host: daemon-manager-0
-      hostname: daemon-manager
-      ports:
-        port:
-          name: PORT0
-          value: 17553
-  vw_psm: data.cnch.vw
-  vw:
-    psm: data.cnch.vw
-    node:
-      - host: worker-write-0
-        hostname: worker-write
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_write
-      - host: worker-default-0
-        hostname: worker-default
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_default
-catalog:
-  name_space: default
-catalog_service:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb.cluster
-udf_path: /var/byconity/data/user_defined
-udf_manager_server:
-  timeout_ms: 20000
-  max_retry: 1
-udf_processor:
-  count: 3
-  uds_path: /dev/shm/udf_processor_server
-  timeout_ms: 10000
-  max_retry: 1
-custom_settings_prefixes: SQL_
-restrict_tenanted_users_to_whitelist_settings: false
-restrict_tenanted_users_to_privileged_operations: false
-sensitive_permission_tenants: 1234
diff --git a/docker/CI/s3/worker.yml b/docker/CI/s3/worker.yml
deleted file mode 100644
index 503691fab5b..00000000000
--- a/docker/CI/s3/worker.yml
+++ /dev/null
@@ -1,210 +0,0 @@
-# Auto-generated! Please do not modify this file directly. Refer to 'convert-hdfs-configs-to-s3.sh'.
-logger:
-  level: trace
-  log: /var/log/byconity/out.log
-  errorlog: /var/log/byconity/err.log
-  testlog: /var/log/byconity/test.log
-  size: 1000M
-  count: 10
-http_port: 21557
-rpc_port: 30605
-tcp_port: 52145
-ha_tcp_port: 26247
-exchange_port: 47447
-exchange_status_port: 60611
-interserver_http_port: 30491
-listen_host: "0.0.0.0"
-cnch_type: worker
-vw_name: vw_default
-max_connections: 4096
-keep_alive_timeout: 3
-max_concurrent_queries: 200
-uncompressed_cache_size: 8589934592
-mark_cache_size: 5368709120
-path: /var/byconity/
-tmp_path: /var/byconity/tmp_data/
-users_config: /config/users.yml
-default_profile: default
-default_database: default
-timezone: Europe/Moscow
-mlock_executable: false
-enable_tenant_systemdb: false
-macros:
-  "-incl": macros
-  "-optional": true
-builtin_dictionaries_reload_interval: 3600
-max_session_timeout: 3600
-default_session_timeout: 60
-dictionaries_config: "*_dictionary.xml"
-format_schema_path: /var/byconity/format_schemas/
-perQuery: 1
-storage_configuration:
-  disks:
-    local_disk:
-      path: /var/byconity/data/
-      type: local
-    s3_disk:
-      path: data123/
-      type: s3
-      endpoint: http://minio:9000
-      bucket: cnch
-      ak_id: minio
-      ak_secret: minio123
-  policies:
-    default:
-      volumes:
-        local:
-          default: local_disk
-          disk: local_disk
-    cnch_default_hdfs:
-      volumes:
-        s3:
-          default: s3_disk
-          disk: s3_disk
-  # To avoid break hard-coded test cases.
-  cnch_default_policy: cnch_default_hdfs
-cnch_unique_table_log:
-  database: cnch_system
-  table: cnch_unique_table_log
-  flush_max_row_count: 10000
-  flush_interval_milliseconds: 7500
-query_log:
-  database: system
-  table: query_log
-  flush_interval_milliseconds: 15000
-  partition_by: event_date
-service_discovery:
-  mode: local
-  cluster: default
-  disable_cache: false
-  cache_timeout: 5
-  server:
-    psm: data.cnch.server
-    node:
-      - host: server-0
-        hostname: server-0
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-      - host: server-1
-        hostname: server-1
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-  tso:
-    psm: data.cnch.tso
-    node:
-      host: tso-0
-      hostname: tso
-      ports:
-        port:
-          - name: PORT0
-            value: 18845
-          - name: PORT2
-            value: 9181
-  resource_manager:
-    psm: data.cnch.resource_manager
-    node:
-      host: resource-manager-0
-      hostname: resource-manager-0
-      ports:
-        port:
-          name: PORT0
-          value: 28989
-  daemon_manager:
-    psm: data.cnch.daemon_manager
-    node:
-      host: daemon-manager-0
-      hostname: daemon-manager
-      ports:
-        port:
-          name: PORT0
-          value: 17553
-  vw_psm: data.cnch.vw
-  vw:
-    psm: data.cnch.vw
-    node:
-      - host: worker-write-0
-        hostname: worker-write
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_write
-      - host: worker-default-0
-        hostname: worker-default
-        ports:
-          port:
-            - name: PORT2
-              value: 21557
-            - name: PORT1
-              value: 30605
-            - name: PORT0
-              value: 52145
-            - name: PORT4
-              value: 27651
-            - name: PORT3
-              value: 45443
-            - name: PORT5
-              value: 47447
-            - name: PORT6
-              value: 60611
-        vw_name: vw_default
-catalog:
-  name_space: default
-catalog_service:
-  type: fdb
-  fdb:
-    cluster_file: /config/fdb.cluster
-udf_path: /var/byconity/data/user_defined
-udf_manager_server:
-  timeout_ms: 20000
-  max_retry: 1
-udf_processor:
-  count: 3
-  uds_path: /dev/shm/udf_processor_worker
-  timeout_ms: 10000
-  max_retry: 1
-restrict_tenanted_users_to_whitelist_settings: false
-restrict_tenanted_users_to_privileged_operations: false
-additional_services:
-  FullTextSearch: true
-sensitive_permission_tenants: 1234
diff --git a/docker/ci-deploy/config/users.yml b/docker/ci-deploy/config/users.yml
index 773b115f3f9..2d745378055 100644
--- a/docker/ci-deploy/config/users.yml
+++ b/docker/ci-deploy/config/users.yml
@@ -4,7 +4,14 @@ profiles:
     log_queries: 1
     max_execution_time: 180
     exchange_timeout_ms: 300000
-    cnch_max_cached_storage : 50000
+  point_lookup:
+    max_threads: 1
+    exchange_source_pipeline_threads: 1
+    enable_plan_cache: true
+    query_worker_fault_tolerance: false
+    send_cacheable_table_definitions: true
+    optimize_skip_unused_shards: true
+    enable_prune_source_plan_segment: true
   readonly:
     readonly: 1
 
diff --git a/docker/docker-compose/byconity-multi-cluster/users.yml b/docker/docker-compose/byconity-multi-cluster/users.yml
index 800ccc4b5c9..10f91465665 100644
--- a/docker/docker-compose/byconity-multi-cluster/users.yml
+++ b/docker/docker-compose/byconity-multi-cluster/users.yml
@@ -4,7 +4,15 @@ profiles:
     log_queries: 1
     max_execution_time: 180
     exchange_timeout_ms: 300000
-
+    enable_auto_query_forwarding: true
+  point_lookup:
+    max_threads: 1
+    exchange_source_pipeline_threads: 1
+    enable_plan_cache: true
+    query_worker_fault_tolerance: false
+    send_cacheable_table_definitions: true
+    optimize_skip_unused_shards: true
+    enable_prune_source_plan_segment: true
 users:
   default:
     networks:
@@ -35,4 +43,4 @@ quotas:
       result_rows: 0
       read_rows: 0
       execution_time: 0
-cnch_config: "/config/cnch-config.yml"
\ No newline at end of file
+cnch_config: "/config/cnch-config.yml"
diff --git a/docker/docker-compose/byconity-multiworkers-cluster/users.yml b/docker/docker-compose/byconity-multiworkers-cluster/users.yml
index 800ccc4b5c9..0d24e8d9080 100644
--- a/docker/docker-compose/byconity-multiworkers-cluster/users.yml
+++ b/docker/docker-compose/byconity-multiworkers-cluster/users.yml
@@ -4,6 +4,14 @@ profiles:
     log_queries: 1
     max_execution_time: 180
     exchange_timeout_ms: 300000
+  point_lookup:
+    max_threads: 1
+    exchange_source_pipeline_threads: 1
+    enable_plan_cache: true
+    query_worker_fault_tolerance: false
+    send_cacheable_table_definitions: true
+    optimize_skip_unused_shards: true
+    enable_prune_source_plan_segment: true
 
 users:
   default:
@@ -35,4 +43,4 @@ quotas:
       result_rows: 0
       read_rows: 0
       execution_time: 0
-cnch_config: "/config/cnch-config.yml"
\ No newline at end of file
+cnch_config: "/config/cnch-config.yml"
diff --git a/docker/docker-compose/byconity-simple-cluster/users.yml b/docker/docker-compose/byconity-simple-cluster/users.yml
index af9df8a1bc1..d1ec3f918d5 100644
--- a/docker/docker-compose/byconity-simple-cluster/users.yml
+++ b/docker/docker-compose/byconity-simple-cluster/users.yml
@@ -4,7 +4,14 @@ profiles:
     log_queries: 1
     max_execution_time: 180
     exchange_timeout_ms: 300000
-
+  point_lookup:
+    max_threads: 1
+    exchange_source_pipeline_threads: 1
+    enable_plan_cache: true
+    query_worker_fault_tolerance: false
+    send_cacheable_table_definitions: true
+    optimize_skip_unused_shards: true
+    enable_prune_source_plan_segment: true
 users:
   default:
     networks:
diff --git a/docs/en/sql-reference/functions/type-conversion-functions.md b/docs/en/sql-reference/functions/type-conversion-functions.md
index 423471290b6..661469e6901 100644
--- a/docs/en/sql-reference/functions/type-conversion-functions.md
+++ b/docs/en/sql-reference/functions/type-conversion-functions.md
@@ -1337,144 +1337,3 @@ Result:
 │ 2,"good"                                  │
 └───────────────────────────────────────────┘
 ```
-
-## snowflakeToDateTime {#snowflakeToDateTime}
-
-extract time from snowflake id as DateTime format.
-
-**Syntax**
-
-``` sql
-snowflakeToDateTime(value [, time_zone])
-```
-
-**Parameters**
-
--   `value` — `snowflake id`, Int64 value.
--   `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
-
-**Returned value**
-
--  value converted to the `DateTime` data type.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
-```
-
-Result:
-
-``` text
-┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
-│                                              2021-08-15 10:57:56 │
-└──────────────────────────────────────────────────────────────────┘
-```
-
-## snowflakeToDateTime64 {#snowflakeToDateTime64}
-
-extract time from snowflake id as DateTime64 format.
-
-**Syntax**
-
-``` sql
-snowflakeToDateTime64(value [, time_zone])
-```
-
-**Parameters**
-
--   `value` — `snowflake id`, Int64 value.
--   `time_zone` — [Timezone](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../../sql-reference/data-types/string.md).
-
-**Returned value**
-
--  value converted to the `DateTime64` data type.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
-```
-
-Result:
-
-``` text
-┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
-│                                            2021-08-15 10:58:19.841 │
-└────────────────────────────────────────────────────────────────────┘
-```
-
-## dateTimeToSnowflake {#dateTimeToSnowflake}
-
-convert DateTime to the first snowflake id at the giving time.
-
-**Syntax**
-
-``` sql
-dateTimeToSnowflake(value)
-```
-
-**Parameters**
-
--   `value` — Date and time. [DateTime](../../sql-reference/data-types/datetime.md).
-
-
-**Returned value**
-
--   `value` converted to the `Int64` data type as the first snowflake id at that time.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT dateTimeToSnowflake(CAST('2021-08-15 18:57:56', 'DateTime'));
-```
-
-Result:
-
-``` text
-┌─dateTimeToSnowflake(CAST('2021-08-15 18:57:56', 'DateTime'))─┐
-│                                          1426860702823350272 │
-└──────────────────────────────────────────────────────────────┘
-```
-
-
-## dateTime64ToSnowflake {#dateTime64ToSnowflake}
-
-convert DateTime64 to the first snowflake id at the giving time.
-
-**Syntax**
-
-``` sql
-dateTime64ToSnowflake(value)
-```
-
-**Parameters**
-
--   `value` — Date and time. [DateTime64](../../sql-reference/data-types/datetime64.md).
-
-
-**Returned value**
-
--   `value` converted to the `Int64` data type as the first snowflake id at that time.
-
-**Example**
-
-Query:
-
-``` sql
-SELECT dateTime64ToSnowflake(CAST('2021-08-15 18:57:56.073', 'DateTime64'));
-```
-
-Result:
-
-``` text
-┌─dateTime64ToSnowflake(CAST('2021-08-15 18:57:56.073', 'DateTime64'))─┐
-│                                                  1426860703129534464 │
-└──────────────────────────────────────────────────────────────────────┘
-```
\ No newline at end of file
diff --git a/docs/en/sql-reference/functions/uuid-functions.md b/docs/en/sql-reference/functions/uuid-functions.md
new file mode 100644
index 00000000000..6bed681c896
--- /dev/null
+++ b/docs/en/sql-reference/functions/uuid-functions.md
@@ -0,0 +1,925 @@
+---
+toc_priority: 53
+toc_title: UUID
+---
+
+# Functions for Working with UUIDs
+
+## generateUUIDv4
+
+Generates a [version 4](https://tools.ietf.org/html/rfc4122#section-4.4) [UUID](../data-types/uuid.md).
+
+**Syntax**
+
+``` sql
+generateUUIDv4([expr])
+```
+
+**Arguments**
+
+- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
+
+**Returned value**
+
+A value of type UUIDv4.
+
+**Example**
+
+First, create a table with a column of type UUID, then insert a generated UUIDv4 into the table.
+
+``` sql
+CREATE TABLE tab (uuid UUID) ENGINE = Memory;
+
+INSERT INTO tab SELECT generateUUIDv4();
+
+SELECT * FROM tab;
+```
+
+Result:
+
+```response
+┌─────────────────────────────────uuid─┐
+│ f4bf890f-f9dc-4332-ad5c-0c18e73f28e9 │
+└──────────────────────────────────────┘
+```
+
+**Example with multiple UUIDs generated per row**
+
+```sql
+SELECT generateUUIDv4(1), generateUUIDv4(2);
+
+┌─generateUUIDv4(1)────────────────────┬─generateUUIDv4(2)────────────────────┐
+│ 2d49dc6e-ddce-4cd0-afb8-790956df54c1 │ 8abf8c13-7dea-4fdf-af3e-0e18767770e6 │
+└──────────────────────────────────────┴──────────────────────────────────────┘
+```
+
+## generateUUIDv7 {#generateUUIDv7}
+
+Generates a [version 7](https://datatracker.ietf.org/doc/html/draft-peabody-dispatch-new-uuid-format-04) [UUID](../data-types/uuid.md).
+
+The generated UUID contains the current Unix timestamp in milliseconds (48 bits), followed by version "7" (4 bits), a counter (42 bit) to distinguish UUIDs within a millisecond (including a variant field "2", 2 bit), and a random field (32 bits).
+For any given timestamp (unix_ts_ms), the counter starts at a random value and is incremented by 1 for each new UUID until the timestamp changes.
+In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to a random new start value.
+
+Function `generateUUIDv7` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.
+
+```
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                           unix_ts_ms                          |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|          unix_ts_ms           |  ver  |   counter_high_bits   |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|var|                   counter_low_bits                        |
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                            rand_b                             |
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
+```
+
+:::note
+As of April 2024, version 7 UUIDs are in draft status and their layout may change in future.
+:::
+
+**Syntax**
+
+``` sql
+generateUUIDv7([expr])
+```
+
+**Arguments**
+
+- `expr` — An arbitrary [expression](../syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned UUID. Optional.
+
+**Returned value**
+
+A value of type UUIDv7.
+
+**Example**
+
+First, create a table with a column of type UUID, then insert a generated UUIDv7 into the table.
+
+``` sql
+CREATE TABLE tab (uuid UUID) ENGINE = Memory;
+
+INSERT INTO tab SELECT generateUUIDv7();
+
+SELECT * FROM tab;
+```
+
+Result:
+
+```response
+┌─────────────────────────────────uuid─┐
+│ 018f05af-f4a8-778f-beee-1bedbc95c93b │
+└──────────────────────────────────────┘
+```
+
+**Example with multiple UUIDs generated per row**
+
+```sql
+SELECT generateUUIDv7(1), generateUUIDv7(2);
+
+┌─generateUUIDv7(1)────────────────────┬─generateUUIDv7(2)────────────────────┐
+│ 018f05c9-4ab8-7b86-b64e-c9f03fbd45d1 │ 018f05c9-4ab8-7b86-b64e-c9f12efb7e16 │
+└──────────────────────────────────────┴──────────────────────────────────────┘
+```
+
+## empty
+
+Checks whether the input UUID is empty.
+
+**Syntax**
+
+```sql
+empty(UUID)
+```
+
+The UUID is considered empty if it contains all zeros (zero UUID).
+
+The function also works for [Arrays](array-functions.md#function-empty) and [Strings](string-functions.md#empty).
+
+**Arguments**
+
+- `x` — A UUID. [UUID](../data-types/uuid.md).
+
+**Returned value**
+
+- Returns `1` for an empty UUID or `0` for a non-empty UUID. [UInt8](../data-types/int-uint.md).
+
+**Example**
+
+To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
+
+Query:
+
+```sql
+SELECT empty(generateUUIDv4());
+```
+
+Result:
+
+```response
+┌─empty(generateUUIDv4())─┐
+│                       0 │
+└─────────────────────────┘
+```
+
+## notEmpty
+
+Checks whether the input UUID is non-empty.
+
+**Syntax**
+
+```sql
+notEmpty(UUID)
+```
+
+The UUID is considered empty if it contains all zeros (zero UUID).
+
+The function also works for [Arrays](array-functions.md#function-notempty) or [Strings](string-functions.md#notempty).
+
+**Arguments**
+
+- `x` — A UUID. [UUID](../data-types/uuid.md).
+
+**Returned value**
+
+- Returns `1` for a non-empty UUID or `0` for an empty UUID. [UInt8](../data-types/int-uint.md).
+
+**Example**
+
+To generate the UUID value, ClickHouse provides the [generateUUIDv4](#generateuuidv4) function.
+
+Query:
+
+```sql
+SELECT notEmpty(generateUUIDv4());
+```
+
+Result:
+
+```response
+┌─notEmpty(generateUUIDv4())─┐
+│                          1 │
+└────────────────────────────┘
+```
+
+## toUUID
+
+Converts a value of type String to a UUID.
+
+``` sql
+toUUID(string)
+```
+
+**Returned value**
+
+The UUID type value.
+
+**Usage example**
+
+``` sql
+SELECT toUUID('61f0c404-5cb3-11e7-907b-a6006ad3dba0') AS uuid
+```
+
+Result:
+
+```response
+┌─────────────────────────────────uuid─┐
+│ 61f0c404-5cb3-11e7-907b-a6006ad3dba0 │
+└──────────────────────────────────────┘
+```
+
+## toUUIDOrDefault
+
+**Arguments**
+
+- `string` — String of 36 characters or FixedString(36). [String](../syntax.md#string).
+- `default` — UUID to be used as the default if the first argument cannot be converted to a UUID type. [UUID](../data-types/uuid.md).
+
+**Returned value**
+
+UUID
+
+``` sql
+toUUIDOrDefault(string, default)
+```
+
+**Returned value**
+
+The UUID type value.
+
+**Usage examples**
+
+This first example returns the first argument converted to a UUID type as it can be converted:
+
+``` sql
+SELECT toUUIDOrDefault('61f0c404-5cb3-11e7-907b-a6006ad3dba0', cast('59f0c404-5cb3-11e7-907b-a6006ad3dba0' as UUID));
+```
+
+Result:
+
+```response
+┌─toUUIDOrDefault('61f0c404-5cb3-11e7-907b-a6006ad3dba0', CAST('59f0c404-5cb3-11e7-907b-a6006ad3dba0', 'UUID'))─┐
+│ 61f0c404-5cb3-11e7-907b-a6006ad3dba0                                                                          │
+└───────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+This second example returns the second argument (the provided default UUID) as the first argument cannot be converted to a UUID type:
+
+```sql
+SELECT toUUIDOrDefault('-----61f0c404-5cb3-11e7-907b-a6006ad3dba0', cast('59f0c404-5cb3-11e7-907b-a6006ad3dba0' as UUID));
+```
+
+Result:
+
+```response
+┌─toUUIDOrDefault('-----61f0c404-5cb3-11e7-907b-a6006ad3dba0', CAST('59f0c404-5cb3-11e7-907b-a6006ad3dba0', 'UUID'))─┐
+│ 59f0c404-5cb3-11e7-907b-a6006ad3dba0                                                                               │
+└────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+## toUUIDOrNull
+
+Takes an argument of type String and tries to parse it into UUID. If failed, returns NULL.
+
+``` sql
+toUUIDOrNull(string)
+```
+
+**Returned value**
+
+The Nullable(UUID) type value.
+
+**Usage example**
+
+``` sql
+SELECT toUUIDOrNull('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid
+```
+
+Result:
+
+```response
+┌─uuid─┐
+│ ᴺᵁᴸᴸ │
+└──────┘
+```
+
+## toUUIDOrZero
+
+It takes an argument of type String and tries to parse it into UUID. If failed, returns zero UUID.
+
+``` sql
+toUUIDOrZero(string)
+```
+
+**Returned value**
+
+The UUID type value.
+
+**Usage example**
+
+``` sql
+SELECT toUUIDOrZero('61f0c404-5cb3-11e7-907b-a6006ad3dba0T') AS uuid
+```
+
+Result:
+
+```response
+┌─────────────────────────────────uuid─┐
+│ 00000000-0000-0000-0000-000000000000 │
+└──────────────────────────────────────┘
+```
+
+## UUIDStringToNum
+
+Accepts `string` containing 36 characters in the format `xxxxxxxx-xxxx-xxxx-xxxx-xxxxxxxxxxxx`, and returns a [FixedString(16)](../data-types/fixedstring.md) as its binary representation, with its format optionally specified by `variant` (`Big-endian` by default).
+
+**Syntax**
+
+``` sql
+UUIDStringToNum(string[, variant = 1])
+```
+
+**Arguments**
+
+- `string` — A [String](../syntax.md#syntax-string-literal) of 36 characters or [FixedString](../syntax.md#syntax-string-literal)
+- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`.
+
+**Returned value**
+
+FixedString(16)
+
+**Usage examples**
+
+``` sql
+SELECT
+    '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
+    UUIDStringToNum(uuid) AS bytes
+```
+
+Result:
+
+```response
+┌─uuid─────────────────────────────────┬─bytes────────────┐
+│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
+└──────────────────────────────────────┴──────────────────┘
+```
+
+``` sql
+SELECT
+    '612f3c40-5d3b-217e-707b-6a546a3d7b29' AS uuid,
+    UUIDStringToNum(uuid, 2) AS bytes
+```
+
+Result:
+
+```response
+┌─uuid─────────────────────────────────┬─bytes────────────┐
+│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ @</a;]~!p{jTj={) │
+└──────────────────────────────────────┴──────────────────┘
+```
+
+## UUIDNumToString
+
+Accepts `binary` containing a binary representation of a UUID, with its format optionally specified by `variant` (`Big-endian` by default), and returns a string containing 36 characters in text format.
+
+**Syntax**
+
+``` sql
+UUIDNumToString(binary[, variant = 1])
+```
+
+**Arguments**
+
+- `binary` — [FixedString(16)](../data-types/fixedstring.md) as a binary representation of a UUID.
+- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`.
+
+**Returned value**
+
+String.
+
+**Usage example**
+
+``` sql
+SELECT
+    'a/<@];!~p{jTj={)' AS bytes,
+    UUIDNumToString(toFixedString(bytes, 16)) AS uuid
+```
+
+Result:
+
+```response
+┌─bytes────────────┬─uuid─────────────────────────────────┐
+│ a/<@];!~p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
+└──────────────────┴──────────────────────────────────────┘
+```
+
+``` sql
+SELECT
+    '@</a;]~!p{jTj={)' AS bytes,
+    UUIDNumToString(toFixedString(bytes, 16), 2) AS uuid
+```
+
+Result:
+
+```response
+┌─bytes────────────┬─uuid─────────────────────────────────┐
+│ @</a;]~!p{jTj={) │ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │
+└──────────────────┴──────────────────────────────────────┘
+```
+
+## UUIDToNum
+
+Accepts a [UUID](../data-types/uuid.md) and returns its binary representation as a [FixedString(16)](../data-types/fixedstring.md), with its format optionally specified by `variant` (`Big-endian` by default). This function replaces calls to two separate functions `UUIDStringToNum(toString(uuid))` so no intermediate conversion from UUID to string is required to extract bytes from a UUID.
+
+**Syntax**
+
+``` sql
+UUIDToNum(uuid[, variant = 1])
+```
+
+**Arguments**
+
+- `uuid` — [UUID](../data-types/uuid.md).
+- `variant` — Integer, representing a variant as specified by [RFC4122](https://datatracker.ietf.org/doc/html/rfc4122#section-4.1.1). 1 = `Big-endian` (default), 2 = `Microsoft`.
+
+**Returned value**
+
+The binary representation of the UUID.
+
+**Usage examples**
+
+``` sql
+SELECT
+    toUUID('612f3c40-5d3b-217e-707b-6a546a3d7b29') AS uuid,
+    UUIDToNum(uuid) AS bytes
+```
+
+Result:
+
+```response
+┌─uuid─────────────────────────────────┬─bytes────────────┐
+│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ a/<@];!~p{jTj={) │
+└──────────────────────────────────────┴──────────────────┘
+```
+
+``` sql
+SELECT
+    toUUID('612f3c40-5d3b-217e-707b-6a546a3d7b29') AS uuid,
+    UUIDToNum(uuid, 2) AS bytes
+```
+
+Result:
+
+```response
+┌─uuid─────────────────────────────────┬─bytes────────────┐
+│ 612f3c40-5d3b-217e-707b-6a546a3d7b29 │ @</a;]~!p{jTj={) │
+└──────────────────────────────────────┴──────────────────┘
+```
+
+## UUIDv7ToDateTime
+
+Returns the timestamp component of a UUID version 7.
+
+**Syntax**
+
+``` sql
+UUIDv7ToDateTime(uuid[, timezone])
+```
+
+**Arguments**
+
+- `uuid` — [UUID](../data-types/uuid.md) of version 7.
+- `timezone` — [Timezone name](../../operations/server-configuration-parameters/settings.md#server_configuration_parameters-timezone) for the returned value (optional). [String](../data-types/string.md).
+
+**Returned value**
+
+- Timestamp with milliseconds precision. If the UUID is not a valid version 7 UUID, it returns 1970-01-01 00:00:00.000. [DateTime64(3)](../data-types/datetime64.md).
+
+**Usage examples**
+
+``` sql
+SELECT UUIDv7ToDateTime(toUUID('018f05c9-4ab8-7b86-b64e-c9f03fbd45d1'))
+```
+
+Result:
+
+```response
+┌─UUIDv7ToDateTime(toUUID('018f05c9-4ab8-7b86-b64e-c9f03fbd45d1'))─┐
+│                                          2024-04-22 15:30:29.048 │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+``` sql
+SELECT UUIDv7ToDateTime(toUUID('018f05c9-4ab8-7b86-b64e-c9f03fbd45d1'), 'America/New_York')
+```
+
+Result:
+
+```response
+┌─UUIDv7ToDateTime(toUUID('018f05c9-4ab8-7b86-b64e-c9f03fbd45d1'), 'America/New_York')─┐
+│                                                              2024-04-22 08:30:29.048 │
+└──────────────────────────────────────────────────────────────────────────────────────┘
+```
+
+## serverUUID
+
+Returns the random UUID generated during the first start of the ClickHouse server. The UUID is stored in file `uuid` in the ClickHouse server directory (e.g. `/var/lib/clickhouse/`) and retained between server restarts.
+
+**Syntax**
+
+```sql
+serverUUID()
+```
+
+**Returned value**
+
+- The UUID of the server. [UUID](../data-types/uuid.md).
+
+## generateSnowflakeID
+
+Generates a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID).
+
+The generated Snowflake ID contains the current Unix timestamp in milliseconds (41 + 1 top zero bits), followed by a machine id (10 bits), and a counter (12 bits) to distinguish IDs within a millisecond.
+For any given timestamp (unix_ts_ms), the counter starts at 0 and is incremented by 1 for each new Snowflake ID until the timestamp changes.
+In case the counter overflows, the timestamp field is incremented by 1 and the counter is reset to 0.
+
+Function `generateSnowflakeID` guarantees that the counter field within a timestamp increments monotonically across all function invocations in concurrently running threads and queries.
+
+:::note
+The generated Snowflake IDs are based on the UNIX epoch 1970-01-01.
+While no standard or recommendation exists for the epoch of Snowflake IDs, implementations in other systems may use a different epoch, e.g. Twitter/X (2010-11-04) or Mastodon (2015-01-01).
+:::
+
+```
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|0|                         timestamp                           |
+├─┼                 ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                   |     machine_id    |    machine_seq_num    |
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
+```
+
+**Syntax**
+
+``` sql
+generateSnowflakeID([expr, [machine_id]])
+```
+
+**Arguments**
+
+- `expr` — An arbitrary [expression](../../sql-reference/syntax.md#syntax-expressions) used to bypass [common subexpression elimination](../../sql-reference/functions/index.md#common-subexpression-elimination) if the function is called multiple times in a query. The value of the expression has no effect on the returned Snowflake ID. Optional.
+- `machine_id` — A machine ID, the lowest 10 bits are used. [Int64](../data-types/int-uint.md). Optional.
+
+**Returned value**
+
+A value of type UInt64.
+
+**Example**
+
+First, create a table with a column of type UInt64, then insert a generated Snowflake ID into the table.
+
+``` sql
+CREATE TABLE tab (id UInt64) ENGINE = Memory;
+
+INSERT INTO tab SELECT generateSnowflakeID();
+
+SELECT * FROM tab;
+```
+
+Result:
+
+```response
+┌──────────────────id─┐
+│ 7199081390080409600 │
+└─────────────────────┘
+```
+
+**Example with multiple Snowflake IDs generated per row**
+
+```sql
+SELECT generateSnowflakeID(1), generateSnowflakeID(2);
+
+┌─generateSnowflakeID(1)─┬─generateSnowflakeID(2)─┐
+│    7199081609652224000 │    7199081609652224001 │
+└────────────────────────┴────────────────────────┘
+```
+
+**Example with expression and a machine ID**
+
+```
+SELECT generateSnowflakeID('expr', 1);
+
+┌─generateSnowflakeID('expr', 1)─┐
+│            7201148511606784002 │
+└────────────────────────────────┘
+```
+
+## snowflakeToDateTime
+
+:::warning
+This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled.
+The function will be removed at some point in future.
+:::
+
+Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime](../data-types/datetime.md) format.
+
+**Syntax**
+
+``` sql
+snowflakeToDateTime(value[, time_zone])
+```
+
+**Arguments**
+
+- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
+
+**Returned value**
+
+- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC');
+```
+
+Result:
+
+```response
+
+┌─snowflakeToDateTime(CAST('1426860702823350272', 'Int64'), 'UTC')─┐
+│                                              2021-08-15 10:57:56 │
+└──────────────────────────────────────────────────────────────────┘
+```
+
+## snowflakeToDateTime64
+
+:::warning
+This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled.
+The function will be removed at some point in future.
+:::
+
+Extracts the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) in [DateTime64](../data-types/datetime64.md) format.
+
+**Syntax**
+
+``` sql
+snowflakeToDateTime64(value[, time_zone])
+```
+
+**Arguments**
+
+- `value` — Snowflake ID. [Int64](../data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
+
+**Returned value**
+
+- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
+
+**Example**
+
+Query:
+
+``` sql
+SELECT snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC');
+```
+
+Result:
+
+```response
+
+┌─snowflakeToDateTime64(CAST('1426860802823350272', 'Int64'), 'UTC')─┐
+│                                            2021-08-15 10:58:19.841 │
+└────────────────────────────────────────────────────────────────────┘
+```
+
+## dateTimeToSnowflake
+
+:::warning
+This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled.
+The function will be removed at some point in future.
+:::
+
+Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+
+**Syntax**
+
+``` sql
+dateTimeToSnowflake(value)
+```
+
+**Arguments**
+
+- `value` — Date with time. [DateTime](../data-types/datetime.md).
+
+**Returned value**
+
+- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
+
+**Example**
+
+Query:
+
+``` sql
+WITH toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt SELECT dateTimeToSnowflake(dt);
+```
+
+Result:
+
+```response
+┌─dateTimeToSnowflake(dt)─┐
+│     1426860702823350272 │
+└─────────────────────────┘
+```
+
+## dateTime64ToSnowflake
+
+:::warning
+This function is deprecated and can only be used if setting [allow_deprecated_snowflake_conversion_functions](../../operations/settings/settings.md#allow_deprecated_snowflake_conversion_functions) is enabled.
+The function will be removed at some point in future.
+:::
+
+Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+
+**Syntax**
+
+``` sql
+dateTime64ToSnowflake(value)
+```
+
+**Arguments**
+
+- `value` — Date with time. [DateTime64](../data-types/datetime64.md).
+
+**Returned value**
+
+- Input value converted to the [Int64](../data-types/int-uint.md) data type as the first Snowflake ID at that time.
+
+**Example**
+
+Query:
+
+``` sql
+WITH toDateTime64('2021-08-15 18:57:56.492', 3, 'Asia/Shanghai') AS dt64 SELECT dateTime64ToSnowflake(dt64);
+```
+
+Result:
+
+```response
+┌─dateTime64ToSnowflake(dt64)─┐
+│         1426860704886947840 │
+└─────────────────────────────┘
+```
+
+## snowflakeIDToDateTime
+
+Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime](../data-types/datetime.md).
+
+**Syntax**
+
+``` sql
+snowflakeIDToDateTime(value[, epoch[, time_zone]])
+```
+
+**Arguments**
+
+- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md).
+- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
+
+**Returned value**
+
+- The timestamp component of `value` as a [DateTime](../data-types/datetime.md) value.
+
+**Example**
+
+Query:
+
+```sql
+SELECT snowflakeIDToDateTime(7204436857747984384) AS res
+```
+
+Result:
+
+```
+┌─────────────────res─┐
+│ 2024-06-06 10:59:58 │
+└─────────────────────┘
+```
+
+## snowflakeIDToDateTime64
+
+Returns the timestamp component of a [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) as a value of type [DateTime64](../data-types/datetime64.md).
+
+**Syntax**
+
+``` sql
+snowflakeIDToDateTime64(value[, epoch[, time_zone]])
+```
+
+**Arguments**
+
+- `value` — Snowflake ID. [UInt64](../data-types/int-uint.md).
+- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md).
+- `time_zone` — [Timezone](/docs/en/operations/server-configuration-parameters/settings.md/#server_configuration_parameters-timezone). The function parses `time_string` according to the timezone. Optional. [String](../data-types/string.md).
+
+**Returned value**
+
+- The timestamp component of `value` as a [DateTime64](../data-types/datetime64.md) with scale = 3, i.e. millisecond precision.
+
+**Example**
+
+Query:
+
+```sql
+SELECT snowflakeIDToDateTime64(7204436857747984384) AS res
+```
+
+Result:
+
+```
+┌─────────────────res─┐
+│ 2024-06-06 10:59:58 │
+└─────────────────────┘
+```
+
+## dateTimeToSnowflakeID
+
+Converts a [DateTime](../data-types/datetime.md) value to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+
+**Syntax**
+
+``` sql
+dateTimeToSnowflakeID(value[, epoch])
+```
+
+**Arguments**
+
+- `value` — Date with time. [DateTime](../data-types/datetime.md).
+- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md).
+
+**Returned value**
+
+- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time.
+
+**Example**
+
+Query:
+
+```sql
+SELECT toDateTime('2021-08-15 18:57:56', 'Asia/Shanghai') AS dt, dateTimeToSnowflakeID(dt) AS res;
+```
+
+Result:
+
+```
+┌──────────────────dt─┬─────────────────res─┐
+│ 2021-08-15 18:57:56 │ 6832626392367104000 │
+└─────────────────────┴─────────────────────┘
+```
+
+## dateTime64ToSnowflakeID
+
+Convert a [DateTime64](../data-types/datetime64.md) to the first [Snowflake ID](https://en.wikipedia.org/wiki/Snowflake_ID) at the giving time.
+
+**Syntax**
+
+``` sql
+dateTime64ToSnowflakeID(value[, epoch])
+```
+
+**Arguments**
+
+- `value` — Date with time. [DateTime64](../data-types/datetime64.md).
+- `epoch` - Epoch of the Snowflake ID in milliseconds since 1970-01-01. Defaults to 0 (1970-01-01). For the Twitter/X epoch (2015-01-01), provide 1288834974657. Optional. [UInt*](../data-types/int-uint.md).
+
+**Returned value**
+
+- Input value converted to [UInt64](../data-types/int-uint.md) as the first Snowflake ID at that time.
+
+**Example**
+
+Query:
+
+```sql
+SELECT toDateTime('2021-08-15 18:57:56.493', 3, 'Asia/Shanghai') AS dt, dateTime64ToSnowflakeID(dt) AS res;
+```
+
+Result:
+
+```
+┌──────────────────────dt─┬─────────────────res─┐
+│ 2021-08-15 18:57:56.493 │ 6832626394434895872 │
+└─────────────────────────┴─────────────────────┘
+```
+
+## See also
+
+- [dictGetUUID](../functions/ext-dict-functions.md#ext_dict_functions-other)
diff --git a/programs/CMakeLists.txt b/programs/CMakeLists.txt
index 2b6c0069952..2f967d5f91d 100644
--- a/programs/CMakeLists.txt
+++ b/programs/CMakeLists.txt
@@ -61,6 +61,7 @@ option (ENABLE_CLICKHOUSE_DUMPER "Enable clickhouse-dumper" ${ENABLE_CLICKHOUSE_
 option (ENABLE_CLICKHOUSE_RESOURCE_MANAGER "Service that manage worker resources" ${ENABLE_CLICKHOUSE_ALL})
 option (ENABLE_CLICKHOUSE_META_INSPECTOR "Enable meta-inspector in CNCH" ${ENABLE_CLICKHOUSE_ALL})
 option (ENABLE_CLICKHOUSE_STORAGE_TOOLS "Enable storage-tools in CNCH" ${ENABLE_CLICKHOUSE_ALL})
+option (ENABLE_CLICKHOUSE_SCHEMA_ADVISOR "Data schema advisor" ${ENABLE_CLICKHOUSE_ALL})
 
 if (NOT USE_NURAFT)
     # RECONFIGURE_MESSAGE_LEVEL should not be used here,
@@ -187,6 +188,12 @@ else()
     message(STATUS "ClickHouse tso-server mode: OFF")
 endif()
 
+if (ENABLE_CLICKHOUSE_SCHEMA_ADVISOR)
+    message(STATUS "ClickHouse schema-advisor mode: ON")
+else()
+    message(STATUS "ClickHouse schema-advisor mode: OFF")
+endif()
+
 if(NOT (MAKE_STATIC_LIBRARIES OR SPLIT_SHARED_LIBRARIES))
     set(CLICKHOUSE_ONE_SHARED ON)
 endif()
@@ -281,6 +288,10 @@ if (ENABLE_CLICKHOUSE_LIBRARY_BRIDGE)
     add_subdirectory (library-bridge)
 endif ()
 
+if (ENABLE_CLICKHOUSE_SCHEMA_ADVISOR)
+    add_subdirectory (schema-advisor)
+endif()
+
 set (JAVA_EXTENSIONS_JVM_SEARCH_PATH "/usr/lib/jvm/default-java/lib:/usr/lib/jvm/default-java/lib/server:/usr/lib/jvm/default-java/jre/lib/amd64:/usr/lib/jvm/default-java/jre/lib/amd64/server:/usr/lib/jvm/java-8-byteopenjdk-amd64/jre/lib/amd64:/usr/lib/jvm/java-8-byteopenjdk-amd64/jre/lib/amd64/server" CACHE STRING "Runtime search path for libjvm.so")
 
 if (CLICKHOUSE_ONE_SHARED)
@@ -298,7 +309,8 @@ if (CLICKHOUSE_ONE_SHARED)
     ${CLICKHOUSE_ODBC_BRIDGE_SOURCES}
     ${CLICKHOUSE_KEEPER_SOURCES}
     ${CLICKHOUSE_PART_TOOLKIT_SOURCES}
-    ${CLICKHOUSE_META_INSPECTOR_SOURCES})
+    ${CLICKHOUSE_META_INSPECTOR_SOURCES}
+    ${CLICKHOUSE_SCHEMA_ADVISOR_SOURCES})
 
     target_link_libraries(clickhouse-lib
       ${CLICKHOUSE_SERVER_LINK}
@@ -315,7 +327,8 @@ if (CLICKHOUSE_ONE_SHARED)
       ${CLICKHOUSE_KEEPER_LINK}
       ${CLICKHOUSE_KEEPER_CONVERTER_LINK}
       ${CLICKHOUSE_PART_TOOLKIT_LINK}
-      ${CLICKHOUSE_META_INSPECTOR_LINK})
+      ${CLICKHOUSE_META_INSPECTOR_LINK}
+      ${CLICKHOUSE_SCHEMA_ADVISOR_LINK})
 
     target_include_directories(clickhouse-lib
       ${CLICKHOUSE_SERVER_INCLUDE}
@@ -352,6 +365,7 @@ if (CLICKHOUSE_SPLIT_BINARY)
         clickhouse-dumper
         clickhouse-meta-inspector
         clickhouse-storage-tools
+        clickhouse-schema-advisor
     )
 
     if (ENABLE_CLICKHOUSE_ODBC_BRIDGE)
@@ -370,6 +384,10 @@ if (CLICKHOUSE_SPLIT_BINARY)
         list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-keeper-converter)
     endif ()
 
+    if (ENABLE_CLICKHOUSE_SCHEMA_ADVISOR)
+        list (APPEND CLICKHOUSE_ALL_TARGETS clickhouse-schema-advisor)
+    endif ()
+
     set_target_properties(${CLICKHOUSE_ALL_TARGETS} PROPERTIES RUNTIME_OUTPUT_DIRECTORY ..)
     set(RPATH "$ORIGIN/../../lib")
     if (USE_JAVA_EXTENSIONS)
@@ -450,6 +468,9 @@ else ()
     if (ENABLE_CLICKHOUSE_STORAGE_TOOLS)
         clickhouse_target_link_split_lib(clickhouse storage-tools)
     endif()
+    if (ENABLE_CLICKHOUSE_SCHEMA_ADVISOR)
+        clickhouse_target_link_split_lib(clickhouse schema-advisor)
+    endif ()
 
     set (CLICKHOUSE_BUNDLE)
     if (ENABLE_CLICKHOUSE_SERVER)
@@ -566,6 +587,11 @@ else ()
         install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-storage_tools" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
         list(APPEND CLICKHOUSE_BUNDLE clickhouse-storage_tools)
     endif ()
+    if (ENABLE_CLICKHOUSE_SCHEMA_ADVISOR)
+        add_custom_target (clickhouse-schema-advisor ALL COMMAND ${CMAKE_COMMAND} -E create_symlink clickhouse clickhouse-schema-advisor DEPENDS clickhouse)
+        install (FILES "${CMAKE_CURRENT_BINARY_DIR}/clickhouse-schema-advisor" DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
+        list(APPEND CLICKHOUSE_BUNDLE clickhouse-schema-advisor)
+    endif ()
 
     install (TARGETS clickhouse RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} COMPONENT clickhouse)
 
diff --git a/programs/config_tools.h.in b/programs/config_tools.h.in
index 89fcde5f013..6a35ed019ac 100644
--- a/programs/config_tools.h.in
+++ b/programs/config_tools.h.in
@@ -25,3 +25,4 @@
 #cmakedefine01 ENABLE_CLICKHOUSE_META_INSPECTOR
 #cmakedefine01 ENABLE_CLICKHOUSE_DUMPER
 #cmakedefine01 ENABLE_CLICKHOUSE_STORAGE_TOOLS
+#cmakedefine01 ENABLE_CLICKHOUSE_SCHEMA_ADVISOR
\ No newline at end of file
diff --git a/programs/main.cpp b/programs/main.cpp
index 3c40836af7c..dbee49bb8a1 100644
--- a/programs/main.cpp
+++ b/programs/main.cpp
@@ -124,6 +124,9 @@ int mainEntryClickHouseTSOServer(int argc, char ** argv);
 #if ENABLE_CLICKHOUSE_STORAGE_TOOLS
 int mainEntryStorageTools(int argc, char ** argv);
 #endif
+#if ENABLE_CLICKHOUSE_SCHEMA_ADVISOR
+int mainEntryClickHouseSchemaAdvisor(int argc, char ** argv);
+#endif
 
 int mainEntryClickHouseHashBinary(int, char **)
 {
@@ -218,6 +221,9 @@ std::pair<const char *, MainFunc> clickhouse_applications[] =
     {"storage-tools", mainEntryStorageTools},
     {"storage_tools", mainEntryStorageTools},
 #endif
+#if ENABLE_CLICKHOUSE_SCHEMA_ADVISOR
+    {"schema-advisor", mainEntryClickHouseSchemaAdvisor},
+#endif
 };
 
 
diff --git a/programs/schema-advisor/CMakeLists.txt b/programs/schema-advisor/CMakeLists.txt
new file mode 100644
index 00000000000..c8327d5e751
--- /dev/null
+++ b/programs/schema-advisor/CMakeLists.txt
@@ -0,0 +1,29 @@
+set(CLICKHOUSE_SCHEMA_ADVISOR_SOURCES 
+    SchemaAdvisor.cpp 
+    CodecAdvisor.cpp 
+    TypeAdvisor.cpp 
+    IndexAdvisor.cpp
+    PrewhereAdvisor.cpp  
+    SampleColumnReader.cpp
+    Statistics.cpp
+    CompressedStatisticsCollectBuffer.cpp
+    ColumnUsageExtractor.cpp
+    MockGlobalContext.cpp
+    MockEnvironment.cpp
+)
+
+set(CLICKHOUSE_SCHEMA_ADVISOR_LINK 
+    PRIVATE
+        boost::program_options
+        clickhouse_functions
+	clickhouse_aggregate_functions
+        clickhouse_parsers
+        dbms
+	clickhouse_storages_system
+)
+
+if (CLICKHOUSE_SPLIT_BINARY)
+	list(APPEND CLICKHOUSE_SCHEMA_ADVISOR_LINK $<TARGET_OBJECTS:protobuf::libprotobuf>)
+endif()
+
+clickhouse_program_add(schema-advisor)
diff --git a/programs/schema-advisor/CodecAdvisor.cpp b/programs/schema-advisor/CodecAdvisor.cpp
new file mode 100644
index 00000000000..f0b3bac38be
--- /dev/null
+++ b/programs/schema-advisor/CodecAdvisor.cpp
@@ -0,0 +1,212 @@
+#include "CodecAdvisor.h"
+#include "CompressedStatisticsCollectBuffer.h"
+
+#include <boost/algorithm/string/join.hpp>
+#include <boost/program_options.hpp>
+
+#include <Common/ThreadPool.h>
+#include <Compression/CompressedReadBufferFromFile.h>
+#include <Compression/CompressionFactory.h>
+#include <DataTypes/MapHelpers.h>
+#include <IO/copyData.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromFileDescriptor.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/WriteHelpers.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/queryToString.h>
+#include <Poco/DirectoryIterator.h>
+
+namespace DB
+{
+
+CodecAdvisor::CodecAdvisor(
+    const po::variables_map & options,
+    const ColumnsDescription & column_descs,
+    std::string absolute_part_path_,
+    size_t sample_row_number_,
+    size_t max_threads_)
+    : absolute_part_path(std::move(absolute_part_path_))
+    , sample_row_number(sample_row_number_)
+    , max_threads(max_threads_)
+{
+    parseCodecCandidates(options);
+    setSamplingColumnFiles(absolute_part_path, column_descs);
+}
+
+void CodecAdvisor::parseCodecCandidates(const po::variables_map & options)
+{
+    block_size = options["block-size"].as<unsigned>();
+
+    bool use_lz4hc = options.count("hc");
+    bool use_zstd = options.count("zstd");
+    std::vector<std::string> combi_codec;
+    if (options.count("codec"))
+        combi_codec = options["codec"].as<std::vector<std::string>>();
+
+    if (!use_lz4hc && !use_zstd && combi_codec.empty())
+        throw Exception(
+            "Missing options, either --hc or --zstd or --codec options is required", ErrorCodes::BAD_ARGUMENTS);
+    if ((use_lz4hc || use_zstd) && !combi_codec.empty())
+        throw Exception(
+            "Wrong options, codec flags like --zstd and --codec options are mutually exclusive", ErrorCodes::BAD_ARGUMENTS);
+    if (!combi_codec.empty() && options.count("level"))
+        throw Exception("Wrong options, --level is not compatible with --codec list", ErrorCodes::BAD_ARGUMENTS);
+
+    std::string method_family;
+    if (use_lz4hc)
+        method_family = "LZ4HC";
+    else if (use_zstd)
+        method_family = "ZSTD";
+
+    std::optional<int> level = std::nullopt;
+    if (options.count("level"))
+        level = options["level"].as<int>();
+
+    CompressionCodecPtr codec;
+    if (!combi_codec.empty())
+    {
+        ParserCodec codec_parser;
+        std::string combi_codec_line = boost::algorithm::join(combi_codec, ",");
+        auto ast = parseQuery(codec_parser, "(" + combi_codec_line + ")", 0, DBMS_DEFAULT_MAX_PARSER_DEPTH);
+        codec = CompressionCodecFactory::instance().get(ast, nullptr);
+    }
+    else
+        codec = CompressionCodecFactory::instance().get(method_family, level);
+
+    codecs_to_compare.push_back(codec);
+}
+
+/// Select column files to sample and estimate profit
+void CodecAdvisor::setSamplingColumnFiles(const std::string & part_path, const ColumnsDescription & column_descs)
+{
+    Poco::DirectoryIterator end;
+    for (Poco::DirectoryIterator it(part_path); it != end; ++it)
+    {
+        if (it->isFile() && endsWith(it->path(), ".bin"))
+        {
+            std::string file_path = it->path();
+            std::string file_name = it.name();
+            std::string column_name;
+            if (isMapImplicitKey(file_name) && !isMapBaseFile(file_name))
+                column_name = parseMapNameFromImplicitFileName(file_name);
+            else if (endsWith(it->path(), ".null.bin"))
+                column_name = unescapeForFileName(file_name.substr(0, file_name.size() - 9));
+            else
+                column_name = unescapeForFileName(file_name.substr(0, file_name.size() - 4));
+
+            if (column_descs.has(column_name))
+                column_files_to_sample.push_back(std::make_shared<SamplingColumnFile>(file_path, column_name));
+        }
+    }
+}
+
+void CodecAdvisor::execute()
+{
+    size_t part_row_count;
+    std::string part_count_path = absolute_part_path + "/count.txt";
+    {
+        ReadBufferFromFile in(part_count_path, METADATA_FILE_BUFFER_SIZE);
+        readIntText(part_row_count, in);
+        assertEOF(in);
+    }
+
+    auto run_estimate_task = [&](const SamplingColumnFilePtr & column_file_to_sample) {
+        std::string file_path = column_file_to_sample->file_path;
+        column_file_to_sample->origin_file_size = std::filesystem::file_size(file_path) * sample_row_number / part_row_count;
+
+        CompressedReadBufferFromFile from(std::make_unique<ReadBufferFromFile>(file_path), true, 0, column_file_to_sample->origin_file_size, true);
+        CompressedStatisticsCollectBuffer to(codecs_to_compare[0], block_size); /// TODO(weiping.qw): support comparing multiple codecs after FSST is imported.
+        copyData(from, to);
+
+        column_file_to_sample->optimized_file_size = to.getCompressedBytes();
+    };
+
+    ExceptionHandler exception_handler;
+    ///make queue size large enough to hold all tasks.
+    ThreadPool pool(max_threads, max_threads, 100000);
+
+    for (const auto & file : column_files_to_sample)
+    {
+        pool.trySchedule(
+            createExceptionHandledJob(
+                [&, column_file_to_sample = file]() { run_estimate_task(column_file_to_sample); }
+                , exception_handler
+            )
+        );
+    }
+    pool.wait();
+    /// throw if exception during collecting compression info.
+    exception_handler.throwIfException();
+}
+
+void CodecAdvisor::serializeJson(WriteBuffer & buf, bool verbose)
+{
+    size_t total_origin_file_size = 0;
+    size_t total_optimized_file_size = 0;
+
+    std::unordered_map<std::string, size_t> column_origin_file_sizes;
+    std::unordered_map<std::string, size_t> column_optimized_file_sizes;
+    for (const auto & file : column_files_to_sample)
+    {
+        /// skip column without potential compression profit
+        if (file->origin_file_size <= file->optimized_file_size)
+            continue;
+
+        total_origin_file_size += file->origin_file_size;
+        total_optimized_file_size += file->optimized_file_size;
+        if (verbose)
+        {
+            if (column_origin_file_sizes.find(file->column_name) == column_origin_file_sizes.end())
+            {
+                column_origin_file_sizes.emplace(file->column_name, file->origin_file_size);
+                column_optimized_file_sizes.emplace(file->column_name, file->optimized_file_size);
+            }
+            else
+            {
+                column_origin_file_sizes[file->column_name] += file->origin_file_size;
+                column_optimized_file_sizes[file->column_name] += file->optimized_file_size;
+            }
+        }
+    }
+
+    if (verbose)
+    {
+        bool first = true;
+        writeString("\"columns\":[", buf);
+        for (const auto & entry : column_origin_file_sizes)
+        {
+            if (first)
+                first = false;
+            else
+                writeString(",", buf);
+            std::string column_name = entry.first;
+            writeString("{\"name\":\"", buf);
+            writeString(column_name, buf);
+            writeString("\",", buf);
+            size_t column_origin_file_size = entry.second;
+            size_t column_optimized_file_size = column_optimized_file_sizes[column_name];
+            double column_estimated_profit =
+                (column_origin_file_size == 0 || column_origin_file_size <= column_optimized_file_size)
+                    ? 0 : (column_origin_file_size - column_optimized_file_size) * 100.0 / column_origin_file_size;
+            writeString("\"codec\":{\"", buf);
+            writeString(queryToString(codecs_to_compare[0]->getCodecDesc()), buf);
+            writeString("\":{\"compression ratio\":", buf);
+            writeFloatText(column_estimated_profit, buf);
+            writeString("}}}", buf);
+        }
+        writeString("],", buf);
+    }
+
+    double estimated_profit = (total_origin_file_size - total_optimized_file_size) * 100.0 / total_origin_file_size;
+
+    writeString("\"codec\":{\"compression ratio\":", buf);
+    writeFloatText(estimated_profit, buf);
+    writeString("}", buf);
+}
+
+}
diff --git a/programs/schema-advisor/CodecAdvisor.h b/programs/schema-advisor/CodecAdvisor.h
new file mode 100644
index 00000000000..610e5df12ab
--- /dev/null
+++ b/programs/schema-advisor/CodecAdvisor.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#include <boost/program_options.hpp>
+
+#include "SchemaAdvisorHelpers.h"
+
+#include <Compression/ICompressionCodec.h>
+#include <IO/WriteBuffer.h>
+#include <Storages/ColumnsDescription.h>
+
+namespace DB
+{
+
+namespace po = boost::program_options;
+
+class CodecAdvisor
+{
+private:
+    SamplingColumnFiles column_files_to_sample;
+    Codecs codecs_to_compare;
+    const std::string absolute_part_path;
+    const size_t sample_row_number;
+    const size_t max_threads;
+    unsigned block_size;
+
+    void parseCodecCandidates(const po::variables_map & options);
+    void setSamplingColumnFiles(const std::string & part_path, const ColumnsDescription & column_descs);
+
+public:
+    CodecAdvisor(
+        const po::variables_map & options,
+        const ColumnsDescription & column_descs,
+        std::string absolute_part_path,
+        size_t sample_row_number,
+        size_t max_threads);
+
+    virtual ~CodecAdvisor() = default;
+
+    void execute();
+    void serializeJson(WriteBuffer & buf, bool verbose = false);
+};
+
+}
diff --git a/programs/schema-advisor/ColumnUsageExtractor.cpp b/programs/schema-advisor/ColumnUsageExtractor.cpp
new file mode 100644
index 00000000000..f080d09bbc6
--- /dev/null
+++ b/programs/schema-advisor/ColumnUsageExtractor.cpp
@@ -0,0 +1,223 @@
+#include "ColumnUsageExtractor.h"
+#include "SchemaAdvisorHelpers.h"
+#include "MockEnvironment.h"
+
+#include <Advisor/ColumnUsage.h>
+#include <Advisor/WorkloadQuery.h>
+#include <Common/Exception.h>
+#include <Common/ThreadPool.h>
+#include <Interpreters/Context_fwd.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Parsers/formatAST.h>
+#include <Storages/KeyDescription.h>
+#include <boost/algorithm/string/replace.hpp>
+#include <bthread/mutex.h>
+
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <mutex>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace DB
+{
+
+namespace
+{
+    WorkloadQueries buildWorkloadQueriesCollectException(const std::vector<std::string> & queries,
+                                                         ContextPtr from_context,
+                                                         ThreadPool & query_thread_pool,
+                                                         MessageCollector & collector)
+    {
+        WorkloadQueries res(queries.size());
+        for (size_t i = 0; i < queries.size(); ++i)
+        {
+            query_thread_pool.scheduleOrThrowOnError([&, i] {
+                setThreadName("BuildQuery");
+                try
+                {
+                    res[i] = WorkloadQuery::build("q" + std::to_string(i), queries[i], from_context);
+                }
+                catch (...)
+                {
+                    std::string msg = "failed to build query " + std::to_string(i) + "\nreason: " + getCurrentExceptionMessage(true)
+                        + "\nsql: " + queries[i] + "\n";
+                    collector.collect(std::move(msg));
+                }
+            });
+        }
+        query_thread_pool.wait();
+        res.erase(std::remove(res.begin(), res.end(), nullptr), res.end());
+        return res;
+    }
+}
+
+ColumnUsages ColumnUsageExtractor::extractColumnUsages(const std::vector<std::string> & queries) const
+{
+    ThreadPool query_thread_pool{std::min<size_t>(max_threads, queries.size())};
+    MessageCollector collector;
+    WorkloadQueries workload_queries = buildWorkloadQueriesCollectException(queries, context, query_thread_pool, collector);
+    if (queries.empty())
+        throw Exception("No valid query has been extracted", ErrorCodes::BAD_ARGUMENTS);
+
+    LOG_DEBUG(getLogger("ColumnUsageExtractor"), "Successfully planed {} / {} queries", workload_queries.size(), queries.size());
+    collector.logCollectedError();
+
+    return buildColumnUsages(workload_queries);
+}
+
+ColumnUsageExtractor::ColumnToScannedUsages ColumnUsageExtractor::extractUsageForLowCardinality(const ColumnUsages & column_usages) const
+{
+    ColumnToScannedUsages res;
+    for (const auto & [column, info] : column_usages)
+    {
+        if (MockEnvironment::isPrimaryKey(column, context))
+        {
+            LOG_DEBUG(getLogger("ColumnUsageExtractor"), "Column {} skipped because it is a primary key", column.getFullName());
+            continue;
+        }
+
+        auto scanned = info.getUsages(ColumnUsageType::SCANNED, /*only_source_table=*/false);
+        if (scanned.empty())
+            continue;
+    
+        res.emplace(column, scanned.size());
+    }
+
+    return res;
+}
+
+ColumnUsageExtractor::ColumnToEqualityAndInUsages ColumnUsageExtractor::extractUsageForSkipIndex(const ColumnUsages & column_usages) const
+{
+    /// if only interested in a specific table, do it here
+    // std::erase_if(column_usages, [&](const auto & pair) { return pair.first.database != database || pair.first.table != table;});
+
+    ColumnToEqualityAndInUsages res;
+    for (const auto & [column, info] : column_usages)
+    {
+        if (MockEnvironment::isPrimaryKey(column, context))
+        {
+            LOG_DEBUG(getLogger("ColumnUsageExtractor"), "Column {} skipped because it is a primary key", column.getFullName());
+            continue;
+        }
+
+        size_t arraysetfunc_count = info.getFrequency(ColumnUsageType::ARRAY_SET_FUNCTION, /*only_source_table=*/false);
+        size_t others_count = info.getFrequency(ColumnUsageType::OTHER_PREDICATE, /*only_source_table=*/false);
+
+        if (arraysetfunc_count)
+        {
+            auto arraysetfuncs = info.getUsages(ColumnUsageType::ARRAY_SET_FUNCTION, /*only_source_table=*/false);
+            size_t total_count = arraysetfuncs.size() + others_count;
+            /// TODO: Optimize the ColumnToEqualityAndInUsages struct?
+            res.emplace(column, std::make_tuple(std::move(arraysetfuncs), std::vector<ColumnUsage>{}, total_count));
+
+            continue;
+        }
+
+        auto equalities = info.getUsages(ColumnUsageType::EQUALITY_PREDICATE, /*only_source_table=*/false);
+        auto ins = info.getUsages(ColumnUsageType::IN_PREDICATE, /*only_source_table=*/false);
+        size_t ranges_count = info.getFrequency(ColumnUsageType::RANGE_PREDICATE, /*only_source_table=*/false);
+
+        size_t total_count = equalities.size() + ins.size() + ranges_count + others_count;
+        if (total_count == 0)
+        {
+            LOG_DEBUG(
+                getLogger("ColumnUsageExtractor"),
+                "Column {} skipped, total count: {}",
+                column.getFullName(),
+                total_count);
+            continue;
+        }
+
+        // Keep the set size threshold limit on
+        // Remove in lists whose in set size is larger than IN_LIST_SIZE_UPPER_BOUND
+        std::erase_if(ins, [](const ColumnUsage & usage) {
+            if (auto func = dynamic_pointer_cast<const ASTFunction>(usage.expression); func && func->name == "in")
+                if (auto expr_list = dynamic_pointer_cast<const ASTExpressionList>(func->arguments); expr_list && expr_list->children.size() == 2)
+                    if (auto tuple = dynamic_pointer_cast<const ASTFunction>(expr_list->children[1]); tuple && tuple->name == "tuple")
+                        if (auto tuple_expr = dynamic_pointer_cast<const ASTExpressionList>(tuple->arguments))
+                            return tuple_expr->children.size() > IN_LIST_SIZE_UPPER_BOUND;
+            return true;
+        });
+
+        size_t eq_in_count = equalities.size() + ins.size();
+        if (eq_in_count == 0)
+        {
+            LOG_DEBUG(
+                getLogger("ColumnUsageExtractor"),
+                "Column {} skipped, eq & in count: {}, total count: {}",
+                column.getFullName(),
+                eq_in_count,
+                total_count);
+            continue;
+        }
+
+        // Temply loosen the restrictions of in+equals proportion
+        if (eq_in_count * 1.0 < total_count * EQUALITY_AND_IN_PREDICATE_THRESHOLD)
+        {
+            LOG_DEBUG(
+                getLogger("ColumnUsageExtractor"),
+                "Column {} maybe skipped, eq & in count: {}, total count: {}",
+                column.getFullName(),
+                eq_in_count,
+                total_count);
+            continue;
+        }
+
+        LOG_DEBUG(
+            getLogger("ColumnUsageExtractor"),
+            "Column {} added, eq & in count: {}, total count: {}",
+            column.getFullName(),
+            eq_in_count,
+            total_count);
+
+        res.emplace(column, std::make_tuple(std::move(equalities), std::move(ins), total_count));
+    }
+    return res;
+}
+
+ColumnUsageExtractor::ColumnToPrewherePredicateUsages ColumnUsageExtractor::extractUsageForPrewhere(const ColumnUsages & column_usages) const
+{
+    /// if only interested in a specific table, do it here
+    // std::erase_if(column_usages, [&](const auto & pair) { return pair.first.database != database || pair.first.table != table;});
+
+    ColumnToPrewherePredicateUsages res;
+    for (const auto & [column, info] : column_usages)
+    {
+        if (MockEnvironment::isPrimaryKey(column, context))
+        {
+            LOG_DEBUG(getLogger("ColumnUsageExtractor"), "Column {} skipped because it is a primary key", column.getFullName());
+            continue;
+        }
+
+        auto equalities = info.getUsages(ColumnUsageType::EQUALITY_PREDICATE, /*only_source_table=*/false);
+        auto ins = info.getUsages(ColumnUsageType::IN_PREDICATE, /*only_source_table=*/false);
+        auto ranges = info.getUsages(ColumnUsageType::RANGE_PREDICATE, /*only_source_table=*/false);
+        auto others = info.getUsages(ColumnUsageType::OTHER_PREDICATE, /*only_source_table=*/false);
+
+        size_t total_count = equalities.size() + ins.size() + ranges.size() + others.size();
+
+        if (total_count == 0)
+            continue;
+
+        // Keep the set size threshold limit on
+        // Remove in lists whose in set size is larger than IN_LIST_SIZE_UPPER_BOUND
+        std::erase_if(ins, [](const ColumnUsage & usage) {
+            if (auto func = dynamic_pointer_cast<const ASTFunction>(usage.expression); func && func->name == "in")
+                if (auto expr_list = dynamic_pointer_cast<const ASTExpressionList>(func->arguments); expr_list && expr_list->children.size() == 2)
+                    if (auto tuple = dynamic_pointer_cast<const ASTFunction>(expr_list->children[1]); tuple && tuple->name == "tuple")
+                        if (auto tuple_expr = dynamic_pointer_cast<const ASTExpressionList>(tuple->arguments))
+                            return tuple_expr->children.size() > IN_LIST_SIZE_UPPER_BOUND;
+            return true;
+        });
+
+        res.emplace(column, std::make_tuple(std::move(equalities), std::move(ins), std::move(ranges), std::move(others), total_count));
+    }
+    return res;
+}
+
+} // DB
diff --git a/programs/schema-advisor/ColumnUsageExtractor.h b/programs/schema-advisor/ColumnUsageExtractor.h
new file mode 100644
index 00000000000..a29633ffca7
--- /dev/null
+++ b/programs/schema-advisor/ColumnUsageExtractor.h
@@ -0,0 +1,45 @@
+#pragma once
+
+#include <Analyzers/QualifiedColumnName.h>
+#include <Advisor/ColumnUsage.h>
+#include <Interpreters/Context_fwd.h>
+
+#include <string>
+#include <utility>
+#include <vector>
+
+namespace DB
+{
+
+class ColumnUsageExtractor
+{
+public:
+    // EqualityAndInUsages: 
+    // for skip index: equality_usages, in_usages, total_predicates
+    // for bitmap index: arraysetfunc_usages, {}, total_predicates
+    using EqualityAndInUsages = std::tuple<std::vector<ColumnUsage>, std::vector<ColumnUsage>, size_t>;
+    using ColumnToEqualityAndInUsages = std::unordered_map<QualifiedColumnName, EqualityAndInUsages, QualifiedColumnNameHash>;
+    using ColumnToScannedUsages = std::unordered_map<QualifiedColumnName, size_t, QualifiedColumnNameHash>;
+
+    // for prewhere: equality_usages, in_usages, range_usages, other_usages, total_predicates
+    using PrewherePredicateUsages = std::tuple<std::vector<ColumnUsage>, std::vector<ColumnUsage>, std::vector<ColumnUsage>, std::vector<ColumnUsage>, size_t>;
+    using ColumnToPrewherePredicateUsages = std::unordered_map<QualifiedColumnName, PrewherePredicateUsages, QualifiedColumnNameHash>;
+
+
+    explicit ColumnUsageExtractor(ContextMutablePtr _context, size_t _max_threads): context(_context), max_threads(_max_threads) {}
+
+    ColumnUsages extractColumnUsages(const std::vector<std::string> & queries) const;
+    ColumnToEqualityAndInUsages extractUsageForSkipIndex(const ColumnUsages & column_usages) const;
+    ColumnToPrewherePredicateUsages extractUsageForPrewhere(const ColumnUsages & column_usages) const;
+
+    ColumnToScannedUsages extractUsageForLowCardinality(const ColumnUsages & column_usages) const;
+
+private:
+    ContextMutablePtr context;
+    size_t max_threads;
+    // which "in" filters are considered interesting
+    static constexpr size_t IN_LIST_SIZE_UPPER_BOUND = 10;
+    static constexpr float EQUALITY_AND_IN_PREDICATE_THRESHOLD = 0.5;
+};
+
+} // DB
diff --git a/programs/schema-advisor/CompressedStatisticsCollectBuffer.cpp b/programs/schema-advisor/CompressedStatisticsCollectBuffer.cpp
new file mode 100644
index 00000000000..d052fb5ceeb
--- /dev/null
+++ b/programs/schema-advisor/CompressedStatisticsCollectBuffer.cpp
@@ -0,0 +1,54 @@
+#include <city.h>
+#include <string.h>
+
+#include <common/unaligned.h>
+#include <common/types.h>
+
+#include "CompressedStatisticsCollectBuffer.h"
+#include <Compression/CompressionFactory.h>
+
+#include <Common/MemorySanitizer.h>
+#include <Common/MemoryTracker.h>
+
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+}
+
+static constexpr auto CHECKSUM_SIZE{sizeof(CityHash_v1_0_2::uint128)};
+
+void CompressedStatisticsCollectBuffer::nextImpl()
+{
+    if (!offset())
+        return;
+
+    size_t decompressed_size = offset();
+    UInt32 compressed_reserve_size = codec->getCompressedReserveSize(decompressed_size);
+    compressed_buffer.resize(compressed_reserve_size);
+    UInt32 compressed_size = codec->compress(working_buffer.begin(), decompressed_size, compressed_buffer.data());
+
+    // FIXME remove this after fixing msan report in lz4.
+    // Almost always reproduces on stateless tests, the exact test unknown.
+    __msan_unpoison(compressed_buffer.data(), compressed_size);
+
+    total_compressed_size += CHECKSUM_SIZE + compressed_size;
+}
+
+
+CompressedStatisticsCollectBuffer::CompressedStatisticsCollectBuffer(
+    CompressionCodecPtr codec_,
+    size_t buf_size)
+    : BufferWithOwnMemory<WriteBuffer>(buf_size), codec(std::move(codec_))
+{
+}
+
+CompressedStatisticsCollectBuffer::~CompressedStatisticsCollectBuffer()
+{
+    /// FIXME move final flush into the caller
+    next();
+}
+
+}
diff --git a/programs/schema-advisor/CompressedStatisticsCollectBuffer.h b/programs/schema-advisor/CompressedStatisticsCollectBuffer.h
new file mode 100644
index 00000000000..84f0b31c36b
--- /dev/null
+++ b/programs/schema-advisor/CompressedStatisticsCollectBuffer.h
@@ -0,0 +1,53 @@
+#pragma once
+
+#include <memory>
+
+#include <Common/PODArray.h>
+
+#include <IO/WriteBuffer.h>
+#include <IO/BufferWithOwnMemory.h>
+#include <Compression/ICompressionCodec.h>
+#include <Compression/CompressionFactory.h>
+
+
+namespace DB
+{
+
+class CompressedStatisticsCollectBuffer : public BufferWithOwnMemory<WriteBuffer>
+{
+private:
+    CompressionCodecPtr codec;
+    PODArray<char> compressed_buffer;
+    size_t total_compressed_size = 0;
+
+    void nextImpl() override;
+
+public:
+    CompressedStatisticsCollectBuffer(
+        CompressionCodecPtr codec_ = CompressionCodecFactory::instance().getDefaultCodec(),
+        size_t buf_size = DBMS_DEFAULT_BUFFER_SIZE);
+
+    /// The amount of compressed data
+    size_t getCompressedBytes()
+    {
+        next();
+        return total_compressed_size;
+    }
+
+    /// How many uncompressed bytes were written to the buffer
+    size_t getUncompressedBytes()
+    {
+        return count();
+    }
+
+    /// How many bytes are in the buffer (not yet compressed)
+    size_t getRemainingBytes()
+    {
+        nextIfAtEnd();
+        return offset();
+    }
+
+    ~CompressedStatisticsCollectBuffer() override;
+};
+
+}
diff --git a/programs/schema-advisor/IndexAdvisor.cpp b/programs/schema-advisor/IndexAdvisor.cpp
new file mode 100644
index 00000000000..e60f186a989
--- /dev/null
+++ b/programs/schema-advisor/IndexAdvisor.cpp
@@ -0,0 +1,274 @@
+#include "IndexAdvisor.h"
+#include "ColumnUsageExtractor.h"
+#include "IO/WriteIntText.h"
+#include "SampleColumnReader.h"
+#include "Statistics.h"
+#include "SchemaAdvisorHelpers.h"
+#include "PotentialColumn.h"
+
+#include <iostream>
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/split.hpp>
+
+#include "Common/Exception.h"
+#include <Common/ThreadPool.h>
+#include <Core/NamesAndTypes.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <IO/copyData.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromFileDescriptor.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/WriteHelpers.h>
+
+namespace DB
+{
+
+static constexpr double ADVISOR_HIGH_CARDINALITY_NDV_THRESHOLD = 0.33;
+
+IndexAdvisor::IndexAdvisor(
+    MockEnvironment & env_,
+    const po::variables_map & options_,
+    size_t sample_row_number_,
+    size_t max_threads_)
+    : env(env_)
+    , options(options_)
+    , sample_row_number(sample_row_number_)
+    , max_threads(max_threads_)
+{
+}
+
+// High_Cardinality_Threshold: 
+//      20w / 65536 = sample_row_number / ndv => ndv ～ 1/3 sample_row_number
+// For skip index, ndv > High_Cardinality_Threshold
+// For bitmap index, 10 < ndv <= High_Cardinality_Threshold
+bool checkColumnCardinality(String column_name, size_t ndv, size_t sample_row_number, PotentialIndexType & type)
+{
+    bool basic_cardinality = ndv > 10;
+    bool high_cardinality = ndv > ADVISOR_HIGH_CARDINALITY_NDV_THRESHOLD * sample_row_number;
+
+    auto get_ndv_check_msg = [&]() -> String
+    {
+        if (type == PotentialIndexType::BITMAP_INDEX)
+        {
+            if (!basic_cardinality)
+                return fmt::format("Column {} skipped because the ndv ({}) is less than 10", column_name, ndv);
+            if (high_cardinality)
+                type = PotentialIndexType::SEGMENT_BITMAP_INDEX;
+        }
+        if (type == PotentialIndexType::BLOOM_FILTER)
+        {
+            if (!high_cardinality)
+                return fmt::format("Column {} skipped because of the array ndv({}) / sample_rows({}) < threshold({})", column_name, ndv, sample_row_number, ADVISOR_HIGH_CARDINALITY_NDV_THRESHOLD);
+        }
+        return "";
+    };
+
+    auto check_ndv_msg = get_ndv_check_msg();
+    if (!check_ndv_msg.empty())
+    {
+        LOG_DEBUG(getLogger("ColumnUsageExtractor"), check_ndv_msg);
+        return false;
+    }
+    return true;
+};
+
+void IndexAdvisor::execute()
+{
+    auto context = createContext(options, env);
+    auto queries = loadQueries(options);
+
+    LOG_DEBUG(getLogger("ColumnUsageExtractor"), "++++++++++ begin to executor index advisor ++++++++++");
+
+    ColumnUsageExtractor extractor(context, max_threads);
+    auto column_usages = extractor.extractColumnUsages(queries);
+    auto skip_index_usages = extractor.extractUsageForSkipIndex(column_usages);
+
+    auto make_predicate_info = [&](PredicateInfos & predicate_infos, ColumnUsageType predicate_type, const std::vector<ColumnUsage> & column_usages_) {
+        PredicateExpressions predicate_expressions;
+        size_t total_predicate_expression = 0;
+        for (const auto & equality_usage : column_usages_)
+        {
+            auto & count = predicate_expressions[equality_usage.expression];
+            ++count;
+            total_predicate_expression++;
+        }
+        predicate_infos.insert({predicate_type, {predicate_expressions, total_predicate_expression}});
+    };
+
+    UniExtract uniq_extract;
+    for (const auto & index_usage : skip_index_usages)
+    {
+        auto column_info = index_usage.first;
+
+        auto storage = MockEnvironment::tryGetLocalTable(column_info.database, column_info.table, context);
+        if (!storage)
+            throw Exception(column_info.database + "(" + column_info.table + "): can not find local table.", ErrorCodes::NOT_FOUND_EXPECTED_DATA_PART);
+
+        auto metadata = storage->getInMemoryMetadataCopy();
+        auto column_and_type = metadata.getColumns().tryGetColumn(GetColumnsOptions::Kind::AllPhysical, column_info.column);
+        if (!column_and_type)
+            continue;
+
+        auto column_type = column_and_type->type;
+
+        bool check_bitmap_index = false;
+        bool already_bitmap_index = false;
+        if (isArray(column_type))
+        {
+            if (column_type->isBitmapIndex() || column_type->isSegmentBitmapIndex())
+            {
+                LOG_DEBUG(getLogger("ColumnUsageExtractor"), "Column " + column_info.column + " skipped because has already been a bitmap index column");
+                // continue;
+                already_bitmap_index = true;
+            }
+            check_bitmap_index = true;
+        }
+
+        std::vector<std::string> data_path_list;
+        // if (options.count("path"))
+        // {
+        //     std::string path = options["path"].as<std::string>();
+        //     if (!endsWith(path, "/"))
+        //         path.append("/");
+        //     data_path_list.emplace_back(path);
+        // }
+        // else
+        // {            
+            boost::split(data_path_list, options["data-path-list"].as<std::string>(), boost::is_any_of(" ,"));
+            for (auto & i : data_path_list)
+            {
+                if (!endsWith(i, "/"))
+                    i = i.append("/");
+            }
+        // }
+
+        std::string absolute_part_path;
+        try
+        {
+            absolute_part_path = selectPartPath(options, data_path_list, storage->getStorageID().getDatabaseName(), storage->getStorageID().getTableName(), sample_row_number);
+        }
+        catch (Exception & e)
+        {
+            if (e.code() == ErrorCodes::NOT_FOUND_EXPECTED_DATA_PART)
+            {
+                    LOG_DEBUG(
+                        getLogger("ColumnUsageExtractor"),
+                        "Can't find suitable part for table " + column_info.database + "." + column_info.table
+                            + ", maybe because of the total part rows < " + std::to_string(sample_row_number));
+                    continue;
+            }
+            else
+                throw e;
+        }
+
+        SampleColumnReader reader(absolute_part_path + "/", 0, sample_row_number);
+        ColumnPtr column;
+        try
+        {
+            column = reader.readColumn({index_usage.first.column, column_type});
+        } 
+        catch (...)
+        {   
+            // Just skip the column if it can't be read
+            LOG_DEBUG(
+                getLogger("ColumnUsageExtractor"),
+                "Can't read column file " + index_usage.first.column + " from table " + column_info.database + "." + column_info.table
+                        + ", error message: "
+                    + getCurrentExceptionMessage(true));
+            continue;
+        }
+
+        if (check_bitmap_index)
+        {
+            size_t ndv = uniq_extract.executeOnColumnArray(column, column_type).get<UInt64>();
+            auto bitmap_index_type = already_bitmap_index ? PotentialIndexType::ALREADY_BITMAP_INDEX : PotentialIndexType::BITMAP_INDEX;
+            if (!checkColumnCardinality(column_info.getFullName(), ndv, sample_row_number, bitmap_index_type))
+                continue;
+
+            StatisticInfo statistic_info{ndv, sample_row_number, std::get<2>(index_usage.second)};
+
+            PredicateInfos predicate_infos;
+            make_predicate_info(predicate_infos, ColumnUsageType::ARRAY_SET_FUNCTION, std::get<0>(index_usage.second));
+
+            PotentialColumnInfo potential_column{bitmap_index_type, statistic_info, predicate_infos};
+            potential_columns.insert({std::move(column_info), std::move(potential_column)});
+
+            continue;
+        }
+        
+        // All following: check skip index 
+        size_t ndv = uniq_extract.executeOnColumn(column, column_type).get<UInt64>();
+        auto skip_index_type = PotentialIndexType::BLOOM_FILTER;
+        if (!checkColumnCardinality(column_info.getFullName(), ndv, sample_row_number, skip_index_type))
+            continue;
+
+        StatisticInfo statistic_info{ndv, sample_row_number, std::get<2>(index_usage.second)};
+
+        PredicateInfos predicate_infos;
+        make_predicate_info(predicate_infos, ColumnUsageType::EQUALITY_PREDICATE, std::get<0>(index_usage.second));
+        make_predicate_info(predicate_infos, ColumnUsageType::IN_PREDICATE, std::get<1>(index_usage.second));
+
+        PotentialColumnInfo potential_column{skip_index_type, statistic_info, predicate_infos};
+        potential_columns.insert({std::move(column_info), std::move(potential_column)});
+    }
+
+    LOG_DEBUG(getLogger("ColumnUsageExtractor"), "Extracted {} column usages", potential_columns.size());
+    for ([[maybe_unused]] auto & [column_info, potential_column] : potential_columns)
+    {
+        std::stringstream ss;
+        ss << column_info.getFullName() << "\tindex_type:" << toString(potential_column.index_type)
+           << "\tsample_ndv:" << potential_column.statistic_info.sample_ndv
+           << "\tsample_row_num:" << potential_column.statistic_info.sample_row_num << "\ttarget_expression_cnt:"
+           << potential_column.predicate_infos[ColumnUsageType::EQUALITY_PREDICATE].total
+                + potential_column.predicate_infos[ColumnUsageType::IN_PREDICATE].total
+                + potential_column.predicate_infos[ColumnUsageType::ARRAY_SET_FUNCTION].total
+           << "\ttotal_expr count:" << potential_column.statistic_info.total_predicates;
+
+        LOG_DEBUG(getLogger("ColumnUsageExtractor"), ss.str());
+    }
+}
+
+/// TODO: seperate two indices
+void IndexAdvisor::serializeJson(WriteBuffer & buf, bool /* verbose */)
+{
+    bool first = true;
+    writeString("\"index\":[", buf);
+    for (auto & [column_info, potential_column] : potential_columns)
+    {
+        if (first)
+            first = false;
+        else
+            writeString(",", buf);
+
+        writeString(R"({"db":")", buf);
+        writeString(column_info.database, buf);
+        writeString(R"(","table":")", buf);
+        writeString(column_info.table, buf);
+        writeString(R"(","column_name":")", buf);
+        writeString(column_info.column, buf);
+
+        writeString(R"(","index_type":")", buf);
+        writeString(toString(potential_column.index_type), buf);
+
+        writeString(R"(","sample_ndv":")", buf);
+        writeIntText(potential_column.statistic_info.sample_ndv, buf);
+        writeString(R"(","sample_row_num":")", buf);
+        writeIntText(potential_column.statistic_info.sample_row_num, buf);
+
+        // The usage type (EQUALITY_PREDICATE + IN_PREDICATE) and (ARRAY_SET_FUNCTION)
+        // will not appear at the same time, so we can simply add the cnt
+        size_t target_expression_cnt = potential_column.predicate_infos[ColumnUsageType::EQUALITY_PREDICATE].total
+            + potential_column.predicate_infos[ColumnUsageType::IN_PREDICATE].total
+            + potential_column.predicate_infos[ColumnUsageType::ARRAY_SET_FUNCTION].total;
+        writeString(R"(","target_expression_cnt":")", buf);
+        writeIntText(target_expression_cnt, buf);
+        writeString(R"(","total_expression_cnt":")", buf);
+        writeIntText(potential_column.statistic_info.total_predicates, buf);
+        writeString("\"}", buf);
+    }
+    writeString("]", buf);
+}
+
+}
diff --git a/programs/schema-advisor/IndexAdvisor.h b/programs/schema-advisor/IndexAdvisor.h
new file mode 100644
index 00000000000..de259fa9a63
--- /dev/null
+++ b/programs/schema-advisor/IndexAdvisor.h
@@ -0,0 +1,39 @@
+#pragma once
+
+#include "MockEnvironment.h"
+#include "PotentialColumn.h"
+
+#include <boost/program_options/variables_map.hpp>
+
+#include <Compression/ICompressionCodec.h>
+#include <Storages/ColumnsDescription.h>
+
+namespace DB
+{
+
+namespace po = boost::program_options;
+
+class IndexAdvisor
+{
+private:
+    MockEnvironment & env;
+    po::variables_map options;
+    const size_t sample_row_number;
+    const size_t max_threads;
+
+    PotentialColumns potential_columns;
+
+public:
+    IndexAdvisor(
+        MockEnvironment & env_,
+        const po::variables_map & options_,
+        size_t sample_row_number,
+        size_t max_threads);
+
+    virtual ~IndexAdvisor() = default;
+
+    void execute();
+    void serializeJson(WriteBuffer & buf, bool verbose = false);
+};
+
+}
diff --git a/programs/schema-advisor/MockEnvironment.cpp b/programs/schema-advisor/MockEnvironment.cpp
new file mode 100644
index 00000000000..211c34a7956
--- /dev/null
+++ b/programs/schema-advisor/MockEnvironment.cpp
@@ -0,0 +1,330 @@
+#include "MockEnvironment.h"
+#include "MockGlobalContext.h"
+#include "SchemaAdvisorHelpers.h"
+
+#include <AggregateFunctions/registerAggregateFunctions.h>
+#include <Analyzers/QueryAnalyzer.h>
+#include <Analyzers/QueryRewriter.h>
+#include <common/logger_useful.h>
+#include <Common/Exception.h>
+#include <Core/UUID.h>
+#include <Databases/DatabaseMemory.h>
+#include <Disks/registerDisks.h>
+#include <Formats/registerFormats.h>
+#include <Functions/registerFunctions.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/DatabaseCatalog.h>
+#include <Interpreters/executeQuery.h>
+#include <IO/WriteHelpers.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ASTLiteral.h>
+#include <Parsers/ASTSetQuery.h>
+#include <Parsers/IAST_fwd.h>
+#include <Parsers/IParserBase.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/ParserQuery.h>
+#include <QueryPlan/Hints/registerHints.h>
+#include <QueryPlan/QueryPlan.h>
+#include <QueryPlan/QueryPlanner.h>
+#include <Statistics/CacheManager.h>
+#include <Storages/IStorage_fwd.h>
+#include <Storages/ColumnsDescription.h>
+#include <Storages/StorageDistributed.h>
+#include <Storages/StorageMergeTree.h>
+#include <Storages/System/attachSystemTables.h>
+#include <Storages/registerStorages.h>
+
+#include <filesystem>
+#include <fstream>
+#include <optional>
+#include <sstream>
+#include <string>
+#include <string_view>
+#include <memory>
+
+namespace DB
+{
+namespace
+{
+    std::string readString(const std::string & file_path)
+    {
+        std::ifstream fin(file_path);
+        std::stringstream buffer;
+        buffer << fin.rdbuf();
+        return buffer.str();
+    }
+}
+
+MockEnvironment::MockEnvironment(const std::string & path, size_t max_threads)
+    : session_context(MockGlobalContext::instance().createSessionContext())
+    , actual_folder(path)
+    , mock_folder(std::filesystem::path{"/tmp"} / ("advisor_tool_" + toString(UUIDHelpers::generateV4())))
+{
+    session_context->setPath(mock_folder.string() + '/');
+    session_context->setMetastorePath((mock_folder / METASTORE).string() + '/');
+
+    SettingsChanges setting_changes;
+    setting_changes.emplace_back("max_threads", max_threads);
+    setting_changes.emplace_back("enable_memory_catalog", true);
+    session_context->applySettingsChanges(setting_changes);
+    std::filesystem::remove_all(mock_folder);
+    std::filesystem::create_directories(mock_folder);
+    std::filesystem::create_directories(mock_folder / METASTORE);
+    std::filesystem::create_directories(mock_folder / METADATA);
+    std::filesystem::create_directories(mock_folder / DATA);
+
+    registerFunctions();
+    registerFormats();
+    registerStorages();
+    registerAggregateFunctions();
+    registerHints();
+    registerDisks();
+    Statistics::CacheManager::initialize(session_context);
+
+    // make system database
+    DatabasePtr system_database = DatabaseCatalog::instance().tryGetDatabase(DatabaseCatalog::SYSTEM_DATABASE, session_context);
+    if (!system_database)
+    {
+        system_database = std::make_shared<DatabaseMemory>(DatabaseCatalog::SYSTEM_DATABASE, session_context);
+        DatabaseCatalog::instance().attachDatabase(DatabaseCatalog::SYSTEM_DATABASE, system_database);
+        attachSystemTablesLocal(*system_database);
+    }
+}
+
+MockEnvironment::~MockEnvironment()
+{
+    for (const auto & [name, database] : DatabaseCatalog::instance().getDatabases(session_context))
+    {
+        for (auto it = database->getTablesIterator(session_context); it->isValid(); it->next())
+        {
+            database->dropTable(session_context, it->name(), /*no_delay=*/true);
+        }
+        database->drop(session_context);
+    }
+
+    std::filesystem::remove_all(mock_folder);
+}
+
+std::vector<std::string> MockEnvironment::listDatabases()
+{
+    std::vector<std::string> res;
+    auto meta_path = actual_folder / METADATA;
+    if (!std::filesystem::exists(meta_path))
+        throw Exception("cannot find metadata", ErrorCodes::CANNOT_OPEN_FILE);
+    for (const auto & file : std::filesystem::directory_iterator{meta_path})
+    {
+        const std::filesystem::path & fullname = file.path();
+        if (fullname.extension() == ".sql")
+            res.emplace_back(fullname.stem().string());
+    }
+    return res;
+}
+
+std::vector<std::string> MockEnvironment::listTables(const std::string & database)
+{
+    std::vector<std::string> res;
+    auto meta_path = actual_folder / METADATA / database;
+    if (!std::filesystem::exists(meta_path))
+        throw Exception("cannot find metadata", ErrorCodes::CANNOT_OPEN_FILE);
+    for (const auto & file : std::filesystem::directory_iterator{meta_path})
+    {
+        const std::filesystem::path & fullname = file.path();
+        if (fullname.extension() == ".sql")
+            res.emplace_back(fullname.stem().string());
+    }
+    return res;
+}
+
+bool MockEnvironment::containsDatabase(const std::string & database)
+{
+    return std::filesystem::exists(actual_folder / METADATA / (database + ".sql"));
+}
+
+bool MockEnvironment::containsTable(const std::string & database, const std::string & table)
+{
+    return std::filesystem::exists(actual_folder / METADATA / database / (table + ".sql"));
+}
+
+std::string MockEnvironment::getCreateDatabaseSql(const std::string & database)
+{
+    if (!containsDatabase(database))
+        throw Exception("cannot find database " + database, ErrorCodes::CANNOT_OPEN_FILE);
+    return readString(actual_folder / METADATA / (database + ".sql"));
+}
+
+std::string MockEnvironment::getCreateTableSql(const std::string & database, const std::string & table)
+{
+    if (!containsTable(database, table))
+        throw Exception("cannot find table " + database + "." + table, ErrorCodes::CANNOT_OPEN_FILE);
+    return readString(actual_folder / METADATA / database / (table + ".sql"));
+}
+
+ColumnsDescription MockEnvironment::getColumnsDescription(const std::string & database, const std::string & table)
+{
+    std::string create_table = getCreateTableSql(database, table);
+    ContextMutablePtr context = createQueryContext();
+    auto ast = parse(create_table, context)->as<ASTCreateQuery &>();
+    return InterpreterCreateQuery::getColumnsDescription(*ast.columns_list->columns, context, ast.attach, false);
+}
+
+ContextMutablePtr MockEnvironment::createQueryContext()
+{
+    ContextMutablePtr query_context = Context::createCopy(session_context);
+    query_context->createPlanNodeIdAllocator();
+    query_context->createSymbolAllocator();
+    query_context->makeQueryContext();
+    return query_context;
+}
+
+ASTPtr MockEnvironment::parse(std::string_view sql, ContextPtr query_context)
+{
+    const char * begin = sql.data();
+    const char * end = begin + sql.size();
+    ParserQuery parser(end, ParserSettings::valueOf(query_context->getSettingsRef()));
+    return parseQuery(
+        parser, begin, end, "",
+        query_context->getSettingsRef().max_query_size,
+        query_context->getSettingsRef().max_parser_depth);
+}
+
+QueryPlanPtr MockEnvironment::plan(std::string_view sql, ContextMutablePtr query_context)
+{
+    ASTPtr ast = parse(sql, query_context);
+    ast = QueryRewriter().rewrite(ast, query_context);
+    AnalysisPtr analysis = QueryAnalyzer::analyze(ast, query_context);
+    QueryPlanPtr query_plan = QueryPlanner().plan(ast, *analysis, query_context);
+    return query_plan;
+}
+
+void MockEnvironment::execute(const std::string & sql, ContextMutablePtr query_context)
+{
+    executeQuery(sql, query_context, /*internal=*/true);
+}
+
+void MockEnvironment::createMockDatabase(const std::string & database)
+{
+    if (DatabaseCatalog::instance().isDatabaseExist(database, session_context))
+        return;
+    ContextMutablePtr query_context = createQueryContext();
+    std::string sql = getCreateDatabaseSql(database);
+    // the sql is "attach _ ..." in metadata, we revert it
+    auto ast = dynamic_pointer_cast<ASTCreateQuery>(parse(sql, query_context));
+    if (!ast)
+        throw Exception("failed to create database " + database + ", invalid sql: " + sql, ErrorCodes::BAD_ARGUMENTS);
+    ast->attach = false;
+    ast->database = database;
+    ast->uuid = UUIDHelpers::Nil;
+    // there are some problems with destructing an Atomic database, so we force to memory
+    if (ast->storage && ast->storage->engine)
+        ast->storage->engine->name = "Memory";
+    ast->cluster = "";
+    execute(serializeAST(*ast), query_context);
+}
+
+void MockEnvironment::createMockTable(const std::string & database, const std::string & table)
+{
+    createMockDatabase(database);
+    if (DatabaseCatalog::instance().getDatabase(database, session_context)->isTableExist(table, session_context))
+        return;
+    ContextMutablePtr query_context = createQueryContext();
+    SettingsChanges setting_changes;
+    setting_changes.emplace_back("enable_constraint_check", false);
+    setting_changes.emplace_back("allow_nullable_key", true);
+    query_context->applySettingsChanges(setting_changes);
+
+    std::string sql = getCreateTableSql(database, table);
+    // the sql is "attach _ ..." in metadata, we revert it
+    auto ast = dynamic_pointer_cast<ASTCreateQuery>(parse(sql, query_context));
+    if (!ast)
+        throw Exception("failed to create table " + database + "." + table + ", invalid sql: " + sql, ErrorCodes::BAD_ARGUMENTS);
+    ast->attach = false;
+    ast->database = database;
+    ast->table = table;
+    ast->uuid = UUIDHelpers::Nil;
+    ast->cluster = "";
+
+    if (ast->storage && ast->storage->engine)
+    {
+        auto engine_name = ast->storage->engine->name;
+        if (engine_name == "Distributed")
+            ast->storage->engine->arguments->children[0] = std::make_shared<ASTLiteral>(MockGlobalContext::ADVISOR_SHARD);
+        else if (engine_name.starts_with("Ha"))
+        {
+            // HaUniqueMergeTree, HaMergeTree require zookeeper
+            engine_name = engine_name.substr(2, engine_name.length());
+            ASTPtr mock_engine = makeASTFunction(engine_name);
+            ast->storage->set(ast->storage->engine, mock_engine);
+        }
+
+        if (engine_name == "MergeTree")
+        {
+            ASTSetQuery * settings = ast->storage->settings;
+            if (!settings)
+                ast->storage->set(settings, std::make_shared<ASTSetQuery>());
+            settings->is_standalone = false;
+            settings->changes.emplace_back("enable_metastore", false);
+        }
+
+        if (engine_name == "UniqueMergeTree")
+        {
+            ASTSetQuery * settings = ast->storage->settings;
+            if (!settings)
+                ast->storage->set(settings, std::make_shared<ASTSetQuery>());
+            settings->is_standalone = false;
+            settings->changes.emplace_back("part_writer_flag", true);
+            settings->changes.emplace_back("enable_metastore", false);
+        }
+    }
+
+    std::string create_sql = serializeAST(*ast);
+    try
+    {
+        execute(std::move(create_sql), query_context);
+    }
+    catch (...)
+    {
+        LOG_ERROR(getLogger("MockEnvironment"), "Create table {} failed: {}", table, getCurrentExceptionMessage(true));
+    }
+}
+
+bool MockEnvironment::isPrimaryKey(const QualifiedColumnName & column, ContextPtr context)
+{
+    StoragePtr table = tryGetLocalTable(column.database, column.table, context);
+
+    if (!table)
+        return false;
+
+    auto metadata = table->getInMemoryMetadataCopy();
+    std::optional<KeyDescription> primary_key = std::nullopt;
+    if (metadata.isPrimaryKeyDefined())
+        primary_key = metadata.getPrimaryKey();
+    // From CH: By default the primary key is the same as the sorting key (which is specified by the ORDER BY clause).
+    // Thus in most cases it is unnecessary to specify a separate PRIMARY KEY clause.
+    else if (auto merge_tree = dynamic_pointer_cast<StorageMergeTree>(table); merge_tree && metadata.isSortingKeyDefined())
+        primary_key = metadata.getSortingKey();
+
+    if (!primary_key)
+        return false;
+
+    const auto & primary_key_columns = primary_key.value().expression->getRequiredColumns();
+    return std::find(primary_key_columns.begin(), primary_key_columns.end(), column.column) != primary_key_columns.end();
+}
+
+StoragePtr MockEnvironment::tryGetLocalTable(const std::string & database_name, const std::string & table_name, ContextPtr context)
+{
+    StoragePtr table;
+
+    if (DatabasePtr database = DatabaseCatalog::instance().tryGetDatabase(database_name, context))
+        table = database->tryGetTable(table_name, context);
+
+    if (auto distributed = dynamic_pointer_cast<StorageDistributed>(table))
+        if (auto remote_database = DatabaseCatalog::instance().tryGetDatabase(distributed->getRemoteDatabaseName(), context))
+            if (auto remote_table = remote_database->tryGetTable(distributed->getRemoteTableName(), context))
+                table = remote_table;
+
+    return table;
+}
+
+
+
+} // DB
diff --git a/programs/schema-advisor/MockEnvironment.h b/programs/schema-advisor/MockEnvironment.h
new file mode 100644
index 00000000000..71f89614b58
--- /dev/null
+++ b/programs/schema-advisor/MockEnvironment.h
@@ -0,0 +1,63 @@
+#pragma once
+
+#include "Interpreters/StorageID.h"
+#include "MockGlobalContext.h"
+
+#include <Analyzers/QualifiedColumnName.h>
+#include <DataTypes/IDataType.h>
+#include <Interpreters/Context_fwd.h>
+#include <Parsers/IAST_fwd.h>
+#include <QueryPlan/QueryPlan.h>
+#include <Storages/ColumnsDescription.h>
+#include <Storages/IStorage_fwd.h>
+
+#include <filesystem>
+#include <string>
+#include <string_view>
+#include <memory>
+#include <vector>
+
+namespace DB
+{
+
+class MockEnvironment
+{
+public:
+    explicit MockEnvironment(const std::string & path, size_t max_threads);
+    MockEnvironment(const MockEnvironment &other) = delete;
+    ~MockEnvironment();
+
+    // list the available databases or tables under the give path
+    std::vector<std::string> listDatabases();
+    std::vector<std::string> listTables(const std::string & database);
+    bool containsDatabase(const std::string & database);
+    bool containsTable(const std::string & database, const std::string & table);
+
+    // get the create-table/database sql
+    std::string getCreateDatabaseSql(const std::string & database);
+    std::string getCreateTableSql(const std::string & database, const std::string & table);
+    ColumnsDescription getColumnsDescription(const std::string & database, const std::string & table);
+
+    // mock the query execution environment
+    ContextMutablePtr createQueryContext();
+    ASTPtr parse(std::string_view sql, ContextPtr query_context);
+    QueryPlanPtr plan(std::string_view sql, ContextMutablePtr query_context); // no optimize
+    void execute(const std::string & sql, ContextMutablePtr query_context); // supposed to execute ddl only
+
+    void createMockDatabase(const std::string & database);
+    void createMockTable(const std::string & database, const std::string & table);
+
+    static bool isPrimaryKey(const QualifiedColumnName & column, ContextPtr context);
+    static StoragePtr tryGetLocalTable(const std::string & database_name, const std::string & table_name, ContextPtr context);
+
+private:
+    ContextMutablePtr session_context;
+    const std::filesystem::path actual_folder;
+    const std::filesystem::path mock_folder;
+    static constexpr const char * METADATA = "metadata";
+    static constexpr const char * METASTORE = "metastore";
+    static constexpr const char * DATA = "data";
+};
+
+
+} // DB
diff --git a/programs/schema-advisor/MockGlobalContext.cpp b/programs/schema-advisor/MockGlobalContext.cpp
new file mode 100644
index 00000000000..aadc30e5b1a
--- /dev/null
+++ b/programs/schema-advisor/MockGlobalContext.cpp
@@ -0,0 +1,68 @@
+#include "MockGlobalContext.h"
+
+#include <Common/Config/ConfigProcessor.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/Context_fwd.h>
+#include <Poco/AutoPtr.h>
+#include <Poco/DOM/Document.h>
+#include <Poco/DOM/Element.h>
+#include <Poco/DOM/Text.h>
+#include <Poco/Util/XMLConfiguration.h>
+
+namespace DB
+{
+ContextMutablePtr MockGlobalContext::createSessionContext()
+{
+    ContextMutablePtr session_context = Context::createCopy(context);
+    session_context->makeSessionContext();
+    return session_context;
+}
+
+MockGlobalContext::MockGlobalContext()
+{
+    shared_context = Context::createShared();
+    context = Context::createGlobal(shared_context.get());
+    context->makeGlobalContext();
+    ConfigurationPtr configuration(new Poco::Util::XMLConfiguration(MockGlobalContext::mockConfig()));
+    context->setConfig(configuration);
+}
+
+XMLDocumentPtr MockGlobalContext::mockConfig()
+{
+    XMLDocumentPtr document = new Poco::XML::Document();
+    Poco::AutoPtr<Poco::XML::Element> yandex = document->createElement("yandex");
+    Poco::AutoPtr<Poco::XML::Element> remote_servers = document->createElement("remote_servers");
+    Poco::AutoPtr<Poco::XML::Element> advisor_shard = document->createElement(ADVISOR_SHARD);
+    Poco::AutoPtr<Poco::XML::Element> shard = document->createElement("shard");
+    Poco::AutoPtr<Poco::XML::Element> replica = document->createElement("replica");
+
+    Poco::AutoPtr<Poco::XML::Element> host = document->createElement("host");
+    Poco::AutoPtr<Poco::XML::Text> host_text = document->createTextNode("localhost");
+    host->appendChild(host_text);
+    replica->appendChild(host);
+
+    Poco::AutoPtr<Poco::XML::Element> port = document->createElement("port");
+    Poco::AutoPtr<Poco::XML::Text> port_text = document->createTextNode("9000");
+    port->appendChild(port_text);
+    replica->appendChild(port);
+
+    Poco::AutoPtr<Poco::XML::Element> exchange_port = document->createElement("exchange_port");
+    Poco::AutoPtr<Poco::XML::Text> exchange_port_text = document->createTextNode("9300");
+    exchange_port->appendChild(exchange_port_text);
+    replica->appendChild(exchange_port);
+
+    Poco::AutoPtr<Poco::XML::Element> exchange_status_port = document->createElement("exchange_status_port");
+    Poco::AutoPtr<Poco::XML::Text> exchange_status_port_text = document->createTextNode("9400");
+    exchange_status_port->appendChild(exchange_status_port_text);
+    replica->appendChild(exchange_status_port);
+
+    shard->appendChild(replica);
+    advisor_shard->appendChild(shard);
+    remote_servers->appendChild(advisor_shard);
+    yandex->appendChild(remote_servers);
+    document->appendChild(yandex);
+
+    return document;
+}
+
+}
diff --git a/programs/schema-advisor/MockGlobalContext.h b/programs/schema-advisor/MockGlobalContext.h
new file mode 100644
index 00000000000..e04ff33aacf
--- /dev/null
+++ b/programs/schema-advisor/MockGlobalContext.h
@@ -0,0 +1,30 @@
+#pragma once
+
+#include <Common/Config/ConfigProcessor.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/Context_fwd.h>
+
+namespace DB
+{
+class MockGlobalContext
+{
+public:
+    static constexpr const char * ADVISOR_SHARD = "advisor_shard";
+
+    static MockGlobalContext & instance()
+    {
+        static MockGlobalContext mock_context;
+        return mock_context;
+    }
+
+    ContextMutablePtr createSessionContext();
+
+private:
+    explicit MockGlobalContext();
+    static XMLDocumentPtr mockConfig();
+
+    SharedContextHolder shared_context;
+    ContextMutablePtr context;
+};
+
+}
diff --git a/programs/schema-advisor/PotentialColumn.h b/programs/schema-advisor/PotentialColumn.h
new file mode 100644
index 00000000000..a89ba79cc33
--- /dev/null
+++ b/programs/schema-advisor/PotentialColumn.h
@@ -0,0 +1,101 @@
+#pragma once
+
+#include <Analyzers/QualifiedColumnName.h>
+#include <Advisor/ColumnUsage.h>
+
+
+namespace DB
+{
+
+/**
+ * StatisticInfo:
+ *  - sample_ndv: the ndv for a column in the particular part
+ *  - total_rows: the total number of rows for the particular part
+ *  - total_predicates: The total number of predicates involving the specified column, including in and equals and others
+ */
+struct StatisticInfo
+{
+    size_t sample_ndv{};
+    size_t sample_row_num{};
+    size_t total_predicates{};
+};
+
+/**
+ * PredicateInfo:
+ *  - PredicateExpressions: predicate_ast_expression - count
+ *  - total: The total number of occurrences of a ColumnUsageType
+ */
+using PredicateExpressions = std::unordered_map<ConstASTPtr, size_t>;
+struct PredicateInfo
+{
+    PredicateExpressions expressions;
+    size_t total{};
+
+    PredicateInfo() = default;
+    PredicateInfo(PredicateExpressions & expressions_, size_t total_): expressions(std::move(expressions_)), total(total_) {}
+};
+using PredicateInfos = std::unordered_map<ColumnUsageType, PredicateInfo>;
+
+enum class PotentialIndexType
+{
+    BLOOM_FILTER, // just support bloom_filter for skip index
+    BITMAP_INDEX,
+    SEGMENT_BITMAP_INDEX, // For high cordinality
+    ALREADY_BITMAP_INDEX, // used in test, to find the column already has bitmap index
+};
+
+inline std::string toString(PotentialIndexType indexType)
+{
+    switch (indexType)
+    {
+        case PotentialIndexType::BLOOM_FILTER:
+            return "BLOOM_FILTER";
+        case PotentialIndexType::BITMAP_INDEX:
+            return "BITMAP_INDEX";
+        case PotentialIndexType::SEGMENT_BITMAP_INDEX:
+            return "SEGMENT_BITMAP_INDEX";
+        case PotentialIndexType::ALREADY_BITMAP_INDEX:
+            return "ALREADY_BITMAP_INDEX";
+        default:
+            return "Unknown";
+    }
+}
+
+struct PotentialColumnInfo
+{
+    PotentialIndexType index_type;
+    StatisticInfo statistic_info;
+    PredicateInfos predicate_infos;
+};
+using PotentialColumns = std::unordered_map<QualifiedColumnName, PotentialColumnInfo, QualifiedColumnNameHash>;
+
+using PotentialPrewhereColumns = std::unordered_map<QualifiedColumnName, std::pair<Float64, Float64>, QualifiedColumnNameHash>;
+
+struct IndexOverhead
+{
+    size_t hashs;
+    size_t bits_per_rows;
+    size_t uncompressed_index_size;
+};
+
+using IndexSelectors = std::vector<Float64>;
+struct IndexEffect
+{
+    IndexSelectors index_selectors;
+    size_t total_expressions;
+};
+
+struct PotentialIndex
+{
+    QualifiedColumnName column;
+    PotentialIndexType index_type;
+    Float32 false_positive_rate;
+
+    IndexOverhead index_overhead;
+    IndexEffect index_effect;
+
+    StatisticInfo statistic_info;
+    PredicateInfos predicate_infos;
+};
+
+}
diff --git a/programs/schema-advisor/PrewhereAdvisor.cpp b/programs/schema-advisor/PrewhereAdvisor.cpp
new file mode 100644
index 00000000000..468fa7ea156
--- /dev/null
+++ b/programs/schema-advisor/PrewhereAdvisor.cpp
@@ -0,0 +1,238 @@
+#include "PrewhereAdvisor.h"
+#include "ColumnUsageExtractor.h"
+#include "Columns/IColumn.h"
+#include <Columns/ColumnsNumber.h>
+#include <Columns/ColumnArray.h>
+#include "Core/Field.h"
+#include "IO/WriteIntText.h"
+#include "QueryPlan/PlanSerDerHelper.h"
+#include "SampleColumnReader.h"
+#include "Statistics.h"
+#include "SchemaAdvisorHelpers.h"
+#include "PotentialColumn.h"
+
+#include <cstddef>
+#include <iostream>
+#include <utility>
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/split.hpp>
+
+#include "Common/Exception.h"
+#include "common/types.h"
+#include <Common/ThreadPool.h>
+#include <Core/NamesAndTypes.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <IO/copyData.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromFileDescriptor.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/WriteHelpers.h>
+
+namespace DB
+{
+
+PrewhereAdvisor::PrewhereAdvisor(
+    MockEnvironment & env_,
+    const po::variables_map & options_,
+    size_t sample_row_number_,
+    size_t max_threads_,
+    Float64 mark_filter_threshold_,
+    Float64 top_3_mark_filter_threshold_)
+    : env(env_)
+    , options(options_)
+    , sample_row_number(sample_row_number_)
+    , max_threads(max_threads_)
+    , mark_filter_threshold(mark_filter_threshold_)
+    , top_3_mark_filter_threshold(top_3_mark_filter_threshold_) 
+    , sample_mark_number(static_cast<Float64>(sample_row_number_) / 8192)
+    , column_size_threshold(128 * sample_row_number_)
+{
+}
+
+Float64 PrewhereAdvisor::calcMarkFilterRatio(const Field & field) const
+{
+    const auto & array_field = DB::safeGet<Array>(field);
+
+    size_t total_marks = 0;
+    for (const auto & tuple_field : array_field)
+    {
+        total_marks += DB::safeGet<UInt64>(DB::safeGet<Tuple>(tuple_field)[1]);
+    }
+
+    Float64 avg_mark_occurrence_rate = static_cast<Float64>(total_marks) / array_field.size();
+
+    return avg_mark_occurrence_rate / sample_mark_number;
+}
+
+std::pair<Float64, Float64> PrewhereAdvisor::calcMarkFilterRatio(const ColumnPtr & column) const
+{
+    const auto * array_column = typeid_cast<const ColumnArray *>(column.get());
+    const auto * tuple_column = typeid_cast<const ColumnTuple *>((array_column->getDataPtr()).get());
+    const auto * mark_count_column = typeid_cast<const ColumnUInt64 *>(tuple_column->getColumns()[1].get());
+
+    size_t total_marks = 0;
+    std::priority_queue<UInt64, std::vector<UInt64>, std::greater<UInt64>> top_3_mark_pq;
+    for (size_t i = 0; i < mark_count_column->size(); i++)
+    {   
+        auto current_mark = mark_count_column->get64(i);
+        total_marks += current_mark;
+
+        if (top_3_mark_pq.size() < 3) 
+        {
+            top_3_mark_pq.push(current_mark);
+        } 
+        else if (current_mark > top_3_mark_pq.top()) 
+        {
+            top_3_mark_pq.pop();
+            top_3_mark_pq.push(current_mark);
+        }
+    }
+    size_t queue_size = top_3_mark_pq.size();
+
+    size_t top_3_mark_sum = 0;
+    while(!top_3_mark_pq.empty())
+    {
+        top_3_mark_sum += top_3_mark_pq.top();
+        top_3_mark_pq.pop();
+    }
+
+    Float64 avg_mark_occurrence_rate = static_cast<Float64>(total_marks) / mark_count_column->size();
+    Float64 top_3_mark_occurrence_rate = static_cast<Float64>(top_3_mark_sum) / queue_size;
+
+    return std::make_pair(avg_mark_occurrence_rate / sample_mark_number, top_3_mark_occurrence_rate / sample_mark_number);
+}
+
+void PrewhereAdvisor::execute()
+{
+    auto context = createContext(options, env);
+    auto queries = loadQueries(options);
+
+    LOG_DEBUG(getLogger("PrewhereAdvisor"), "++++++++++ begin to executor prewhere advisor ++++++++++");
+
+    ColumnUsageExtractor extractor(context, max_threads);
+    auto column_usages = extractor.extractColumnUsages(queries);
+    auto prewhere_usages = extractor.extractUsageForPrewhere(column_usages);
+
+    LOG_DEBUG(getLogger("PrewhereAdvisor"), "Extracted {} prewhere_usages usages for prewhere", prewhere_usages.size());
+
+    CountByGranularity count_by_granularity;
+
+    for (const auto & prewhere_usage : prewhere_usages)
+    {
+        auto column_info = prewhere_usage.first;
+
+        auto storage = MockEnvironment::tryGetLocalTable(column_info.database, column_info.table, context);
+        if (!storage)
+            throw Exception(column_info.database + "(" + column_info.table + "): can not find local table.", ErrorCodes::NOT_FOUND_EXPECTED_DATA_PART);
+
+        auto metadata = storage->getInMemoryMetadataCopy();
+        auto column_and_type = metadata.getColumns().tryGetColumn(GetColumnsOptions::Kind::AllPhysical, column_info.column);
+        if (!column_and_type)
+            continue;
+
+        auto column_type = column_and_type->type;
+
+        if (isArray(column_type))
+            continue;
+
+        std::vector<std::string> data_path_list;
+      
+        boost::split(data_path_list, options["data-path-list"].as<std::string>(), boost::is_any_of(" ,"));
+        for (auto & path : data_path_list)
+        {
+            if (!endsWith(path, "/"))
+                path = path.append("/");
+        }
+
+        std::string absolute_part_path;
+        try
+        {
+            absolute_part_path = selectPartPath(options, data_path_list, storage->getStorageID().getDatabaseName(), storage->getStorageID().getTableName(), sample_row_number);
+        }
+        catch (Exception & e)
+        {
+            if (e.code() == ErrorCodes::NOT_FOUND_EXPECTED_DATA_PART)
+            {
+                LOG_DEBUG(
+                        getLogger("PrewhereAdvisor"),
+                        "Can't find suitable part for table " + column_info.database + "." + column_info.table
+                            + ", maybe because of the total part rows < " + std::to_string(sample_row_number));
+                continue;
+            }
+            else
+                throw e;
+        }
+
+        SampleColumnReader reader(absolute_part_path + "/", 0, sample_row_number);
+        ColumnPtr column;
+        try
+        {
+            column = reader.readColumn({prewhere_usage.first.column, column_type});
+        } 
+        catch (...)
+        {   
+            // Just skip the column if it can't be read
+            LOG_DEBUG(
+                getLogger("PrewhereAdvisor"),
+                "Can't read column file " + prewhere_usage.first.column + " from table " + column_info.database + "." + column_info.table
+                        + ", error message: "
+                    + getCurrentExceptionMessage(true));
+            continue;
+        }
+
+        std::pair<Float64, Float64> mark_filter_pair;
+        try
+        {
+            mark_filter_pair  = calcMarkFilterRatio(count_by_granularity.executeOnColumn(column, column_type));
+        }
+        catch (Exception & e)
+        {
+            if (e.code() == ErrorCodes::BAD_ARGUMENTS)
+            {
+                LOG_DEBUG(
+                        getLogger("PrewhereAdvisor"), "Error while calculate mark filter ratio, error message: " + e.message());
+                continue;
+            }
+            else
+                throw e;
+        }
+
+        LOG_DEBUG(getLogger("PrewhereAdvisor"), "Column {} mark filter ratio is {}, top-3 mark filter ratio is {}, column size is {} MB", column_info.column, mark_filter_pair.first, mark_filter_pair.second, column->byteSize()/1000000);
+
+        if (((mark_filter_pair.first <= mark_filter_threshold && mark_filter_pair.second <= top_3_mark_filter_threshold) || (mark_filter_pair.first < 0.1 && mark_filter_pair.second < 0.76)) && column->byteSize() < column_size_threshold)
+            potential_columns.insert({std::move(column_info), mark_filter_pair});
+    }
+}
+
+/// TODO: seperate two indices
+void PrewhereAdvisor::serializeJson(WriteBuffer & buf, bool /* verbose */)
+{
+    bool first = true;
+    writeString("\"prewhere\":[", buf);
+    for (auto & [column_info, mark_filter_ratio] : potential_columns)
+    {
+        if (first)
+            first = false;
+        else
+            writeString(",", buf);
+
+        writeString(R"({"db":")", buf);
+        writeString(column_info.database, buf);
+        writeString(R"(","table":")", buf);
+        writeString(column_info.table, buf);
+        writeString(R"(","column_name":")", buf);
+        writeString(column_info.column, buf);
+
+        writeString(R"(","mark_filter_ratio":")", buf);
+        writeString(toString(mark_filter_ratio.first), buf);
+        writeString(R"(","top_3_mark_filter_ratio":")", buf);
+        writeString(toString(mark_filter_ratio.second), buf);
+
+        writeString("\"}", buf);
+    }
+    writeString("]", buf);
+}
+
+}
diff --git a/programs/schema-advisor/PrewhereAdvisor.h b/programs/schema-advisor/PrewhereAdvisor.h
new file mode 100644
index 00000000000..48e3d2cf67e
--- /dev/null
+++ b/programs/schema-advisor/PrewhereAdvisor.h
@@ -0,0 +1,48 @@
+#pragma once
+
+#include "MockEnvironment.h"
+#include "PotentialColumn.h"
+
+#include <boost/program_options/variables_map.hpp>
+
+#include <Compression/ICompressionCodec.h>
+#include <Storages/ColumnsDescription.h>
+
+namespace DB
+{
+
+namespace po = boost::program_options;
+
+class PrewhereAdvisor
+{
+private:
+    MockEnvironment & env;
+    po::variables_map options;
+    const size_t sample_row_number;
+    const size_t max_threads;
+    const Float64 mark_filter_threshold;
+    const Float64 top_3_mark_filter_threshold;
+    const Float64 sample_mark_number;
+    // We do not pushdown the column if the field size > 128 bytes
+    const size_t column_size_threshold;
+
+    PotentialPrewhereColumns potential_columns;
+
+    Float64 calcMarkFilterRatio(const Field & field) const;
+    std::pair<Float64, Float64> calcMarkFilterRatio(const ColumnPtr & column) const;
+public:
+    PrewhereAdvisor(
+        MockEnvironment & env_,
+        const po::variables_map & options_,
+        size_t sample_row_number_,
+        size_t max_threads_,
+        Float64 mark_filter_threshold_,
+        Float64 top_3_mark_filter_threshold_);
+
+    virtual ~PrewhereAdvisor() = default;
+
+    void execute();
+    void serializeJson(WriteBuffer & buf, bool verbose = false);
+};
+
+}
diff --git a/programs/schema-advisor/SampleColumnReader.cpp b/programs/schema-advisor/SampleColumnReader.cpp
new file mode 100644
index 00000000000..a5cd4eedbbb
--- /dev/null
+++ b/programs/schema-advisor/SampleColumnReader.cpp
@@ -0,0 +1,341 @@
+#include "SampleColumnReader.h"
+
+#include <filesystem>
+#include <Common/MemoryTrackerBlockerInThread.h>
+#include <IO/ReadBufferFromFile.h>
+#include <Poco/DirectoryIterator.h>
+#include <Storages/MergeTree/MergeTreeIndexGranularityInfo.h>
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ARGUMENT_OUT_OF_BOUND;
+    extern const int BAD_ARGUMENTS;
+    extern const int CANNOT_SEEK_THROUGH_FILE;
+    extern const int CORRUPTED_DATA;
+    extern const int NO_FILE_IN_DATA_PART;
+    extern const int UNKNOWN_PART_TYPE;
+}
+
+SampleColumnIndexGranularityInfo::SampleColumnIndexGranularityInfo(const String & path_to_part)
+{
+    auto mrk_ext = getMarksExtensionFromFilesystem(path_to_part);
+    if (*mrk_ext == getNonAdaptiveMrkExtension())
+    {
+        is_adaptive = false;
+        part_type = MergeTreeDataPartType::WIDE;
+        marks_file_extension = *mrk_ext;
+    }
+    else if (*mrk_ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE))
+    {
+        is_adaptive = true;
+        part_type = MergeTreeDataPartType::WIDE;
+        marks_file_extension = *mrk_ext;
+    }
+    else
+    {
+        throw Exception("Can't determine part type, because of unsupported mark extension " + *mrk_ext, ErrorCodes::UNKNOWN_PART_TYPE);
+    }
+}
+
+std::optional<std::string> SampleColumnIndexGranularityInfo::getMarksExtensionFromFilesystem(const String & path_to_part)
+{
+    if (std::filesystem::exists(path_to_part))
+    {
+        Poco::DirectoryIterator end;
+        for (Poco::DirectoryIterator it(path_to_part); it != end; ++it)
+        {
+            const auto & ext = std::filesystem::path(it->path()).extension();
+            if (ext == getNonAdaptiveMrkExtension()
+                || ext == getAdaptiveMrkExtension(MergeTreeDataPartType::WIDE)
+                || ext == getAdaptiveMrkExtension(MergeTreeDataPartType::COMPACT))
+                return ext;
+        }
+    }
+    return {};
+}
+
+std::string SampleColumnIndexGranularityInfo::getAdaptiveMrkExtension(MergeTreeDataPartType part_type_)
+{
+    if (part_type_ == MergeTreeDataPartType::WIDE)
+        return ".mrk2";
+    else if (part_type_ == MergeTreeDataPartType::COMPACT)
+        return ".mrk3";
+    else if (part_type_ == MergeTreeDataPartType::IN_MEMORY)
+        return "";
+    else
+        throw Exception("Unknown part type", ErrorCodes::UNKNOWN_PART_TYPE);
+}
+
+size_t SampleColumnIndexGranularityInfo::getMarkSizeInBytes() const
+{
+    if (part_type == MergeTreeDataPartType::WIDE)
+        return is_adaptive ? getAdaptiveMrkSizeWide() : getNonAdaptiveMrkSizeWide();
+    else
+        throw Exception("Unsupported type: " + part_type.toString(), ErrorCodes::UNKNOWN_PART_TYPE);
+}
+
+size_t SampleColumnIndexGranularityInfo::getMarksCount(const String & path_prefix) const
+{
+    std::string marks_file_path = getMarksFilePath(path_prefix);
+    if (!std::filesystem::exists(marks_file_path))
+        throw Exception("Marks file '" + marks_file_path + "' doesn't exist", ErrorCodes::NO_FILE_IN_DATA_PART);
+
+    size_t marks_file_size = std::filesystem::file_size(marks_file_path);
+    return marks_file_size / getMarkSizeInBytes();
+}
+
+size_t SampleColumnIndexGranularityInfo::getMarksTotalSizeInBytes(const String & path_prefix) const
+{
+    std::string marks_file_path = getMarksFilePath(path_prefix);
+    if (!std::filesystem::exists(marks_file_path))
+        throw Exception("Marks file '" + marks_file_path + "' doesn't exist", ErrorCodes::NO_FILE_IN_DATA_PART);
+
+    return std::filesystem::file_size(marks_file_path);
+}
+
+SampleColumnMarksLoader::SampleColumnMarksLoader(
+    const String & path_prefix_,
+    const String & stream_name_,
+    size_t marks_count_,
+    const SampleColumnIndexGranularityInfo & index_granularity_info_,
+    off_t mark_file_offset_,
+    size_t mark_file_size_)
+    : mrk_path(index_granularity_info_.getMarksFilePath(path_prefix_))
+    , stream_name(stream_name_)
+    , marks_count(marks_count_)
+    , mark_file_offset(mark_file_offset_)
+    , mark_file_size(mark_file_size_)
+    , index_granularity_info(index_granularity_info_) {}
+
+const MarkInCompressedFile & SampleColumnMarksLoader::getMark(size_t row_index)
+{
+    if (!marks)
+        loadMarks();
+
+    return (*marks)[row_index];
+}
+
+SampleColumnMarksLoader::MarksPtr SampleColumnMarksLoader::loadMarksImpl()
+{
+    /// Memory for marks must not be accounted as memory usage for query, because they are stored in shared cache.
+    MemoryTrackerBlockerInThread temporarily_disable_memory_tracker;
+
+    size_t mark_size = index_granularity_info.getMarkSizeInBytes();
+    size_t expected_file_size = mark_size * marks_count;
+
+    if (expected_file_size != mark_file_size)
+        throw Exception(
+            "Bad size of marks file '" + mrk_path + "' for stream '" + stream_name + "': " + std::to_string(mark_file_size) + ", must be: " + std::to_string(expected_file_size),
+            ErrorCodes::CORRUPTED_DATA);
+
+    auto res = std::make_shared<MarksInCompressedFile>(marks_count);
+
+    if (!index_granularity_info.is_adaptive)
+    {
+        /// Read directly to marks.
+        auto buffer = std::make_unique<ReadBufferFromFile>(mrk_path);
+        if (buffer->seek(mark_file_offset, SEEK_SET) != mark_file_offset)
+            throw Exception("Cannot seek to mark file  " + mrk_path + " for stream " + stream_name, ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+
+        if (buffer->eof() || buffer->buffer().size() != mark_file_size)
+            throw Exception("Cannot read all marks from file " + mrk_path + ", eof: " + std::to_string(buffer->eof())
+            + ", buffer size: " + std::to_string(buffer->buffer().size()) + ", file size: " + std::to_string(mark_file_size), ErrorCodes::CANNOT_READ_ALL_DATA);
+
+        buffer->readStrict(reinterpret_cast<char *>(res->data()), mark_file_size);
+    }
+    else
+    {
+        auto buffer = std::make_unique<ReadBufferFromFile>(mrk_path);
+        if (buffer->seek(mark_file_offset, SEEK_SET) != mark_file_offset)
+            throw Exception("Cannot seek to mark file  " + mrk_path + " for stream " + stream_name, ErrorCodes::CANNOT_SEEK_THROUGH_FILE);
+
+        size_t i = 0;
+        off_t limit_offset_in_file = mark_file_offset + mark_file_size;
+        while (buffer->getPosition() < limit_offset_in_file)
+        {
+            res->read(*buffer, i, 1);
+            buffer->seek(sizeof(size_t), SEEK_CUR);
+            ++i;
+        }
+
+        if (i * mark_size != mark_file_size)
+            throw Exception("Cannot read all marks from file " + mrk_path, ErrorCodes::CANNOT_READ_ALL_DATA);
+    }
+    res->protect();
+    return res;
+}
+
+void SampleColumnMarksLoader::loadMarks()
+{
+    String mrk_name = index_granularity_info.getMarksFilePath(stream_name);
+    marks = loadMarksImpl();
+
+    if (!marks)
+        throw Exception("Failed to load marks: " + mrk_name + " from path:" + mrk_path, ErrorCodes::LOGICAL_ERROR);
+}
+
+SampleColumnReaderStream::SampleColumnReaderStream(
+        const String & path_prefix_, const String & stream_name_, const String & data_file_extension_,
+        const SampleColumnIndexGranularityInfo * index_granularity_info_,
+        size_t max_rows_to_read_)
+        : path_prefix(path_prefix_)
+        , max_rows_to_read(max_rows_to_read_)
+        , marks_loader(
+            path_prefix_
+            , stream_name_
+            , index_granularity_info_->getMarksCount(path_prefix_)
+            , *index_granularity_info_
+            , mark_file_offset
+            , index_granularity_info_->getMarksTotalSizeInBytes(path_prefix_))
+{
+    std::string data_file_path = path_prefix_ + data_file_extension_;
+    /// Initialize the objects that shall be used to perform read operations.
+    auto buffer = std::make_unique<CompressedReadBufferFromFile>(
+        std::make_unique<ReadBufferFromFile>(data_file_path),
+        /* allow_different_codecs = */true,
+        data_file_offset,
+        std::filesystem::file_size(data_file_path),
+        /* is_limit = */true);
+
+    /* if (!settings.checksum_on_read) */
+    buffer->disableChecksumming();
+
+    non_cached_buffer = std::move(buffer);
+    data_buffer = non_cached_buffer.get();
+}
+
+void SampleColumnReaderStream::seekToMark(size_t index)
+{
+    MarkInCompressedFile mark = marks_loader.getMark(index);
+
+    try
+    {
+        non_cached_buffer->seek(mark.offset_in_compressed_file + data_file_offset, mark.offset_in_decompressed_block);
+    }
+    catch (Exception & e)
+    {
+        /// Better diagnostics.
+        if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
+            e.addMessage("(while seeking to mark " + toString(index)
+                         + " of column " + path_prefix + "; offsets are: "
+                         + toString(mark.offset_in_compressed_file + data_file_offset) + " "
+                         + toString(mark.offset_in_decompressed_block) + ")");
+
+        throw;
+    }
+}
+
+void SampleColumnReaderStream::seekToStart()
+{
+    try
+    {
+        non_cached_buffer->seek(data_file_offset, 0);
+#ifdef ENABLE_QPL_COMPRESSION
+        if (non_cached_async_buffer)
+            non_cached_async_buffer->seek(data_file_offset, 0);
+#endif
+    }
+    catch (Exception & e)
+    {
+        /// Better diagnostics.
+        if (e.code() == ErrorCodes::ARGUMENT_OUT_OF_BOUND)
+            e.addMessage("(while seeking to start of column " + path_prefix + ")");
+
+        throw;
+    }
+}
+
+SampleColumnReader::SampleColumnReader(
+    std::string path_to_part_, size_t from_mark_, size_t max_rows_to_read_)
+    : path_to_part(std::move(path_to_part_))
+    , from_mark(from_mark_)
+    , max_rows_to_read(max_rows_to_read_) {}
+
+ReadBuffer * SampleColumnReader::getStream(
+        [[maybe_unused]] bool stream_for_prefix,
+        const ISerialization::SubstreamPath & substream_path,
+        const NameAndTypePair & name_and_type,
+        size_t from_mark_)
+{
+    String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path);
+
+    auto it = streams.find(stream_name);
+    if (it == streams.end())
+        return nullptr;
+
+    SampleColumnReaderStream & stream = *it->second;
+
+    if (stream_for_prefix)
+        stream.seekToStart();
+    else
+        stream.seekToMark(from_mark_);
+
+    return stream.data_buffer;
+}
+
+
+ColumnPtr SampleColumnReader::readColumn(const NameAndTypePair & name_and_type)
+{
+    SampleColumnIndexGranularityInfo index_granularity_info(path_to_part);
+
+    ISerialization::StreamCallback callback = [&](const ISerialization::SubstreamPath & substream_path) {
+        String stream_name = ISerialization::getFileNameForStream(name_and_type, substream_path);
+
+        if (streams.count(stream_name))
+            return;
+/*
+        auto check_validity
+            = [&](String & stream_name_) -> bool { return data_part->getChecksums()->files.count(stream_name_ + DATA_FILE_EXTENSION); };
+
+        // If data file is missing then we will not try to open it.
+        // It is necessary since it allows to add new column to structure of the table without creating new files for old parts.
+        //
+        if ((!name_and_type.type->isKVMap() && !check_validity(stream_name))
+            || (name_and_type.type->isKVMap() && !tryConvertToValidKVStreamName(stream_name, check_validity)))
+            return;
+*/
+        std::string path_prefix = path_to_part + stream_name;
+        streams.emplace(
+            stream_name,
+            std::make_unique<SampleColumnReaderStream>(
+                path_prefix,
+                stream_name,
+                DATA_FILE_EXTENSION,
+                &index_granularity_info,
+                max_rows_to_read
+            ));
+    };
+
+    auto serialization = name_and_type.type->getDefaultSerialization();
+    serialization->enumerateStreams(callback);
+
+    ColumnPtr column = name_and_type.type->createColumn();
+    //double & avg_value_size_hint = avg_value_size_hints[name_and_type.name];
+    ISerialization::DeserializeBinaryBulkSettings deserialize_settings;
+    // deserialize_settings.avg_value_size_hint = avg_value_size_hint;
+
+    const auto & name = name_and_type.name;
+
+    if (deserialize_binary_bulk_state_map.count(name) == 0)
+    {
+        deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
+        {
+            return getStream(true, substream_path, name_and_type, from_mark);
+        };
+        serialization->deserializeBinaryBulkStatePrefix(deserialize_settings, deserialize_binary_bulk_state_map[name]);
+    }
+
+    deserialize_settings.getter = [&](const ISerialization::SubstreamPath & substream_path)
+    {
+        return getStream(false, substream_path, name_and_type, from_mark);
+    };
+    deserialize_settings.continuous_reading = 0;
+    auto & deserialize_state = deserialize_binary_bulk_state_map[name];
+
+    serialization->deserializeBinaryBulkWithMultipleStreams(column, max_rows_to_read, deserialize_settings, deserialize_state, nullptr);
+    return column;
+}
+
+}
diff --git a/programs/schema-advisor/SampleColumnReader.h b/programs/schema-advisor/SampleColumnReader.h
new file mode 100644
index 00000000000..ecb6126f830
--- /dev/null
+++ b/programs/schema-advisor/SampleColumnReader.h
@@ -0,0 +1,133 @@
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include <Compression/CompressedReadBufferFromFile.h>
+#include <Core/NamesAndTypes.h>
+#include <DataStreams/MarkInCompressedFile.h>
+#include <Storages/ColumnsDescription.h>
+#include <Storages/MergeTree/MergeTreeDataPartType.h>
+
+namespace DB
+{
+
+struct SampleColumnIndexGranularityInfo
+{
+public:
+    /// Marks file extension '.mrk' or '.mrk2'
+    String marks_file_extension;
+
+    /// Is stride in rows between marks non fixed?
+    bool is_adaptive = false;
+
+    SampleColumnIndexGranularityInfo(const String & path_to_part);
+
+    String getMarksFilePath(const String & path_prefix) const
+    {
+        return path_prefix + marks_file_extension;
+    }
+
+    size_t getMarkSizeInBytes() const;
+    size_t getMarksCount(const String & path_prefix) const;
+    size_t getMarksTotalSizeInBytes(const String & path_prefix) const;
+
+private:
+    MergeTreeDataPartType part_type;
+    std::optional<std::string> getMarksExtensionFromFilesystem(const String & path_to_part);
+    std::string getAdaptiveMrkExtension(MergeTreeDataPartType part_type);
+};
+
+class SampleColumnMarksLoader
+{
+public:
+    using MarksPtr = std::shared_ptr<MarksInCompressedFile>;
+
+    SampleColumnMarksLoader(
+        const String & path_prefix_,
+        const String & stream_name_,
+        size_t marks_count_,
+        const SampleColumnIndexGranularityInfo & index_granularity_info_,
+        off_t mark_file_offset_,
+        size_t mark_file_size_);
+
+    const MarkInCompressedFile & getMark(size_t row_index);
+
+    bool initialized() const { return marks != nullptr; }
+
+private:
+    String mrk_path;
+    String stream_name; // for compacted map
+    size_t marks_count;
+
+    off_t mark_file_offset;
+    size_t mark_file_size;
+
+    SampleColumnIndexGranularityInfo index_granularity_info;
+
+    MarksPtr marks;
+
+    void loadMarks();
+    MarksPtr loadMarksImpl();
+};
+
+class SampleColumnReaderStream
+{
+public:
+    SampleColumnReaderStream(
+        const String & path_prefix_, const String & stream_name_, const String & data_file_extension_,
+        const SampleColumnIndexGranularityInfo * index_granularity_info_,
+        size_t max_rows_to_read_);
+
+    virtual ~SampleColumnReaderStream() = default;
+
+    void seekToMark(size_t index);
+
+    void seekToStart();
+
+    ReadBuffer * data_buffer;
+
+private:
+    std::string path_prefix;
+    off_t data_file_offset = 0;
+    off_t mark_file_offset = 0;
+    [[maybe_unused]] size_t max_rows_to_read;
+
+    std::unique_ptr<CompressedReadBufferFromFile> non_cached_buffer;
+
+    SampleColumnMarksLoader marks_loader;
+};
+
+using SampleFileStreams = std::map<std::string, std::unique_ptr<SampleColumnReaderStream>>;
+using DeserializeBinaryBulkStateMap = std::map<std::string, ISerialization::DeserializeBinaryBulkStatePtr>;
+
+class SampleColumnReader
+{
+private:
+    const std::string path_to_part;
+    size_t from_mark;
+    size_t max_rows_to_read;
+
+    SampleFileStreams streams;
+
+    /// Stores states for IDataType::deserializeBinaryBulk
+    DeserializeBinaryBulkStateMap deserialize_binary_bulk_state_map;
+
+public:
+    SampleColumnReader(
+        std::string path_to_part_,
+        size_t from_mark_,
+        size_t max_rows_to_read_);
+
+    virtual ~SampleColumnReader() = default;
+
+    ReadBuffer * getStream(
+        bool stream_for_prefix,
+        const ISerialization::SubstreamPath & substream_path,
+        const NameAndTypePair & name_and_type,
+        size_t from_mark);
+
+    ColumnPtr readColumn(const NameAndTypePair & name_and_type);
+};
+
+}
diff --git a/programs/schema-advisor/SchemaAdvisor.cpp b/programs/schema-advisor/SchemaAdvisor.cpp
new file mode 100644
index 00000000000..2dda2ab4e5b
--- /dev/null
+++ b/programs/schema-advisor/SchemaAdvisor.cpp
@@ -0,0 +1,243 @@
+#include "Advisor/Advisor.h"
+#include "CodecAdvisor.h"
+#include "ColumnUsageExtractor.h"
+#include "IndexAdvisor.h"
+#include "MockEnvironment.h"
+#include "SchemaAdvisorHelpers.h"
+#include "TypeAdvisor.h"
+#include "PrewhereAdvisor.h"
+
+#include <iostream>
+#include <optional>
+#include <string>
+#include <boost/algorithm/string/classification.hpp>
+#include <boost/algorithm/string/split.hpp>
+
+#include <Advisor/AdvisorContext.h>
+#include <Core/Defines.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/Context.h>
+#include <Interpreters/InterpreterCreateQuery.h>
+#include <Poco/Util/XMLConfiguration.h>
+#include <common/types.h>
+#include <Common/Exception.h>
+#include <Common/TerminalSize.h>
+#include <Common/escapeForFileName.h>
+#include <Common/formatReadable.h>
+
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+}
+
+static constexpr size_t DEFAULT_SAMPLE_ROW_NUMBER = 1000000;
+static constexpr size_t DEFAULT_MAX_THREADS = 8;
+static constexpr Float64 MARK_FILTER_THRESHOLD = 0.35;
+static constexpr Float64 TOP_3_MARK_FILTER_THRESHOLD = 0.65;
+
+}
+
+int mainEntryClickHouseSchemaAdvisor(int argc, char ** argv)
+{
+    using namespace DB;
+    namespace po = boost::program_options;
+
+    po::options_description desc = createOptionsDescription("Allowed options", getTerminalWidth());
+    desc.add_options()("help,h", "produce help message")
+        /// mandatory
+        ("db", po::value<std::string>()->value_name("DATABASE"), "db name")(
+            "table", po::value<std::string>()->value_name("TABLE"), "table name")(
+            "mode", po::value<std::string>()->value_name("MODE"),
+            "mode: type, codec, type-codec, skip-index, projection, materialized-view, projection, order-by-key, sharding-key")(
+            "path", po::value<std::string>()->value_name("PATH"), "main path")(
+            "meta-path", po::value<std::string>()->value_name("META PATH"), "meta path")(
+            "data-path-list", po::value<std::string>()->value_name("DATA PATH LIST"), "data path list, format: path1,path2")(
+            "settings", po::value<std::string>()->default_value(""), "set settings, format: key1=value,key2=value2")(
+            "log-level", po::value<std::string>()->default_value(""),
+            "log level: trace, debug, information, notice, warning, error. Disable if emtpy.")
+        /// optional
+        ("part", po::value<std::string>()->value_name("PART"), "sample part name")(
+            "max-threads", po::value<size_t>()->default_value(DEFAULT_MAX_THREADS), "max threads for schema advisor")(
+            "sample-number", po::value<size_t>()->default_value(DEFAULT_SAMPLE_ROW_NUMBER), "sample row number")(
+            "verbose", "print column compression gain ratio")
+        /// codec
+        ("fsst", "codec mode: use FSST instead of LZ4")("zstd", "codec mode: use ZSTD instead of LZ4")(
+            "level", po::value<int>(), "codec mode: compression level for codecs specified via flags")(
+            "codec", po::value<std::vector<std::string>>()->multitoken(), "codec mode: use codecs combination instead of LZ4")(
+            "hc", "codec mode: use LZ4HC instead of LZ4")("none", "codec mode: use no compression instead of LZ4")(
+            "block-size,b",
+            po::value<unsigned>()->default_value(DBMS_DEFAULT_BUFFER_SIZE),
+            "codec mode: compress in blocks of specified size")
+        /// skip-index
+        ("query-file", po::value<std::string>()->value_name("QUERIES"), "absolute path to the query file seperated by newline")(
+            "query-file-delimiter", po::value<std::string>()->value_name("DELIMITER"), "query file delimiter, default is new line.")
+        /// tos
+        ("tos-ak", po::value<std::string>()->value_name("TOS AK"), "tos access key")(
+            "vetos-endpoint", po::value<std::string>()->value_name("VETOS ENDPOINT"), "ve tos endpoint")(
+            "vetos-region", po::value<std::string>()->value_name("VETOS REGION"), "ve tos region")(
+            "vetos-ak", po::value<std::string>()->value_name("VETOS AK"), "ve tos access key")(
+            "vetos-sk", po::value<std::string>()->value_name("VETOS SK"), "ve tos secret key")
+        /// prewhere
+        ("mark_filter_threshold", po::value<Float64>()->default_value(MARK_FILTER_THRESHOLD), "threshold for mark filter ratio") ("top_3_mark_filter_threshold", po::value<Float64>()->default_value(TOP_3_MARK_FILTER_THRESHOLD), "threshold for mark filter ratio")    ("low-cardinality", "recommend low-cardinality only in type advisor") (
+            "scanned_count_threshold_for_lc", po::value<Float64>()->default_value(0.035), "recommend low-cardinality only scanned count > scan_count_threshold_for_lc") (
+            "cardinality_ratio_threshold_for_lc", po::value<Float64>()->default_value(0.05), "recommend low-cardinality only cardinality < sample_row_number * cardinality_ratio_threshold_for_lc");
+
+    WriteBufferFromOwnString str_buf;
+    std::unique_ptr<WriteBufferFromFileBase> stdout_buf = std::make_unique<WriteBufferFromFileDescriptor>(STDOUT_FILENO);
+    bool verbose = false;
+
+    try
+    {
+        po::variables_map options;
+        po::store(po::command_line_parser(argc, argv).options(desc).run(), options);
+
+        if (options.count("help"))
+        {
+            std::cout << "Usage: " << argv[0] << " [options] < INPUT > OUTPUT" << std::endl;
+            std::cout << "Usage: " << argv[0] << " [options] INPUT OUTPUT" << std::endl;
+            std::cout << desc << std::endl;
+            return 0;
+        }
+
+        if (!options.count("db") || !options.count("mode"))
+            throw Exception("Missing option, 'db' or 'mode' is missing", ErrorCodes::BAD_ARGUMENTS);
+
+        // if (options.count("path") == options.count("meta-path") || options.count("meta-path") != options.count("data-path-list"))
+        //     throw Exception("Missing option, either single 'path' argument or both meta path and data path list arguments are allowed", ErrorCodes::BAD_ARGUMENTS);
+
+        std::string db_name = options["db"].as<std::string>();
+        std::string advisor_mode = options["mode"].as<std::string>();
+
+        std::string meta_path;
+        std::vector<std::string> data_path_list;
+        // if (options.count("path"))
+        // {
+        //     std::string path = options["path"].as<std::string>();
+        //     if (!endsWith(path, "/"))
+        //         path.append("/");
+        //     meta_path = path;
+        //     data_path_list.emplace_back(path);
+        // }
+        // else
+        // {
+            meta_path = options["meta-path"].as<std::string>();
+            if (!endsWith(meta_path, "/"))
+                meta_path.append("/");
+            boost::split(data_path_list, options["data-path-list"].as<std::string>(), boost::is_any_of(" ,"));
+            for(auto & path : data_path_list)
+            {
+                if (!endsWith(path, "/"))
+                    path = path.append("/");
+            }
+        // }
+        
+        size_t sample_row_number = options["sample-number"].as<size_t>();
+        size_t max_threads = options["max-threads"].as<size_t>();
+        Float64 mark_filter_threshold = options["mark_filter_threshold"].as<Float64>();
+        Float64 top_3_mark_filter_threshold = options["top_3_mark_filter_threshold"].as<Float64>();
+        verbose = options.count("verbose");
+
+
+        if (auto log_level = options["log-level"].as<std::string>(); !log_level.empty())
+            setupLogging(log_level);
+
+        // prepare mock env
+        MockEnvironment env(meta_path, max_threads);
+
+        if (advisor_mode == "codec")
+        {
+            if (!options.count("table"))
+                throw Exception("Missing option, 'table' is missing", ErrorCodes::BAD_ARGUMENTS);
+
+            std::string table_name = options["table"].as<std::string>();
+            std::string absolute_part_path = selectPartPath(options, data_path_list, db_name, table_name, sample_row_number);
+            serializeJsonPrefix(str_buf, db_name, table_name, absolute_part_path, verbose);
+            ColumnsDescription columns = env.getColumnsDescription(db_name, table_name);
+
+            CodecAdvisor codec_advisor(options, columns, absolute_part_path, sample_row_number, max_threads);
+            codec_advisor.execute();
+            codec_advisor.serializeJson(str_buf, verbose);
+            serializeJsonSuffix(str_buf);
+        }
+        else if (advisor_mode == "type")
+        {
+            if (!options.count("table"))
+                throw Exception("Missing option, 'table' is missing", ErrorCodes::BAD_ARGUMENTS);
+
+            auto lc_only = options.count("low-cardinality");
+            auto scanned_count_threshold_for_lc = options["scanned_count_threshold_for_lc"].as<Float64>();
+            auto cardinality_ratio_threshold_for_lc = options["cardinality_ratio_threshold_for_lc"].as<Float64>();
+
+            std::string table_name = options["table"].as<std::string>();
+            std::string absolute_part_path = selectPartPath(options, data_path_list, db_name, table_name, sample_row_number);
+            serializeJsonPrefix(str_buf, db_name, table_name, absolute_part_path, verbose);
+            ColumnsDescription columns = env.getColumnsDescription(db_name, table_name);
+
+            TypeAdvisor type_advisor(env, options, columns, absolute_part_path, sample_row_number, max_threads, lc_only, scanned_count_threshold_for_lc, cardinality_ratio_threshold_for_lc);
+            type_advisor.execute();
+            type_advisor.serializeJson(str_buf, verbose);
+            serializeJsonSuffix(str_buf);
+        }
+        else if (advisor_mode == "skip-index") // currently extracts all usages for the database
+        {
+            serializeJsonPrefixWithDB(str_buf, db_name);
+            IndexAdvisor index_advisor(env, options, sample_row_number, max_threads);
+            index_advisor.execute();
+            index_advisor.serializeJson(str_buf, verbose);
+            serializeJsonSuffix(str_buf);
+        }
+        else if (advisor_mode == "prewhere") // currently extracts all usages for the database
+        {
+            serializeJsonPrefixWithDB(str_buf, db_name);
+            PrewhereAdvisor prewhere_advisor(env, options, sample_row_number, max_threads, mark_filter_threshold, top_3_mark_filter_threshold);
+            prewhere_advisor.execute();
+            prewhere_advisor.serializeJson(str_buf, verbose);
+            serializeJsonSuffix(str_buf);
+        }
+        else if (advisor_mode == "materialized-view")
+        {
+            Advisor advisor{ASTAdviseQuery::AdvisorType::MATERIALIZED_VIEW};
+            WorkloadAdvises advises = advisor.analyze(loadQueries(options), createContext(options, env));
+            serializeJson("materialized-view", "ddl", db_name, advises, str_buf, verbose);
+        }
+        else if (advisor_mode == "projection")
+        {
+            Advisor advisor{ASTAdviseQuery::AdvisorType::PROJECTION};
+            WorkloadAdvises advises = advisor.analyze(loadQueries(options), createContext(options, env));
+            serializeJson("projection", "ddl", db_name, advises, str_buf, verbose);
+        }
+        else if (advisor_mode == "order-by-key")
+        {
+            Advisor advisor{ASTAdviseQuery::AdvisorType::ORDER_BY};
+            WorkloadAdvises advises = advisor.analyze(loadQueries(options), createContext(options, env));
+            serializeJson(advisor_mode, "candidate", db_name, advises, str_buf, verbose);
+        }
+        else if (advisor_mode == "cluster-key")
+        {
+            Advisor advisor{ASTAdviseQuery::AdvisorType::CLUSTER_BY};
+            WorkloadAdvises advises = advisor.analyze(loadQueries(options), createContext(options, env));
+            serializeJson(advisor_mode, "candidate", db_name, advises, str_buf, verbose);
+        }
+        else if (advisor_mode == "column-usage")
+        {
+            Advisor advisor{ASTAdviseQuery::AdvisorType::COLUMN_USAGE};
+            WorkloadAdvises advises = advisor.analyze(loadQueries(options), createContext(options, env));
+            serializeJson(advisor_mode, "usage", db_name, advises, str_buf, verbose);
+        }
+        else
+        {
+            throw Exception("Unsupported advisor mode: " + advisor_mode, ErrorCodes::BAD_ARGUMENTS);
+        }
+    }
+    catch (...)
+    {
+        serializeException(*stdout_buf, getCurrentExceptionMessage(verbose));
+        return getCurrentExceptionCode();
+    }
+    writeString(str_buf.str(), *stdout_buf);
+
+    return 0;
+}
diff --git a/programs/schema-advisor/SchemaAdvisorHelpers.h b/programs/schema-advisor/SchemaAdvisorHelpers.h
new file mode 100644
index 00000000000..02546c075a0
--- /dev/null
+++ b/programs/schema-advisor/SchemaAdvisorHelpers.h
@@ -0,0 +1,418 @@
+#pragma once
+
+#include <algorithm>
+#include <fstream>
+#include <iostream>
+#include <memory>
+#include <random>
+#include <string>
+#include <vector>
+#include <Advisor/Rules/WorkloadAdvisor.h>
+#include <IO/ConnectionTimeouts.h>
+#include <IO/HTTPCommon.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/WriteHelpers.h>
+#include <Interpreters/Context_fwd.h>
+#include <Optimizer/Dump/PlanReproducer.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
+#include <boost/program_options/variables_map.hpp>
+#include <Poco/DirectoryIterator.h>
+#include <Poco/FormattingChannel.h>
+#include <Poco/JSON/Array.h>
+#include <Poco/JSON/Object.h>
+#include <Poco/JSON/Stringifier.h>
+#include <Poco/PatternFormatter.h>
+#include <Poco/StreamCopier.h>
+#include <Poco/String.h>
+#include <Common/Logger.h>
+#include <Common/escapeForFileName.h>
+#include <Common/formatIPv6.h>
+#include "MockEnvironment.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int NOT_FOUND_EXPECTED_DATA_PART;
+    extern const int NO_FILE_IN_DATA_PART;
+}
+
+namespace po = boost::program_options;
+
+[[maybe_unused]] static void setupLogging(const std::string & log_level)
+{
+    Poco::AutoPtr<Poco::ConsoleChannel> channel(new Poco::ConsoleChannel);
+    Poco::AutoPtr<Poco::PatternFormatter> formatter(new Poco::PatternFormatter);
+    formatter->setProperty("pattern", "%L%Y-%m-%d %H:%M:%S.%i <%p> %s: %t");
+    Poco::AutoPtr<Poco::FormattingChannel> formatting_channel(new Poco::FormattingChannel(formatter, channel));
+    Poco::Logger::root().setChannel(formatting_channel);
+    Poco::Logger::root().setLevel(log_level);
+}
+
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int NOT_FOUND_EXPECTED_DATA_PART;
+    extern const int NETWORK_ERROR;
+}
+
+namespace po = boost::program_options;
+
+static constexpr size_t METADATA_FILE_BUFFER_SIZE = 32768;
+static constexpr size_t DEFAULT_MAX_SAMPLE_CANDIDATE_NUM = 20;
+static constexpr auto DEFAULT_TOS_PSM = "toutiao.tos.tosapi";
+
+struct SamplingColumnFile
+{
+    SamplingColumnFile(std::string file_path_, std::string column_name_)
+        : file_path(std::move(file_path_)), column_name(std::move(column_name_))
+    {
+    }
+
+    std::string file_path;
+    std::string column_name;
+    size_t origin_file_size = 0;
+    size_t optimized_file_size = 0;
+};
+
+using SamplingColumnFilePtr = std::shared_ptr<SamplingColumnFile>;
+using SamplingColumnFiles = std::vector<SamplingColumnFilePtr>;
+
+// a thread-safe implementation
+class MessageCollector
+{
+public:
+    void collect(std::string && msg)
+    {
+        std::lock_guard lock(mutex);
+        messages.emplace_back(std::move(msg));
+    }
+
+    void logCollectedError()
+    {
+        for (const auto & msg : messages)
+            LOG_ERROR(getLogger("MessageCollector"), "{}", msg);
+        messages.clear();
+    }
+
+private:
+    std::vector<std::string> messages;
+    bthread::Mutex mutex;
+};
+
+static std::string readSqlFile(String source_uri, [[maybe_unused]]const po::variables_map & options)
+{
+    // std::string uri_prefix = source_uri.substr(0, source_uri.find_last_of('/'));
+    // Poco::URI uri(uri_prefix);
+    // const String& scheme = uri.getScheme();
+
+
+    // if (scheme == "tos") // tos on cloud, url like "tos://bucket/key"
+    // {
+    //     if (!options.count("tos-ak"))
+    //         throw Exception("Option tos-ak is missing for tos uri", ErrorCodes::BAD_ARGUMENTS);
+    //     std::string tos_ak = options["tos-ak"].as<std::string>();
+
+    //     Poco::URI tos_uri(source_uri);
+    //     auto host = tos_uri.getHost();
+    //     auto port = tos_uri.getPort();
+    //     std::string tos_psm = DEFAULT_TOS_PSM;
+    //     std::string tos_server;
+
+    //     if (host.empty() || port == 0)
+    //     {
+    //         auto tos_servers = ServiceDiscovery::lookup(DEFAULT_TOS_PSM, std::pair<std::string, std::string>("cluster", "default"));
+    //         if (tos_servers.empty())
+    //             throw Exception("Can not find tos servers with PSM: " + tos_psm, ErrorCodes::NETWORK_ERROR);
+    //         auto generator = std::mt19937(std::random_device{}()); // mt19937 engine
+    //         std::uniform_int_distribution<int> distribution(0, tos_servers.size() - 1);
+    //         tos_server = tos_servers.at(distribution(generator));
+    //     }
+    //     else
+    //     {
+    //         tos_server = normalizeHost(host) + ":" + toString(port);
+    //     }
+
+    //     ConnectionTimeouts timeouts(
+    //         {DEFAULT_HTTP_READ_BUFFER_CONNECTION_TIMEOUT, 0},
+    //         {DEFAULT_HTTP_READ_BUFFER_TIMEOUT, 0},
+    //         {DEFAULT_HTTP_READ_BUFFER_TIMEOUT, 0});
+
+    //     std::string tos_http_uri_str = fmt::format(
+    //         "http://{}{}?timeout={}s", tos_server, tos_uri.getPath(), DBMS_DEFAULT_CONNECT_TIMEOUT_SEC);
+    //     Poco::URI tos_http_uri = Poco::URI(tos_http_uri_str);
+    //     HTTPSessionPtr session = makeHTTPSession(tos_http_uri, timeouts);
+
+    //     Poco::Net::HTTPRequest request{Poco::Net::HTTPRequest::HTTP_GET, tos_http_uri.getPathAndQuery(), Poco::Net::HTTPRequest::HTTP_1_1};
+    //     request.set("X-Tos-Access", tos_ak);
+    //     request.setHost(tos_http_uri.getHost());
+    //     request.setChunkedTransferEncoding(false);
+
+    //     session->sendRequest(request);
+    //     Poco::Net::HTTPResponse response;
+    //     std::istream * response_body = receiveResponse(*session, request, response, false);
+    //     Poco::StreamCopier::copyToString(*response_body, res);
+    // }
+    // #if USE_VE_TOS
+    // else if (scheme == "vetos") // tos on volcano engine, url like "vetos://bucket/key"
+    // {
+    //     Poco::URI vetos_uri(source_uri);
+    //     vetos_uri.getPath();
+    //     if(vetos_uri.getPath().empty() || vetos_uri.getHost().empty())
+    //     {
+    //         throw Exception("Invalid ve-tos path.", ErrorCodes::LOGICAL_ERROR);
+    //     }
+    //     const String& bucket = vetos_uri.getHost();
+    //     size_t size = vetos_uri.getPath().size();
+    //     String key = vetos_uri.getPath().substr(1, size - 1);
+    //     if (!options.count("vetos-endpoint"))
+    //             throw Exception("Option vetos-endpoint is missing for ve tos uri", ErrorCodes::BAD_ARGUMENTS);
+    //     if (!options.count("vetos-region"))
+    //             throw Exception("Option vetos-region is missing for ve tos uri", ErrorCodes::BAD_ARGUMENTS);
+    //     if (!options.count("vetos-ak"))
+    //             throw Exception("Option vetos-ak is missing for ve tos uri", ErrorCodes::BAD_ARGUMENTS);
+    //     if (!options.count("vetos-sk"))
+    //             throw Exception("Option vetos-sk is missing for ve tos uri", ErrorCodes::BAD_ARGUMENTS);
+    //     std::string ve_tos_endpoint = options["vetos-endpoint"].as<std::string>();
+    //     std::string ve_tos_region = options["vetos-region"].as<std::string>();
+    //     std::string ve_tos_ak = options["vetos-ak"].as<std::string>();
+    //     std::string ve_tos_sk = options["vetos-sk"].as<std::string>();
+
+    //     std::unique_ptr<ReadBuffer> read_buf =
+    //         std::make_unique<ReadBufferFromVETos>(ve_tos_endpoint, ve_tos_region, ve_tos_ak, ve_tos_sk, bucket, key);
+
+    //     readStringUntilEOF(res, *read_buf);
+    // }
+    // #endif // USE_VE_TOS
+    // else // absolute file path on local file system
+    // {
+
+    std::string res;
+
+    std::ifstream fin(source_uri);
+    std::stringstream buffer;
+    buffer << fin.rdbuf();
+    res = buffer.str();
+    // }
+
+    return res;
+}
+
+
+/// Select the target part according to specific rule if 'part' option is not specified
+[[maybe_unused]] static std::string selectPartPath(const po::variables_map & options, const std::vector<std::string> & data_path_list, const std::string & db_name, const std::string & table_name, size_t sample_row_number)
+{
+    for (const auto & path : data_path_list)
+    {
+        if (options.count("part"))
+        {
+            std::string part = options["part"].as<std::string>();
+            if (endsWith(part, "/"))
+                part.pop_back();
+            return path + "metadata/" + escapeForFileName(db_name) + "/" + escapeForFileName(table_name) + "/" + part;
+        }
+
+        std::string table_data_path = path + "data/" + escapeForFileName(db_name) + "/" + escapeForFileName(table_name) + "/";
+        if (!std::filesystem::exists(table_data_path))
+            continue;
+
+        std::multimap<std::time_t, std::string> parts_by_timestamp;
+        Poco::DirectoryIterator end;
+        for (Poco::DirectoryIterator it(table_data_path); it != end; ++it)
+        {
+            if (it->isDirectory()
+                && it.name() != "detached"
+                && it.name() != "log"
+                && it.name() != "catalog.db"
+                && it.name() != "manifest"
+                && !startsWith(it.name(), "tmp-fetch")
+                && !startsWith(it.name(), "tmp_")
+                && !startsWith(it.name(), "delete_tmp"))
+            {
+                size_t part_row_count;
+                std::string part_count_path = it->path() + "/count.txt";
+                {
+                    ReadBufferFromFile in(part_count_path, METADATA_FILE_BUFFER_SIZE);
+                    readIntText(part_row_count, in);
+                    assertEOF(in);
+                }
+                if (part_row_count >= sample_row_number)
+                {
+                    parts_by_timestamp.emplace(it->getLastModified().epochTime(), it->path());
+                    if (parts_by_timestamp.size() > DEFAULT_MAX_SAMPLE_CANDIDATE_NUM)
+                        break;
+                }
+            }
+        }
+        if (!parts_by_timestamp.empty())
+            return parts_by_timestamp.begin()->second;
+    }
+
+    throw Exception(db_name + "(" + table_name + "): failed to find qualified sample part.", ErrorCodes::NOT_FOUND_EXPECTED_DATA_PART);
+}
+
+/// Generate output prefix in JSON format
+[[maybe_unused]] static void serializeJsonPrefix(WriteBuffer & buf, std::string db_name, std::string table_name, std::string absolute_part_path, bool verbose)
+{
+    writeString(R"({"recommendation":{"db":")", buf);
+    writeString(db_name, buf);
+    writeString("\",", buf);
+    writeString(R"("table":")", buf);
+    writeString(table_name, buf);
+    writeString("\",", buf);
+    if (verbose)
+    {
+        writeString(R"("part selected":")", buf);
+        writeString(absolute_part_path, buf);
+        writeString("\",", buf);
+    }
+}
+
+[[maybe_unused]] static void serializeJsonPrefixWithDB(WriteBuffer & buf, std::string db_name)
+{
+    writeString(R"({"recommendation":{"db":")", buf);
+    writeString(db_name, buf);
+    writeString("\",", buf);
+}
+
+/// Generate output suffix in JSON format
+[[maybe_unused]] static void serializeJsonSuffix(WriteBuffer & buf)
+{
+    writeString("}}", buf);
+}
+
+/// Generate exception in JSON format
+[[maybe_unused]] static void serializeException(WriteBuffer & buf, std::string error_msg)
+{
+    writeString(R"({"exception":")", buf);
+    writeString(error_msg, buf);
+    writeString("\"}", buf);
+}
+
+[[maybe_unused]] static std::vector<String> loadQueries(po::variables_map & options)
+{
+    std::string query_file = options["query-file"].as<std::string>();
+
+    std::vector<std::string> splits;
+    if (Poco::toLower(query_file).ends_with(".json"))
+    {
+        PlanReproducer reproducer{query_file, nullptr};
+        for (const auto & name : reproducer.getQueries()->getNames())
+            splits.emplace_back(reproducer.getQuery(name).query);
+        return splits;
+    }
+
+    std::string query_content = readSqlFile(query_file, options);
+    std::string delimiter = "\n";
+    if (options.count("query-file-delimiter"))
+        delimiter = options["query-file-delimiter"].as<std::string>();
+
+    size_t last = 0;
+    size_t next;
+    while ((next = query_content.find(delimiter, last)) != std::string::npos)
+    {
+        auto query = query_content.substr(last, next - last);
+        boost::replace_all(query, "\\r", "\r");
+        boost::replace_all(query, "\\n", "\n");
+        boost::replace_all(query, "\\t", "\t");
+        boost::replace_all(query, "\\\"", "\"");
+        boost::replace_all(query, "\\'", "'");
+        splits.push_back(query);
+        last = next + 1;
+    }
+    if (splits.empty())
+        throw Poco::Exception("'" + query_file + "' is empty?");
+    return splits;
+}
+
+[[maybe_unused]] static ContextMutablePtr createContext(po::variables_map & options, MockEnvironment & env)
+{
+    if (options["db"].empty())
+        throw Exception("argument db is requried", ErrorCodes::BAD_ARGUMENTS);
+
+    std::string db_name = options["db"].as<std::string>();
+    std::vector<std::string> db_list;
+    boost::algorithm::split(db_list, db_name, boost::is_any_of(","), boost::token_compress_on);
+
+    if (db_list.empty())
+        throw Exception("argument db is requried", ErrorCodes::BAD_ARGUMENTS);
+
+    for (const auto & db : db_list)
+    {
+        env.createMockDatabase(db);
+        // todo: currently we create all tables in the db
+        for (const auto & table : env.listTables(db))
+            env.createMockTable(db, table);
+    }
+
+    auto context = env.createQueryContext();
+    context->setCurrentDatabase(db_list[0]);
+
+    std::string settings = options["settings"].as<std::string>();
+    if (!settings.empty())
+    {
+        ParserSetQuery parser{true};
+        ASTPtr ast = parseQuery(parser, settings, 0, 0);
+        context->applySettingsChanges(ast->as<ASTSetQuery>()->changes);
+    }
+
+    return context;
+}
+
+[[maybe_unused]] static void serializeJson(const std::string & advise_type, const String & advise_name, const String & db, const WorkloadAdvises & advises, WriteBuffer & buf, bool)
+{
+    Poco::JSON::Array advises_array;
+    for (const auto & advise : advises)
+    {
+        Poco::JSON::Object advise_object;
+        advise_object.set("db", advise->getTable().database);
+        advise_object.set("table", advise->getTable().table);
+        if (advise->getColumnName().has_value()) {
+            advise_object.set("column", advise->getColumnName().value());
+        }
+
+        if (!advise->getCandidates().empty())
+        {
+            Poco::JSON::Array candidates;
+            for (const auto & item : advise->getCandidates())
+            {
+                Poco::JSON::Object candidate_object;
+                candidate_object.set(advise_name, item.first);
+                candidate_object.set("benefit", item.second);
+                candidates.add(candidate_object);
+            }
+            advise_object.set("candidates", candidates);
+        }
+        else
+        {
+            advise_object.set(advise_name, advise->getOptimizedValue());
+            advise_object.set("benefit", advise->getBenefit());
+        }
+
+        if (!advise->getRelatedQueries().empty())
+        {
+            Poco::JSON::Array related_queries;
+            for (const auto & query : advise->getRelatedQueries())
+                related_queries.add(query);
+            advise_object.set("relatedQueries", related_queries);
+        }
+
+        advises_array.add(advise_object);
+    }
+
+    Poco::JSON::Object advises_object;
+    advises_object.set(advise_type, advises_array);
+
+    Poco::JSON::Object recommendation_object;
+    recommendation_object.set("db", db);
+    recommendation_object.set(advise_type, advises_object);
+
+    Poco::JSON::Object res;
+    res.set("recommendation", recommendation_object);
+    std::ostringstream oss;
+    Poco::JSON::Stringifier::condense(res, oss);
+    writeString(oss.str(), buf);
+}
+}
diff --git a/programs/schema-advisor/Statistics.cpp b/programs/schema-advisor/Statistics.cpp
new file mode 100644
index 00000000000..3581bb17d61
--- /dev/null
+++ b/programs/schema-advisor/Statistics.cpp
@@ -0,0 +1,453 @@
+#include "Statistics.h"
+
+#include <AggregateFunctions/AggregateFunctionNull.h>
+#include <AggregateFunctions/AggregateFunctionUniq.h>
+#include <AggregateFunctions/AggregateFunctionCombinatorFactory.h>
+#include <AggregateFunctions/FactoryHelpers.h>
+#include <AggregateFunctions/Helpers.h>
+#include <AggregateFunctions/HelpersMinMaxAny.h>
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <AggregateFunctions/AggregateFunctionCountByGranularity.h>
+#include <DataTypes/DataTypeArray.h>
+#include <DataTypes/DataTypeDate.h>
+#include <DataTypes/DataTypeDate32.h>
+#include <DataTypes/DataTypeDateTime.h>
+#include <DataTypes/DataTypeUUID.h>
+#include <DataTypes/DataTypeLowCardinality.h>
+#include <DataTypes/IDataType.h>
+#include <Core/Field.h>
+#include <Functions/FunctionHelpers.h>
+#include "Columns/IColumn.h"
+
+namespace DB
+{
+namespace ErrorCodes
+{
+    extern const int CANNOT_ALLOCATE_MEMORY;
+    extern const int BAD_ARGUMENTS;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+template <bool is_exact, template <typename, bool> typename Data, template <bool, bool, bool> typename DataForVariadic, bool is_able_to_parallelize_merge>
+AggregateFunctionPtr
+createAggregateFunctionUniq(const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+{
+    assertNoParameters(name, params);
+
+    if (argument_types.empty())
+        throw Exception("Incorrect number of arguments for aggregate function " + name,
+            ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    /// We use exact hash function if the user wants it;
+    /// or if the arguments are not contiguous in memory, because only exact hash function have support for this case.
+    /// bool use_exact_hash_function = is_exact || !isAllArgumentsContiguousInMemory(argument_types);
+
+    if (argument_types.size() == 1)
+    {
+        const IDataType & argument_type = *argument_types[0];
+
+        AggregateFunctionPtr res(createWithNumericType<AggregateFunctionUniq, Data, is_able_to_parallelize_merge>(*argument_types[0], argument_types));
+
+        WhichDataType which(argument_type);
+        if (res)
+            return res;
+        else if (which.isDate())
+            return std::make_shared<AggregateFunctionUniq<DataTypeDate::FieldType, Data<DataTypeDate::FieldType, is_able_to_parallelize_merge>>>(argument_types);
+        else if (which.isDate32())
+            return std::make_shared<AggregateFunctionUniq<DataTypeDate32::FieldType, Data<DataTypeDate32::FieldType, is_able_to_parallelize_merge>>>(argument_types);
+        else if (which.isDateTime())
+            return std::make_shared<AggregateFunctionUniq<DataTypeDateTime::FieldType, Data<DataTypeDateTime::FieldType, is_able_to_parallelize_merge>>>(argument_types);
+        else if (which.isStringOrFixedString())
+            return std::make_shared<AggregateFunctionUniq<String, Data<String, is_able_to_parallelize_merge>>>(argument_types);
+        else if (which.isUUID())
+            return std::make_shared<AggregateFunctionUniq<DataTypeUUID::FieldType, Data<DataTypeUUID::FieldType, is_able_to_parallelize_merge>>>(argument_types);
+        else if (which.isTuple())
+        {
+            /*
+            if (use_exact_hash_function)
+                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<true, true, is_able_to_parallelize_merge>>>(argument_types);
+            else
+                return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<false, true, is_able_to_parallelize_merge>>>(argument_types);
+            */
+            throw Exception("Unsupported tuple data type for uniqExtract", ErrorCodes::BAD_ARGUMENTS);
+        }
+    }
+
+    /* "Variadic" method also works as a fallback generic case for single argument.
+    if (use_exact_hash_function)
+        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<true, false, is_able_to_parallelize_merge>>>(argument_types);
+    else
+        return std::make_shared<AggregateFunctionUniqVariadic<DataForVariadic<false, false, is_able_to_parallelize_merge>>>(argument_types);
+    */
+    throw Exception("Unsupported arguments size " + std::to_string(argument_types.size()), ErrorCodes::BAD_ARGUMENTS);
+}
+
+DataTypes transformArguments(const DataTypes & arguments)
+{
+    size_t size = arguments.size();
+    DataTypes res(size);
+    for (size_t i = 0; i < size; ++i)
+        res[i] = removeNullable(arguments[i]);
+    return res;
+}
+
+Field UniExtract::executeOnColumn(const ColumnPtr & column, const DataTypePtr & type)
+{
+    String name = "uniqExact";
+    DataTypes argument_types(1);
+    argument_types[0] = type;
+    Array parameters;
+
+    AggregateFunctionPtr nested_function =
+        createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, false /* is_able_to_parallelize_merge */>
+            (name, transformArguments(argument_types), parameters, nullptr);
+    AggregateFunctionPtr aggregate_function = type->isNullable()
+        ? std::make_shared<AggregateFunctionNullUnary<false, true>>(nested_function, argument_types, parameters)
+        : nested_function;
+
+    size_t total_size_of_aggregate_states = 0;    /// The total size of the row from the aggregate functions.
+    // add info to track alignment requirement
+    // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
+    size_t align_aggregate_states = 1;
+    total_size_of_aggregate_states = aggregate_function->sizeOfData();
+    align_aggregate_states = std::max(align_aggregate_states, aggregate_function->alignOfData());
+
+    std::shared_ptr<Arena> aggregates_pool = std::make_shared<Arena>();    /// The pool that is currently used for allocation.
+    AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+    try
+    {
+        /** An exception may occur if there is a shortage of memory.
+         * In order that then everything is properly destroyed, we "roll back" some of the created states.
+         * The code is not very convenient.
+         */
+        aggregate_function->create(place);
+    }
+    catch (...)
+    {
+        aggregate_function->destroy(place);
+        throw Exception("Cannot allocate memory", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+    }
+    size_t rows = column->size();
+    ColumnRawPtrs column_ptrs;
+    column_ptrs.emplace_back(column.get());
+    const IColumn ** batch_arguments = column_ptrs.data();
+
+    aggregate_function->addBatchSinglePlace(rows, place, batch_arguments, nullptr);
+
+    DataTypePtr result_type = std::make_shared<DataTypeUInt64>();
+    ColumnPtr result_column = result_type->createColumn();
+    MutableColumnPtr mutable_column = result_column->assumeMutable();
+    aggregate_function->insertResultInto(place, *mutable_column, nullptr);
+    return (*result_column)[0];
+}
+
+Field UniExtract::executeOnColumnArray(const ColumnPtr & column, const DataTypePtr & type)
+{
+    if (!isArray(type))
+        return 0;
+
+    const auto * array_type = checkAndGetDataType<DataTypeArray>(type.get());
+    const auto& nested_type = array_type->getNestedType();
+
+    String inner_func_name = "uniqExact";
+    String combinator_suffix = "Array";
+
+    DataTypes nested_argument_types{nested_type};
+    DataTypes argument_types{type};
+    Array parameters;
+
+    // For inner func uniqExact
+    AggregateFunctionPtr nested_function =
+        createAggregateFunctionUniq<true, AggregateFunctionUniqExactData, AggregateFunctionUniqExactDataForVariadic, false /* is_able_to_parallelize_merge */>
+            (inner_func_name, transformArguments(nested_argument_types), parameters, nullptr);
+    AggregateFunctionPtr uniq_exact_function = type->isNullable()
+        ? std::make_shared<AggregateFunctionNullUnary<false, true>>(nested_function, nested_argument_types, parameters)
+        : nested_function;
+
+    // For combinator -Array
+    AggregateFunctionCombinatorPtr array_combinator = AggregateFunctionCombinatorFactory::instance().tryFindSuffix(combinator_suffix);
+    AggregateFunctionPtr uniq_exact_array_function = array_combinator->transformAggregateFunction(
+        uniq_exact_function, {}, argument_types, parameters);
+
+    size_t total_size_of_aggregate_states = 0;
+    size_t align_aggregate_states = 1;
+    total_size_of_aggregate_states = uniq_exact_array_function->sizeOfData();
+    align_aggregate_states = std::max(align_aggregate_states, uniq_exact_array_function->alignOfData());
+
+    std::shared_ptr<Arena> aggregates_pool = std::make_shared<Arena>();
+    AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+    try
+    {
+        uniq_exact_array_function->create(place);
+    }
+    catch (...)
+    {
+        uniq_exact_array_function->destroy(place);
+        throw Exception("Cannot allocate memory", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+    }
+    size_t rows = column->size();
+    ColumnRawPtrs column_ptrs;
+    column_ptrs.emplace_back(column.get());
+    const IColumn ** batch_arguments = column_ptrs.data();
+
+    uniq_exact_array_function->addBatchSinglePlace(rows, place, batch_arguments, nullptr);
+
+    DataTypePtr result_type = std::make_shared<DataTypeUInt64>();
+    ColumnPtr result_column = result_type->createColumn();
+    MutableColumnPtr mutable_column = result_column->assumeMutable();
+    uniq_exact_array_function->insertResultInto(place, *mutable_column, nullptr);
+
+    return (*result_column)[0];
+}
+
+AggregateFunctionPtr createAggregateFunctionMin(
+    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
+{
+    return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMinData>(name, argument_types, parameters, settings));
+}
+
+Field Min::executeOnColumn(const ColumnPtr & column, const DataTypePtr & type)
+{
+    String name = "min";
+    DataTypes argument_types(1);
+    argument_types[0] = type;
+    Array parameters;
+
+    AggregateFunctionPtr nested_function = createAggregateFunctionMin(name, transformArguments(argument_types), parameters, nullptr);
+    AggregateFunctionPtr aggregate_function = type->isNullable() 
+        ? std::make_shared<AggregateFunctionNullUnary<false, true>>(nested_function, argument_types, parameters) 
+        : nested_function;
+
+    size_t total_size_of_aggregate_states = 0;    /// The total size of the row from the aggregate functions.
+    // add info to track alignment requirement
+    // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
+    size_t align_aggregate_states = 1;
+    total_size_of_aggregate_states = aggregate_function->sizeOfData();
+    align_aggregate_states = std::max(align_aggregate_states, aggregate_function->alignOfData());
+
+    std::shared_ptr<Arena> aggregates_pool = std::make_shared<Arena>();    /// The pool that is currently used for allocation.
+    AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+    try
+    {
+        /** An exception may occur if there is a shortage of memory.
+         * In order that then everything is properly destroyed, we "roll back" some of the created states.
+         * The code is not very convenient.
+         */
+        aggregate_function->create(place);
+    }
+    catch (...)
+    {
+        aggregate_function->destroy(place);
+        throw Exception("Cannot allocate memory", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+    }
+    size_t rows = column->size();
+    ColumnRawPtrs column_ptrs;
+    column_ptrs.emplace_back(column.get());
+    const IColumn ** batch_arguments = column_ptrs.data();
+    
+    aggregate_function->addBatchSinglePlace(rows, place, batch_arguments, nullptr);
+
+    ColumnPtr result_column = type->createColumn();
+    MutableColumnPtr mutable_column = result_column->assumeMutable();
+    aggregate_function->insertResultInto(place, *mutable_column, nullptr);
+    return (*result_column)[0];
+}
+
+AggregateFunctionPtr createAggregateFunctionMax(
+    const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings * settings)
+{
+    return AggregateFunctionPtr(createAggregateFunctionSingleValue<AggregateFunctionsSingleValue, AggregateFunctionMaxData>(name, argument_types, parameters, settings));
+}
+
+Field Max::executeOnColumn(const ColumnPtr & column, const DataTypePtr & type)
+{
+    String name = "max";
+    DataTypes argument_types(1);
+    argument_types[0] = type;
+    Array parameters;
+
+    AggregateFunctionPtr nested_function = createAggregateFunctionMax(name, transformArguments(argument_types), parameters, nullptr);
+    AggregateFunctionPtr aggregate_function = type->isNullable() 
+        ? std::make_shared<AggregateFunctionNullUnary<false, true>>(nested_function, argument_types, parameters) 
+        : nested_function;
+
+    size_t total_size_of_aggregate_states = 0;    /// The total size of the row from the aggregate functions.
+    // add info to track alignment requirement
+    // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
+    size_t align_aggregate_states = 1;
+    total_size_of_aggregate_states = aggregate_function->sizeOfData();
+    align_aggregate_states = std::max(align_aggregate_states, aggregate_function->alignOfData());
+
+    std::shared_ptr<Arena> aggregates_pool = std::make_shared<Arena>();    /// The pool that is currently used for allocation.
+    AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+    try
+    {
+        /** An exception may occur if there is a shortage of memory.
+         * In order that then everything is properly destroyed, we "roll back" some of the created states.
+         * The code is not very convenient.
+         */
+        aggregate_function->create(place);
+    }
+    catch (...)
+    {
+        aggregate_function->destroy(place);
+        throw Exception("Cannot allocate memory", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+    }
+    size_t rows = column->size();
+    ColumnRawPtrs column_ptrs;
+    column_ptrs.emplace_back(column.get());
+    const IColumn ** batch_arguments = column_ptrs.data();
+    
+    aggregate_function->addBatchSinglePlace(rows, place, batch_arguments, nullptr);
+
+    ColumnPtr result_column = type->createColumn();
+    MutableColumnPtr mutable_column = result_column->assumeMutable();
+    aggregate_function->insertResultInto(place, *mutable_column, nullptr);
+    return (*result_column)[0];
+}
+
+AggregateFunctionPtr createAggregateFunctionCountByGranularity(
+    const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+{
+    if (argument_types.size() != 1)
+        throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+    const IDataType & argument_type = *argument_types[0];
+    WhichDataType which(argument_type);
+
+    if (which.isNothing() || which.isArray() || which.isFunction() || which.isAggregateFunction() || which.isMap() || which.isBitmap64()
+        || which.isSet() || which.isTuple() || which.isInterval() || which.isDecimal() || which.isInt128() || which.isUInt128() || which.isDateOrDateTime())
+    {
+        throw Exception(
+            "argument of " + name
+                + " can not be "
+                  "(Nothing,Array,Function,"
+                  "AggregateFunction,Map,Bitmap64,"
+                  "Set,Tuple,Interval,"
+                  "Decimal,Int128,UInt128, DateOrDateTime)",
+            ErrorCodes::BAD_ARGUMENTS);
+    }
+    else if (which.isStringOrFixedString())
+    {
+        //auto a =AggregateFunctionCountByGranularity<String>(argument_types, params);
+        return std::make_shared<AggregateFunctionCountByGranularity<String>>(argument_types, params);
+    }
+    else if (which.isInt8())
+    {
+        auto a = AggregateFunctionCountByGranularity<Int8>(argument_types, params);
+        return std::make_shared<AggregateFunctionCountByGranularity<Int8>>(argument_types, params);
+    }
+    else if (which.isUInt8() || which.isEnum8())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<UInt8>>(argument_types, params);
+    }
+    else if (which.isInt16())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<Int16>>(argument_types, params);
+    }
+    else if (which.isUInt16() || which.isEnum16())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<UInt16>>(argument_types, params);
+    }
+    else if (which.isInt32())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<Int32>>(argument_types, params);
+    }
+    else if (which.isUInt32() || which.isDateTime())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<UInt32>>(argument_types, params);
+    }
+    else if (which.isInt64())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<Int64>>(argument_types, params);
+    }
+    else if (which.isUInt64())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<UInt64>>(argument_types, params);
+    }
+    //        TODO can't support Int128 for now
+    //        else if (which.isInt128())
+    //        {
+    //            return std::make_shared<AggregateFunctionCountByGranularity<Int128>>(argument_types, params);
+    //        }
+    else if (which.isUInt128())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<UInt128>>(argument_types, params);
+    }
+    else if (which.isFloat32())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<Float32>>(argument_types, params);
+    }
+    else if (which.isFloat64())
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<Float64>>(argument_types, params);
+    }
+    // TODO can't support Decimal for now
+    //        else if (which.isDecimal32())
+    //        {
+    //            return std::make_shared<AggregateFunctionCountByGranularity<Decimal32>>(argument_types, params);
+    //        }
+    //        else if (which.isDecimal64() || which.isDateTime64())
+    //        {
+    //            return std::make_shared<AggregateFunctionCountByGranularity<Decimal64>>(argument_types, params);
+    //        }
+    //        else if (which.isDecimal128())
+    //        {
+    //            return std::make_shared<AggregateFunctionCountByGranularity<Decimal128>>(argument_types, params);
+    //        }
+    else
+    {
+        return std::make_shared<AggregateFunctionCountByGranularity<String>>(argument_types, params);
+    }
+
+    __builtin_unreachable();
+}
+
+ColumnPtr CountByGranularity::executeOnColumn(const ColumnPtr & column, const DataTypePtr & type)
+{
+    String name = "countByGranularity";
+    DataTypes argument_types(1);
+    argument_types[0] = recursiveRemoveLowCardinality(type);
+    Array parameters;
+
+    AggregateFunctionPtr nested_function = createAggregateFunctionCountByGranularity(name, transformArguments(argument_types), parameters, nullptr);
+    AggregateFunctionPtr aggregate_function = argument_types[0]->isNullable() ? std::make_shared<AggregateFunctionNullUnary<false, true>>(nested_function, argument_types, parameters) 
+        : nested_function;
+
+    size_t total_size_of_aggregate_states = 0;    /// The total size of the row from the aggregate functions.
+    // add info to track alignment requirement
+    // If there are states whose alignment are v1, ..vn, align_aggregate_states will be max(v1, ... vn)
+    size_t align_aggregate_states = 1;
+    total_size_of_aggregate_states = aggregate_function->sizeOfData();
+    align_aggregate_states = std::max(align_aggregate_states, aggregate_function->alignOfData());
+
+    std::shared_ptr<Arena> aggregates_pool = std::make_shared<Arena>();    /// The pool that is currently used for allocation.
+    AggregateDataPtr place = aggregates_pool->alignedAlloc(total_size_of_aggregate_states, align_aggregate_states);
+    try
+    {
+        /** An exception may occur if there is a shortage of memory.
+         * In order that then everything is properly destroyed, we "roll back" some of the created states.
+         * The code is not very convenient.
+         */
+        aggregate_function->create(place);
+    }
+    catch (...)
+    {
+        aggregate_function->destroy(place);
+        throw Exception("Cannot allocate memory", ErrorCodes::CANNOT_ALLOCATE_MEMORY);
+    }
+    size_t rows = column->size();
+    ColumnRawPtrs column_ptrs;
+    column_ptrs.emplace_back(recursiveRemoveLowCardinality(column).get());
+    const IColumn ** batch_arguments = column_ptrs.data();
+    
+    aggregate_function->addBatchSinglePlace(rows, place, batch_arguments, nullptr);
+
+    ColumnPtr result_column = nested_function->getReturnType()->createColumn();
+    MutableColumnPtr mutable_column = result_column->assumeMutable();
+    aggregate_function->insertResultInto(place, *mutable_column, nullptr);
+
+    return result_column;
+}
+
+}
diff --git a/programs/schema-advisor/Statistics.h b/programs/schema-advisor/Statistics.h
new file mode 100644
index 00000000000..70ee858bd08
--- /dev/null
+++ b/programs/schema-advisor/Statistics.h
@@ -0,0 +1,33 @@
+#pragma once
+
+#include <Columns/IColumn.h>
+#include <DataTypes/IDataType.h>
+
+namespace DB
+{
+class UniExtract
+{
+public:
+    Field executeOnColumn(const ColumnPtr & column, const DataTypePtr & type);
+    Field executeOnColumnArray(const ColumnPtr & column, const DataTypePtr & type);
+};
+
+class Min
+{
+public:
+    Field executeOnColumn(const ColumnPtr & column, const DataTypePtr & type);
+};
+
+class Max
+{
+public:
+    Field executeOnColumn(const ColumnPtr & column, const DataTypePtr & type);
+};
+
+class CountByGranularity
+{
+public:
+    ColumnPtr executeOnColumn(const ColumnPtr & column, const DataTypePtr & type);
+};
+
+}
diff --git a/programs/schema-advisor/TypeAdvisor.cpp b/programs/schema-advisor/TypeAdvisor.cpp
new file mode 100644
index 00000000000..004425a9a0a
--- /dev/null
+++ b/programs/schema-advisor/TypeAdvisor.cpp
@@ -0,0 +1,237 @@
+#include "TypeAdvisor.h"
+#include "ColumnUsageExtractor.h"
+#include "Core/Types.h"
+#include "SampleColumnReader.h"
+#include "Statistics.h"
+
+#include <iostream>
+#include <boost/algorithm/string/join.hpp>
+#include <boost/program_options.hpp>
+
+#include <Common/ThreadPool.h>
+#include <Core/NamesAndTypes.h>
+#include <DataTypes/DataTypeNullable.h>
+#include <DataTypes/MapHelpers.h>
+#include <IO/copyData.h>
+#include <IO/ReadBufferFromFile.h>
+#include <IO/ReadBufferFromFileDescriptor.h>
+#include <IO/ReadHelpers.h>
+#include <IO/WriteBufferFromFile.h>
+#include <IO/WriteBufferFromFileDescriptor.h>
+#include <IO/WriteHelpers.h>
+#include <Parsers/ASTCreateQuery.h>
+#include <Parsers/ParserCreateQuery.h>
+#include <Parsers/parseQuery.h>
+#include <Parsers/queryToString.h>
+
+namespace DB
+{
+
+TypeAdvisor::TypeAdvisor(
+    MockEnvironment & env_,
+    const po::variables_map & options_,
+    const ColumnsDescription & column_descs_,
+    std::string absolute_part_path_,
+    size_t sample_row_number_,
+    size_t max_threads_,
+    bool lc_only_,
+    Float64 scanned_count_threshold_for_lc_,
+    Float64 cardinality_ratio_threshold_for_lc_)
+    : env(env_)
+    , options(options_)
+    , column_descs(column_descs_)
+    , absolute_part_path(absolute_part_path_ + "/")
+    , sample_row_number(sample_row_number_)
+    , max_threads(max_threads_)
+    , lc_only(lc_only_)
+    , scanned_count_threshold_for_lc(scanned_count_threshold_for_lc_)
+    , cardinality_ratio_threshold_for_lc(cardinality_ratio_threshold_for_lc_)
+{
+    parseCodecCandidates();
+}
+
+void TypeAdvisor::parseCodecCandidates()
+{
+
+}
+
+DataTypePtr decayDataType(DataTypePtr type)
+{
+    if (type->isNullable())
+        return dynamic_cast<const DataTypeNullable *>(type.get())->getNestedType();
+    return type;
+}
+
+TypeAdvisor::TypeRecommendation buildTypeRecommendation(std::string column_name, std::string origin_type, bool is_type_nullable, std::string optimized_type)
+{
+    return {column_name, is_type_nullable ? "Nullable(" + origin_type + ")" : origin_type, is_type_nullable ? "Nullable(" + optimized_type + ")" : optimized_type};
+}
+
+void TypeAdvisor::adviseLowCardinality()
+{
+    auto context = createContext(options, env);
+    auto queries = loadQueries(options);
+
+    ColumnUsageExtractor extractor(context, max_threads);
+    auto column_usages = extractor.extractColumnUsages(queries);
+    auto type_usages = extractor.extractUsageForLowCardinality(column_usages);
+
+    LOG_DEBUG(getLogger("TypeAdvisor"), "Extract {} candidate columns, {}, {}", type_usages.size(), scanned_count_threshold_for_lc, cardinality_ratio_threshold_for_lc);
+
+    UniExtract uniq_extract;
+    for (const auto & type_usage : type_usages)
+    {
+        if (type_usage.second < queries.size() * scanned_count_threshold_for_lc)
+        {
+            LOG_DEBUG(getLogger("TypeAdvisor"), "Do not Recommend lowcardinality column {}, scanned count is {}", type_usage.first.column, type_usage.second);
+            continue;
+        }
+
+        auto column_info = type_usage.first;
+        if (isMapImplicitKey(column_info.column))
+            continue;
+
+        auto storage = MockEnvironment::tryGetLocalTable(column_info.database, column_info.table, context);
+        if (!storage)
+            throw Exception(column_info.database + "(" + column_info.table + "): can not find local table.", ErrorCodes::NOT_FOUND_EXPECTED_DATA_PART);
+
+        auto metadata = storage->getInMemoryMetadataCopy();
+        auto column_and_type = metadata.getColumns().tryGetColumn(GetColumnsOptions::Kind::AllPhysical, column_info.column);
+        if (!column_and_type)
+            continue;
+
+        auto column_type = column_and_type->type;
+        if (column_type->getTypeId() == TypeIndex::LowCardinality || !isString(decayDataType(column_type)))
+            continue;
+
+        SampleColumnReader reader(absolute_part_path + "/", 0, sample_row_number);
+        ColumnPtr column;
+        try
+        {
+            column = reader.readColumn({type_usage.first.column, column_type});
+        } 
+        catch (...)
+        {   
+            // Just skip the column if it can't be read
+            LOG_DEBUG(
+                getLogger("TypeAdvisor"),
+                "Can't read column file " + type_usage.first.column + " from table " + column_info.database + "." + column_info.table
+                        + ", error message: "
+                    + getCurrentExceptionMessage(true));
+            continue;
+        }
+        
+        // All following: check skip index 
+        size_t ndv = uniq_extract.executeOnColumn(column, column_type).get<UInt64>();
+
+        if (ndv > sample_row_number * cardinality_ratio_threshold_for_lc)
+        {
+            LOG_DEBUG(getLogger("TypeAdvisor"), "Do not Recommend lowcardinality column {}, scanned count is {}, ndv is {}", type_usage.first.column, type_usage.second, ndv);
+            continue;
+        }
+
+        LOG_DEBUG(getLogger("TypeAdvisor"), "Recommend lowcardinality column {}, scanned count is {}, ndv is {}", type_usage.first.column, type_usage.second, ndv);
+
+        type_recommendations.push_back({column_and_type->name
+                                              , column_and_type->type->isNullable() ? "Nullable(String)" : "String"
+                                              , column_and_type->type->isNullable() ? "LowCardinality(Nullable(String))" : "LowCardinality(String)"});}
+
+}
+
+void TypeAdvisor::execute()
+{
+    if (lc_only)
+        return adviseLowCardinality();
+
+    UniExtract uniqExtractFunc;
+    Max maxFunc;
+    Min minFunc;
+    SampleColumnReader reader(absolute_part_path, 0, sample_row_number);
+    for (const NameAndTypePair & name_and_type : column_descs.getOrdinary())
+    {
+        auto decayed_type = decayDataType(name_and_type.type);
+
+        bool is_string = decayed_type->getTypeId() == TypeIndex::String;
+        bool is_float_64 = decayed_type->getTypeId() == TypeIndex::Float64;
+        bool is_unsigned_integer = decayed_type->isValueRepresentedByUnsignedInteger() && decayed_type->isSummable();
+        bool is_integer = decayed_type->isValueRepresentedByInteger() && decayed_type->isSummable();
+
+        if (is_string)
+        {
+            ColumnPtr column = reader.readColumn(name_and_type);
+            auto ndv = uniqExtractFunc.executeOnColumn(column, name_and_type.type).get<UInt64>();
+            if (ndv < ADVISOR_LOW_CARDINALITY_NDV_THRESHOLD)
+                type_recommendations.push_back({name_and_type.name
+                                              , name_and_type.type->isNullable() ? "Nullable(String)" : "String"
+                                              , name_and_type.type->isNullable() ? "LowCardinality(Nullable(String))" : "LowCardinality(String)"});
+        }
+        else if (is_float_64)
+        {
+            ColumnPtr column = reader.readColumn(name_and_type);
+            auto max = maxFunc.executeOnColumn(column, name_and_type.type).get<Float64>();
+            auto min = minFunc.executeOnColumn(column, name_and_type.type).get<Float64>();
+            if (min >= std::numeric_limits<Float32>::min() && max <= std::numeric_limits<Float32>::max())
+                type_recommendations.push_back({name_and_type.name
+                                              , name_and_type.type->isNullable() ? "Nullable(Float64)" : "Float64"
+                                              , name_and_type.type->isNullable() ? "Nullable(Float32)" : "Float32"});
+        }
+        else if (is_unsigned_integer)
+        {
+            if (decayed_type->getTypeId() == TypeIndex::UInt8) /// skip UInt8
+                continue;
+
+            ColumnPtr column = reader.readColumn(name_and_type);
+            auto max = maxFunc.executeOnColumn(column, name_and_type.type).get<UInt64>();
+            if (max <= std::numeric_limits<UInt8>::max())
+                type_recommendations.push_back(buildTypeRecommendation(name_and_type.name, decayed_type->getName(), name_and_type.type->isNullable(), "UInt8"));
+            else if (max <= std::numeric_limits<UInt16>::max())
+                type_recommendations.push_back(buildTypeRecommendation(name_and_type.name, decayed_type->getName(), name_and_type.type->isNullable(), "UInt16"));
+            else if (max <= std::numeric_limits<UInt32>::max())
+                type_recommendations.push_back(buildTypeRecommendation(name_and_type.name, decayed_type->getName(), name_and_type.type->isNullable(), "UInt32"));
+        }
+        else if (is_integer)
+        {
+            if (decayed_type->getTypeId() == TypeIndex::Int8) /// skip Int8
+                continue;
+
+            ColumnPtr column = reader.readColumn(name_and_type);
+            auto max = maxFunc.executeOnColumn(column, name_and_type.type).get<Int64>();
+            auto min = minFunc.executeOnColumn(column, name_and_type.type).get<Int64>();
+            if (min >= std::numeric_limits<Int8>::min() && max <= std::numeric_limits<Int8>::max())
+                type_recommendations.push_back(buildTypeRecommendation(name_and_type.name, decayed_type->getName(), name_and_type.type->isNullable(), "Int8"));
+            else if (min >= std::numeric_limits<Int16>::min() && max <= std::numeric_limits<Int16>::max())
+                type_recommendations.push_back(buildTypeRecommendation(name_and_type.name, decayed_type->getName(), name_and_type.type->isNullable(), "Int16"));
+            else if (min >= std::numeric_limits<Int32>::min() && max <= std::numeric_limits<Int32>::max())
+                type_recommendations.push_back(buildTypeRecommendation(name_and_type.name, decayed_type->getName(), name_and_type.type->isNullable(), "Int32"));
+        }
+        /// TODO(weiping.qw): add more rules
+    }
+}
+
+void TypeAdvisor::serializeJson(WriteBuffer & buf, [[maybe_unused]] bool verbose)
+{
+    bool first = true;
+    writeString("\"type\":[", buf);
+    for (const auto & entry : type_recommendations)
+    {
+        if (first)
+            first = false;
+        else
+            writeString(",", buf);
+        std::string column_name = entry.column_name;
+        writeString("{\"name\":\"", buf);
+        writeString(column_name, buf);
+        writeString("\",", buf);
+        std::string column_origin_type = entry.origin_type;
+        std::string column_optimized_type = entry.optimized_type;
+        writeString("\"origin\":\"", buf);
+        writeString(column_origin_type, buf);
+        writeString("\",", buf);
+        writeString("\"optimized\":\"", buf);
+        writeString(column_optimized_type, buf);
+        writeString("\"}", buf);
+    }
+    writeString("]", buf);
+}
+
+}
diff --git a/programs/schema-advisor/TypeAdvisor.h b/programs/schema-advisor/TypeAdvisor.h
new file mode 100644
index 00000000000..88e10d60b3b
--- /dev/null
+++ b/programs/schema-advisor/TypeAdvisor.h
@@ -0,0 +1,71 @@
+#pragma once
+
+#include <boost/program_options.hpp>
+
+#include "SchemaAdvisorHelpers.h"
+
+#include <Compression/ICompressionCodec.h>
+#include <IO/WriteBuffer.h>
+#include <Storages/ColumnsDescription.h>
+
+namespace DB
+{
+
+namespace po = boost::program_options;
+
+class TypeAdvisor
+{
+public:
+    struct TypeRecommendation
+    {
+        TypeRecommendation(
+            std::string column_name_,
+            std::string origin_type_,
+            std::string optimized_type_
+        ) : column_name(column_name_)
+          , origin_type(origin_type_)
+          , optimized_type(optimized_type_) {}
+
+        std::string column_name;
+        std::string origin_type;
+        std::string optimized_type;
+    };
+
+private:
+    static constexpr const size_t ADVISOR_LOW_CARDINALITY_NDV_THRESHOLD = 65535;
+
+    MockEnvironment & env;
+    po::variables_map options;
+    const ColumnsDescription column_descs;
+    Codecs codecs_to_compare;
+    std::string absolute_part_path;
+    const size_t sample_row_number;
+    [[maybe_unused]] const size_t max_threads;
+    std::vector<TypeRecommendation> type_recommendations;
+    const bool lc_only;
+    const Float64 scanned_count_threshold_for_lc;
+    const Float64 cardinality_ratio_threshold_for_lc;
+
+    void parseCodecCandidates();
+
+    void adviseLowCardinality();
+
+public:
+    TypeAdvisor(
+        MockEnvironment & env_,
+        const po::variables_map & options_,
+        const ColumnsDescription & column_descs_,
+        std::string absolute_part_path_,
+        size_t sample_row_number_,
+        size_t max_threads_,
+        bool lc_only_,
+        Float64 scanned_count_threshold_for_lc_,
+        Float64 cardinality_ratio_threshold_for_lc_);
+
+    virtual ~TypeAdvisor() = default;
+
+    void execute();
+    void serializeJson(WriteBuffer & buf, bool verbose = false);
+};
+
+}
diff --git a/programs/schema-advisor/clickhouse-schema-advisor.cpp b/programs/schema-advisor/clickhouse-schema-advisor.cpp
new file mode 100644
index 00000000000..556fd3a1bdf
--- /dev/null
+++ b/programs/schema-advisor/clickhouse-schema-advisor.cpp
@@ -0,0 +1,6 @@
+int mainEntryClickHouseSchemaAdvisor(int argc, char ** argv);
+
+int main(int argc_, char ** argv_)
+{
+    return mainEntryClickHouseSchemaAdvisor(argc_, argv_);
+}
diff --git a/programs/server/Server.cpp b/programs/server/Server.cpp
index 849af98506b..c01e8dddeca 100644
--- a/programs/server/Server.cpp
+++ b/programs/server/Server.cpp
@@ -1183,6 +1183,17 @@ int Server::main(const std::vector<std::string> & /*args*/)
                 global_context->setVWCustomizedSettings(std::make_shared<VWCustomizedSettings>(config));
             }
 
+            if (global_context->getIsRestrictSettingsToWhitelist())
+            {
+                auto setting_names = getMultipleValuesFromConfig(*config, "tenant_whitelist_settings", "name");
+                std::unordered_set<String> setting_names_set;
+                for (auto& setting : setting_names)
+                {
+                    setting_names_set.emplace(setting);
+                }
+                global_context->setExtraRestrictSettingsToWhitelist(std::move(setting_names_set));
+            }
+
             if (auto catalog = global_context->tryGetCnchCatalog())
                 catalog->loadFromConfig("catalog_service", *config);
         },
diff --git a/src/Advisor/Advisor.cpp b/src/Advisor/Advisor.cpp
index b04696ff8f1..50e15e47da5 100644
--- a/src/Advisor/Advisor.cpp
+++ b/src/Advisor/Advisor.cpp
@@ -1,8 +1,10 @@
 #include <Advisor/Advisor.h>
 
 #include <Advisor/AdvisorContext.h>
-#include <Advisor/Rules/ClusterKeyAdvise.h>
+#include <Advisor/Rules/ColumnUsageAdvise.h>
+#include <Advisor/Rules/DataTypeAdvise.h>
 #include <Advisor/Rules/MaterializedViewAdvise.h>
+#include <Advisor/Rules/OrderByKeyAdvise.h>
 #include <Advisor/Rules/PartitionKeyAdvise.h>
 #include <Advisor/Rules/WorkloadAdvisor.h>
 #include <Advisor/WorkloadQuery.h>
@@ -26,19 +28,25 @@ WorkloadAdvisors Advisor::getAdvisors(ASTAdviseQuery::AdvisorType type)
     {
         case ASTAdviseQuery::AdvisorType::ALL:
             return {
-                std::make_shared<ClusterKeyAdvisor>(),
+                std::make_shared<OrderByKeyAdvisor>(),
                 std::make_shared<PartitionKeyAdvisor>(),
+                std::make_shared<DataTypeAdvisor>(),
                 std::make_shared<MaterializedViewAdvisor>(MaterializedViewAdvisor::OutputType::PROJECTION, true, true),
-                std::make_shared<MaterializedViewAdvisor>(MaterializedViewAdvisor::OutputType::MATERIALIZED_VIEW, true, false)};
+                std::make_shared<MaterializedViewAdvisor>(MaterializedViewAdvisor::OutputType::MATERIALIZED_VIEW, true, true)};
+
         case ASTAdviseQuery::AdvisorType::ORDER_BY:
-            return {std::make_shared<ClusterKeyAdvisor>()};
-        case ASTAdviseQuery::AdvisorType::DISTRIBUTED_BY:
+            return {std::make_shared<OrderByKeyAdvisor>()};
+        case ASTAdviseQuery::AdvisorType::CLUSTER_BY:
             return {std::make_shared<PartitionKeyAdvisor>()};
+        case ASTAdviseQuery::AdvisorType::DATA_TYPE:
+            return {std::make_shared<DataTypeAdvisor>()};
         case ASTAdviseQuery::AdvisorType::MATERIALIZED_VIEW:
-            return {std::make_shared<MaterializedViewAdvisor>(MaterializedViewAdvisor::OutputType::MATERIALIZED_VIEW, true, false)};
+            return {std::make_shared<MaterializedViewAdvisor>(MaterializedViewAdvisor::OutputType::MATERIALIZED_VIEW, true, true)};
         case ASTAdviseQuery::AdvisorType::PROJECTION:
             return {std::make_shared<MaterializedViewAdvisor>(MaterializedViewAdvisor::OutputType::PROJECTION, true, true)};
-            }
+        case ASTAdviseQuery::AdvisorType::COLUMN_USAGE:
+            return {std::make_shared<ColumnUsageAdvisor>()};
+    }
 }
 
 WorkloadAdvises Advisor::analyze(const std::vector<String> & queries_, ContextPtr context_)
diff --git a/src/Advisor/Advisor.h b/src/Advisor/Advisor.h
index 184dd8c0a2e..2172906ef00 100644
--- a/src/Advisor/Advisor.h
+++ b/src/Advisor/Advisor.h
@@ -21,7 +21,7 @@ class Advisor
     }
     WorkloadAdvises analyze(const std::vector<String> & queries, ContextPtr context);
 
-    private:
+private:
     static WorkloadAdvisors getAdvisors(ASTAdviseQuery::AdvisorType type);
 
     ASTAdviseQuery::AdvisorType type;
diff --git a/src/Advisor/AdvisorContext.cpp b/src/Advisor/AdvisorContext.cpp
index 67faebe3188..32c4d1e56aa 100644
--- a/src/Advisor/AdvisorContext.cpp
+++ b/src/Advisor/AdvisorContext.cpp
@@ -19,7 +19,7 @@ AdvisorContext AdvisorContext::buildFrom(ContextMutablePtr session_context, Work
     ColumnUsages column_usages = buildColumnUsages(queries);
     SignatureUsages signature_usages = buildSignatureUsages(queries, session_context);
 
-std::unordered_map<String, WorkloadQueryPtr> query_id_to_query;
+    std::unordered_map<String, WorkloadQueryPtr> query_id_to_query;
     for (const auto & query : queries)
         query_id_to_query[query->getQueryId()] = query;
 
@@ -27,7 +27,7 @@ std::unordered_map<String, WorkloadQueryPtr> query_id_to_query;
         session_context,
         tables,
         queries,
-std::move(query_id_to_query),
+        std::move(query_id_to_query),
         query_thread_pool,
         std::move(column_usages),
         std::move(signature_usages));
diff --git a/src/Advisor/AdvisorContext.h b/src/Advisor/AdvisorContext.h
index 6fe854acd8b..c5c0c2332d1 100644
--- a/src/Advisor/AdvisorContext.h
+++ b/src/Advisor/AdvisorContext.h
@@ -19,7 +19,7 @@ class AdvisorContext
     ContextMutablePtr session_context;
     WorkloadTables & tables;
     WorkloadQueries & queries;
-std::unordered_map<String, WorkloadQueryPtr> query_id_to_query;
+    std::unordered_map<String, WorkloadQueryPtr> query_id_to_query;
     ThreadPool & query_thread_pool;
     const ColumnUsages column_usages;
     const SignatureUsages signature_usages;
@@ -34,16 +34,16 @@ std::unordered_map<String, WorkloadQueryPtr> query_id_to_query;
 private:
     AdvisorContext(
         ContextMutablePtr _session_context,
-                   WorkloadTables & _tables,
-                   WorkloadQueries & _queries,
-std::unordered_map<String, WorkloadQueryPtr> _query_id_to_query,
-                   ThreadPool & _query_thread_pool,
-                   ColumnUsages _column_usages,
-                   SignatureUsages _signature_usages)
+        WorkloadTables & _tables,
+        WorkloadQueries & _queries,
+        std::unordered_map<String, WorkloadQueryPtr> _query_id_to_query,
+        ThreadPool & _query_thread_pool,
+        ColumnUsages _column_usages,
+        SignatureUsages _signature_usages)
         : session_context(_session_context)
         , tables(_tables)
         , queries(_queries)
-, query_id_to_query(std::move(_query_id_to_query))
+        , query_id_to_query(std::move(_query_id_to_query))
         , query_thread_pool(_query_thread_pool)
         , column_usages(std::move(_column_usages))
         , signature_usages(std::move(_signature_usages))
diff --git a/src/Advisor/ColumnUsage.cpp b/src/Advisor/ColumnUsage.cpp
index 9cb58a98221..5799879d4e6 100644
--- a/src/Advisor/ColumnUsage.cpp
+++ b/src/Advisor/ColumnUsage.cpp
@@ -3,10 +3,12 @@
 #include <Advisor/WorkloadQuery.h>
 #include <Analyzers/QualifiedColumnName.h>
 #include <Core/Types.h>
+#include <Storages/MergeTree/Index/BitmapIndexHelper.h>
 #include <Functions/FunctionsComparison.h>
 #include <Interpreters/StorageID.h>
 #include <Optimizer/CostModel/CostCalculator.h>
 #include <Optimizer/PredicateUtils.h>
+#include <Optimizer/SymbolsExtractor.h>
 #include <Parsers/IAST_fwd.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/ASTIdentifier.h>
@@ -52,13 +54,23 @@ namespace
         return function.name == "in" && function.arguments && function.arguments->children.size() == 2;
     }
 
-    std::optional<std::pair<std::string, ColumnUsageType>> extractPredicateUsage(ConstASTPtr expression)
+    ASTPtr unwarpMonotonicFunction(ASTPtr expr)
     {
-        auto fun = dynamic_pointer_cast<const ASTFunction>(expression);
+        if (auto * function = expr->as<ASTFunction>())
+        {
+            if (function->arguments->children.size() == 1) 
+                return unwarpMonotonicFunction(function->arguments->children[0]);
+        }
+        return expr;
+    };
+
+    std::optional<std::pair<std::string, ColumnUsageType>> extractPredicateUsage(ConstASTPtr predicate)
+    {
+        auto fun = dynamic_pointer_cast<const ASTFunction>(predicate);
         if (!fun || !fun->arguments || fun->arguments->children.size() != 2)
             return std::nullopt;
-        auto identifier = dynamic_pointer_cast<const ASTIdentifier>(fun->arguments->children[0]);
-        if (!identifier)
+        auto left = unwarpMonotonicFunction(fun->arguments->children[0]);
+        auto identifier = dynamic_pointer_cast<const ASTIdentifier>(left);        if (!identifier)
             return std::nullopt;
         const std::string & symbol = identifier->name();
 
@@ -119,6 +131,9 @@ class ColumnUsageVisitor : public PlanNodeVisitor<void, ColumnUsages>
     void visitAggregatingNode(AggregatingNode & node, ColumnUsages & column_usages) override;
     void visitCTERefNode(CTERefNode & node, ColumnUsages & column_usages) override;
 
+    void extractFilterUsages(ConstASTPtr expr, PlanNodePtr, ColumnUsages & column_usages);
+    void extractArraySetFunctions(ConstASTPtr expression, const PlanNodePtr & node, ColumnUsages & column_usages);
+
 private:
     std::unordered_map<std::string, ColumnNameWithSourceTableFlag> symbol_to_table_column_map;
     std::unordered_set<CTEId> visited_ctes;
@@ -154,6 +169,19 @@ size_t ColumnUsageInfo::getFrequency(ColumnUsageType type, bool only_source_tabl
     return freq;
 }
 
+std::unordered_map<ColumnUsageType, size_t> ColumnUsageInfo::getFrequencies(bool only_source_table) const {
+    std::unordered_map<ColumnUsageType, size_t> res;
+    for (const auto & item : usages_only_source_table) {
+        res[item.first] += 1;
+    }
+    if (!only_source_table) {
+            for (const auto & item : usages_non_source_table) {
+        res[item.first] += 1;
+    }
+    }
+    return res;
+}
+
 std::vector<ColumnUsage> ColumnUsageInfo::getUsages(ColumnUsageType type, bool only_source_table) const
 {
     std::vector<ColumnUsage> res{};
@@ -195,11 +223,34 @@ void ColumnUsageVisitor::visitTableScanNode(TableScanNode & node, ColumnUsages &
     auto table_step = dynamic_pointer_cast<TableScanStep>(node.getStep());
     const StorageID & storage_id = table_step->getStorageID();
 
+    std::unordered_map<std::string, ColumnNameWithSourceTableFlag> table_columns;
+
+    for (const auto & column_name : table_step->getRequiredColumns())
+    {
+        QualifiedColumnName column{storage_id.getDatabaseName(), storage_id.getTableName(), column_name};
+        table_columns.insert_or_assign(column_name, ColumnNameWithSourceTableFlag{column, true});
+    }
+    
+    // extract usages
+    symbol_to_table_column_map.swap(table_columns);
+    for (const auto & column_name : table_step->getRequiredColumns())
+        addUsage(column_usages, column_name, ColumnUsageType::SCANNED, node.shared_from_this());
+    
+    if (table_step->getPrewhere())
+        extractFilterUsages(table_step->getPrewhere(), node.shared_from_this(), column_usages);
+
+    // for (auto [output, expr] : table_step->getIndexExpressions())
+        // extractFilterUsages(expr, node.shared_from_this(), column_usages);
+
+    for (auto [output, expr] : table_step->getInlineExpressions())
+        extractFilterUsages(expr, node.shared_from_this(), column_usages);
+
+    symbol_to_table_column_map.swap(table_columns);
+
     for (const auto & [column_name, alias] : table_step->getColumnAlias())
     {
         QualifiedColumnName column{storage_id.getDatabaseName(), storage_id.getTableName(), column_name};
-        symbol_to_table_column_map.emplace(alias, ColumnNameWithSourceTableFlag{column, true});
-        addUsage(column_usages, alias, ColumnUsageType::SCANNED, node.shared_from_this());
+        symbol_to_table_column_map.insert_or_assign(alias, ColumnNameWithSourceTableFlag{column, true});
     }
 }
 
@@ -207,12 +258,44 @@ void ColumnUsageVisitor::visitFilterNode(FilterNode & node, ColumnUsages & colum
 {
     processChildren(node, column_usages);
     auto filter_step = dynamic_pointer_cast<FilterStep>(node.getStep());
-    for (const ConstASTPtr & expression : PredicateUtils::extractConjuncts(filter_step->getFilter()))
+    extractFilterUsages(filter_step->getFilter(), node.shared_from_this(), column_usages);
+}
+
+void ColumnUsageVisitor::extractFilterUsages(ConstASTPtr expr, PlanNodePtr node, ColumnUsages & column_usages)
+{
+    for (const auto & expression : PredicateUtils::extractConjuncts(expr))
     {
         auto usage_opt = extractPredicateUsage(expression);
         if (usage_opt.has_value())
-            addUsage(column_usages, usage_opt.value().first, usage_opt.value().second, node.shared_from_this(), expression);
+            addUsage(column_usages, usage_opt.value().first, usage_opt.value().second, node, expression);
+        else
+        {
+            auto names = SymbolsExtractor::extract(expression);
+            for (const auto & name : names)
+            {
+                addUsage(column_usages, name, ColumnUsageType::OTHER_PREDICATE, node, expression);
+            }
+        }
+    }
+    extractArraySetFunctions(expr, node, column_usages);
+}
+
+void ColumnUsageVisitor::extractArraySetFunctions(ConstASTPtr expression, const PlanNodePtr & node, ColumnUsages & column_usages)
+{
+    auto function = dynamic_pointer_cast<const ASTFunction>(expression);
+    if (const auto * func = expression->as<ASTFunction>())
+    {
+        if (!func->arguments || func->arguments->children.empty()) return;
+        auto * ident = func->arguments->children[0]->as<ASTIdentifier>();
+        if (ident && BitmapIndexHelper::isArraySetFunctions(func->name))
+        {
+            addUsage(column_usages, ident->name(), ColumnUsageType::ARRAY_SET_FUNCTION, node, expression);
+            return;
+        }
     }
+
+    for (const auto & child : expression->children)
+        extractArraySetFunctions(child, node, column_usages);
 }
 
 void ColumnUsageVisitor::visitJoinNode(JoinNode & node, ColumnUsages & column_usages)
@@ -256,8 +339,10 @@ void ColumnUsageVisitor::visitProjectionNode(ProjectionNode & node, ColumnUsages
         {
             auto it = symbol_to_table_column_map.find(identifier->name());
             if (it != symbol_to_table_column_map.end())
-                symbol_to_table_column_map.emplace(out_symbol, it->second);
+                symbol_to_table_column_map.insert_or_assign(out_symbol, it->second);
         }
+
+        extractArraySetFunctions(in_ast, node.shared_from_this(), column_usages);
     }
 }
 
@@ -286,4 +371,27 @@ void ColumnUsageVisitor::visitCTERefNode(CTERefNode & node, ColumnUsages & colum
     VisitorUtil::accept(cte_info.getCTEs().at(cte_id), *this, column_usages);
 }
 
+String toString(ColumnUsageType type)
+{
+    switch (type) {
+        case ColumnUsageType::SCANNED:
+            return "Scanned";
+        case ColumnUsageType::EQUI_JOIN:
+            return "EquiJoin";
+        case ColumnUsageType::NON_EQUI_JOIN:
+            return "NonEquiJoin";
+        case ColumnUsageType::GROUP_BY:
+            return "GroupBy";
+        case ColumnUsageType::EQUALITY_PREDICATE:
+            return "EqualityPredicate";
+        case ColumnUsageType::IN_PREDICATE:
+            return "InPredicate";
+        case ColumnUsageType::RANGE_PREDICATE:
+            return "RangePredicate";
+        case ColumnUsageType::ARRAY_SET_FUNCTION:
+            return "ArraySetFunction";
+        case ColumnUsageType::OTHER_PREDICATE:
+            return "OtherPredicate";
+    }
+}
 }
diff --git a/src/Advisor/ColumnUsage.h b/src/Advisor/ColumnUsage.h
index 3c3e670febc..27f097a53d0 100644
--- a/src/Advisor/ColumnUsage.h
+++ b/src/Advisor/ColumnUsage.h
@@ -23,9 +23,12 @@ enum class ColumnUsageType
     EQUALITY_PREDICATE, // columns in "= literal" filters
     IN_PREDICATE, // columns in "in list" filters
     RANGE_PREDICATE, // columns in "> literal" or "< literal" filters
+    ARRAY_SET_FUNCTION, // columns in "has" or "arraySetCheck"
     OTHER_PREDICATE, // columns in "column ???" filters
 };
 
+String toString(ColumnUsageType type);
+
 struct ColumnUsage
 {
     ColumnUsageType type;
@@ -42,6 +45,7 @@ class ColumnUsageInfo
     void update(ColumnUsage usage, bool is_source_table);
 
     size_t getFrequency(ColumnUsageType type, bool only_source_table = false) const;
+    std::unordered_map<ColumnUsageType, size_t> getFrequencies(bool only_source_table = false) const;
     std::vector<ColumnUsage> getUsages(ColumnUsageType type, bool only_source_table = false) const;
 
 private:
diff --git a/src/Advisor/Rules/ColumnUsageAdvise.cpp b/src/Advisor/Rules/ColumnUsageAdvise.cpp
new file mode 100644
index 00000000000..3fcc62a3441
--- /dev/null
+++ b/src/Advisor/Rules/ColumnUsageAdvise.cpp
@@ -0,0 +1,73 @@
+#include <Advisor/Rules/ColumnUsageAdvise.h>
+
+#include <Advisor/AdvisorContext.h>
+#include <Advisor/ColumnUsage.h>
+#include <Advisor/Rules/WorkloadAdvisor.h>
+#include <Advisor/WorkloadTable.h>
+#include <Analyzers/QualifiedColumnName.h>
+#include <Core/QualifiedTableName.h>
+#include <Core/Types.h>
+#include <Parsers/IAST_fwd.h>
+#include <Poco/Logger.h>
+
+#include <algorithm>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+namespace DB
+{
+
+class ColumnUsageAdvise : public IWorkloadAdvise
+{
+public:
+    ColumnUsageAdvise(QualifiedTableName table_, String column_, std::vector<std::pair<String, double>> candidates_)
+        : table(std::move(table_)), column(std::move(column_)), candidates(std::move(candidates_))
+    {
+    }
+
+    String apply(WorkloadTables &) override { return "not implement"; }
+
+    QualifiedTableName getTable() override { return table; }
+    std::optional<String> getColumnName() override { return column; }
+    String getAdviseType() override { return "Column Usage"; }
+    String getOriginalValue() override { return ""; }
+    String getOptimizedValue() override { return ""; }
+    double getBenefit() override { return 0; }
+    std::vector<std::pair<String, double>> getCandidates() override { return candidates; }
+
+private:
+    QualifiedTableName table;
+    String column;
+    std::vector<std::pair<String, double>> candidates;
+};
+
+WorkloadAdvises ColumnUsageAdvisor::analyze(AdvisorContext & context) const
+{
+    std::map<QualifiedColumnName, std::unordered_map<String, double>> column_usage_by_table;
+    for (const auto & [qualified_column, metrics] : context.column_usages)
+    {
+        for (const auto & [type, count] : metrics.getFrequencies(true))
+        {
+            column_usage_by_table[qualified_column][toString(type)] += count;
+        }
+    }
+
+    WorkloadAdvises res;
+    for (const auto & [table, column_freq] : column_usage_by_table)
+    {
+        std::vector<std::pair<String, double>> sorted_freq{column_freq.begin(), column_freq.end()};
+        std::sort(sorted_freq.begin(), sorted_freq.end(), [](const auto & p1, const auto & p2) {
+            // enforce unique ordering
+            if (p1.second == p2.second)
+                return p1.first < p2.first;
+            return p1.second < p2.second;
+        });
+
+        res.emplace_back(std::make_shared<ColumnUsageAdvise>(table.getQualifiedTable(), table.column, sorted_freq));
+    }
+    return res;
+}
+
+}
diff --git a/src/Advisor/Rules/ColumnUsageAdvise.h b/src/Advisor/Rules/ColumnUsageAdvise.h
new file mode 100644
index 00000000000..18159501406
--- /dev/null
+++ b/src/Advisor/Rules/ColumnUsageAdvise.h
@@ -0,0 +1,22 @@
+#pragma once
+
+#include <Advisor/AdvisorContext.h>
+#include <Advisor/Rules/WorkloadAdvisor.h>
+#include <Analyzers/QualifiedColumnName.h>
+#include <Core/Types.h>
+#include <Poco/Logger.h>
+
+namespace DB
+{
+
+class ColumnUsageAdvisor : public IWorkloadAdvisor
+{
+public:
+    String getName() const override { return "ColumnUsageAdvisor"; }
+    WorkloadAdvises analyze(AdvisorContext & context) const override;
+    
+private:
+    // Poco::Logger * log = getLogger("OrderByKeyAdvisor");
+};
+
+}
diff --git a/src/Advisor/Rules/DataTypeAdvise.cpp b/src/Advisor/Rules/DataTypeAdvise.cpp
new file mode 100644
index 00000000000..d14512374e5
--- /dev/null
+++ b/src/Advisor/Rules/DataTypeAdvise.cpp
@@ -0,0 +1,174 @@
+#include <Advisor/Rules/DataTypeAdvise.h>
+
+#include <Advisor/AdvisorContext.h>
+#include <Advisor/Rules/WorkloadAdvisor.h>
+#include <Advisor/WorkloadTableStats.h>
+#include <Core/Types.h>
+#include <Core/Field.h>
+#include <Core/QualifiedTableName.h>
+#include <DataTypes/IDataType.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <Optimizer/CardinalityEstimate/SymbolStatistics.h>
+#include <Optimizer/CardinalityEstimate/PlanNodeStatistics.h>
+#include <Poco/Logger.h>
+#include "Interpreters/StorageID.h"
+#include <Storages/IStorage_fwd.h>
+#include <Storages/StorageCnchMergeTree.h>
+#include <Statistics/TypeUtils.h>
+
+#include <limits>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int UNKNOWN_TABLE;
+}
+
+WorkloadAdvises DataTypeAdvisor::analyze(AdvisorContext & context) const
+{
+    WorkloadAdvises res;
+    for (auto & [table_name, workload_table] : context.tables.getTables())
+    {
+        auto basic_stats = workload_table->getStats().getBasicStats();
+        if (!basic_stats.get())
+            throw Exception("Empty statistics when analyzing data types for table " + table_name.getFullName(), ErrorCodes::LOGICAL_ERROR);
+
+        auto storage = DatabaseCatalog::instance().getTable(StorageID{table_name.database, table_name.table}, context.session_context);
+        auto columns = storage->getInMemoryMetadataPtr()->getColumns().getAll();
+        auto extended_stats
+            = workload_table->getStats().collectExtendedStats(context.session_context, table_name.database, table_name.table, columns);
+
+        if (!extended_stats.get())
+            throw Exception("Empty extended statistics when analyzing data types for table " + table_name.getFullName(), ErrorCodes::LOGICAL_ERROR);
+
+        const auto local_table = workload_table->getTablePtr();
+        if (!dynamic_cast<const StorageCnchMergeTree *>(local_table.get()))
+            throw Exception("Table " + table_name.getFullName() + " is not merge tree table",  ErrorCodes::UNKNOWN_TABLE);
+
+        UInt64 row_count = basic_stats->getRowCount();
+        auto & table_stats = basic_stats->getSymbolStatistics();
+
+        for (auto & [column_name, symbol_stats] : table_stats)
+        {
+            if (symbol_stats->getNullsCount() == row_count) /// all nulls
+                continue;
+
+            const auto & column_type = local_table->getInMemoryMetadataPtr()->getColumns().getPhysical(column_name).type;
+            auto decayed_type = Statistics::decayDataType(column_type);
+
+            bool is_string = decayed_type->getTypeId() == TypeIndex::String || decayed_type->getTypeId() == TypeIndex::FixedString;
+            bool is_unsigned_integer = decayed_type->isValueRepresentedByUnsignedInteger() && decayed_type->isSummable();
+            bool is_integer = decayed_type->isValueRepresentedByInteger() && decayed_type->isSummable();
+
+            String optimized_type;
+            if ((is_string && string_type_advisor->checkAndApply(local_table, symbol_stats, extended_stats->at(column_name), row_count, optimized_type))
+                || (is_unsigned_integer && integer_type_advisor->checkAndApply(local_table, symbol_stats, decayed_type, true, optimized_type))
+                || (is_integer && integer_type_advisor->checkAndApply(local_table, symbol_stats, decayed_type, false, optimized_type)))
+            {
+                res.emplace_back(std::make_shared<DataTypeAdvise>(table_name, column_name, column_type->getName(), optimized_type));
+            }
+        }
+    }
+    return res;
+}
+
+bool DataTypeAdvisor::StringTypeAdvisor::checkAndApply(const StoragePtr & local_table, const SymbolStatisticsPtr & symbol_stats, WorkloadExtendedStat & extended_symbol_stats, UInt64 row_count, String & optimized_type)
+{
+    const auto & nulls_count = symbol_stats->getNullsCount();
+
+    /// check date
+    const Field & count_to_date = extended_symbol_stats[WorkloadExtendedStatsType::COUNT_TO_DATE_OR_NULL];
+    bool all_date = !count_to_date.isNull() ? count_to_date.get<UInt64>() + nulls_count == row_count : false;
+    if (all_date)
+    {
+        optimized_type = nulls_count > 0 ? "Nullable(Date)" : "Date";
+        return true;
+    }
+
+    /// check date time
+    const Field & count_to_date_time = extended_symbol_stats[WorkloadExtendedStatsType::COUNT_TO_DATE_TIME_OR_NULL];
+    bool all_date_time = !count_to_date_time.isNull() ? count_to_date_time.get<UInt64>() + nulls_count == row_count : false;
+    if (all_date_time)
+    {
+        optimized_type = nulls_count > 0 ? "Nullable(DateTime)" : "DateTime";
+        return true;
+    }
+
+    /// check uint32
+    const Field & count_to_uint32 = extended_symbol_stats[WorkloadExtendedStatsType::COUNT_TO_UINT32_OR_NULL];
+    bool all_unsigned_integer = !count_to_uint32.isNull() ? count_to_uint32.get<UInt64>() + nulls_count == row_count : false;
+    if (all_unsigned_integer)
+    {
+        optimized_type = nulls_count > 0 ? "Nullable(UInt32)" : "UInt32";
+        return true;
+    }
+
+    /// check float32
+    const Field & count_to_float32 = extended_symbol_stats[WorkloadExtendedStatsType::COUNT_TO_FLOAT32_OR_NULL];
+    bool all_float32 = !count_to_float32.isNull() ? count_to_float32.get<UInt64>() + nulls_count == row_count : false;
+    if (all_float32)
+    {
+        optimized_type = nulls_count > 0 ? "Nullable(Float32)" : "Float32";
+        return true;
+    }
+
+    /// check (global) low cardinality
+    const auto & ndv = symbol_stats->getNdv();
+    const auto * merge_tree_storage = dynamic_cast<const StorageCnchMergeTree *>(local_table.get());
+    bool can_be_inside_low_cardinality = ndv < merge_tree_storage->getSettings()->low_cardinality_ndv_threshold && ndv + nulls_count != row_count;
+    if (can_be_inside_low_cardinality)
+    {
+        String nested_type = nulls_count > 0 ? "Nullable(String)" : "String";
+        optimized_type = "LowCardinality(" + nested_type + ")";
+        return true;
+    }
+
+    /// check fixed string
+    const auto & avg_len = symbol_stats->getAvg();
+    bool is_fixed_size = false; /// TODO
+    if (is_fixed_size)
+    {
+        optimized_type = nulls_count > 0 ? "Nullable(FixedString("+ toString(avg_len) +"))" : "FixedString(" + toString(avg_len) + ")";
+        return true;
+    }
+
+    return false;
+}
+
+bool DataTypeAdvisor::IntegerTypeAdvisor::checkAndApply(
+    [[maybe_unused]] const StoragePtr & local_table,
+    const SymbolStatisticsPtr & symbol_stats,
+    const DataTypePtr & decayed_original_type,
+    bool is_unsigned_type,
+    String & optimized_type)
+{
+    const auto & nulls_count = symbol_stats->getNullsCount();
+    const auto & max = symbol_stats->getMax();
+
+    DataTypePtr new_type = nullptr;
+    if (is_unsigned_type)
+    {
+        if (max <= std::numeric_limits<UInt8>::max()) new_type = std::make_shared<DataTypeUInt8>();
+        else if (max <= std::numeric_limits<UInt16>::max()) new_type = std::make_shared<DataTypeUInt16>();
+        else if (max <= std::numeric_limits<UInt32>::max()) new_type = std::make_shared<DataTypeUInt32>();
+    }
+    else
+    {
+        if (max <= std::numeric_limits<Int8>::max()) new_type = std::make_shared<DataTypeInt8>();
+        else if (max <= std::numeric_limits<Int16>::max()) new_type = std::make_shared<DataTypeInt16>();
+        else if (max <= std::numeric_limits<Int32>::max()) new_type = std::make_shared<DataTypeInt32>();
+    }
+
+    if (new_type && new_type->getTypeId() < decayed_original_type->getTypeId())
+    {
+        optimized_type = nulls_count > 0 ? "Nullable(" + new_type->getName() + ")" : new_type->getName();
+        return true;
+    }
+
+    return false;
+}
+
+}
diff --git a/src/Advisor/Rules/DataTypeAdvise.h b/src/Advisor/Rules/DataTypeAdvise.h
new file mode 100644
index 00000000000..6fa686f90ce
--- /dev/null
+++ b/src/Advisor/Rules/DataTypeAdvise.h
@@ -0,0 +1,77 @@
+#pragma once
+
+#include <Common/Logger.h>
+#include <Advisor/AdvisorContext.h>
+#include <Advisor/Rules/WorkloadAdvisor.h>
+#include <Advisor/WorkloadTableStats.h>
+#include <Core/Types.h>
+#include <Core/QualifiedTableName.h>
+#include <Optimizer/CardinalityEstimate/SymbolStatistics.h>
+#include <Poco/Logger.h>
+#include <Storages/IStorage_fwd.h>
+
+#include <memory>
+#include <optional>
+
+namespace DB
+{
+
+class DataTypeAdvisor : public IWorkloadAdvisor
+{
+public:
+    DataTypeAdvisor()
+    {
+        string_type_advisor = std::unique_ptr<StringTypeAdvisor>();
+        integer_type_advisor = std::unique_ptr<IntegerTypeAdvisor>();
+    }
+    String getName() const override { return "DataTypeAdvisor"; }
+    WorkloadAdvises analyze(AdvisorContext & context) const override;
+
+private:
+    class StringTypeAdvisor
+    {
+    public:
+        bool checkAndApply(const StoragePtr & local_table, const SymbolStatisticsPtr & symbol_stats, WorkloadExtendedStat & extended_symbol_stats, UInt64 row_count, String & optimized_type);
+    };
+
+    class IntegerTypeAdvisor
+    {
+    public:
+        bool checkAndApply(const StoragePtr & local_table, const SymbolStatisticsPtr & symbol_stats, const DataTypePtr & decayed_original_type, bool is_unsigned_type, String & optimized_type);
+    };
+
+    std::unique_ptr<StringTypeAdvisor> string_type_advisor;
+    std::unique_ptr<IntegerTypeAdvisor> integer_type_advisor;
+};
+
+class DataTypeAdvise : public IWorkloadAdvise
+{
+public:
+    DataTypeAdvise(
+        const QualifiedTableName & table_, const String & column_name_, const String & original_type_, const String & new_type_)
+        : table(table_), column_name(column_name_), original_type(original_type_), new_type(new_type_)
+    {
+    }
+
+    String apply([[maybe_unused]] WorkloadTables & tables) override
+    {
+        /// TODO: modify ddl
+        return "";
+    }
+
+    QualifiedTableName getTable() override { return table; }
+    std::optional<String> getColumnName() override { return {column_name}; }
+    String getAdviseType() override { return "Data Type"; }
+    String getOriginalValue() override { return original_type; }
+    String getOptimizedValue() override { return new_type; }
+
+private:
+    QualifiedTableName table;
+    String column_name;
+    String original_type;
+    String new_type;
+
+    const LoggerPtr log = getLogger("DataTypeAdvise");
+};
+
+}
diff --git a/src/Advisor/Rules/ClusterKeyAdvise.cpp b/src/Advisor/Rules/OrderByKeyAdvise.cpp
similarity index 51%
rename from src/Advisor/Rules/ClusterKeyAdvise.cpp
rename to src/Advisor/Rules/OrderByKeyAdvise.cpp
index c5f7c68cc86..fbad81bdc0e 100644
--- a/src/Advisor/Rules/ClusterKeyAdvise.cpp
+++ b/src/Advisor/Rules/OrderByKeyAdvise.cpp
@@ -1,4 +1,4 @@
-#include <Advisor/Rules/ClusterKeyAdvise.h>
+#include <Advisor/Rules/OrderByKeyAdvise.h>
 
 #include <Advisor/AdvisorContext.h>
 #include <Advisor/ColumnUsage.h>
@@ -13,20 +13,28 @@
 #include <algorithm>
 #include <string>
 #include <unordered_map>
+#include <utility>
+#include <vector>
 
 namespace DB
 {
 
-class ClusterKeyAdvise : public IWorkloadAdvise
+class OrderByKeyAdvise : public IWorkloadAdvise
 {
 public:
-    ClusterKeyAdvise(QualifiedTableName table_,
-                     ASTPtr original_order_by_,
-                     String original_column_,
-                     String new_column_,
-                     double benefit_)
-        : table(std::move(table_)), original_order_by(original_order_by_)
-        , original_column(std::move(original_column_)), new_column(std::move(new_column_)), benefit(benefit_)
+    OrderByKeyAdvise(
+        QualifiedTableName table_,
+        ASTPtr original_order_by_,
+        String original_column_,
+        String new_column_,
+        double benefit_,
+        std::vector<std::pair<String, double>> candidates_)
+        : table(std::move(table_))
+        , original_order_by(original_order_by_)
+        , original_column(std::move(original_column_))
+        , new_column(std::move(new_column_))
+        , benefit(benefit_)
+        , candidates(std::move(candidates_))
     {
     }
 
@@ -52,6 +60,7 @@ class ClusterKeyAdvise : public IWorkloadAdvise
     String getOriginalValue() override { return original_column; }
     String getOptimizedValue() override { return new_column; }
     double getBenefit() override { return benefit; }
+    std::vector<std::pair<String, double>> getCandidates() override { return candidates; }
 
 private:
     QualifiedTableName table;
@@ -59,15 +68,18 @@ class ClusterKeyAdvise : public IWorkloadAdvise
     String original_column;
     String new_column;
     double benefit;
+    std::vector<std::pair<String, double>> candidates;
 };
 
-WorkloadAdvises ClusterKeyAdvisor::analyze(AdvisorContext & context) const
+WorkloadAdvises OrderByKeyAdvisor::analyze(AdvisorContext & context) const
 {
-    std::unordered_map<QualifiedTableName, std::unordered_map<String, size_t>> column_usage_by_table;
+    std::unordered_map<QualifiedTableName, std::unordered_map<String, double>> column_usage_by_table;
     for (const auto & [qualified_column, metrics] : context.column_usages)
     {
         auto predicate_freq = metrics.getFrequency(ColumnUsageType::EQUALITY_PREDICATE, /*only_source_table=*/true)
-                               + metrics.getFrequency(ColumnUsageType::RANGE_PREDICATE, /*only_source_table=*/true);
+            + metrics.getFrequency(ColumnUsageType::IN_PREDICATE, /*only_source_table=*/true)
+            + metrics.getFrequency(ColumnUsageType::RANGE_PREDICATE, /*only_source_table=*/true)
+            + metrics.getFrequency(ColumnUsageType::EQUI_JOIN, /*only_source_table=*/true) /* runtime_filter*/;
         if (predicate_freq > 0 && isValidColumn(qualified_column, context))
             column_usage_by_table[qualified_column.getQualifiedTable()][qualified_column.column] += predicate_freq;
     }
@@ -75,31 +87,33 @@ WorkloadAdvises ClusterKeyAdvisor::analyze(AdvisorContext & context) const
     WorkloadAdvises res{};
     for (const auto & [table, column_freq] : column_usage_by_table)
     {
-        auto max_column_freq = *std::max_element(column_freq.begin(), column_freq.end(),
-                                    [](const auto & p1, const auto & p2) {
-                                        // enforce unique ordering
-                                        if (p1.second == p2.second)
-                                            return p1.first < p2.first;
-                                        return p1.second < p2.second;
-                                    });
+        std::vector<std::pair<String, double>> sorted_freq{column_freq.begin(), column_freq.end()};
+        std::sort(sorted_freq.begin(), sorted_freq.end(), [](const auto & p1, const auto & p2) {
+            // enforce unique ordering
+            if (p1.second == p2.second)
+                return p1.first > p2.first;
+            return p1.second > p2.second;
+        });
+        if (sorted_freq.size() > 3)
+            sorted_freq.resize(3);
 
         auto optimized_table = context.tables.tryGetTable(table);
         auto order_by = optimized_table ? optimized_table->getOrderBy() : nullptr;
         auto original_column = (order_by) ? serializeAST(*order_by) : String{};
-        res.emplace_back(std::make_shared<ClusterKeyAdvise>(table, order_by, original_column, max_column_freq.first, max_column_freq.second));
+        res.emplace_back(
+            std::make_shared<OrderByKeyAdvise>(table, order_by, original_column, sorted_freq[0].first, sorted_freq[0].second, sorted_freq));
     }
     return res;
 }
 
-bool ClusterKeyAdvisor::isValidColumn(const QualifiedColumnName & column, AdvisorContext & context) const
+bool OrderByKeyAdvisor::isValidColumn(const QualifiedColumnName & /*column*/, AdvisorContext & /*context*/) const
 {
-    auto column_type = context.getColumnType(column);
-    if (!column_type || !column_type->isComparable()) // sharding key only accepts integers
-    {
-        LOG_DEBUG(log, "Column {}.{}.{} is not a valid order by key, because it is not comparable",
-                  column.database, column.table, column.column);
-        return false;
-    }
+    // auto column_type = context.getColumnType(column);
+    // if (!column_type || !column_type->isValueRepresentedByInteger()) // sharding key only accepts integers
+    // {
+    //     LOG_DEBUG(log, "Column {}.{}.{} is not a valid sharding key, because it is not an integer type", column.database, column.table, column.column);
+    //     return false;
+    // }
     return true;
 }
 
diff --git a/src/Advisor/Rules/ClusterKeyAdvise.h b/src/Advisor/Rules/OrderByKeyAdvise.h
similarity index 80%
rename from src/Advisor/Rules/ClusterKeyAdvise.h
rename to src/Advisor/Rules/OrderByKeyAdvise.h
index 7a95002b19d..b27a488edb8 100644
--- a/src/Advisor/Rules/ClusterKeyAdvise.h
+++ b/src/Advisor/Rules/OrderByKeyAdvise.h
@@ -10,10 +10,10 @@
 namespace DB
 {
 
-class ClusterKeyAdvisor : public IWorkloadAdvisor
+class OrderByKeyAdvisor : public IWorkloadAdvisor
 {
 public:
-    String getName() const override { return "ClusterKeyAdvisor"; }
+    String getName() const override { return "OrderByKeyAdvisor"; }
     WorkloadAdvises analyze(AdvisorContext & context) const override;
 
 private:
diff --git a/src/Advisor/Rules/WorkloadAdvisor.h b/src/Advisor/Rules/WorkloadAdvisor.h
index 9fb4bd44ffa..79a87bbc15f 100644
--- a/src/Advisor/Rules/WorkloadAdvisor.h
+++ b/src/Advisor/Rules/WorkloadAdvisor.h
@@ -78,7 +78,11 @@ class IWorkloadAdvise
     virtual String getOriginalValue() = 0;
     virtual String getOptimizedValue() = 0;
     virtual double getBenefit() { return 0.0; }
-    virtual std::vector<String> getRelatedQueries() { return {}; }
+    virtual std::vector<std::pair<String, double>> getCandidates() { return {}; }
+    virtual std::vector<String> getRelatedQueries()
+    {
+        return {};
+    }
 };
 
 }
diff --git a/src/Advisor/SignatureUsage.cpp b/src/Advisor/SignatureUsage.cpp
index fde4000cdb7..788f9ce2042 100644
--- a/src/Advisor/SignatureUsage.cpp
+++ b/src/Advisor/SignatureUsage.cpp
@@ -17,7 +17,7 @@ SignatureUsages buildSignatureUsages(const WorkloadQueries & queries, ContextPtr
     SignatureUsages signature_usages;
     for (const auto & query : queries)
     {
-        const auto & plan = query->getPlan();
+        const auto & plan = query->getPlanBeforeCascades();
         PlanSignatureProvider provider(plan->getCTEInfo(), context);
         auto plan_signatures = provider.computeSignatures(plan->getPlanNode());
         for (const auto & [plan_node, signature] : plan_signatures)
diff --git a/src/Advisor/WorkloadQuery.cpp b/src/Advisor/WorkloadQuery.cpp
index bd161345b42..54f947edc1d 100644
--- a/src/Advisor/WorkloadQuery.cpp
+++ b/src/Advisor/WorkloadQuery.cpp
@@ -92,12 +92,23 @@ WorkloadQueryPtr WorkloadQuery::build(const std::string & query_id, const std::s
     context->applySettingsChanges(
         {DB::SettingChange("enable_sharding_optimize", "true"), // for colocated join
          DB::SettingChange("enable_runtime_filter", "false"), // for calculating signature
-         DB::SettingChange("enable_optimzier", "true")});
+         DB::SettingChange("enable_optimzier", "true"),
+         DB::SettingChange("cte_mode", "INLINED")}); // for materialized view
     context->createPlanNodeIdAllocator();
     context->createSymbolAllocator();
     context->createOptimizerMetrics();
     context->makeQueryContext();
 
+    if (context->getSettingsRef().print_graphviz)
+    {
+        std::stringstream path;
+        path << context->getSettingsRef().graphviz_path.toString();
+        path << "/" << query_id << ".sql";
+        std::ofstream out(path.str());
+        out << query;
+        out.close();
+    }
+
     // parse and plan
     const char * begin = query.data();
     const char * end = begin + query.size();
@@ -120,6 +131,7 @@ WorkloadQueryPtr WorkloadQuery::build(const std::string & query_id, const std::s
 
     CardinalityEstimator::estimate(*query_plan, context);
     PlanCostMap costs = calculateCost(*query_plan, *context);
+    
     return std::make_unique<WorkloadQuery>(
         context, query_id, query, std::move(query_plan), std::move(plan_before_cascades), std::move(query_tables), std::move(costs));
 }
@@ -140,11 +152,10 @@ WorkloadQueries WorkloadQuery::build(const std::vector<std::string> & queries, c
             {
                 WorkloadQueryPtr workload_query = build("q" + std::to_string(i), query, from_context);
                 res[i] = std::move(workload_query);
-            } catch (Exception & e)
+            } catch (...)
             {
-                LOG_WARNING(getLogger("WorkloadQuery"),
-                          "failed to build query, reason: {}, sql: {}",
-                          e.message(), query);
+                LOG_WARNING(getLogger("WorkloadQuery"),"failed to build query, reason: {}, sql: {}",
+                    getCurrentExceptionMessage(true), query);
             }
         });
     }
@@ -156,7 +167,7 @@ WorkloadQueries WorkloadQuery::build(const std::vector<std::string> & queries, c
 
 double WorkloadQuery::getOptimalCost(const TableLayout & table_layout)
 {
-if (!root_group)
+    if (!root_group)
     {
         cascades_context = std::make_shared<CascadesContext>(
             query_context,
@@ -180,6 +191,7 @@ if (!root_group)
     GroupId root_group_id = root_group->getGroupId();
     CascadesOptimizer::optimize(root_group_id, *cascades_context, required_property);
     auto res = cascades_context->getMemo().getGroupById(root_group_id)->getBestExpression(required_property)->getCost();
+
     GraphvizPrinter::printMemo(cascades_context->getMemo(), root_group_id, query_context, "CascadesOptimizer-Memo-Graph");
     return res;
 }
diff --git a/src/Advisor/WorkloadQuery.h b/src/Advisor/WorkloadQuery.h
index 71ea5a91a8c..88d8a7bb7e9 100644
--- a/src/Advisor/WorkloadQuery.h
+++ b/src/Advisor/WorkloadQuery.h
@@ -49,6 +49,7 @@ class WorkloadQuery : private boost::noncopyable
     }
     const std::string & getSQL() const { return sql; }
     const QueryPlanPtr & getPlan() const { return plan; }
+    const QueryPlanPtr & getPlanBeforeCascades() const { return plan_before_cascades; }
     const PlanCostMap & getCosts() const { return costs; }
 
     /*
diff --git a/src/Advisor/WorkloadTable.h b/src/Advisor/WorkloadTable.h
index 61137442608..13f306afbe3 100644
--- a/src/Advisor/WorkloadTable.h
+++ b/src/Advisor/WorkloadTable.h
@@ -48,6 +48,8 @@ class WorkloadTable
     {
     }
 
+    StoragePtr getTablePtr() const { return storage; }
+
     ASTPtr getDDL() const { return create_table_ddl; }
     bool isOptimized() const { return optimized; }
 
diff --git a/src/Advisor/WorkloadTableStats.h b/src/Advisor/WorkloadTableStats.h
index 52baba82480..e02ccdd8f1e 100644
--- a/src/Advisor/WorkloadTableStats.h
+++ b/src/Advisor/WorkloadTableStats.h
@@ -37,8 +37,7 @@ class WorkloadTableStats
                                                   const String & table_name,
                                                   const NamesAndTypesList & columns);
 
-    PlanNodeStatisticsPtr basic_stats;
-    WorkloadExtendedStatsPtr extended_stats;
+    PlanNodeStatisticsPtr getBasicStats() { return basic_stats; }
 
 private:
     explicit WorkloadTableStats(PlanNodeStatisticsPtr basic_stats_)
@@ -47,6 +46,9 @@ class WorkloadTableStats
     {
     }
 
+    PlanNodeStatisticsPtr basic_stats;
+    WorkloadExtendedStatsPtr extended_stats;
+
     static const char * getStatsAggregation(const WorkloadExtendedStatsType & type)
     {
         switch (type)
diff --git a/src/Advisor/tests/gtest_column_usage.cpp b/src/Advisor/tests/gtest_column_usage.cpp
index e900eb31ca9..04b21868c9e 100644
--- a/src/Advisor/tests/gtest_column_usage.cpp
+++ b/src/Advisor/tests/gtest_column_usage.cpp
@@ -19,14 +19,16 @@ class ColumnUsageTest : public ::testing::Test
                         "  deptno UInt32 not null,"
                         "  name Nullable(String),"
                         "  salary Nullable(Float64),"
-                        "  commission Nullable(UInt32)"
-                        ") ENGINE=CnchMergeTree() order by empid;");
+                        "  commission Nullable(UInt32),"
+                        "  history Array(UInt32)"
+                        ") ENGINE=Memory();");
         tester->execute("CREATE TABLE IF NOT EXISTS depts("
                         "  deptno UInt32 not null,"
                         "  name Nullable(String)"
-                        ") ENGINE=CnchMergeTree() order by deptno;");
+                        ") ENGINE=Memory();");
     }
 
+    static void TearDownTestCase() { tester.reset(); }
 
     ColumnUsages buildColumnUsagesFromSQL(std::initializer_list<std::string> sql_list)
     {
@@ -72,7 +74,7 @@ TEST_F(ColumnUsageTest, testSelect)
                                             "select empid from emps"});
     auto select_usages = getColumnFrequencies(column_usages, ColumnUsageType::SCANNED, true);
     auto empid_column = QualifiedColumnName{tester->getDatabaseName(), "emps", "empid"};
-    EXPECT_EQ(select_usages.size(), 5);
+    EXPECT_EQ(select_usages.size(), 6);
     ASSERT_TRUE(select_usages.contains(empid_column));
     EXPECT_EQ(select_usages[empid_column], 2);
 }
@@ -106,9 +108,9 @@ TEST_F(ColumnUsageTest, testNestedJoin)
 
 TEST_F(ColumnUsageTest, testNestedJoinCountAll)
 {
-    tester->execute("CREATE TABLE IF NOT EXISTS A(a UInt32 not null, b UInt32 not null) ENGINE=CnchMergeTree() order by tuple();");
-    tester->execute("CREATE TABLE IF NOT EXISTS B(b UInt32 not null, c UInt32 not null) ENGINE=CnchMergeTree() order by tuple();");
-    tester->execute("CREATE TABLE IF NOT EXISTS C(c UInt32 not null, d UInt32 not null) ENGINE=CnchMergeTree() order by tuple();");
+    tester->execute("CREATE TABLE IF NOT EXISTS A(a UInt32 not null, b UInt32 not null) ENGINE=Memory();");
+    tester->execute("CREATE TABLE IF NOT EXISTS B(b UInt32 not null, c UInt32 not null) ENGINE=Memory();");
+    tester->execute("CREATE TABLE IF NOT EXISTS C(c UInt32 not null, d UInt32 not null) ENGINE=Memory();");
 
     auto column_usages = buildColumnUsagesFromSQL({"select * from A, B, C where A.b = B.b and B.c = C.c"});
 
@@ -161,4 +163,27 @@ TEST_F(ColumnUsageTest, testInFilter)
     EXPECT_EQ(in_usages[empid_column], 1);
 }
 
+TEST_F(ColumnUsageTest, testArraySetFnction)
+{
+    auto column_usages = buildColumnUsagesFromSQL({"select if(arraySetCheck(history, (9000)), 'hint', 'miss') from emps "
+                                                   "where has(history, 9000) and arraySetCheck(history, (9000)) = 1"});
+    auto history_column = QualifiedColumnName{tester->getDatabaseName(), "emps", "history"};
+
+    auto array_set_usages = getColumnFrequencies(column_usages, ColumnUsageType::ARRAY_SET_FUNCTION, true);
+    ASSERT_TRUE(array_set_usages.contains(history_column));
+    EXPECT_GE(array_set_usages[history_column], 2);
+}
+
+TEST_F(ColumnUsageTest, testPrewhere)
+{
+    auto column_usages = buildColumnUsagesFromSQL({"select empid from emps "
+                                                   "prewhere arraySetCheck(history, (9000)) = 1 where empid in (1,2,3)"});
+    auto history_column = QualifiedColumnName{tester->getDatabaseName(), "emps", "history"};
+
+    auto array_set_usages = getColumnFrequencies(column_usages, ColumnUsageType::ARRAY_SET_FUNCTION, true);
+    ASSERT_TRUE(array_set_usages.contains(history_column));
+    EXPECT_GE(array_set_usages[history_column], 1);
+}
+
+
 } // namespace DB
diff --git a/src/Advisor/tests/gtest_cluster_key.cpp b/src/Advisor/tests/gtest_order_by_key.cpp
similarity index 73%
rename from src/Advisor/tests/gtest_cluster_key.cpp
rename to src/Advisor/tests/gtest_order_by_key.cpp
index 228fe81c40b..bada3439f80 100644
--- a/src/Advisor/tests/gtest_cluster_key.cpp
+++ b/src/Advisor/tests/gtest_order_by_key.cpp
@@ -1,4 +1,4 @@
-#include <Advisor/Rules/ClusterKeyAdvise.h>
+#include <Advisor/Rules/OrderByKeyAdvise.h>
 
 #include <Advisor/Rules/WorkloadAdvisor.h>
 #include <Advisor/WorkloadQuery.h>
@@ -11,7 +11,7 @@
 
 using namespace DB;
 
-class ClusterKeyTest : public ::testing::Test
+class OrderByKeyTest : public ::testing::Test
 {
 public:
     static void SetUpTestSuite()
@@ -23,24 +23,24 @@ class ClusterKeyTest : public ::testing::Test
                              "  name Nullable(String),"
                              "  salary Nullable(Float64),"
                              "  commission Nullable(UInt32)"
-                             ") ENGINE=CnchMergeTree() order by empid;");
+                             ") ENGINE=Memory();");
         tester->execute("CREATE TABLE IF NOT EXISTS depts("
                              "  deptno UInt32 not null,"
                              "  name Nullable(String)"
-                             ") ENGINE=CnchMergeTree() order by deptno;");
+                             ") ENGINE=Memory();");
     }
 
     static void TearDownTestCase()
     {
         tester.reset();
     }
-    
+
     static std::shared_ptr<BaseWorkloadTest> tester;
 };
 
-std::shared_ptr<BaseWorkloadTest> ClusterKeyTest::tester;
+std::shared_ptr<BaseWorkloadTest> OrderByKeyTest::tester;
 
-TEST_F(ClusterKeyTest, testSimple)
+TEST_F(OrderByKeyTest, testSimple)
 {
     auto context = tester->createQueryContext();
     std::vector<std::string> sqls(
@@ -49,17 +49,17 @@ TEST_F(ClusterKeyTest, testSimple)
     WorkloadQueries queries = WorkloadQuery::build(sqls, context, query_thread_pool);
     WorkloadTables tables(context);
     AdvisorContext advisor_context = AdvisorContext::buildFrom(context, tables, queries, query_thread_pool);
-    auto advise = ClusterKeyAdvisor().analyze(advisor_context);
+    auto advise = OrderByKeyAdvisor().analyze(advisor_context);
     EXPECT_EQ(advise.size(), 1);
     QualifiedTableName emps{tester->getDatabaseName(), "emps"};
     EXPECT_EQ(advise[0]->getTable(), emps);
     EXPECT_EQ(advise[0]->getOptimizedValue(), "empid");
 }
 
-TEST_F(ClusterKeyTest, testUpdateOrderBy)
+TEST_F(OrderByKeyTest, testUpdateOrderBy)
 {
     std::string database = tester->getDatabaseName();
-    std::string create_table_ddl = "CREATE TABLE IF NOT EXISTS " + database
+    std::string table_ddl = "CREATE TABLE IF NOT EXISTS " + database
         + ".emps("
           "  empid UInt32 not null,"
           "  deptno UInt32 not null,"
@@ -70,13 +70,14 @@ TEST_F(ClusterKeyTest, testUpdateOrderBy)
           "order by deptno;";
 
     auto query_context = tester->createQueryContext();
-    auto create_ast = tester->parse(create_table_ddl, query_context);
+    query_context->applySettingsChanges({DB::SettingChange("dialect_type", "CLICKHOUSE")});
+    auto create_ast = tester->parse(table_ddl, query_context);
     WorkloadTable table(nullptr, create_ast, WorkloadTableStats::build(query_context, tester->getDatabaseName(), "emps"));
     table.updateOrderBy(std::make_shared<ASTIdentifier>("empid"));
 
-    std::string optimal_ddl = serializeAST(*table.getDDL());
-    std::cout << optimal_ddl << std::endl;
-    EXPECT_TRUE(optimal_ddl.find("ORDER BY deptno") == std::string::npos);
-    EXPECT_TRUE(optimal_ddl.find("ORDER BY empid") != std::string::npos);
+    std::string local_ddl = serializeAST(*table.getDDL());
+    std::cout << local_ddl << std::endl;
+    EXPECT_TRUE(local_ddl.find("ORDER BY deptno") == std::string::npos);
+    EXPECT_TRUE(local_ddl.find("ORDER BY empid") != std::string::npos);
 }
 
diff --git a/src/AggregateFunctions/AggregateBitmapExpressionCommon.h b/src/AggregateFunctions/AggregateBitmapExpressionCommon.h
index d3e5a34195a..2b51f1ca21f 100644
--- a/src/AggregateFunctions/AggregateBitmapExpressionCommon.h
+++ b/src/AggregateFunctions/AggregateBitmapExpressionCommon.h
@@ -18,12 +18,16 @@
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 
+#include <algorithm>
 #include <array>
 #include <stack>
 
 #include <DataTypes/DataTypeBitMap64.h>
 
+#include <common/types.h>
 #include <Common/ArenaAllocator.h>
+#include <Columns/ListIndex.h>
+#include <AggregateFunctions/AggregateBitmapExpression_fwd.h>
 
 
 namespace DB
@@ -33,16 +37,140 @@ namespace ErrorCodes
     extern const int LOGICAL_ERROR;
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int BAD_ARGUMENTS;
 }
 
-template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
-struct AggregateFunctionBitMapData
+#define AGGREGATE_INTEGRAL_BITMAP_KEY_INTERNAL_START (std::numeric_limits<Int64>::min()+10000)
+#define AGGREGATE_STRING_BITMAP_KEY_INTERNAL_PREFIX "BITMAP*AGG*KEY"
+
+template<typename T>
+inline T trans_global_index_to_bitmap_key(int arg)
+{
+    return AGGREGATE_INTEGRAL_BITMAP_KEY_INTERNAL_START+arg;
+}
+
+template<>
+inline String trans_global_index_to_bitmap_key(int arg)
+{
+    return AGGREGATE_STRING_BITMAP_KEY_INTERNAL_PREFIX+toString(arg);
+}
+
+template<typename T>
+inline String trans_global_index_to_bitmap_string_key(int arg)
+{
+    return toString(AGGREGATE_INTEGRAL_BITMAP_KEY_INTERNAL_START+arg);
+}
+
+template<>
+inline String trans_global_index_to_bitmap_string_key<String>(int arg)
+{
+    return AGGREGATE_STRING_BITMAP_KEY_INTERNAL_PREFIX+toString(arg);
+}
+
+template<typename T>
+inline bool check_is_internal_bitmap_key(T key)
+{
+    return (static_cast<Int64>(key) < AGGREGATE_INTEGRAL_BITMAP_KEY_INTERNAL_START);
+}
+
+template<>
+inline bool check_is_internal_bitmap_key(String key)
+{
+    return 0 == key.compare(0, strlen(AGGREGATE_STRING_BITMAP_KEY_INTERNAL_PREFIX), AGGREGATE_STRING_BITMAP_KEY_INTERNAL_PREFIX);
+}
+
+inline bool existBitengineExpressionKeyword(const String& str)
+{
+    if (str.empty())
+        return false;
+
+    for (const char & keyword : BITENGINE_EXPRESSION_KEYWORDS)
+    {
+        if (str.find(keyword) != std::string::npos)
+        {
+            return true;
+        }
+    }
+
+    if (!str.empty() && str[0] == BITENGINE_SPECIAL_KEYWORD)
+    {
+        throw Exception("Input tag starts with '_' is not allowd. your tag: " + str
+        , ErrorCodes::BAD_ARGUMENTS);
+    }
+
+    return false;
+}
+
+template <typename T>
+void checkIntegerExpression(const String & expression)
+{
+    auto is_integer = [&]() {
+        try
+        {
+            std::size_t pos;
+            [[maybe_unused]] UInt64 result = std::stoull(expression, &pos);
+
+            return pos == expression.size();
+        }
+        catch (...)
+        {
+            return false;
+        }
+    };
+
+    if constexpr (std::is_integral_v<T>)
+    {
+        if (is_integer())
+        {
+            return;
+        }
+
+        if (expression.find('-') != expression.npos)
+        {
+            throw Exception(
+                "The tag (or bitmap key): " + expression + " has character '-', "
+                    + "you mean to calculate the difference set? If so, you should use '~', but not '-'.",
+                ErrorCodes::BAD_ARGUMENTS);
+        }
+        else if (expression.find("～") != expression.npos)
+        {
+            throw Exception(
+                "The tag (or bitmap key): " + expression + " has Chinese character '～', "
+                    + "you mean to calculate the difference set? If so, you should use English character '~'.",
+                ErrorCodes::BAD_ARGUMENTS);
+        }
+        else if (expression.find("，") != expression.npos)
+        {
+            throw Exception(
+                "The tag (or bitmap key): " + expression + " has Chinese character '，', "
+                    + "you mean to calculate the union set? If so, you should use English character ','.",
+                ErrorCodes::BAD_ARGUMENTS);
+        }
+        else if (expression.find("｜") != expression.npos)
+        {
+            throw Exception(
+                "The tag (or bitmap key): " + expression + " has Chinese character '｜', "
+                    + "you mean to calculate the union set? If so, you should use English character '|'.",
+                ErrorCodes::BAD_ARGUMENTS);
+        }
+        else
+        {
+            throw Exception(
+                "The tag (or bitmap key): " + expression + " is illegal, " + "and it should be an integer.", ErrorCodes::BAD_ARGUMENTS);
+        }
+    }
+}
+
+template<typename T,
+          typename BitmapType,
+    typename = std::enable_if_t< (std::is_integral_v<T> || std::is_same_v<T, String>) && (std::is_same_v<BitmapType, BitMap64> || std::is_same_v<BitmapType, BitMap>) > >
+struct AggregateFunctionBitMapDataImpl
 {
-    std::unordered_map<T, BitMap64> bitmap_map;
+    std::unordered_map<T, BitmapType> bitmap_map;
     bool is_finished = false;
-    AggregateFunctionBitMapData() = default;
+    AggregateFunctionBitMapDataImpl() = default;
 
-    void add(const T key, const BitMap64 & bitmap)
+    void add(const T key, const BitmapType & bitmap)
     {
         auto it = bitmap_map.find(key);
         if (it == bitmap_map.end()) {
@@ -52,9 +180,9 @@ struct AggregateFunctionBitMapData
         }
     }
 
-    void merge(AggregateFunctionBitMapData && rhs)
+    void merge(AggregateFunctionBitMapDataImpl<T, BitmapType> && rhs)
     {
-        std::unordered_map<T, BitMap64> & rhs_map = rhs.bitmap_map;
+        std::unordered_map<T, BitmapType> & rhs_map = rhs.bitmap_map;
         for (auto it = rhs_map.begin(); it != rhs_map.end(); ++it)
         {
             auto jt = bitmap_map.find(it->first);
@@ -68,9 +196,9 @@ struct AggregateFunctionBitMapData
         is_finished = false;
     }
 
-    bool empty() { return bitmap_map.empty(); }
+    bool empty() const { return bitmap_map.empty(); }
 
-    UInt64 getCardinality(const T key)
+    UInt64 getCardinality(const T & key)
     {
         auto it = bitmap_map.find(key);
         if (it != bitmap_map.end())
@@ -108,7 +236,7 @@ struct AggregateFunctionBitMapData
             readVarUInt(bytes_size, buf);
             PODArray<char> buffer(bytes_size);
             buf.readStrict(buffer.data(), bytes_size);
-            BitMap64 bitmap = BitMap64::readSafe(buffer.data(), bytes_size);
+            BitmapType bitmap = BitmapType::readSafe(buffer.data(), bytes_size);
             bitmap_map.emplace(key, std::move(bitmap));
         }
 
@@ -116,6 +244,11 @@ struct AggregateFunctionBitMapData
     }
 };
 
+
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
+using AggregateFunctionBitMapData = AggregateFunctionBitMapDataImpl<T, BitMap64>;
+
+
 template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
 struct BitMapExpressionNode
 {
@@ -130,7 +263,7 @@ struct BitMapExpressionNode
     BitMapExpressionNode(String && left_, String && op_, String && right_, const T res_, bool replicated_ = false)
     : left(std::move(left_)), op(std::move(op_)), right(std::move(right_)), res(res_), replicated(replicated_) {}
 
-    String toString()
+    String toString() const
     {
         std::ostringstream oss;
         oss << left << " " << op << " " << right << " = " << res << " REPLICATED: " << replicated << "\n";
@@ -138,8 +271,10 @@ struct BitMapExpressionNode
     }
 };
 
-template<typename T, typename = std::enable_if_t< std::is_integral_v<T> > >
-struct BitMapExpressionAnalyzer
+template<typename T,
+          typename BitmapType,
+          typename = std::enable_if_t< (std::is_integral_v<T> || std::is_same_v<T, String>) && (std::is_same_v<BitmapType, BitMap64> || std::is_same_v<BitmapType, BitMap>) > >
+struct BitMapExpressionAnalyzerImpl
 {
     using BitMapExpressions = std::vector<BitMapExpressionNode<T>>;
     String original_expression;
@@ -149,14 +284,14 @@ struct BitMapExpressionAnalyzer
     bool only_or = true;
     NameSet or_expressions;
 
-    BitMapExpressionAnalyzer(const String & expression)
+    explicit BitMapExpressionAnalyzerImpl(const String & expression)
     : original_expression(expression)
     {
         analyze();
     }
-    BitMapExpressionAnalyzer() = default;
+    BitMapExpressionAnalyzerImpl() = default;
 
-    void subExpression(std::stack<String> & expression_stack, T & global_index, String & right)
+    void subExpression(std::stack<String> & expression_stack, int & global_index, String & right)
     {
         while (!expression_stack.empty() &&
             (expression_stack.top() == "&" || expression_stack.top() == "|"
@@ -172,19 +307,19 @@ struct BitMapExpressionAnalyzer
             String operation = expression_stack.top();
             expression_stack.pop();
             if (expression_stack.empty())
-                throw Exception("Invalid expression " + operation + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+                throw Exception("Invalid expression " + operation + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
             String left = expression_stack.top();
             expression_stack.pop();
             // Optimize the case which right is equal to left.
             // If the operation is "~", add a no-exists result to expression_stack so that we can get an empty bitmap
             if (right == left && operation == "~")
             {
-                T res = global_index--;
-                right = toString(res);
+                int res = global_index--;
+                right = trans_global_index_to_bitmap_string_key<T>(res);
             }
             else if (right != left)
             {
-                T res = global_index--;
+                int res = global_index--;
                 bool replicated = false;
                 auto left_it = replicated_keys.find(left);
                 auto right_it = replicated_keys.find(right);
@@ -198,8 +333,9 @@ struct BitMapExpressionAnalyzer
                     if (right_it->second > 1)
                         replicated = true;
                 }
-                expression_actions.emplace_back(std::move(left), std::move(operation), std::move(right), res, replicated);
-                right = toString(res);
+                expression_actions.emplace_back(
+                    std::move(left), std::move(operation), std::move(right), trans_global_index_to_bitmap_key<T>(res), replicated);
+                right = trans_global_index_to_bitmap_string_key<T>(res);
             }
         }
     }
@@ -217,6 +353,10 @@ struct BitMapExpressionAnalyzer
             || expression[i] == ')' || expression[i] == '#' || expression[i] == '~' || expression[i] == ' ') {
                 if (number_index != expression_size) {
                     String number = expression.substr(number_index, (i - number_index));
+                    if constexpr (std::is_integral_v<T>)
+                    {
+                        checkIntegerExpression<T>(number);
+                    }
                     replicated_keys[number] += 1;
                     or_expressions.insert(number);
                     expression_vector.push_back(std::move(number));
@@ -260,35 +400,35 @@ struct BitMapExpressionAnalyzer
 
         if (only_or)
         {
-            final_key = -1;
+            final_key = trans_global_index_to_bitmap_key<T>(-1);
             return;
         }
 
-        T global_index = -1;
+        int global_index = -1;
 
-        for (size_t i = 0; i < expression_vector.size(); i++)
+        for (const auto & i : expression_vector)
         {
-            if (expression_vector[i] == "(" || expression_vector[i] == "&"
-                || expression_vector[i] == "|" || expression_vector[i] == ","
-                || expression_vector[i] == "~")
+            if (i == "(" || i == "&"
+                || i == "|" || i == ","
+                || i == "~")
             {
-                expression_stack.push(expression_vector[i]);
+                expression_stack.push(i);
             }
-            else if (expression_vector[i] == ")")
+            else if (i == ")")
             {
                 if (expression_stack.empty())
-                    throw Exception("Invalid expression " + expression_vector[i] + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("Invalid expression " + i + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
                 String number = expression_stack.top();
                 expression_stack.pop();
                 if (expression_stack.empty())
-                    throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
                 expression_stack.pop();
                 subExpression(expression_stack, global_index, number);
                 expression_stack.push(number);
             }
             else
             {
-                String right = expression_vector[i];
+                String right = i;
                 // If there are replicated number, we cannot use some optimization strategy to execute expression
                 subExpression(expression_stack, global_index, right);
                 expression_stack.push(right);
@@ -300,11 +440,11 @@ struct BitMapExpressionAnalyzer
             std::istringstream iss(res);
             iss >> final_key;
         } else {
-            throw Exception("Invalid expression for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+            throw Exception("Invalid expression for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
         }
     }
 
-    void executeExpressionImpl(String left, String operation, String right, T res, bool replicated, const AggregateFunctionBitMapData<T>& data) const
+    void executeExpressionImpl(String left, String operation, String right, T res, bool replicated, const AggregateFunctionBitMapDataImpl<T, BitmapType>& data) const
     {
         std::istringstream iss(left);
         T left_key;
@@ -313,12 +453,12 @@ struct BitMapExpressionAnalyzer
         T right_key;
         iss2 >> right_key;
 
-        auto& bitmap_map = const_cast<std::unordered_map<T, BitMap64>&>(data.bitmap_map);
+        auto& bitmap_map = const_cast<std::unordered_map<T, BitmapType>&>(data.bitmap_map);
         auto left_iter = bitmap_map.find(left_key);
         auto right_iter = bitmap_map.find(right_key);
 
         if (left_iter == bitmap_map.end()) {
-            BitMap64 temp_bitmap;
+            BitmapType temp_bitmap;
             auto res_pair = bitmap_map.emplace(left_key, std::move(temp_bitmap));
             if (res_pair.second)
                 left_iter = res_pair.first;
@@ -326,7 +466,7 @@ struct BitMapExpressionAnalyzer
                 throw Exception("Existing empty BitMap64 when inserting empty BitMap64", ErrorCodes::LOGICAL_ERROR);
         }
         if (right_iter == bitmap_map.end()) {
-            BitMap64 temp_bitmap;
+            BitmapType temp_bitmap;
             auto res_pair = bitmap_map.emplace(right_key, std::move(temp_bitmap));
             if (res_pair.second)
                 right_iter = res_pair.first;
@@ -371,7 +511,7 @@ struct BitMapExpressionAnalyzer
         }
     }
 
-    void executeExpressionOnlyOr(const AggregateFunctionBitMapData<T> & data) const
+    void executeExpressionOnlyOr(const AggregateFunctionBitMapDataImpl<T, BitmapType> & data) const
     {
         std::set<T> key_set;
         for (const auto & expression : or_expressions)
@@ -382,14 +522,14 @@ struct BitMapExpressionAnalyzer
             key_set.insert(key);
         }
 
-        auto& bitmap_map = const_cast<std::unordered_map<T, BitMap64>&>(data.bitmap_map);
+        auto& bitmap_map = const_cast<std::unordered_map<T, BitmapType>&>(data.bitmap_map);
 
         if (key_set.size() == 1)
         {
             T key = *key_set.begin();
             auto it = bitmap_map.find(key);
             if (it == bitmap_map.end()) {
-                BitMap64 temp_bitmap;
+                BitmapType temp_bitmap;
                 auto res_pair = bitmap_map.emplace(key, std::move(temp_bitmap));
                 if (res_pair.second)
                     it = res_pair.first;
@@ -402,34 +542,50 @@ struct BitMapExpressionAnalyzer
             return;
         }
 
-        std::map<UInt32, std::vector<roaring::Roaring *>> roaring_map;
-        for (const auto & key: key_set)
+        if constexpr(std::is_same_v<BitmapType, BitMap64>)
         {
-            auto it = bitmap_map.find(key);
-            if (it == bitmap_map.end())
-                continue;
-            std::map<UInt32, roaring::Roaring> & inner_roaring = const_cast<std::map<UInt32, roaring::Roaring> &>(it->second.getRoarings());
-            for (auto jt = inner_roaring.begin(); jt != inner_roaring.end(); ++jt)
+            std::map<UInt32, std::vector<roaring::Roaring*>> roaring_map;
+            for (const auto & key: key_set)
             {
-                if (roaring_map.find(jt->first) == roaring_map.end())
-                    roaring_map.emplace(jt->first, std::vector<roaring::Roaring *>());
-                roaring_map[jt->first].emplace_back(&jt->second);
+                auto it = bitmap_map.find(key);
+                if (it == bitmap_map.end())
+                    continue;
+                std::map<UInt32, roaring::Roaring> & inner_roaring = const_cast<std::map<UInt32, roaring::Roaring> &>(it->second.getRoarings());
+                for (auto & jt : inner_roaring)
+                {
+                    if (roaring_map.find(jt.first) == roaring_map.end())
+                        roaring_map.emplace(jt.first, std::vector<Roaring *>());
+                    roaring_map[jt.first].emplace_back(&jt.second);
+                }
             }
-        }
 
-        BitMap64 res_roaring;
+            BitMap64 res_roaring;
 
-        for (auto it = roaring_map.begin(); it != roaring_map.end(); ++it)
+            for (auto & it : roaring_map)
+            {
+                roaring::Roaring result = roaring::Roaring::fastunion(it.second.size(), static_cast<roaring::Roaring **>(&(*(it.second.begin()))));
+                const_cast<std::map<UInt32, roaring::Roaring> &>(res_roaring.getRoarings()).emplace(it.first, std::move(result));
+            }
+
+            bitmap_map[final_key] = std::move(res_roaring);
+        }
+        else if constexpr (std::is_same_v<BitmapType, BitMap>)
         {
-            roaring::Roaring result
-                = roaring::Roaring::fastunion(it->second.size(), static_cast<roaring::Roaring **>(&(*(it->second.begin()))));
-            const_cast<std::map<UInt32, roaring::Roaring> &>(res_roaring.getRoarings()).emplace(it->first, std::move(result));
+            std::vector<roaring::Roaring*> vec;
+            for (const auto & key: key_set)
+            {
+                auto it = bitmap_map.find(key);
+                if (it == bitmap_map.end())
+                    continue;
+                vec.emplace_back(reinterpret_cast<roaring::Roaring*>(&(it->second)));
+            }
+            roaring::Roaring res_roaring = roaring::Roaring::fastunion(vec.size(), &(*(vec.begin())));
+            bitmap_map[final_key].loadBitmap(std::move(res_roaring));
         }
 
-        bitmap_map[final_key] = std::move(res_roaring);
     }
 
-    void executeExpression(const AggregateFunctionBitMapData<T> & data) const
+    void executeExpression(const AggregateFunctionBitMapDataImpl<T, BitmapType> & data) const
     {
         if (only_or)
         {
@@ -444,20 +600,23 @@ struct BitMapExpressionAnalyzer
         }
     }
 };
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
+using BitMapExpressionAnalyzer = BitMapExpressionAnalyzerImpl<T, BitMap64>;
+
 
 template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
 struct BitMapExpressionMultiAnalyzer
 {
     using BitMapExpressions = std::vector<BitMapExpressionNode<T>>;
     std::vector<String> original_expressions;
-    T global_index = -1;
+    int global_index = -1;
     std::vector<T> final_keys;
     std::vector<bool> expression_only_ors;
     std::vector<BitMapExpressions> expression_actions_vector;
     std::unordered_map<String, size_t> replicated_keys;
     std::vector<NameSet> or_expressions;
 
-    BitMapExpressionMultiAnalyzer(const std::vector<String> & expressions)
+    explicit BitMapExpressionMultiAnalyzer(const std::vector<String> & expressions)
             : original_expressions(expressions)
     {
         analyze();
@@ -481,20 +640,19 @@ struct BitMapExpressionMultiAnalyzer
             String operation = expression_stack.top();
             expression_stack.pop();
             if (expression_stack.empty())
-                throw Exception("Invalid expression " + operation + " for BitMap: " + original_expressions[index], ErrorCodes::LOGICAL_ERROR);
+                throw Exception("Invalid expression " + operation + " for BitMap: " + original_expressions[index], ErrorCodes::BAD_ARGUMENTS);
             String left = expression_stack.top();
             expression_stack.pop();
             // Optimize the case which right is equal to left.
             // If the operation is "~", add a no-exists result to expression_stack so that we can get an empty bitmap
             if (right == left && operation == "~")
             {
-                T res = global_index--;
+                int res = global_index--;
                 right = toString(res);
             }
             else if (right != left)
             {
-                T res = global_index--;
-
+                T res = trans_global_index_to_bitmap_key<T>(global_index--);
                 expression_actions_vector[index].emplace_back(std::move(left), std::move(operation), std::move(right), res, false);
                 right = toString(res);
             }
@@ -566,45 +724,60 @@ struct BitMapExpressionMultiAnalyzer
                     // replace number with final key
                     if (number.size() > 1 && number[0] == '_')
                     {
-                        auto res_index = std::stoi(number.substr(1));
-                        if (res_index <= 0 || res_index > static_cast<Int32>(index))
+                        size_t res_index{0};
+
+                        try
                         {
-                            throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+                            res_index = std::stoi(number.substr(1));
+                        }
+                        catch (std::exception &e)
+                        {
+                            throw Exception("Bad cast number to position: " + number + ", reason: " + String(e.what()),
+                                ErrorCodes::BAD_ARGUMENTS);
+                        }
+
+                        if (res_index <= 0 || res_index > index)
+                        {
+                            throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
                         }
                         number = toString(final_keys[res_index - 1]);
                     }
 
+                    if constexpr (std::is_integral_v<T>)
+                    {
+                        checkIntegerExpression<T>(number);
+                    }
                     or_expression.insert(number);
-                    expression_vector.push_back(std::move(number));
+                    expression_vector.emplace_back(std::move(number));
                     number_index = expression_size;
                 }
                 switch (expression[i]) {
                     case '(':
-                        expression_vector.push_back("(");
+                        expression_vector.emplace_back("(");
                         break;
                     case '&':
                     {
-                        expression_vector.push_back("&");
+                        expression_vector.emplace_back("&");
                         only_or = false;
                         break;
                     }
                     case '|':
-                        expression_vector.push_back("|");
+                        expression_vector.emplace_back("|");
                         break;
                     case ')':
-                        expression_vector.push_back(")");
+                        expression_vector.emplace_back(")");
                         break;
                     case ',':
-                        expression_vector.push_back(",");
+                        expression_vector.emplace_back(",");
                         break;
                     case '~':
                     {
-                        expression_vector.push_back("~");
+                        expression_vector.emplace_back("~");
                         only_or = false;
                         break;
                     }
                     case '#':
-                        expression_vector.push_back("#");
+                        expression_vector.emplace_back("#");
                         break;
                 }
             } else {
@@ -619,33 +792,34 @@ struct BitMapExpressionMultiAnalyzer
         expression_only_ors.emplace_back(only_or);
         if (only_or)
         {
-            final_keys.emplace_back(global_index--);
+            T current_key = trans_global_index_to_bitmap_key<T>(global_index--);
+            final_keys.emplace_back(current_key);
             return;
         }
 
-        for (size_t i = 0; i < expression_vector.size(); i++)
+        for (const auto & i : expression_vector)
         {
-            if (expression_vector[i] == "(" || expression_vector[i] == "&"
-                || expression_vector[i] == "|" || expression_vector[i] == ","
-                || expression_vector[i] == "~")
+            if (i == "(" || i == "&"
+                || i == "|" || i == ","
+                || i == "~")
             {
-                expression_stack.push(expression_vector[i]);
+                expression_stack.push(i);
             }
-            else if (expression_vector[i] == ")")
+            else if (i == ")")
             {
                 if (expression_stack.empty())
-                    throw Exception("Invalid expression " + expression_vector[i] + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("Invalid expression " + i + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
                 String number = expression_stack.top();
                 expression_stack.pop();
                 if (expression_stack.empty())
-                    throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
                 expression_stack.pop();
                 subExpression(expression_stack, number, index);
                 expression_stack.push(number);
             }
             else
             {
-                String right = expression_vector[i];
+                String right = i;
                 // If there are replicated number, we cannot use some optimization strategy to execute expression
                 subExpression(expression_stack, right, index);
                 expression_stack.push(right);
@@ -659,7 +833,7 @@ struct BitMapExpressionMultiAnalyzer
             iss >> temp_final_key;
             final_keys.emplace_back(temp_final_key);
         } else {
-            throw Exception("Invalid expression for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
+            throw Exception("Invalid expression for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
         }
     }
 
@@ -740,7 +914,7 @@ struct BitMapExpressionMultiAnalyzer
             iss >> key;
             if (toString(key) != expression)
                 throw Exception("expression is not fully parsed! parsed: " + toString(key) + ", but your input: " + expression
-                    + ", please check function name and expression type", ErrorCodes::LOGICAL_ERROR);
+                    + ", please check function name and expression type", ErrorCodes::BAD_ARGUMENTS);
             key_set.insert(key);
         }
 
@@ -774,28 +948,27 @@ struct BitMapExpressionMultiAnalyzer
             return;
         }
 
-        std::map<UInt32, std::vector<roaring::Roaring *>> roaring_map;
+        std::map<UInt32, std::vector<roaring::Roaring*>> roaring_map;
         for (const auto & key: key_set)
         {
             auto it = bitmap_map.find(key);
             if (it == bitmap_map.end())
                 continue;
             std::map<UInt32, roaring::Roaring> & inner_roaring = const_cast<std::map<UInt32, roaring::Roaring> &>(it->second.getRoarings());
-            for (auto jt = inner_roaring.begin(); jt != inner_roaring.end(); ++jt)
+            for (auto & jt : inner_roaring)
             {
-                if (roaring_map.find(jt->first) == roaring_map.end())
-                    roaring_map.emplace(jt->first, std::vector<roaring::Roaring *>());
-                roaring_map[jt->first].emplace_back(&jt->second);
+                if (roaring_map.find(jt.first) == roaring_map.end())
+                    roaring_map.emplace(jt.first, std::vector<roaring::Roaring *>());
+                roaring_map[jt.first].emplace_back(&jt.second);
             }
         }
 
         BitMap64 res_roaring;
 
-        for (auto it = roaring_map.begin(); it != roaring_map.end(); ++it)
+        for (auto & it : roaring_map)
         {
-            roaring::Roaring result
-                = roaring::Roaring::fastunion(it->second.size(), static_cast<roaring::Roaring **>(&(*(it->second.begin()))));
-            const_cast<std::map<UInt32, roaring::Roaring> &>(res_roaring.getRoarings()).emplace(it->first, std::move(result));
+            roaring::Roaring result = roaring::Roaring::fastunion(it.second.size(), static_cast<roaring::Roaring **>(&(*(it.second.begin()))));
+            const_cast<std::map<UInt32, roaring::Roaring> &>(res_roaring.getRoarings()).emplace(it.first, std::move(result));
         }
 
         bitmap_map[final_keys[index]] = std::move(res_roaring);
@@ -817,4 +990,405 @@ struct BitMapExpressionMultiAnalyzer
     }
 };
 
+
+struct BitMapExpressionWithDateMultiAnalyzer
+{
+    using BitMapExpressions = std::vector<BitMapExpressionNode<String>>;
+    // original_expressions is a list extracted from AggFunction's first parameter.
+    // for SQL below:
+    // * Select BitmapMultiCountWithDateV2('conjunct1','conjunct2')(p_date, tag, uid) From ... *
+    // The original_expressions is ['conjunct1', 'conjunct2']
+    std::vector<String> original_expressions;
+    Int64 global_index = -1;
+    std::unordered_set<String> keys_without_date;
+    std::vector<String> final_keys;
+    std::vector<bool> expression_only_ors;
+    std::vector<BitMapExpressions> expression_actions_vector;
+    std::unordered_map<String, size_t> replicated_keys;
+    std::vector<NameSet> or_expressions;
+    std::unordered_set<String> interested_tokens;
+
+    explicit BitMapExpressionWithDateMultiAnalyzer(const std::vector<String> & expressions)
+            : original_expressions(expressions)
+    {
+        analyze();
+    }
+
+    BitMapExpressionWithDateMultiAnalyzer() = default;
+
+    void subExpression(std::stack<String> & expression_stack, String & right, size_t index)
+    {
+        while (!expression_stack.empty() &&
+               (expression_stack.top() == "&" || expression_stack.top() == "|"
+                || expression_stack.top() == "," || expression_stack.top() == "~" || right == "#"))
+        {
+            if (right == "#")
+            {
+                right = expression_stack.top();
+                expression_stack.pop();
+            }
+            if (expression_stack.empty())
+                break;
+            String operation = expression_stack.top();
+            expression_stack.pop();
+            if (expression_stack.empty())
+                throw Exception("Invalid expression " + operation + " for BitMap: " + original_expressions[index], ErrorCodes::BAD_ARGUMENTS);
+            String left = expression_stack.top();
+            expression_stack.pop();
+            // Optimize the case which right is equal to left.
+            // If the operation is "~", add a no-exists result to expression_stack so that we can get an empty bitmap
+            if (right == left && operation == "~")
+            {
+                Int64 res = global_index--;
+                right = std::to_string(res);
+            }
+            else
+            {
+                Int64 res = global_index--;
+
+                expression_actions_vector[index].emplace_back(std::move(left), std::move(operation), std::move(right), std::to_string(res), false);
+                right = std::to_string(res);
+            }
+        }
+    }
+
+    void analyze()
+    {
+        for (size_t i = 0; i < original_expressions.size(); i++)
+            analyzeExpression(original_expressions[i], i);
+        for (auto & expression_actions: expression_actions_vector)
+        {
+            for (BitMapExpressionNode<String> & expression_action: expression_actions)
+            {
+                replicated_keys[expression_action.left] += 1;
+                replicated_keys[expression_action.right] += 1;
+            }
+        }
+        for (const NameSet& or_expression: or_expressions)
+        {
+            for (const String& or_expression_item: or_expression)
+            {
+                replicated_keys[or_expression_item] += 1;
+            }
+        }
+
+        for (auto & expression_actions: expression_actions_vector)
+        {
+            for (BitMapExpressionNode<String> & expression_action: expression_actions)
+            {
+                auto left_it = replicated_keys.find(expression_action.left);
+                do {
+                    if (left_it == replicated_keys.end())
+                        break;
+                    if (left_it->second <= 1)
+                        break;
+
+                    expression_action.replicated = true;
+                } while(false);
+            }
+        }
+    }
+    void analyzeExpression(String& original_expression, size_t index)
+    {
+        bool only_or = true;
+        NameSet or_expression;
+        String expression = original_expression + "#";
+        std::stack<String> expression_stack;
+        size_t expression_size = expression.size();
+        std::vector<String> expression_vector;
+        size_t number_index = expression_size;
+        for (size_t i = 0; i < expression_size; i++)
+        {
+            if (expression[i] == '(' || expression[i] == '&' || expression[i] == '|' || expression[i] == ','
+                || expression[i] == ')' || expression[i] == '#' || expression[i] == '~' || expression[i] == ' ') {
+                if (number_index != expression_size) {
+                    String number = expression.substr(number_index, (i - number_index));
+                    // check mistakable characters
+                    if (number == "-")
+                    {
+                        throw Exception(
+                            "The tag (or bitmap key): " + number + " has character '-', "
+                                + "you mean to calculate the difference set? If so, you should use '~', but not '-'.",
+                            ErrorCodes::BAD_ARGUMENTS);
+                    }
+                    else if (number.find("～") != number.npos)
+                    {
+                        throw Exception(
+                            "The tag (or bitmap key): " + number + " has Chinese character '～', "
+                                + "you mean to calculate the difference set? If so, you should use English character '~'.",
+                            ErrorCodes::BAD_ARGUMENTS);
+                    }
+                    else if (number.find("，") != number.npos)
+                    {
+                        throw Exception(
+                            "The tag (or bitmap key): " + number + " has Chinese character '，', "
+                                + "you mean to calculate the union set? If so, you should use English character ','.",
+                            ErrorCodes::BAD_ARGUMENTS);
+                    }
+                    else if (number.find("｜") != number.npos)
+                    {
+                        throw Exception(
+                            "The tag (or bitmap key): " + number + " has Chinese character '｜', "
+                                + "you mean to calculate the union set? If so, you should use English character '|'.",
+                            ErrorCodes::BAD_ARGUMENTS);
+                    }
+                    // replace number with final key
+                    if (number.size() > 1 && number[0] == '_')
+                    {
+                        size_t res_index{0};
+
+                        try {
+                            res_index = std::stoi(number.substr(1));
+                        } catch (std::exception &e) {
+                            throw Exception("Bad cast number to position: " + number + ", reason: " + String(e.what()),
+                                ErrorCodes::BAD_ARGUMENTS);
+                        }
+
+                        if (res_index <= 0 || res_index > index)
+                        {
+                            throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
+                        }
+                        number = final_keys[res_index - 1];
+                    }
+                    else if (!number.empty()) {
+                        if (number.find('_') == std::string::npos)
+                        {
+                            keys_without_date.emplace(number);
+                        }
+                        else
+                        {
+                            // The expression has '_' in it, So the expression can be a key_date pair.
+                            if ('_' != number[0]){
+                                // We don't want those _1,_2,... pairs
+                                interested_tokens.emplace(number);
+                            }
+                        }
+                    }
+
+                    or_expression.insert(number);
+                    expression_vector.push_back(std::move(number));
+                    number_index = expression_size;
+                }
+                switch (expression[i]) {
+                    case '(':
+                        expression_vector.push_back("(");
+                        break;
+                    case '&':
+                    {
+                        expression_vector.push_back("&");
+                        only_or = false;
+                        break;
+                    }
+                    case '|':
+                        expression_vector.push_back("|");
+                        break;
+                    case ')':
+                        expression_vector.push_back(")");
+                        break;
+                    case ',':
+                        expression_vector.push_back(",");
+                        break;
+                    case '~':
+                    {
+                        expression_vector.push_back("~");
+                        only_or = false;
+                        break;
+                    }
+                    case '#':
+                        expression_vector.push_back("#");
+                        break;
+                }
+            } else {
+                if (number_index == expression_size) {
+                    number_index = i;
+                }
+            }
+        }
+        BitMapExpressions expressions;
+        expression_actions_vector.emplace_back(std::move(expressions));
+        or_expressions.emplace_back(std::move(or_expression));
+        expression_only_ors.emplace_back(only_or);
+        if (only_or)
+        {
+            final_keys.emplace_back(std::to_string(global_index--));
+            return;
+        }
+
+        for (const auto & i : expression_vector)
+        {
+            if (i == "(" || i == "&"
+                || i == "|" || i == ","
+                || i == "~")
+            {
+                expression_stack.push(i);
+            }
+            else if (i == ")")
+            {
+                if (expression_stack.empty())
+                    throw Exception("Invalid expression " + i + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
+                String number = expression_stack.top();
+                expression_stack.pop();
+                if (expression_stack.empty())
+                    throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
+                expression_stack.pop();
+                subExpression(expression_stack, number, index);
+                expression_stack.push(number);
+            }
+            else
+            {
+                String right = i;
+                // If there are replicated number, we cannot use some optimization strategy to execute expression
+                subExpression(expression_stack, right, index);
+                expression_stack.push(right);
+            }
+        }
+
+        if (expression_stack.size() == 1) {
+            const String & res = expression_stack.top();
+            final_keys.emplace_back(res);
+        } else {
+            throw Exception("Invalid expression for BitMap: " + original_expression, ErrorCodes::BAD_ARGUMENTS);
+        }
+    }
+
+    static void executeExpressionImpl(String left_key, String operation, String right_key, String res, bool replicated, const AggregateFunctionBitMapData<String>& data)
+    {
+        auto& bitmap_map = const_cast<std::unordered_map<String, BitMap64>&>(data.bitmap_map);
+        auto left_iter = bitmap_map.find(left_key);
+        auto right_iter = bitmap_map.find(right_key);
+
+        if (left_iter == bitmap_map.end()) {
+            BitMap64 temp_bitmap;
+            auto res_pair = bitmap_map.emplace(left_key, std::move(temp_bitmap));
+            if (res_pair.second)
+                left_iter = res_pair.first;
+            else
+                throw Exception("Existing empty BitMap64 when inserting empty BitMap64", ErrorCodes::LOGICAL_ERROR);
+        }
+        if (right_iter == bitmap_map.end()) {
+            BitMap64 temp_bitmap;
+            auto res_pair = bitmap_map.emplace(right_key, std::move(temp_bitmap));
+            if (res_pair.second)
+                right_iter = res_pair.first;
+            else
+                throw Exception("Existing empty BitMap64 when inserting empty BitMap64", ErrorCodes::LOGICAL_ERROR);
+        }
+        if (!replicated)
+        {
+            if (operation == "|" || operation == ",") {
+                left_iter->second |= right_iter->second;
+                auto left_item = bitmap_map.extract(left_iter->first);
+                left_item.key() = res;
+                bitmap_map.insert(std::move(left_item));
+            }
+            else if (operation == "&") {
+                left_iter->second &= right_iter->second;
+                auto left_item = bitmap_map.extract(left_iter->first);
+                left_item.key() = res;
+                bitmap_map.insert(std::move(left_item));
+            }
+            else if (operation == "~") {
+                left_iter->second -= right_iter->second;
+                auto left_item = bitmap_map.extract(left_iter->first);
+                left_item.key() = res;
+                bitmap_map.insert(std::move(left_item));
+            }
+        }
+        else
+        {
+            if (operation == "|" || operation == ",") {
+                bitmap_map[res] = left_iter->second;
+                bitmap_map[res] |= right_iter->second;
+            }
+            else if (operation == "&") {
+                bitmap_map[res] = left_iter->second;
+                bitmap_map[res] &= right_iter->second;
+            }
+            else if (operation == "~") {
+                bitmap_map[res] = left_iter->second;
+                bitmap_map[res] -= right_iter->second;
+            }
+        }
+    }
+
+    void executeExpressionOnlyOr(const AggregateFunctionBitMapData<String> & data, size_t index) const
+    {
+        std::set<String> key_set;
+        for (const auto & expression : or_expressions[index])
+        {
+            key_set.insert(expression);
+        }
+
+        auto& bitmap_map = const_cast<std::unordered_map<String, BitMap64>&>(data.bitmap_map);
+
+        if (key_set.size() == 1)
+        {
+            String key = *key_set.begin();
+            auto it = bitmap_map.find(key);
+            if (it == bitmap_map.end()) {
+                BitMap64 temp_bitmap;
+                auto res_pair = bitmap_map.emplace(key, std::move(temp_bitmap));
+                if (res_pair.second)
+                    it = res_pair.first;
+                else
+                    throw Exception("Existing empty BitMap64 when inserting empty BitMap64", ErrorCodes::LOGICAL_ERROR);
+            }
+            auto or_it = replicated_keys.find(key);
+            if (or_it == replicated_keys.end())
+                return;
+            else if (or_it->second > 1)
+            {
+                bitmap_map[final_keys[index]] = it->second;
+            }
+            else
+            {
+                auto it_final_item = bitmap_map.extract(it->first);
+                it_final_item.key() = final_keys[index];
+                bitmap_map.insert(std::move(it_final_item));
+            }
+            return;
+        }
+
+        std::map<UInt32, std::vector<roaring::Roaring*>> roaring_map;
+        for (const auto & key: key_set)
+        {
+            auto it = bitmap_map.find(key);
+            if (it == bitmap_map.end())
+                continue;
+            std::map<UInt32, roaring::Roaring> & inner_roaring = const_cast<std::map<UInt32, roaring::Roaring> &>(it->second.getRoarings());
+            for (auto & jt : inner_roaring)
+            {
+                if (roaring_map.find(jt.first) == roaring_map.end())
+                    roaring_map.emplace(jt.first, std::vector<roaring::Roaring *>());
+                roaring_map[jt.first].emplace_back(&jt.second);
+            }
+        }
+
+        BitMap64 res_roaring;
+
+        for (auto & it : roaring_map)
+        {
+            roaring::Roaring result = roaring::Roaring::fastunion(it.second.size(), &(*(it.second.begin())));
+            const_cast<std::map<UInt32, roaring::Roaring> &>(res_roaring.getRoarings()).emplace(it.first, std::move(result));
+        }
+
+        bitmap_map[final_keys[index]] = std::move(res_roaring);
+    }
+
+    void executeExpression(const AggregateFunctionBitMapData<String> & data, size_t index) const
+    {
+        if (expression_only_ors[index])
+        {
+            executeExpressionOnlyOr(data, index);
+        }
+        else
+        {
+            for (const auto & action : expression_actions_vector[index])
+            {
+                executeExpressionImpl(action.left, action.op, action.right, action.res, action.replicated, data);
+            }
+        }
+    }
+};
+
 }
diff --git a/src/AggregateFunctions/AggregateFunctionBitMapJoin.cpp b/src/AggregateFunctions/AggregateFunctionBitMapJoin.cpp
index 698fbfdca2e..fb06d214a1d 100644
--- a/src/AggregateFunctions/AggregateFunctionBitMapJoin.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBitMapJoin.cpp
@@ -22,6 +22,12 @@
 
 namespace DB
 {
+    namespace ErrorCodes
+    {
+        extern const int BAD_ARGUMENTS;
+        extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    }
+
     /// Expected format is 'P.I' or 'I', and P means the position,
     /// as well as the I means the index of argument
     PositionIndexPair parsePositionAndIndex(String & input)
@@ -52,15 +58,15 @@ namespace DB
             for (size_t i = 0; i < arr.size(); ++i)
             {
                 if (arr.at(i).safeGet<String>().empty())
-                    throw Exception("AggregateFunction " + name + ": empty string in parameter is invalid", ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("AggregateFunction " + name + ": empty string in parameter is invalid", ErrorCodes::BAD_ARGUMENTS);
                 UInt64 pos = 0, idx = 0;
                 std::tie(pos, idx) = parsePositionAndIndex(arr.at(i).safeGet<String>());
                 if (pos == 0 || ((pos^0xFF) && pos > union_num+1))
                 {
-                    throw Exception("AggregateFunction " + name + ": wrong value of keys postion identifier, which starts from 1", ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("AggregateFunction " + name + ": wrong value of keys postion identifier, which starts from 1", ErrorCodes::BAD_ARGUMENTS);
                 }
                 if (idx < 3 || idx > argument_num)
-                    throw Exception("AggregateFunction " + name + ": wrong value of key index, which starts from 3", ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("AggregateFunction " + name + ": wrong value of key index, which starts from 3", ErrorCodes::BAD_ARGUMENTS);
                 to.emplace_back(pos, idx);
             }
         }
@@ -70,7 +76,7 @@ namespace DB
             {
                 UInt64 idx = arr.at(i).safeGet<UInt64>();
                 if (idx < 3 || idx > argument_num)
-                    throw Exception("AggregateFunction " + name + ": wrong value of key index", ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("AggregateFunction " + name + ": wrong value of key index", ErrorCodes::BAD_ARGUMENTS);
                 to.emplace_back(0xFF, idx);
             }
         }
@@ -93,11 +99,11 @@ namespace
         /// 6 params are: (union_num, [join_keys], [group_by_keys], bitmap_op, join_type, thread_number, 0), the last 0 mean result is cardinality
         /// 7 params are: (union_num, [join_keys], [group_by_keys], bitmap_op, join_type, thread_number, result_type) result_type: 0->cardinality, 1->raw bitmap
         if (parameters.size() != 3 && parameters.size() != 5 && parameters.size() != 6 && parameters.size() != 7)
-            throw Exception("AggregateFunction " + name + " needs 3, 5, 6 or 7 parameters", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("AggregateFunction " + name + " needs 3, 5, 6 or 7 parameters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
         UInt64 union_num = parameters[0].safeGet<UInt64>();
         if (union_num != 1)
-            throw Exception("AggregateFunction " + name + " can only support one JOIN now, set 1 please", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("AggregateFunction " + name + " can only support one JOIN now, set 1 please", ErrorCodes::BAD_ARGUMENTS);
 
         Array join_arr = parameters[1].safeGet<Array>();
         Array group_by_arr = parameters[2].safeGet<Array>();
@@ -110,7 +116,7 @@ namespace
             keys_set.emplace(jk.second);
         }
         if (keys_set.size() != join_keys_idx.size())
-            throw Exception("AggregateFunction " + name + ": duplicated join key index, only one is ok", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("AggregateFunction " + name + ": duplicated join key index, only one is ok", ErrorCodes::BAD_ARGUMENTS);
 
         getParameterOfPositionAndIndex(group_by_arr, name, union_num, argument_types.size(), group_by_keys_idx);
 
@@ -125,12 +131,12 @@ namespace
                 if (group_by_keys_idx[i] == group_by_keys_idx[j] ||
                     (group_by_keys_idx[i].second == group_by_keys_idx[j].second
                         && (group_by_keys_idx[i].first == 0xFF || group_by_keys_idx[j].first == 0xFF)))
-                    throw Exception("AggregateFunction " + name + ": duplicated group by index", ErrorCodes::LOGICAL_ERROR);
+                    throw Exception("AggregateFunction " + name + ": duplicated group by index", ErrorCodes::BAD_ARGUMENTS);
             }
         }
 
         String logic_str, join_str;
-        if (parameters.size() == 5 || parameters.size() == 6)
+        if (parameters.size() >= 5)
         {
             logic_str = parameters[3].safeGet<String>();
             join_str = parameters[4].safeGet<String>();
@@ -140,16 +146,16 @@ namespace
         if (!logic_op.isValid())
             throw Exception(
                 "AggregateFunction " + name + " only support logic operation: AND, OR, XOR, besides empty string is also ok",
-                DB::ErrorCodes::LOGICAL_ERROR);
+                DB::ErrorCodes::BAD_ARGUMENTS);
 
         JoinOperation join_op(join_str);
         if (!join_op.isValid())
             throw Exception(
                 "AggregateFunction " + name + " only support join type: INNER, LEFT. And empty string means INNER JOIN",
-                DB::ErrorCodes::LOGICAL_ERROR);
+                DB::ErrorCodes::BAD_ARGUMENTS);
 
         UInt64 thread_num = 32;
-        if (parameters.size() == 6)
+        if (parameters.size() >= 6)
         {
             thread_num = parameters[5].safeGet<UInt64>();
         }
@@ -160,19 +166,19 @@ namespace
             result_type = parameters[6].safeGet<UInt64>();
         }
         if (result_type != 0 && result_type != 1)
-            throw Exception("AggregateFunction " + name + " only support result_type: 0, 1", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("AggregateFunction " + name + " only support result_type: 0, 1", ErrorCodes::BAD_ARGUMENTS);
 
         if (!WhichDataType(argument_types[0]).isUInt8())
-            throw Exception("AggregateFunction " + name + " needs Int type for its first argument", ErrorCodes::NOT_IMPLEMENTED);
+            throw Exception("AggregateFunction " + name + " needs Int type for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         if (!isBitmap64(argument_types[1]))
             throw Exception(
-                "AggregateFunction " + name + " needs BitMap64 type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
+                "AggregateFunction " + name + " needs BitMap64 type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
         for (size_t i = 2; i < argument_types.size(); ++i)
         {
             if (!isString(argument_types[i]))
-                throw Exception("AggregateFunction " + name + " needs String type", ErrorCodes::NOT_IMPLEMENTED);
+                throw Exception("AggregateFunction " + name + " needs String type", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
         }
 
         return std::make_shared<AggregateFunctionBitMapJoin>(argument_types, union_num, join_keys_idx, group_by_keys_idx, logic_op, join_op, thread_num, result_type);
diff --git a/src/AggregateFunctions/AggregateFunctionBitMapJoin.h b/src/AggregateFunctions/AggregateFunctionBitMapJoin.h
index 645ba0dc8af..06e98ca0285 100644
--- a/src/AggregateFunctions/AggregateFunctionBitMapJoin.h
+++ b/src/AggregateFunctions/AggregateFunctionBitMapJoin.h
@@ -45,7 +45,6 @@ namespace DB
 {
 namespace ErrorCodes
 {
-extern const int LOGICAL_ERROR;
 extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
 }
 
@@ -62,34 +61,34 @@ enum LogicOperationType
 
 struct LogicOperation
 {
-    LogicOperation() : logicOp(LogicOperationType::NONE) {}
-    LogicOperation(String operation)
+    LogicOperation() : logic_op(LogicOperationType::NONE) {}
+    explicit LogicOperation(String operation)
     {
         std::transform(operation.begin(), operation.end(), operation.begin(), ::toupper);
         if (operation == "NONE" || operation.empty())
-            logicOp = LogicOperationType::NONE;
+            logic_op = LogicOperationType::NONE;
         else if (operation == "AND")
-            logicOp = LogicOperationType::AND;
+            logic_op = LogicOperationType::AND;
         else if (operation == "OR")
-            logicOp = LogicOperationType::OR;
+            logic_op = LogicOperationType::OR;
         else if (operation == "XOR")
-            logicOp = LogicOperationType::XOR;
+            logic_op = LogicOperationType::XOR;
         else if (operation == "ANDNOT")
-            logicOp = LogicOperationType::ANDNOT;
+            logic_op = LogicOperationType::ANDNOT;
         else if (operation == "RANDNOT" || operation == "REVERSEANDNOT")
-            logicOp = LogicOperationType::REVERSEANDNOT;
+            logic_op = LogicOperationType::REVERSEANDNOT;
         else
-            logicOp = LogicOperationType::UNDEFINED;
+            logic_op = LogicOperationType::UNDEFINED;
     }
 
     LogicOperation(const LogicOperation & rhs)
     {
-        this->logicOp = rhs.logicOp;
+        this->logic_op = rhs.logic_op;
     }
 
-    bool isValid() { return logicOp < LogicOperationType::UNDEFINED; }
+    bool isValid() const { return logic_op < LogicOperationType::UNDEFINED; }
 
-    LogicOperationType logicOp;
+    LogicOperationType logic_op;
 };
 
 enum JoinType
@@ -101,37 +100,39 @@ enum JoinType
 
 struct JoinOperation
 {
-    JoinOperation() : joinOp(JoinType::INNER) {}
-    JoinOperation(String operation)
+    JoinOperation() : join_op(JoinType::INNER) {}
+    explicit JoinOperation(String operation)
     {
         std::transform(operation.begin(), operation.end(), operation.begin(), ::toupper);
         if (operation.empty() || operation == "INNER")
-            joinOp = JoinType::INNER;
+            join_op = JoinType::INNER;
         else if (operation == "LEFT")
-            joinOp = JoinType::LEFT;
+            join_op = JoinType::LEFT;
         else
-            joinOp = JoinType::INVALID;
+            join_op = JoinType::INVALID;
     }
 
-    bool isValid() { return joinOp < JoinType::INVALID; }
+    bool isValid() const { return join_op < JoinType::INVALID; }
 
-    JoinType joinOp;
+    JoinType join_op;
 };
 
 using JoinKeys = Strings;
 using GroupByKeys = Strings;
 using Position = UInt8;
 using BitMapPtr = std::shared_ptr<BitMap64>;
-using JoinTuple = std::tuple<JoinKeys, GroupByKeys, BitMapPtr>;
+using JoinKeysPtr = std::shared_ptr<JoinKeys>;
+using GroupByKeysPtr = std::shared_ptr<GroupByKeys>;
+using JoinTuple = std::tuple<JoinKeysPtr, GroupByKeysPtr, BitMapPtr>;
 using JoinTuplePtr = std::shared_ptr<JoinTuple>;
 using JoinTuplePtrs = std::vector<JoinTuplePtr>;
 using PositionIndexPair = std::pair<UInt64, UInt64>;
 
-void writeStrings(const Strings & data, WriteBuffer & buf)
+void writeStrings(const std::shared_ptr<Strings> & data, WriteBuffer & buf)
 {
-    size_t size = data.size();
+    size_t size = data->size();
     writeVarUInt(size, buf);
-    for (auto & key : data)
+    for (auto & key : *data)
         writeString(key.data(), key.size(), buf);
 }
 
@@ -151,21 +152,24 @@ void readStrings(Strings & data, ReadBuffer & buf)
 // The key used to hash the join keys or group by keys
 struct StringsMapKey
 {
-    Strings keys;
+    std::shared_ptr<Strings> keys;
 
     StringsMapKey() = default;
-    StringsMapKey(String & key_) : keys{key_} {}
-    StringsMapKey(Strings && keys_) : keys(std::move(keys_)) {}
-    StringsMapKey(const Strings && keys_) : keys(std::move(keys_)) {}
+    explicit StringsMapKey(String & key_)
+    {
+        Strings strs{ key_ };
+        keys = std::make_shared<Strings>(std::move(strs));
+    }
+    explicit StringsMapKey(std::shared_ptr<Strings> && keyPtr) : keys(std::move(keyPtr)) {}
 
     bool operator==(const StringsMapKey & rhs) const
     {
-        if (keys.size() != rhs.keys.size())
+        if (keys->size() != rhs.keys->size())
             return false;
 
-        for (size_t i = 0; i < keys.size(); ++i)
+        for (size_t i = 0; i < keys->size(); ++i)
         {
-            if (keys.at(i) != rhs.keys.at(i))
+            if (keys->at(i) != rhs.keys->at(i))
                 return false;
         }
         return true;
@@ -176,12 +180,12 @@ struct HashStringsMapKey
 {
     size_t operator()(const StringsMapKey & one) const
     {
-        if (one.keys.empty())
+        if (one.keys->empty())
             return std::hash<String>()("");
 
-        size_t res = std::hash<String>()(one.keys.at(0));
-        for (size_t i = 1; i < one.keys.size(); ++i)
-            res ^= std::hash<String>()(one.keys.at(i)) >> i;
+        size_t res = std::hash<String>()(one.keys->at(0));
+        for (size_t i = 1; i < one.keys->size(); ++i)
+            res ^= std::hash<String>()(one.keys->at(i)) >> i;
 
         return res;
     }
@@ -225,14 +229,14 @@ class KVBigLock {
     // Here is no lock, we just do this in a single thread
     void getAllKeyValueByResultType(ColumnTuple & tuple_in_array, size_t result_type)
     {
-        for (auto it = m_map.begin(); it != m_map.end(); ++it)
+        for (auto & it : m_map)
         {
-            BitMapPtr bitmap_ptr = std::get<2>(*(it->second.at(0)));
-            size_t key_size = it->first.keys.size();
+            BitMapPtr bitmap_ptr = std::get<2>(*(it.second.at(0)));
+            size_t key_size = it.first.keys->size();
             for (size_t i = 0; i < key_size; ++i)
             {
                 auto & column_group_by = static_cast<ColumnString &>(tuple_in_array.getColumn(i));
-                column_group_by.insert(it->first.keys.at(i));
+                column_group_by.insert(it.first.keys->at(i));
             }
             if (result_type == 0)
             {
@@ -256,42 +260,47 @@ class KVBigLock {
 class KVSharded
 {
 public:
-    KVSharded(size_t num_shard) : m_mask(num_shard - 1), m_shards(num_shard)
+    explicit KVSharded(size_t num_shard) : m_mask(num_shard - 1), m_shards(num_shard)
     {
         if ((num_shard & m_mask) != 0)
-            throw Exception("num_shard should be a power of two", ErrorCodes::LOGICAL_ERROR);
+            throw Exception("num_shard should be a power of two", ErrorCodes::BAD_ARGUMENTS);
     }
 
     KVSharded(KVSharded && rhs) : m_mask(std::move(rhs.m_mask)), m_shards(std::move(rhs.m_shards)) {}
-    void operator=(KVSharded && rhs)
+    KVSharded& operator=(KVSharded && rhs)
     {
-        m_shards = std::move(rhs.m_shards);
+        if (this != &rhs) // Optional: Check for self-assignment
+        {
+            m_mask = std::move(rhs.m_mask);
+            m_shards = std::move(rhs.m_shards);
+        }
+        return *this;
     }
 
     void put(const StringsMapKey & key, const JoinTuplePtrs & value)
     {
-        get_shard(key).emplaceKVOrAddValue(std::move(key), std::move(value));
+        getShard(key).emplaceKVOrAddValue(std::move(key), std::move(value));
     }
 
     std::optional<JoinTuplePtrs> get(const StringsMapKey & key)
     {
-        return get_shard(key).get(key);
+        return getShard(key).get(key);
     }
 
     /// It's used in insertIntoResult function, by a single thread
     void writeResultOfKeyAndValue(ColumnTuple & tuple_in_array, size_t result_type)
     {
-        for (auto it = m_shards.begin(); it != m_shards.end(); ++it)
+        for (auto & m_shard : m_shards)
         {
-            it->getAllKeyValueByResultType(tuple_in_array, result_type);
+            m_shard.getAllKeyValueByResultType(tuple_in_array, result_type);
         }
     }
 
 private:
-    const size_t m_mask;
+    size_t m_mask;
     std::vector<KVBigLock> m_shards;
 
-    KVBigLock & get_shard(const StringsMapKey & key)
+    KVBigLock & getShard(const StringsMapKey & key)
     {
         HashStringsMapKey hash_fn;
         size_t h = hash_fn(key);
@@ -300,31 +309,37 @@ class KVSharded
 };
 
 /// It's used to accommodate user input data, and data is grouped by join keys
-struct PositionTuples
+struct JoinPositionTuples
 {
     Position position;
     HashedStringsKeyTuples tuples;   // The key used here is join key
 
-    PositionTuples() = default;
-    PositionTuples(Position pos) : position(pos) {}
-    PositionTuples(const PositionTuples & rhs) : position(rhs.position), tuples(rhs.tuples) {}
-    PositionTuples(PositionTuples && rhs) : position(rhs.position), tuples(std::move(rhs.tuples)) {}
-    PositionTuples(Position && pos, StringsMapKey && join_keys, JoinTuplePtr && val)
+    JoinPositionTuples() = default;
+    explicit JoinPositionTuples(Position pos) : position(pos) {}
+    JoinPositionTuples(const JoinPositionTuples & rhs) = default;
+    JoinPositionTuples(JoinPositionTuples && rhs) : position(rhs.position), tuples(std::move(rhs.tuples)) {}
+    JoinPositionTuples(Position && pos, StringsMapKey && join_keys, JoinTuplePtr && val)
         : position(std::move(pos)), tuples{{std::move(join_keys), JoinTuplePtrs{val}}} {}
 
-    void operator=(const PositionTuples & rhs)
+    JoinPositionTuples& operator=(const JoinPositionTuples & rhs)
     {
-        this->position = rhs.position;
-        this->tuples = rhs.tuples;
+        if (this != &rhs) { // Check for self-assignment
+            this->position = rhs.position;
+            this->tuples = rhs.tuples;
+        }
+        return *this;
     }
 
-    void operator=(const PositionTuples && rhs)
+    JoinPositionTuples& operator=(JoinPositionTuples && rhs)
     {
-        this->position = std::move(rhs.position);
-        this->tuples = std::move(rhs.tuples);
+        if (this != &rhs) { // Check for self-assignment
+            this->position = std::move(rhs.position);
+            this->tuples = std::move(rhs.tuples);
+        }
+        return *this;
     }
 
-    void emplace_back(StringsMapKey && join_key, JoinTuplePtrs && value)
+    void emplaceBack(StringsMapKey && join_key, JoinTuplePtrs && value)
     {
         auto it = this->tuples.find(join_key);
         if (it == this->tuples.end())
@@ -337,16 +352,16 @@ struct PositionTuples
                               std::make_move_iterator(value.end()));
     }
 
-    void emplace_back(StringsMapKey && join_key, JoinTuplePtr && value)
+    void emplaceBack(StringsMapKey && join_key, JoinTuplePtr && value)
     {
-        this->emplace_back(std::move(join_key), JoinTuplePtrs{value});
+        this->emplaceBack(std::move(join_key), JoinTuplePtrs{value});
     }
 
-    void insert(PositionTuples && rhs)
+    void insert(JoinPositionTuples && rhs)
     {
-        for (auto rt = rhs.tuples.begin(); rt != rhs.tuples.end(); ++rt)
+        for (auto & tuple : rhs.tuples)
         {
-            this->emplace_back(std::move(const_cast<StringsMapKey &>(rt->first)), std::move(rt->second));
+            this->emplaceBack(std::move(const_cast<StringsMapKey &>(tuple.first)), std::move(tuple.second));
         }
     }
 
@@ -355,22 +370,21 @@ struct PositionTuples
         writeVarUInt(position, buf);
         size_t map_size = tuples.size();
         writeVarUInt(map_size, buf);
-
-        for (auto it = tuples.begin(); it != tuples.end(); ++it)
+        for (const auto & tuple : tuples)
         {
-            writeStrings(it->first.keys, buf);
+            writeStrings(tuple.first.keys, buf);
 
-            size_t tuples_num = it->second.size();
+            size_t tuples_num = tuple.second.size();
             writeVarUInt(tuples_num, buf);
-            for (auto jt = it->second.begin(); jt != it->second.end(); ++jt)
+            for (const auto & jt : tuple.second)
             {
-                JoinKeys join_key;
-                GroupByKeys group_by;
+                JoinKeysPtr join_key_ptr;
+                GroupByKeysPtr group_by_ptr;
                 BitMapPtr bitmap_ptr;
-                std::tie(join_key, group_by, bitmap_ptr) = *(*jt);
+                std::tie(join_key_ptr, group_by_ptr, bitmap_ptr) = *jt;
 
-                writeStrings(join_key, buf);
-                writeStrings(group_by, buf);
+                writeStrings(const_cast<const JoinKeysPtr&>(join_key_ptr), buf);
+                writeStrings(const_cast<const GroupByKeysPtr&>(group_by_ptr), buf);
 
                 size_t bytes_size = (*bitmap_ptr).getSizeInBytes();
                 writeVarUInt(bytes_size, buf);
@@ -415,14 +429,18 @@ struct PositionTuples
                 buf.readStrict(buffer.data(), bytes_size);
                 BitMap64 bitmap = BitMap64::readSafe(buffer.data(), bytes_size);
 
-                tmp_tuple = std::make_tuple(std::move(join_key),
-                                                      std::move(group_by),
-                                                      std::make_shared<BitMap64>(bitmap));
+                JoinKeysPtr join_key_ptr = make_shared<JoinKeys>(join_key);
+                GroupByKeysPtr group_by_ptr = make_shared<GroupByKeys>(group_by);
+
+                tmp_tuple = std::make_tuple(std::move(join_key_ptr),
+                                            std::move(group_by_ptr),
+                                            std::make_shared<BitMap64>(bitmap));
 
                 tuples_ptrs.emplace_back(std::make_shared<JoinTuple>(tmp_tuple));
             }
 
-            this->emplace_back(StringsMapKey(std::move(key)), std::move(tuples_ptrs));
+            std::shared_ptr<Strings> key_ptr = std::make_shared<Strings>(std::move(key));
+            this->emplaceBack(StringsMapKey(std::move(key_ptr)), std::move(tuples_ptrs));
         }
     }
 };
@@ -431,22 +449,23 @@ struct AggregateFunctionBitMapJoinData
 {
     AggregateFunctionBitMapJoinData() = default;
 
-    std::vector<PositionTuples> join_tuples_by_position;
+    std::vector<JoinPositionTuples> join_tuples_by_position;
 
-    void add(const Position & pos, const BitMapPtr bitmap_ptr, const JoinKeys & join_keys, GroupByKeys & group_bys, size_t union_num)
+    void add(const Position & pos, const BitMapPtr bitmap_ptr, JoinKeysPtr & join_keys, GroupByKeysPtr & group_bys, size_t union_num)
     {
         if (pos > union_num+1)
             throw Exception("AggregateFunction BitMapJoin: Wrong position value. Position starts from 1 and ends with join_num+1 ",
-                            DB::ErrorCodes::LOGICAL_ERROR);
+                            DB::ErrorCodes::BAD_ARGUMENTS);
 
-        StringsMapKey key(std::move(join_keys));
+        JoinKeysPtr cpy(join_keys);
+        StringsMapKey key(std::move(cpy));
         JoinTuplePtr tuple_ptr{std::make_shared<JoinTuple>(std::make_tuple(std::move(join_keys), std::move(group_bys), std::move(bitmap_ptr)))};
 
         for (auto & pos_tuples : join_tuples_by_position) // Position value is in a small range, just compare one by one
         {
             if (pos-1 == pos_tuples.position)  // position starts from 0, but pos from user starts from 1
             {
-                pos_tuples.emplace_back(std::move(key), std::move(tuple_ptr));
+                pos_tuples.emplaceBack(std::move(key), std::move(tuple_ptr));
                 return;
             }
         }
@@ -457,7 +476,7 @@ struct AggregateFunctionBitMapJoinData
     void merge (const AggregateFunctionBitMapJoinData & rhs)
     {
         auto & lhs_tuples_by_position = this->join_tuples_by_position;
-        auto & rhs_tuples_by_position = const_cast<std::vector<PositionTuples> &>(rhs.join_tuples_by_position);
+        auto & rhs_tuples_by_position = const_cast<std::vector<JoinPositionTuples> &>(rhs.join_tuples_by_position);
 
         if (rhs_tuples_by_position.empty())
             return;
@@ -468,20 +487,20 @@ struct AggregateFunctionBitMapJoinData
         }
 
         // Position value is in a small range, just compare one by one
-        for (auto rt = rhs_tuples_by_position.begin(); rt != rhs_tuples_by_position.end(); ++rt)
+        for (auto & rt : rhs_tuples_by_position)
         {
             bool pos_exists = false;
-            for (auto lt = lhs_tuples_by_position.begin(); lt != lhs_tuples_by_position.end(); ++lt)
+            for (auto & lt : lhs_tuples_by_position)
             {
-                if (lt->position == rt->position)
+                if (lt.position == rt.position)
                 {
-                    lt->insert(std::move(*rt));
+                    lt.insert(std::move(rt));
                     pos_exists = true;
                 }
             }
             if (!pos_exists)
             {
-                lhs_tuples_by_position.emplace_back(std::move(*rt));
+                lhs_tuples_by_position.emplace_back(std::move(rt));
             }
         }
     }
@@ -490,10 +509,9 @@ struct AggregateFunctionBitMapJoinData
     {
         size_t position_num = join_tuples_by_position.size();
         writeVarUInt(position_num, buf);
-        for (auto it = join_tuples_by_position.begin();
-            it != join_tuples_by_position.end(); ++it)
+        for (const auto & it : join_tuples_by_position)
         {
-            it->serialize(buf);
+            it.serialize(buf);
         }
     }
 
@@ -504,7 +522,7 @@ struct AggregateFunctionBitMapJoinData
 
         for (size_t i = 0; i < position_num; ++i)
         {
-            PositionTuples pos_tuple;
+            JoinPositionTuples pos_tuple;
             pos_tuple.deserialize(buf);
             join_tuples_by_position.emplace_back(std::move(pos_tuple));
         }
@@ -563,12 +581,15 @@ class AggregateFunctionBitMapJoin final : public IAggregateFunctionDataHelper<Ag
         for (auto pi : group_by_keys_idx)
         {
             if (pi.first == static_cast<UInt64>(pos) && columns_str.at(pi.second - 3) == "#-1#")
-                throw Exception("The column you identified for group by is invalid, where data is '#-1#'", ErrorCodes::LOGICAL_ERROR);
+                throw Exception("The column you identified for group by is invalid, where data is '#-1#'", ErrorCodes::BAD_ARGUMENTS);
 
             group_by_keys.emplace_back(columns_str.at(pi.second - 3));
         }
 
-        this->data(place).add(pos, bitmap_ptr, join_keys, group_by_keys, union_num);
+        auto join_keys_ptr = make_shared<JoinKeys>(join_keys);
+        auto group_by_keys_ptr = make_shared<GroupByKeys>(group_by_keys);
+
+        this->data(place).add(pos, bitmap_ptr, join_keys_ptr, group_by_keys_ptr, union_num);
     }
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs, Arena *) const override
@@ -588,13 +609,12 @@ class AggregateFunctionBitMapJoin final : public IAggregateFunctionDataHelper<Ag
 
     void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
-        auto & this_join_tuples = const_cast<std::vector<PositionTuples> &>(this->data(place).join_tuples_by_position);
+        auto & this_join_tuples = const_cast<std::vector<JoinPositionTuples> &>(this->data(place).join_tuples_by_position);
         if (this_join_tuples.size() < 2)
             return;
-            // throw Exception("AggregateFunction " + getName() + ": at least one position has no data actually", ErrorCodes::LOGICAL_ERROR);
 
         sort(this_join_tuples.begin(), this_join_tuples.end(),
-            [](const PositionTuples & left, const PositionTuples & right) -> bool {
+            [](const JoinPositionTuples & left, const JoinPositionTuples & right) -> bool {
                 return left.position < right.position;
             });
 
@@ -619,31 +639,34 @@ class AggregateFunctionBitMapJoin final : public IAggregateFunctionDataHelper<Ag
     {
         ThreadGroupStatusPtr thread_group = CurrentThread::getGroup();
 
-        auto runJoin = [&](size_t index)
+        auto run_join = [&](size_t index)
         {
             setThreadName("bitmapJoin");
             CurrentThread::attachToIfDetached(thread_group);
 
             JoinTuplePtrs tuples_tmp;
             Pairs & group = split_lhs_data.at(index);
-            for (auto gt = group.begin(); gt != group.end(); ++gt)
+            for (auto & gt : group)
             {
-                auto & key = gt->first;
-                auto & left = gt->second; // left JoinTuplePtrs
+                auto & key = gt.first;
+                auto & left = gt.second; // left JoinTuplePtrs
+
+                if (left.empty())
+                    continue;
 
                 auto rjt = rhs_data.find(key);
                 if (rjt == rhs_data.end()) // key is not matched
                 {
-                    switch (join_operation.joinOp)
+                    switch (join_operation.join_op)
                     {
                         case JoinType::INNER : // INNER JOIN
                             continue;
                         case JoinType::LEFT :   // ALL LEFT JOIN
                             {
-                                for (auto it = left.begin(); it != left.end(); ++it)
+                                for (auto & it : left)
                                 {
-                                    Strings group_by_keys = std::get<1>(*(*it));
-                                    result.put(StringsMapKey(std::move(group_by_keys)), {*it});
+                                    auto group_by_keys = std::get<1>(*it);
+                                    result.put(StringsMapKey(std::move(group_by_keys)), {it});
                                 }
                             }
                             continue;
@@ -653,39 +676,39 @@ class AggregateFunctionBitMapJoin final : public IAggregateFunctionDataHelper<Ag
                 }
 
                 auto & right = rjt->second;  // right JoinTuplePtrs
-                for (auto lt = left.begin(); lt != left.end(); ++lt)
+                for (auto & lt : left)
                 {
-                    for (auto rt = right.cbegin(); rt != right.cend(); ++rt)
+                    for (const auto & rt : right)
                     {
-                        Strings join_keys;
-                        Strings lt_group_bys, rt_group_bys;
+                        JoinKeysPtr join_keys_ptr;
+                        GroupByKeysPtr lt_group_bys, rt_group_bys;
                         BitMapPtr lt_bitmap_ptr, rt_bitmap_ptr;
 
-                        std::tie(join_keys, lt_group_bys, lt_bitmap_ptr) = *(*lt);
-                        std::tie(std::ignore, rt_group_bys, rt_bitmap_ptr) = *(*rt);
+                        std::tie(join_keys_ptr, lt_group_bys, lt_bitmap_ptr) = *lt;
+                        std::tie(std::ignore, rt_group_bys, rt_bitmap_ptr) = *rt;
 
                         Strings group_bys;
                         for (size_t i = 0; i < group_by_keys_idx.size(); ++i)
                         {
                             if (group_by_keys_idx[i].first == 0xFF)  // If no position identifier
                             {
-                                if (lt_group_bys.at(i) != "#-1#") // left subquery has a group by key
-                                    group_bys.emplace_back(std::move(lt_group_bys.at(i)));
+                                if (lt_group_bys->at(i) != "#-1#") // left subquery has a group by key
+                                    group_bys.emplace_back(std::move(lt_group_bys->at(i)));
                                 else
-                                    group_bys.emplace_back(std::move(rt_group_bys.at(i)));
+                                    group_bys.emplace_back(std::move(rt_group_bys->at(i)));
                             }
                             else
                             {
                                 if (group_by_keys_idx[i].first == 1)
-                                    group_bys.emplace_back(std::move(lt_group_bys.at(i)));
+                                    group_bys.emplace_back(std::move(lt_group_bys->at(i)));
                                 else if (group_by_keys_idx[i].first == 2)
-                                    group_bys.emplace_back(std::move(rt_group_bys.at(i)));
+                                    group_bys.emplace_back(std::move(rt_group_bys->at(i)));
                             }
                         }
 
                         BitMap64 bitmap(*lt_bitmap_ptr);
 
-                        switch (logic_operation.logicOp)
+                        switch (logic_operation.logic_op)
                         {
                             case DB::LogicOperationType::NONE :
                             {
@@ -712,10 +735,12 @@ class AggregateFunctionBitMapJoin final : public IAggregateFunctionDataHelper<Ag
                                 break;
                         }
 
-                        JoinTuple  tmp_tuple{std::make_tuple(join_keys, group_bys,
-                                                              std::make_shared<BitMap64>(std::move(bitmap)))};
+                        auto group_by_ptr = make_shared<GroupByKeys>(group_bys);
 
-                        result.put(std::move(StringsMapKey(std::move(group_bys))),
+                        JoinTuple tmp_tuple{std::make_tuple(join_keys_ptr, group_by_ptr,
+                                            std::make_shared<BitMap64>(std::move(bitmap)))};
+
+                        result.put(std::move(StringsMapKey(std::move(group_by_ptr))),
                                    std::move(JoinTuplePtrs{std::make_shared<JoinTuple>(tmp_tuple)}));
                    }
                 }
@@ -723,34 +748,44 @@ class AggregateFunctionBitMapJoin final : public IAggregateFunctionDataHelper<Ag
             }
         };
 
-        std::unique_ptr<ThreadPool> threadPool = std::make_unique<ThreadPool>(thread_num_);
+        std::unique_ptr<ThreadPool> thread_pool = std::make_unique<ThreadPool>(thread_num_);
 
         for (size_t i = 0; i < thread_num_; ++i)
         {
-            auto joinAndFunc = std::bind(runJoin, i);
-            threadPool->scheduleOrThrowOnError(joinAndFunc);
+            auto join_and_func = [i, &run_join]() { run_join(i); };
+            thread_pool->scheduleOrThrowOnError(join_and_func);
         }
 
-        threadPool->wait();
+        thread_pool->wait();
     }
 
-    KVSharded doJoinWithLogicOperation(std::vector<PositionTuples> & this_join_tuples) const
+    KVSharded doJoinWithLogicOperation(std::vector<JoinPositionTuples> & this_join_tuples) const
     {
         HashedStringsKeyTuples & left_join_tuples = this_join_tuples.at(0).tuples;
         HashedStringsKeyTuples & right_join_tuples = this_join_tuples.at(1).tuples;
 
         // split the map to several vector
-        std::vector<Pairs> pair_vector_buckets(thread_num);
+        std::vector<Pairs> pair_vector_buckets;
         size_t idx = 0;
-        for (auto key_tuple_it = left_join_tuples.begin(); key_tuple_it != left_join_tuples.end(); ++key_tuple_it)
+        auto key_tuple_it = left_join_tuples.begin();
+        for (; key_tuple_it != left_join_tuples.end(); ++key_tuple_it)
+        {
+            Pairs p{{key_tuple_it->first, key_tuple_it->second}};
+            pair_vector_buckets.emplace_back(p);
+            ++idx;
+        }
+
+        /// processing remaing data
+        for (; key_tuple_it != left_join_tuples.end(); ++key_tuple_it)
         {
-            pair_vector_buckets.at(idx % thread_num).emplace_back(std::move(*key_tuple_it));
-            left_join_tuples.erase(key_tuple_it);
-            idx++;
+            pair_vector_buckets.at(idx % thread_num).emplace_back(key_tuple_it->first, key_tuple_it->second);
+            ++idx;
         }
+        left_join_tuples.clear();
 
         KVSharded result(128);
-        joinMultiThreads(result, pair_vector_buckets, right_join_tuples, thread_num);
+        size_t actual_thread_num = std::min(thread_num, pair_vector_buckets.size());
+        joinMultiThreads(result, pair_vector_buckets, right_join_tuples, actual_thread_num);
 
         return result;
     }
diff --git a/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.cpp b/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.cpp
index c3fa425e78a..7ffdca95438 100644
--- a/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.cpp
@@ -21,19 +21,26 @@
 
 namespace DB
 {
+namespace ErrorCodes
+{
+    extern const int BAD_ARGUMENTS;
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
 namespace
 {
 
 AggregateFunctionPtr createAggregateFunctionBitMapJoinAndCard(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
     if (argument_types.size() < 4)
-        throw Exception("AggregateFunction " + name + " needs at least four arguments", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs at least four arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     Int32 union_num = 0;
     UInt64 thread_num = 0;
     UInt64 limit_bitmap_number = 0;
-    if (parameters.size() == 0)
-        throw Exception("AggregateFunction " + name + " needs two parameters (join_num, thread_num)", ErrorCodes::NOT_IMPLEMENTED);
+    if (parameters.empty())
+        throw Exception("AggregateFunction " + name + " needs two parameters (join_num, thread_num)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
     else
     {
         union_num = static_cast<Int32>(parameters[0].safeGet<UInt64>());
@@ -44,7 +51,7 @@ AggregateFunctionPtr createAggregateFunctionBitMapJoinAndCard(const std::string
     }
 
     if (union_num == 0 || union_num > 8) // a continuos 8 join is meaningless, 1 join is mostly used.
-        throw Exception("AggregateFunction " + name + " join_number is in range [1,8]", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " join_number is in range [1,8]", ErrorCodes::BAD_ARGUMENTS);
     if (thread_num == 0)
         thread_num = 16;
     if (thread_num > 48) // Several Storage-C machine only have 48 cores, besides 48 threads is large enough
@@ -53,23 +60,23 @@ AggregateFunctionPtr createAggregateFunctionBitMapJoinAndCard(const std::string
         limit_bitmap_number = 100000000; // 100 million
 
     if (!isBitmap64(argument_types[0]))
-        throw Exception("AggregateFunction " + name + " needs BitMap64 type for its first argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs BitMap64 type for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     if (!WhichDataType(argument_types[1]).isUInt8())
-        throw Exception("AggregateFunction " + name + " needs Int type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs Int type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     if (!WhichDataType(argument_types[2]).isInt32())
-        throw Exception("AggregateFunction " + name + " needs Int32 type for its third argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs Int32 type for its third argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-    DataTypePtr attr_val_type = argument_types[3];
+    const DataTypePtr& attr_val_type = argument_types[3];
 
     if (!isString(*attr_val_type))
-        throw Exception("AggregateFunction " + name + " needs String type for its fourth argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs String type for its fourth argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     for (size_t i = 4; i < argument_types.size(); ++i)
     {
         if (!isString(argument_types[i]))
-            throw Exception("AggregateFunction " + name + " needs String type for args...", ErrorCodes::NOT_IMPLEMENTED);
+            throw Exception("AggregateFunction " + name + " needs String type for args...", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
     }
 
     return std::make_shared<AggregateFunctionBitMapJoinAndCard>(argument_types, union_num, thread_num, limit_bitmap_number);
@@ -78,13 +85,13 @@ AggregateFunctionPtr createAggregateFunctionBitMapJoinAndCard(const std::string
 AggregateFunctionPtr createAggregateFunctionBitMapJoinAndCard2(const std::string & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
     if (argument_types.size() < 4)
-        throw Exception("AggregateFunction " + name + " needs at least four arguments", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs at least four arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     Int32 union_num = 0;
     UInt64 thread_num = 0;
     UInt64 limit_bitmap_number = 0;
-    if (parameters.size() == 0)
-        throw Exception("AggregateFunction " + name + " needs two parameters (join_num, thread_num)", ErrorCodes::NOT_IMPLEMENTED);
+    if (parameters.empty())
+        throw Exception("AggregateFunction " + name + " needs two parameters (join_num, thread_num)", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
     else
     {
         union_num = static_cast<Int32>(parameters[0].safeGet<UInt64>());
@@ -95,7 +102,7 @@ AggregateFunctionPtr createAggregateFunctionBitMapJoinAndCard2(const std::string
     }
 
     if (union_num == 0 || union_num > 8) // a continuos 8 join is meaningless, 1 join is mostly used.
-        throw Exception("AggregateFunction " + name + " join_number is in range [1,8]", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " join_number is in range [1,8]", ErrorCodes::BAD_ARGUMENTS);
     if (thread_num == 0)
         thread_num = 16;
     if (thread_num > 48) // Several Storage-C machine only have 48 cores, and 48 threads is large enough
@@ -104,23 +111,23 @@ AggregateFunctionPtr createAggregateFunctionBitMapJoinAndCard2(const std::string
         limit_bitmap_number = 100000000; // 100 million
 
     if (!isBitmap64(argument_types[0]))
-        throw Exception("AggregateFunction " + name + " needs BitMap64 type for its first argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs BitMap64 type for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     if (!WhichDataType(argument_types[1]).isUInt8())
-        throw Exception("AggregateFunction " + name + " needs Int type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs Int type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     if (!WhichDataType(argument_types[2]).isInt32())
-        throw Exception("AggregateFunction " + name + " needs Int32 type for its third argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs Int32 type for its third argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-    DataTypePtr attr_val_type = argument_types[3];
+    const DataTypePtr& attr_val_type = argument_types[3];
 
     if (!isString(*attr_val_type))
-        throw Exception("AggregateFunction " + name + " needs String type for its fourth argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " needs String type for its fourth argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     for (size_t i = 4; i < argument_types.size(); ++i)
     {
         if (!isString(argument_types[i]))
-            throw Exception("AggregateFunction " + name + " needs String type for args...", ErrorCodes::NOT_IMPLEMENTED);
+            throw Exception("AggregateFunction " + name + " needs String type for args...", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
     }
 
     return std::make_shared<AggregateFunctionBitMapJoinAndCard2>(argument_types, union_num, thread_num, limit_bitmap_number);
diff --git a/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.h b/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.h
index 9f74cc911ed..0a6fb63869e 100644
--- a/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.h
+++ b/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard.h
@@ -52,7 +52,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
     extern const int NUMBER_OF_ARGUMENTS_DOES_NOT_MATCH;
     extern const int TOO_MANY_ROWS;
 }
@@ -68,7 +67,7 @@ struct PositionTuples
     JoinTuplePtrs tuples;
 
     PositionTuples() = default;
-    PositionTuples(Int32 pos_):position(pos_) {}
+    explicit PositionTuples(Int32 pos_):position(pos_) {}
     PositionTuples(Int32 pos_, JoinTuplePtrs && tuples_) : position(pos_), tuples(std::move(tuples_)) {}
 
     void addTuple(const JoinTuple & tup)
@@ -89,7 +88,7 @@ struct JoinTupleMapKey
     DB::String attr_val;
     DB::Strings args;
 
-    JoinTupleMapKey() { }
+    JoinTupleMapKey() = default;
     JoinTupleMapKey(const Int32 pos_, const DB::String & attr_val_, const DB::Strings & args_) : pos(pos_), attr_val(attr_val_), args(args_) { }
 
     bool operator==(const JoinTupleMapKey & rhs) const
@@ -104,7 +103,7 @@ struct HashJoinTupleMapKey
     {
         size_t res = std::hash<Int32>()(key.pos);
         res ^= std::hash<DB::String>()(key.attr_val);
-        for (auto a : key.args)
+        for (const auto& a : key.args)
         {
             res ^= std::hash<DB::String>()(a);
         }
@@ -121,7 +120,7 @@ struct AggregateFunctionBitMapJoinAndCardData
     void add(const BitMapPtr & bitmap_ptr, const Int32 & pos, const JoinKey & join_key, const String & attr_val, const Strings & args, Int32 union_num)
     {
         if (pos <= 0 || pos > union_num+1)
-            throw Exception("AggregateFunction BitMapJoinAndCard: Wrong position value. Position starts from 1 and ends with union_num+1 ", DB::ErrorCodes::LOGICAL_ERROR);
+            throw Exception("AggregateFunction BitMapJoinAndCard: Wrong position value. Position starts from 1 and ends with union_num+1 ", DB::ErrorCodes::BAD_ARGUMENTS);
 
         Strings attr_vals(union_num+1);
         attr_vals[pos-1] = attr_val;
@@ -140,15 +139,15 @@ struct AggregateFunctionBitMapJoinAndCardData
 
     void merge(const AggregateFunctionBitMapJoinAndCardData & rhs)
     {
-        for (auto rt = rhs.join_tuple_map.begin(); rt != rhs.join_tuple_map.end(); ++rt)
+        for (const auto & rt : rhs.join_tuple_map)
         {
 
-            auto it = join_tuple_map.find(rt->first);
+            auto it = join_tuple_map.find(rt.first);
             if (it == join_tuple_map.end())
-                join_tuple_map.emplace(std::move(rt->first), std::move(rt->second));
+                join_tuple_map.emplace(std::move(rt.first), std::move(rt.second));
             else
             {
-                *std::get<0>((it->second)) |= *std::get<0>((rt->second));
+                *std::get<0>((it->second)) |= *std::get<0>((rt.second));
             }
         }
     }
@@ -157,14 +156,14 @@ struct AggregateFunctionBitMapJoinAndCardData
     {
         size_t map_size = join_tuple_map.size();
         writeVarUInt(map_size, buf);
-        for (auto it = join_tuple_map.begin(); it != join_tuple_map.end(); ++it)
+        for (const auto & it : join_tuple_map)
         {
             BitMapPtr bitmap_ptr;
             Int32 pos;
             JoinKey joinkey;
             Strings attr_vals;
             Strings args;
-            std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = it->second;
+            std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = it.second;
 
             size_t bytes_size = (*bitmap_ptr).getSizeInBytes();
             writeVarUInt(bytes_size, buf);
@@ -176,13 +175,13 @@ struct AggregateFunctionBitMapJoinAndCardData
             writeVarInt(joinkey, buf);
 
             writeVarUInt(attr_vals.size(), buf);
-            for (auto str : attr_vals)
+            for (const auto& str : attr_vals)
             {
                 writeString(str, buf);
             }
 
             writeVarUInt((args).size(), buf);
-            for (auto a : args)
+            for (const auto& a : args)
             {
                 writeString(a, buf);
             }
@@ -256,7 +255,7 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
         auto bitmap_ptr = std::make_shared<BitMap64>(std::move(const_cast<BitMap64 &>(bitmap)));
 
         const auto & col_position = static_cast<const ColumnInt8 &>(*columns[1]);
-        const Int32 & positionInUnion = static_cast<Int32>(col_position.getElement(row_num));
+        const Int32 & position_in_union = static_cast<Int32>(col_position.getElement(row_num));
 
         const auto & col_joinkey = static_cast<const ColumnInt32 &>(*columns[2]);
         const JoinKey & join_key = col_joinkey.getElement(row_num);
@@ -271,7 +270,7 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
             args.emplace_back(col_arg.getDataAt(row_num).toString());
         }
 
-        this->data(place).add(bitmap_ptr, positionInUnion, join_key, attr_val, args, union_num);
+        this->data(place).add(bitmap_ptr, position_in_union, join_key, attr_val, args, union_num);
     }
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs, Arena *) const override
@@ -304,20 +303,20 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
     void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
     {
         auto & tuples_map = this->data(place).join_tuple_map;
-        std::vector<PositionTuples> tuplesByPosition;
+        std::vector<PositionTuples> tuples_by_position;
         for (size_t i = 0; i < union_num + 1; ++i)
         {
-            tuplesByPosition.emplace_back(i, JoinTuplePtrs());
+            tuples_by_position.emplace_back(i, JoinTuplePtrs());
         }
 
         //partition all input tuples by position
-        for (auto p = tuples_map.begin(); p != tuples_map.end(); ++p)
+        for (auto & p : tuples_map)
         {
-            Int32 pos = p->first.pos;
-            tuplesByPosition.at(pos-1).addTuple(p->second);
+            Int32 pos = p.first.pos;
+            tuples_by_position.at(pos-1).addTuple(p.second);
         }
 
-        const auto res = calcJoin(tuplesByPosition);
+        const auto res = calcJoin(tuples_by_position);
 
         auto & col = static_cast<ColumnArray &>(to);
         auto &col_offsets = static_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
@@ -329,16 +328,16 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
 
         size_t args_num = arguments_num - 4;
 
-        for (auto & p : res)
+        for (const auto & p : res)
         {
-            for (auto rt = p.begin(); rt != p.end(); ++rt)
+            for (const auto & rt : p)
             {
                 UInt64 bitmap_cardinality;
                 JoinKey joinkey;
                 Strings attr_vals;
                 Strings args;
 
-                std::tie(bitmap_cardinality, std::ignore, joinkey, attr_vals, args) = std::move(*rt);
+                std::tie(bitmap_cardinality, std::ignore, joinkey, attr_vals, args) = std::move(rt);
                 col_bitmap_card.insert(bitmap_cardinality);
                 col_joinkey.insert(joinkey);
 
@@ -358,24 +357,24 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
     }
 
 private:
-    std::vector<std::vector<ResultTuple>>
-    calcJoinMultiThreads(std::shared_ptr<std::vector<JoinTuplePtrs>> & res_ptr, const std::shared_ptr<PositionTuples> & rhs, size_t thread_num_, const bool is_last_join) const
+    static std::vector<std::vector<ResultTuple>>
+    calcJoinMultiThreads(std::shared_ptr<std::vector<JoinTuplePtrs>> & res_ptr, const std::shared_ptr<PositionTuples> & rhs, size_t thread_num_, const bool is_last_join)
     {
         std::vector<JoinTuplePtrs> intermediate_tuples_bucktes(thread_num_, JoinTuplePtrs()); // It store the intermediate JOIN result, and it's used for next JOIN
         std::vector<std::vector<ResultTuple>> res_tuples_buckets(thread_num_, std::vector<ResultTuple>());  // It store the final result of the last JOIN
         ThreadGroupStatusPtr thread_group = CurrentThread::getGroup();
 
-        auto runJoinAndCard = [&] (size_t index)
+        auto run_join_and_card = [&] (size_t index)
         {
-            setThreadName("bitmapJoinAndCard");
+            setThreadName("JoinAndCard");
             CurrentThread::attachToIfDetached(thread_group);
             JoinTuplePtrs tuples_tmp;
             std::vector<ResultTuple> res_tuples_in_a_thread;
 
             auto & left = res_ptr->at(index);
-            for (auto rt = rhs->tuples.begin(); rt != rhs->tuples.end(); ++rt)
+            for (auto & rt : rhs->tuples)
             {
-                for (auto lt = left.begin(); lt != left.end(); ++lt)
+                for (auto & lt : left)
                 {
                     BitMapPtr bitmap_ptr, rt_bitmap_ptr;
                     Int32 pos, rt_pos;
@@ -383,8 +382,8 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
                     Strings attr_vals, rt_attr_vals;
                     Strings args, rt_args;
 
-                    std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = *(*lt);
-                    std::tie(rt_bitmap_ptr, rt_pos, std::ignore, rt_attr_vals, rt_args) = *(*rt);
+                    std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = *lt;
+                    std::tie(rt_bitmap_ptr, rt_pos, std::ignore, rt_attr_vals, rt_args) = *rt;
 
                     BitMap64 bitmap(*bitmap_ptr);
                     bitmap &= *rt_bitmap_ptr;
@@ -416,15 +415,15 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
                 res_tuples_buckets[index] = std::move(res_tuples_in_a_thread);
         };
 
-        std::unique_ptr<ThreadPool> threadPool = std::make_unique<ThreadPool>(thread_num_);
+        std::unique_ptr<ThreadPool> thread_pool = std::make_unique<ThreadPool>(thread_num_);
 
         for (size_t i = 0; i < thread_num_; ++i)
         {
-            auto joinAndCardFunc = std::bind(runJoinAndCard, i);
-            threadPool->scheduleOrThrowOnError(joinAndCardFunc);
+            auto join_and_card_func = [&run_join_and_card, i]() { run_join_and_card(i); };
+            thread_pool->scheduleOrThrowOnError(join_and_card_func);
         }
 
-        threadPool->wait();
+        thread_pool->wait();
 
         res_ptr = std::make_shared<std::vector<JoinTuplePtrs>>(std::move(intermediate_tuples_bucktes));
         // For intermediate JOIN, a empty object returned,
@@ -436,7 +435,7 @@ class AggregateFunctionBitMapJoinAndCard final : public IAggregateFunctionDataHe
     {
         //partition the entire position tuples into several parts
         if (position_tuples.empty())
-            throw Exception("BitMapJoinAndCard::calcJoin: empty input data!", DB::ErrorCodes::LOGICAL_ERROR);
+            throw Exception("BitMapJoinAndCard::calcJoin: empty input data!", DB::ErrorCodes::BAD_ARGUMENTS);
 
         // look up for the largest parts
         size_t max_size = 0;
diff --git a/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard2.h b/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard2.h
index 96b87d0d8e7..3f0620ad8d6 100644
--- a/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard2.h
+++ b/src/AggregateFunctions/AggregateFunctionBitMapJoinAndCard2.h
@@ -19,6 +19,7 @@
 #include <IO/WriteHelpers.h>
 #include <IO/ReadHelpers.h>
 
+#include "Common/formatIPv6.h"
 #include <Common/ThreadPool.h>
 #include <Common/setThreadName.h>
 #include <Common/CurrentThread.h>
@@ -50,7 +51,6 @@ namespace DB
 
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
     extern const int NUMBER_OF_ARGUMENTS_DOES_NOT_MATCH;
 }
 
@@ -65,7 +65,7 @@ struct AggregateFunctionBitMapJoinAndCard2Data
     void add(const BitMapPtr & bitmap_ptr, const Int32 & pos, const JoinKey & join_key, const String & attr_val, const Strings & args, Int32 union_num)
     {
         if (pos <= 0 || pos > union_num+1)
-            throw Exception("AggregateFunction BitMapJoinAndCard2: Wrong position value. Position starts from 1 and ends with join_num+1, please check", DB::ErrorCodes::LOGICAL_ERROR);
+            throw Exception("AggregateFunction BitMapJoinAndCard2: Wrong position value. Position starts from 1 and ends with join_num+1, please check", DB::ErrorCodes::BAD_ARGUMENTS);
 
         Strings attr_vals(union_num+1);
         attr_vals[pos-1] = attr_val;
@@ -84,14 +84,14 @@ struct AggregateFunctionBitMapJoinAndCard2Data
 
         size_t input_tuples_size = input_tuples.size();
         writeVarUInt(input_tuples_size, buf);
-        for (auto it = input_tuples.begin(); it != input_tuples.end(); ++it)
+        for (const auto & input_tuple : input_tuples)
         {
             BitMapPtr bitmap_ptr;
             Int32 pos;
             JoinKey joinkey;
             Strings attr_vals;
             Strings args;
-            std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = *it;
+            std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = input_tuple;
 
             size_t bytes_size = (*bitmap_ptr).getSizeInBytes();
             writeVarUInt(bytes_size, buf);
@@ -103,13 +103,13 @@ struct AggregateFunctionBitMapJoinAndCard2Data
             writeVarInt(joinkey, buf);
 
             writeVarUInt(attr_vals.size(), buf);
-            for (auto str: attr_vals)
+            for (const auto& str: attr_vals)
             {
                 writeString(str, buf);
             }
 
             writeVarUInt((args).size(), buf);
-            for (auto a: args)
+            for (const auto& a: args)
             {
                 writeString(a, buf);
             }
@@ -183,7 +183,7 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
         auto bitmap_ptr = std::make_shared<BitMap64>(std::move(const_cast<BitMap64 &>(bitmap)));
 
         const auto & col_position = static_cast<const ColumnInt8 &>(*columns[1]);
-        const Int32 & positionInUnion = static_cast<Int32>(col_position.getElement(row_num));
+        const Int32 & position_in_union = static_cast<Int32>(col_position.getElement(row_num));
 
         const auto & col_joinkey = static_cast<const ColumnInt32 &>(*columns[2]);
         const JoinKey & join_key = col_joinkey.getElement(row_num);
@@ -198,7 +198,7 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
             args.emplace_back(col_arg.getDataAt(row_num).toString());
         }
 
-        this->data(place).add(bitmap_ptr, positionInUnion, join_key, attr_val, args, union_num);
+        this->data(place).add(bitmap_ptr, position_in_union, join_key, attr_val, args, union_num);
     }
 
     void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs, Arena *) const override
@@ -232,20 +232,20 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
     {
         auto & input_tuples = this->data(place).input_tuples;
 
-        std::vector<PositionTuples> tuplesByPosition;
+        std::vector<PositionTuples> tuples_by_position;
         for (size_t i = 0; i < union_num + 1; ++i)
         {
-            tuplesByPosition.emplace_back(i, JoinTuplePtrs());
+            tuples_by_position.emplace_back(i, JoinTuplePtrs());
         }
 
         //partition all input tuples by position
         for (auto & p : input_tuples)
         {
             Int32 pos = std::get<1>(p);
-            tuplesByPosition.at(pos-1).addTuple(p);
+            tuples_by_position.at(pos-1).addTuple(p);
         }
 
-        const auto res = calcJoin(tuplesByPosition);
+        const auto res = calcJoin(tuples_by_position);
 
         auto & col = static_cast<ColumnArray &>(to);
         auto &col_offsets = static_cast<ColumnArray::ColumnOffsets &>(col.getOffsetsColumn());
@@ -257,16 +257,16 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
 
         size_t args_num = arguments_num - 4;
 
-        for (auto & p : res)
+        for (const auto & p : res)
         {
-            for (auto rt = p.begin(); rt != p.end(); ++rt)
+            for (const auto & rt : p)
             {
                 UInt64 bitmap_cardinality;
                 JoinKey joinkey;
                 Strings attr_vals;
                 Strings args;
 
-                std::tie(bitmap_cardinality, std::ignore, joinkey, attr_vals, args) = std::move(*rt);
+                std::tie(bitmap_cardinality, std::ignore, joinkey, attr_vals, args) = std::move(rt);
                 col_bitmap_card.insert(bitmap_cardinality);
                 col_joinkey.insert(joinkey);
 
@@ -293,17 +293,17 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
         std::vector<std::vector<ResultTuple>> res_tuples_buckets(thread_num_, std::vector<ResultTuple>());  // It store the final result of the last JOIN
         ThreadGroupStatusPtr thread_group = CurrentThread::getGroup();
 
-        auto runJoinAndCard = [&] (size_t index)
+        auto run_join_and_card = [&] (size_t index)
         {
-            setThreadName("bitmapJoinAndCard");
+            setThreadName("JoinAndCard2");
             CurrentThread::attachToIfDetached(thread_group);
             JoinTuplePtrs tuples_tmp;
             std::vector<ResultTuple> res_tuples_in_a_thread;
 
             auto & left = res_ptr->at(index);
-            for (auto rt = rhs->tuples.begin(); rt != rhs->tuples.end(); ++rt)
+            for (auto & rt : rhs->tuples)
             {
-                for (auto lt = left.begin(); lt != left.end(); ++lt)
+                for (auto & lt : left)
                 {
                     BitMapPtr bitmap_ptr, rt_bitmap_ptr;
                     Int32 pos, rt_pos;
@@ -311,8 +311,8 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
                     Strings attr_vals, rt_attr_vals;
                     Strings args, rt_args;
 
-                    std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = *(*lt);
-                    std::tie(rt_bitmap_ptr, rt_pos, std::ignore, rt_attr_vals, rt_args) = *(*rt);
+                    std::tie(bitmap_ptr, pos, joinkey, attr_vals, args) = *lt;
+                    std::tie(rt_bitmap_ptr, rt_pos, std::ignore, rt_attr_vals, rt_args) = *rt;
 
                     BitMap64 bitmap(*bitmap_ptr);
                     bitmap &= *rt_bitmap_ptr;
@@ -344,15 +344,15 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
                 res_tuples_buckets[index] = std::move(res_tuples_in_a_thread);
         };
 
-        std::unique_ptr<ThreadPool> threadPool = std::make_unique<ThreadPool>(thread_num_);
+        std::unique_ptr<ThreadPool> thread_pool = std::make_unique<ThreadPool>(thread_num_);
 
         for (size_t i = 0; i < thread_num; ++i)
         {
-            auto joinAndCardFunc = std::bind(runJoinAndCard, i);
-            threadPool->scheduleOrThrowOnError(joinAndCardFunc);
+            auto join_and_card_func = [&run_join_and_card, i]() { run_join_and_card(i); };
+            thread_pool->scheduleOrThrowOnError(join_and_card_func);
         }
 
-        threadPool->wait();
+        thread_pool->wait();
 
         res_ptr = std::make_shared<std::vector<JoinTuplePtrs>>(std::move(intermediate_tuples_bucktes));
         // For intermediate JOIN, a empty object returned,
@@ -364,7 +364,7 @@ class AggregateFunctionBitMapJoinAndCard2 final : public IAggregateFunctionDataH
     {
         //partition the entire position tuples into several parts
         if (position_tuples.empty())
-            throw Exception("BitMapJoinAndCard::calcJoin: empty input data!", DB::ErrorCodes::LOGICAL_ERROR);
+            throw Exception("BitMapJoinAndCard::calcJoin: empty input data!", DB::ErrorCodes::BAD_ARGUMENTS);
 
         //look up for the largest parts
         size_t max_size = 0;
diff --git a/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.cpp b/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.cpp
index dad1c81af24..af43a47bd32 100644
--- a/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.cpp
@@ -28,33 +28,33 @@ AggregateFunctionPtr createAggregateFunctionBitmapColumnDiff(const std::string &
     if (argument_types.size() != 2)
         throw Exception("AggregateFunction " + name + " need only two arguments", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
-    UInt64 return_type_{0}, diff_step_{1};
+    UInt64 return_type{0}, diff_step{1};
     String diff_direction_str{"forward"};
     if (!parameters.empty() && parameters.size() != 3)
         throw Exception("AggregateFunction " + name + " need three parameters", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
 
     if (!parameters.empty())
     {
-        parameters[0].tryGet<UInt64>(return_type_);
+        parameters[0].tryGet<UInt64>(return_type);
         parameters[1].tryGet<String>(diff_direction_str);
-        parameters[2].tryGet<UInt64>(diff_step_);
+        parameters[2].tryGet<UInt64>(diff_step);
     }
 
     if (!isBitmap64(argument_types[1]))
         throw Exception("AggregateFunction " + name + " need BitMap64 type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
 
-    DataTypePtr data_type_0 = argument_types[0];
+    const DataTypePtr& data_type_0 = argument_types[0];
     if (!WhichDataType(data_type_0).isDate() && !WhichDataType(data_type_0).isUInt()
         && !WhichDataType(data_type_0).isInt() && !WhichDataType(data_type_0).isString())
         throw Exception("AggregateFunction " + name + " need Date/Int/UInt/String type for its first argument, for order sorting.", ErrorCodes::NOT_IMPLEMENTED);
 
     if (WhichDataType(data_type_0).isDate())
-        return std::make_shared<AggregateFunctionBitMapColumnDiff<UInt16>>(argument_types, return_type_, diff_direction_str, diff_step_, true);
+        return std::make_shared<AggregateFunctionBitMapColumnDiff<UInt16>>(argument_types, return_type, diff_direction_str, diff_step, true);
     else if (WhichDataType(data_type_0).isString())
-        return std::make_shared<AggregateFunctionBitMapColumnDiff<String>>(argument_types, return_type_, diff_direction_str, diff_step_);
+        return std::make_shared<AggregateFunctionBitMapColumnDiff<String>>(argument_types, return_type, diff_direction_str, diff_step);
     else {
         AggregateFunctionPtr res;
-        res.reset(createWithNumericType<Function>(*data_type_0, argument_types, return_type_, diff_direction_str, diff_step_));
+        res.reset(createWithNumericType<Function>(*data_type_0, argument_types, return_type, diff_direction_str, diff_step));
         return res;
     }
 }
diff --git a/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.h b/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.h
index f6077abb366..18d36812f4e 100644
--- a/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.h
+++ b/src/AggregateFunctions/AggregateFunctionBitmapColumnDiff.h
@@ -35,7 +35,6 @@ namespace DB
 {
 namespace ErrorCodes
 {
-    extern const int LOGICAL_ERROR;
     extern const int BAD_ARGUMENTS;
     extern const int TOO_MANY_ARGUMENTS_FOR_FUNCTION;
     extern const int TOO_FEW_ARGUMENTS_FOR_FUNCTION;
@@ -56,10 +55,12 @@ struct AggregateFunctionBitMapColumnDiffData
 
     void add(const T key, const BitMap64 & bitmap)
     {
-        auto [it, inserted] = data.try_emplace(key, std::make_unique<BitMap64>(std::move(const_cast<BitMap64 &>(bitmap))));
-        if (!inserted) {
+        auto it = data.find(key);
+
+        if (it != data.end())
             *(it->second) |= bitmap;
-        }
+        else
+            data.emplace(key, std::make_unique<BitMap64>(const_cast<BitMap64 &>(bitmap)));
     }
 
     void merge(AggregateFunctionBitMapColumnDiffData & rhs)
@@ -133,7 +134,7 @@ enum DiffDirection
 struct DiffDirectionOp
 {
     DiffDirectionOp() : diff_direc(DiffDirection::FORWARD) {}
-    DiffDirectionOp(String diff_dir_op)
+    explicit DiffDirectionOp(String diff_dir_op)
     {
         std::transform(diff_dir_op.begin(), diff_dir_op.end(), diff_dir_op.begin(), ::tolower);
         if (diff_dir_op.empty() || diff_dir_op == "forward")
@@ -227,7 +228,7 @@ using DiffPair = typename std::pair<T, BitMapPtr>;
             return;
 
         if (diff_step >= input_data.size())
-            throw Exception(getName() + ": the step " + std::to_string(diff_step) + " is larger than data size", ErrorCodes::LOGICAL_ERROR);
+            throw Exception(getName() + ": the step " + std::to_string(diff_step) + " is larger than data size", ErrorCodes::BAD_ARGUMENTS);
 
         std::vector<DiffPair> all_data;
         std::unordered_map<T, std::vector<BitMapPtr>> intermediate_res;
diff --git a/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.cpp b/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.cpp
index 12b63ca2c3e..6ea7ae7c6e7 100644
--- a/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.cpp
+++ b/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.cpp
@@ -17,30 +17,37 @@
 #include <AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.h>
 #include <AggregateFunctions/FactoryHelpers.h>
 #include <AggregateFunctions/Helpers.h>
-
-#pragma  GCC diagnostic ignored  "-Wunused"
-#pragma GCC diagnostic ignored "-Wunused-parameter"
+#include <Functions/FunctionHelpers.h>
 
 namespace DB
 {
 
 struct Settings;
 
+namespace ErrorCodes
+{
+    const extern int TYPE_MISMATCH;
+    const extern int SIZES_OF_COLUMNS_DOESNT_MATCH;
+    const extern int AGGREGATE_FUNCTION_THROW;
+}
+
 namespace
 {
 
 template <template <typename, typename> class AggregateFunctionTemplate, typename... TArgs>
-static IAggregateFunction * createWithSpecificType(const IDataType & argument_type, TArgs &&... args)
+IAggregateFunction * createWithSpecificType(const IDataType & argument_type, TArgs &&... args)
 {
     WhichDataType which(argument_type);
-    if (which.idx == TypeIndex::UInt8)  return new AggregateFunctionTemplate<uint8_t, uint8_t>(std::forward<TArgs>(args)...);
-    if (which.idx == TypeIndex::UInt16) return new AggregateFunctionTemplate<UInt16, UInt16>(std::forward<TArgs>(args)...);
-    if (which.idx == TypeIndex::UInt32) return new AggregateFunctionTemplate<UInt32, UInt32>(std::forward<TArgs>(args)...);
-    if (which.idx == TypeIndex::UInt64) return new AggregateFunctionTemplate<UInt64, UInt64>(std::forward<TArgs>(args)...);
-    if (which.idx == TypeIndex::Int8)   return new AggregateFunctionTemplate<Int8, Int8>(std::forward<TArgs>(args)...);
-    if (which.idx == TypeIndex::Int16)  return new AggregateFunctionTemplate<Int16, Int16>(std::forward<TArgs>(args)...);
-    if (which.idx == TypeIndex::Int32)  return new AggregateFunctionTemplate<Int32, Int32>(std::forward<TArgs>(args)...);
-    if (which.idx == TypeIndex::Int64)  return new AggregateFunctionTemplate<Int64, Int64>(std::forward<TArgs>(args)...);
+
+    if (which.idx == TypeIndex::UInt8 || which.idx == TypeIndex::UInt16 ||
+            which.idx == TypeIndex::UInt32 || which.idx == TypeIndex::UInt64)
+        return new AggregateFunctionTemplate<UInt64, UInt64>(std::forward<TArgs>(args)...);
+    else if (which.idx == TypeIndex::Int8 || which.idx == TypeIndex::Int16 ||
+            which.idx == TypeIndex::Int32 || which.idx == TypeIndex::Int64)
+        return new AggregateFunctionTemplate<Int64, Int64>(std::forward<TArgs>(args)...);
+    else if (which.idx == TypeIndex::String)
+        return new AggregateFunctionTemplate<String, String>(std::forward<TArgs>(args)...);
+
     return nullptr;
 }
 
@@ -48,30 +55,30 @@ template<template <typename, typename> class Function>
 AggregateFunctionPtr createAggregateFunctionBitMapCount(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
     if (argument_types.size() != 2 )
-        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
 
     String expression;
-    if (parameters.size() > 0)
-        parameters[0].tryGet<String>(expression);
+    if (!parameters.empty() && !parameters[0].tryGet<String>(expression))
+        throw Exception("AggregateFunction " + name + " need String as 1st parameter", ErrorCodes::BAD_TYPE_OF_FIELD);
 
     UInt64 is_bitmap_execute = 0;
     if (parameters.size() > 1)
         parameters[1].tryGet<UInt64>(is_bitmap_execute);
 
-    DataTypePtr data_type = argument_types[0];
-    if (!WhichDataType(data_type).isInt())
-        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) for its first argument", ErrorCodes::NOT_IMPLEMENTED);
+    const DataTypePtr& data_type = argument_types[0];
+    if (!WhichDataType(data_type).isNativeInt() && !WhichDataType(data_type).isString())
+        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) or a string type for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
     if (WhichDataType(data_type).isInt8())
         throw Exception("Int8 type is not recommended! Please use Int16 or bigger size number", ErrorCodes::BAD_TYPE_OF_FIELD);
 
     if (!isBitmap64(argument_types[1]))
-        throw Exception("AggregateFunction " + name + " need BitMap type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need BitMap type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     AggregateFunctionPtr res(createWithSpecificType<Function>(*data_type, argument_types, expression, is_bitmap_execute));
 
     // res.reset(createWithNumericType<Function>(*data_type, argument_types, expression, is_bitmap_execute));
     if (!res)
-        throw Exception("Failed to create aggregate function  " + name, ErrorCodes::LOGICAL_ERROR);
+        throw Exception("Failed to create aggregate function  " + name, ErrorCodes::AGGREGATE_FUNCTION_THROW);
 
     return res;
 }
@@ -80,91 +87,174 @@ template<template <typename, typename> class Function>
 AggregateFunctionPtr createAggregateFunctionBitMapMultiCount(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
     if (argument_types.size() != 2 )
-        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
 
     std::vector<String> expressions;
-    if (parameters.size() > 0)
+    if (!parameters.empty())
     {
-        for (size_t i = 0; i < parameters.size(); i++)
+        for (size_t i = 0; i < parameters.size(); ++i)
         {
             String expression;
-            parameters[i].tryGet<String>(expression);
+            if (!parameters[i].tryGet<String>(expression))
+                throw Exception(fmt::format("AggregateFunction {} need String as its {} parameter", name, argPositionToSequence(i+1)), ErrorCodes::BAD_TYPE_OF_FIELD);
             expressions.push_back(expression);
         }
     }
 
-    DataTypePtr data_type = argument_types[0];
-    if (!WhichDataType(data_type).isInt())
-        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) for its first argument", ErrorCodes::NOT_IMPLEMENTED);
+    const DataTypePtr& data_type = argument_types[0];
+    if (!WhichDataType(data_type).isNativeInt() && !WhichDataType(data_type).isString())
+        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) or a string type for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
     if (WhichDataType(data_type).isInt8())
         throw Exception("Int8 type is not recommended! Please use Int16 or bigger size number", ErrorCodes::BAD_TYPE_OF_FIELD);
 
     if (!isBitmap64(argument_types[1]))
-        throw Exception("AggregateFunction " + name + " need BitMap type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need BitMap type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-    AggregateFunctionPtr res;
+    AggregateFunctionPtr res(createWithSpecificType<Function>(*data_type, argument_types, expressions));
 
-    res.reset(createWithSpecificType<Function>(*data_type, argument_types, expressions));
+    if (!res)
+        throw Exception("Failed to create aggregate function  " + name, ErrorCodes::AGGREGATE_FUNCTION_THROW);
 
     return res;
 }
 
+template<template <typename, typename> class Function>
 AggregateFunctionPtr createAggregateFunctionBitMapMultiCountWithDate(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
     if (argument_types.size() != 3 )
-        throw Exception("AggregateFunction " + name + " need three arguments", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need three arguments", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
 
     std::vector<String> expressions;
-    if (parameters.size() > 0)
+    if (!parameters.empty())
     {
         for (size_t i = 0; i < parameters.size(); i++)
         {
             String expression;
-            parameters[i].tryGet<String>(expression);
+            if (!parameters[i].tryGet<String>(expression))
+                throw Exception(fmt::format("AggregateFunction {} need String as its {} parameter", name, argPositionToSequence(i+1)), ErrorCodes::BAD_TYPE_OF_FIELD);
             expressions.push_back(expression);
         }
     }
 
-    DataTypePtr date_type = argument_types[0];
-    if (!WhichDataType(date_type).isInt())
-        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) for its first argument", ErrorCodes::NOT_IMPLEMENTED);
+    const DataTypePtr& date_type = argument_types[0];
+    if (!WhichDataType(date_type).isNativeInt())
+        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-    DataTypePtr data_type = argument_types[1];
-    if (!WhichDataType(data_type).isInt())
-        throw Exception("AggregateFunction " + name + " need signed numeric type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
+    const DataTypePtr& data_type = argument_types[1];
+    if (!WhichDataType(data_type).isNativeInt() && !WhichDataType(data_type).isString())
+        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) or a string type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
     if (!isBitmap64(argument_types[2]))
-        throw Exception("AggregateFunction " + name + " need BitMap type for its third argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need BitMap type for its third argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    AggregateFunctionPtr res(createWithSpecificType<Function>(*data_type, argument_types, expressions));
+
+    if (!res)
+        throw Exception("Failed to create aggregate function  " + name, ErrorCodes::AGGREGATE_FUNCTION_THROW);
 
-    return std::make_shared<AggregateFunctionBitMapMultiCountWithDate>(argument_types, expressions);
+    return res;
 }
 
 template<template <typename, typename> class Function>
 AggregateFunctionPtr createAggregateFunctionBitMapExtract(const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
 {
     if (argument_types.size() != 2 )
-        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
 
     String expression;
-    if (parameters.size() > 0)
-        parameters[0].tryGet<String>(expression);
+    if (!parameters.empty() && !parameters[0].tryGet<String>(expression))
+        throw Exception("AggregateFunction " + name + " need String as 1st parameter", ErrorCodes::BAD_TYPE_OF_FIELD);
 
     UInt64 is_bitmap_execute = 0;
     if (parameters.size() > 1)
         parameters[1].tryGet<UInt64>(is_bitmap_execute);
 
-    DataTypePtr data_type = argument_types[0];
-    if (!WhichDataType(data_type).isInt())
-        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) for its first argument", ErrorCodes::NOT_IMPLEMENTED);
+    const DataTypePtr& data_type = argument_types[0];
+    if (!WhichDataType(data_type).isNativeInt() && !WhichDataType(data_type).isString())
+        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) or a string type for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
     if (WhichDataType(data_type).isInt8())
         throw Exception("Int8 type is not recommended! Please use Int16 or bigger size number", ErrorCodes::BAD_TYPE_OF_FIELD);
 
     if (!isBitmap64(argument_types[1]))
-        throw Exception("AggregateFunction " + name + " need BitMap type for its second argument", ErrorCodes::NOT_IMPLEMENTED);
+        throw Exception("AggregateFunction " + name + " need BitMap type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    AggregateFunctionPtr res(createWithSpecificType<Function>(*data_type, argument_types, expression, is_bitmap_execute));
 
-    AggregateFunctionPtr res;
+    if (!res)
+        throw Exception("Failed to create aggregate function  " + name, ErrorCodes::AGGREGATE_FUNCTION_THROW);
+
+    return res;
+}
+
+template<template <typename, typename> class Function>
+AggregateFunctionPtr createAggregateFunctionBitMapMultiExtract(
+    const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+    if (argument_types.size() != 2 )
+        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::SIZES_OF_COLUMNS_DOESNT_MATCH);
+
+    std::vector<String> expressions;
+    if (!parameters.empty())
+    {
+        for (size_t i = 0; i < parameters.size(); i++)
+        {
+            String expression;
+            if (!parameters[i].tryGet<String>(expression))
+                throw Exception(fmt::format("AggregateFunction {} need String as its {} parameter", name, argPositionToSequence(i+1)), ErrorCodes::BAD_TYPE_OF_FIELD);
+            expressions.push_back(expression);
+        }
+    }
+
+    const DataTypePtr& data_type = argument_types[0];
+
+    if (!WhichDataType(data_type).isNativeInt() && !WhichDataType(data_type).isString())
+        throw Exception(
+            "AggregateFunction " + name + " need signed numeric type (Int16 or bigger) or a string type for its first argument",
+            ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT
+        );
+
+    if (WhichDataType(data_type).isInt8())
+        throw Exception("Int8 type is not recommended! Please use Int16 or bigger size number", ErrorCodes::BAD_TYPE_OF_FIELD);
+
+    if (!isBitmap64(argument_types[1]))
+        throw Exception("AggregateFunction " + name + " need BitMap type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    AggregateFunctionPtr res(createWithSpecificType<Function>(*data_type, argument_types, expressions));
+
+    return res;
+}
+
+template<template <typename, typename> class Function>
+AggregateFunctionPtr createAggregateFunctionBitMapMultiExtractWithDate(
+    const String & name, const DataTypes & argument_types, const Array & parameters, const Settings *)
+{
+    if (argument_types.size() != 3)
+        throw Exception("AggregateFunction " + name + " need two arguments", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    std::vector<String> expressions;
+    if (!parameters.empty())
+    {
+        for (size_t i = 0; i < parameters.size(); i++)
+        {
+            String expression;
+            if (!parameters[i].tryGet<String>(expression))
+                throw Exception(fmt::format("AggregateFunction {} need String as its {} parameter", name, argPositionToSequence(i+1)), ErrorCodes::BAD_TYPE_OF_FIELD);
+            expressions.push_back(expression);
+        }
+    }
+
+    const DataTypePtr& date_type = argument_types[0];
+    if(!WhichDataType(date_type).isNativeInt())
+        throw Exception("AggregateFunction " + name + " need signed numeric type for its first argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    const DataTypePtr& data_type = argument_types[1];
+    if (!WhichDataType(data_type).isNativeInt() && !WhichDataType(data_type).isString())
+        throw Exception("AggregateFunction " + name + " need signed numeric type (Int16 or bigger) or a string type for its second argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
+
+    if (!isBitmap64(argument_types[2]))
+        throw Exception("AggregateFunction " + name + " need BitMap type for its third argument", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
-    res.reset(createWithSpecificType<Function>(*data_type, argument_types, expression, is_bitmap_execute));
+    AggregateFunctionPtr res(createWithSpecificType<Function>(*data_type, argument_types, expressions));
 
     return res;
 }
@@ -173,10 +263,54 @@ AggregateFunctionPtr createAggregateFunctionBitMapExtract(const String & name, c
 
 void registerAggregateFunctionsBitmapExpressionCalculation(AggregateFunctionFactory & factory)
 {
-    factory.registerFunction("BitMapCount", createAggregateFunctionBitMapCount<AggregateFunctionBitMapCount>, AggregateFunctionFactory::CaseInsensitive);
-    factory.registerFunction("BitMapMultiCount", createAggregateFunctionBitMapMultiCount<AggregateFunctionBitMapMultiCount>, AggregateFunctionFactory::CaseInsensitive);
-    factory.registerFunction("BitMapMultiCountWithDate", createAggregateFunctionBitMapMultiCountWithDate, AggregateFunctionFactory::CaseInsensitive);
-    factory.registerFunction("BitMapExtract", createAggregateFunctionBitMapExtract<AggregateFunctionBitMapExtract>, AggregateFunctionFactory::CaseInsensitive);
+    factory.registerFunction(
+        "BitmapCount",
+        createAggregateFunctionBitMapCount<AggregateFunctionBitMapCount>,
+        AggregateFunctionFactory::CaseInsensitive
+    );
+
+    factory.registerFunction(
+        "BitmapMultiCount",
+        createAggregateFunctionBitMapMultiCount<AggregateFunctionBitMapMultiCount>,
+        AggregateFunctionFactory::CaseInsensitive
+    );
+
+    factory.registerFunction(
+        "BitmapMultiCountWithDate",
+        createAggregateFunctionBitMapMultiCountWithDate<AggregateFunctionBitMapMultiCountWithDate>,
+        AggregateFunctionFactory::CaseInsensitive
+    );
+
+    factory.registerFunction(
+        "BitmapExtract",
+        createAggregateFunctionBitMapExtract<AggregateFunctionBitMapExtract>,
+        AggregateFunctionFactory::CaseInsensitive
+    );
+
+    factory.registerFunction(
+        "BitmapMultiExtract",
+        createAggregateFunctionBitMapMultiExtract<AggregateFunctionBitMapMultiExtract>,
+        AggregateFunctionFactory::CaseInsensitive
+    );
+
+    factory.registerFunction(
+        "BitmapMultiExtractWithDate",
+        createAggregateFunctionBitMapMultiExtractWithDate<AggregateFunctionBitMapMultiExtractWithDate>,
+        AggregateFunctionFactory::CaseInsensitive
+    );
+
+    factory.registerAlias(
+        "BitmapCountV2", "BitmapCount", AggregateFunctionFactory::CaseInsensitive);
+    factory.registerAlias(
+        "BitmapMultiCountV2", "BitmapMultiCount", AggregateFunctionFactory::CaseInsensitive);
+    factory.registerAlias(
+        "BitmapMultiCountWithDateV2", "BitmapMultiCountWithDate", AggregateFunctionFactory::CaseInsensitive);
+    factory.registerAlias(
+        "BitmapExtractV2", "BitmapExtract", AggregateFunctionFactory::CaseInsensitive);
+    factory.registerAlias(
+        "BitmapMultiExtractV2", "BitmapMultiExtract", AggregateFunctionFactory::CaseInsensitive);
+    factory.registerAlias(
+        "BitmapMultiExtractWithDateV2", "BitmapMultiExtractWithDate", AggregateFunctionFactory::CaseInsensitive);
 }
 
 }
diff --git a/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.h b/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.h
index 97b11b464ae..119bd0d2af7 100644
--- a/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.h
+++ b/src/AggregateFunctions/AggregateFunctionBitmapExpressionCalculation.h
@@ -28,7 +28,8 @@
 #include <Columns/ColumnBitMap64.h>
 
 #include <AggregateFunctions/IAggregateFunction.h>
-#include <AggregateFunctions/AggregateBitmapExpressionCommon.h>
+
+#include "AggregateBitmapExpressionCommon.h"
 #include <Common/typeid_cast.h>
 
 
@@ -42,7 +43,7 @@ namespace ErrorCodes
 }
 
 
-template<typename T, typename = std::enable_if_t< std::is_integral_v<T> > >
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
 struct AggregateFunctionBitMapCountData
 {
     AggregateFunctionBitMapData<T> bitmap_data;
@@ -97,8 +98,8 @@ struct AggregateFunctionBitMapMultiCountData
     {
         bitmap_data.serialize(buf);
         writeVarUInt(count_vector.size(), buf);
-        for (size_t i = 0; i < count_vector.size(); i++)
-            writeVarUInt(count_vector[i], buf);
+        for (auto cnt : count_vector)
+            writeVarUInt(cnt, buf);
     }
 
     void deserialize(ReadBuffer & buf)
@@ -117,21 +118,21 @@ struct AggregateFunctionBitMapMultiCountData
 
 
 
-template<typename T, typename = std::enable_if_t< std::is_integral_v<T> > >
- struct AggregateFunctionBitMapExtractData
- {
-     AggregateFunctionBitMapData<T> bitmap_data;
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
+struct AggregateFunctionBitMapExtractData
+{
+    AggregateFunctionBitMapData<T> bitmap_data;
 
-     void merge(AggregateFunctionBitMapExtractData<T> & rhs)
-     {
-         if (bitmap_data.empty())
-         {
-             bitmap_data = std::move(rhs.bitmap_data);
-         }
-         else
-         {
-             bitmap_data.merge(std::move(rhs.bitmap_data));
-         }
+    void merge(AggregateFunctionBitMapExtractData<T> & rhs)
+    {
+        if (bitmap_data.empty())
+        {
+            bitmap_data = std::move(rhs.bitmap_data);
+        }
+        else
+        {
+            bitmap_data.merge(std::move(rhs.bitmap_data));
+        }
     }
 
     void serialize(WriteBuffer & buf)
@@ -145,12 +146,46 @@ template<typename T, typename = std::enable_if_t< std::is_integral_v<T> > >
     }
 };
 
+template<typename T>
+inline T get_bitmap_key_from_column(const IColumn * column, int row_num)
+{
+    return static_cast<T>(column->getInt(row_num));
+}
 
+template<>
+inline String get_bitmap_key_from_column(const IColumn * column, int row_num)
+{
+    if (const auto * column_string = typeid_cast<const ColumnString*>(column))
+    {
+        return column_string->getDataAt(row_num).toString();
+    }
+    else
+    {
+        throw Exception("In get_bitmap_key_from_column(), typeid_cast failed", ErrorCodes::BAD_ARGUMENTS);
+    }
+}
 
+template<typename T>
+inline String get_bitmap_string_key_from_column(const IColumn * column, int row_num)
+{
+    return std::to_string(column->getUInt(row_num));
+}
 
+template<>
+inline String get_bitmap_string_key_from_column<String>(const IColumn * column, int row_num)
+{
+    if (const auto * column_string = typeid_cast<const ColumnString*>(column))
+    {
+        return column_string->getDataAt(row_num).toString();
+    }
+    else
+    {
+        throw Exception("In get_bitmap_string_key_from_column(), typeid_cast failed", ErrorCodes::BAD_ARGUMENTS);
+    }
+}
 
 /// Simply count number of calls.
-template<typename T, typename = std::enable_if_t< std::is_integral_v<T> > >
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
 class AggregateFunctionBitMapCount final : public IAggregateFunctionDataHelper<AggregateFunctionBitMapCountData<T>, AggregateFunctionBitMapCount<T>>
 {
     using BitMapExpressions = std::vector<BitMapExpressionNode<T>>;
@@ -166,7 +201,7 @@ class AggregateFunctionBitMapCount final : public IAggregateFunctionDataHelper<A
         final_key = analyzer.final_key;
     }
 
-    String getName() const override { return "bitmapCount"; }
+    String getName() const override { return "BitmapCount"; }
     bool allocatesMemoryInArena() const override { return false; }
 
     DataTypePtr getReturnType() const override
@@ -176,13 +211,20 @@ class AggregateFunctionBitMapCount final : public IAggregateFunctionDataHelper<A
 
     void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        const auto & column_key = static_cast<const ColumnVector<T> &>(*columns[0]);
-        T key = column_key.getElement(row_num);
+        T key = get_bitmap_key_from_column<T>(columns[0], row_num);
 
-        // use's key in this range may get the intermediate cached results
-        if (key >= final_key && key < 0)
-            throw Exception("The tag (or bitmap key): " + std::to_string(key) + " affects the computation, " +
-                "please change another number, maybe positive number is better", ErrorCodes::LOGICAL_ERROR);
+        if (check_is_internal_bitmap_key(key))
+            throw Exception(
+                fmt::format("The tag (or bitmap key): {} affects the computation, please change another number, maybe positive number is better", key)
+                , ErrorCodes::BAD_ARGUMENTS
+            );
+
+        if constexpr (std::is_same_v<T, String>)
+        {
+            if (existBitengineExpressionKeyword(key))
+                throw Exception("The tag (or bitmap key): " + key + " has illegal character, " +
+                    "please check your tag whether contains following characters ['&' , '|' , '~' , ',' , '#' , ' ' , '(' , ')']", ErrorCodes::BAD_ARGUMENTS);
+        }
 
         const auto & column_bitmap = static_cast<const ColumnBitMap64 &>(*columns[1]);
 
@@ -308,7 +350,7 @@ class AggregateFunctionBitMapCount final : public IAggregateFunctionDataHelper<A
 };
 
 /// Simply count number of calls.
-template<typename T, typename = std::enable_if_t< std::is_integral_v<T> > >
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
 class AggregateFunctionBitMapMultiCount final : public IAggregateFunctionDataHelper<AggregateFunctionBitMapMultiCountData<T>, AggregateFunctionBitMapMultiCount<T>>
 {
     using BitMapExpressions = std::vector<BitMapExpressionNode<T>>;
@@ -322,7 +364,7 @@ class AggregateFunctionBitMapMultiCount final : public IAggregateFunctionDataHel
         final_keys = analyzer.final_keys;
     }
 
-    String getName() const override { return "bitmapMultiCount"; }
+    String getName() const override { return "BitmapMultiCount"; }
     bool allocatesMemoryInArena() const override { return false; }
 
     DataTypePtr getReturnType() const override
@@ -332,14 +374,20 @@ class AggregateFunctionBitMapMultiCount final : public IAggregateFunctionDataHel
 
     void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        const auto & column_key = static_cast<const ColumnVector<T> &>(*columns[0]);
-        T key = column_key.getElement(row_num);
+        T key = get_bitmap_key_from_column<T>(columns[0], row_num);
 
-        // use's key in this range may get the intermediate cached results
-        if (std::any_of(final_keys.begin(), final_keys.end(), [&](T final_key) {
-                return key >= final_key && key < 0; }))
-            throw Exception("The tag (or bitmap key): " + std::to_string(key) + " affects the computation, " +
-                "please change another number, maybe positive number is better", ErrorCodes::LOGICAL_ERROR);
+        if (check_is_internal_bitmap_key(key))
+            throw Exception(
+                fmt::format("The tag (or bitmap key): {} affects the computation, please change another number, maybe positive number is better", key)
+                , ErrorCodes::BAD_ARGUMENTS
+            );
+
+        if constexpr (std::is_same_v<T, String>)
+        {
+            if (existBitengineExpressionKeyword(key))
+                throw Exception("The tag (or bitmap key): " + key + " has illegal character, " +
+                    "please check your tag whether contains following characters ['&' , '|' , '~' , ',' , '#' , ' ' , '(' , ')']", ErrorCodes::BAD_ARGUMENTS);
+        }
 
         const auto & column_bitmap = static_cast<const ColumnBitMap64 &>(*columns[1]);
 
@@ -417,12 +465,17 @@ class AggregateFunctionBitMapMultiCount final : public IAggregateFunctionDataHel
         auto & lhs_data = this->data(place);
         auto & rhs_data = const_cast<AggregateFunctionBitMapMultiCountData<T> &>(this->data(rhs));
 
+        /** uncomment for correctness. If the first stream comes here and WITHOUT a key
+        * it will put an empty bitmap in the final_key. That will cause later insert
+        * to final_key fail. The related logic is in executeExpressionOnlyOr()->bitmap.extract
         if (lhs_data.bitmap_data.empty())
         {
             mergeSingleStream(rhs_data);
             lhs_data.merge(rhs_data);
         }
-        else if (lhs_data.bitmap_data.is_finished && rhs_data.bitmap_data.is_finished)
+        else
+        */
+        if (lhs_data.bitmap_data.is_finished && rhs_data.bitmap_data.is_finished)
         {
             for (size_t i = 0; i < final_keys.size(); i++)
                 lhs_data.count_vector[i] += rhs_data.count_vector[i];
@@ -471,405 +524,78 @@ class AggregateFunctionBitMapMultiCount final : public IAggregateFunctionDataHel
 
 };
 
-struct BitMapExpressionWithDateMultiAnalyzer
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
+class AggregateFunctionBitMapMultiCountWithDate final :
+    public IAggregateFunctionDataHelper<AggregateFunctionBitMapMultiCountData<String>, AggregateFunctionBitMapMultiCountWithDate<T>>
 {
     using BitMapExpressions = std::vector<BitMapExpressionNode<String>>;
-    std::vector<String> original_expressions;
-    Int64 global_index = -1;
-    std::unordered_set<Int64> keys_without_date;
-    std::vector<Int64> final_keys;
-    std::vector<bool> expression_only_ors;
-    std::vector<BitMapExpressions> expression_actions_vector;
-    std::unordered_map<String, size_t> replicated_keys;
-    std::vector<NameSet> or_expressions;
-
-    BitMapExpressionWithDateMultiAnalyzer(const std::vector<String> & expressions)
-            : original_expressions(expressions)
+private:
+    BitMapExpressionWithDateMultiAnalyzer analyzer;
+    std::vector<String> final_keys;
+    std::unordered_set<String> keys_without_date;
+public:
+    AggregateFunctionBitMapMultiCountWithDate(const DataTypes & argument_types_, const std::vector<String> & expression_)
+            : IAggregateFunctionDataHelper<AggregateFunctionBitMapMultiCountData<String>, AggregateFunctionBitMapMultiCountWithDate<T>>(argument_types_, {}), analyzer(expression_)
     {
-        analyze();
+        final_keys = analyzer.final_keys;
+        keys_without_date = analyzer.keys_without_date;
     }
 
-    BitMapExpressionWithDateMultiAnalyzer() = default;
+    String getName() const override { return "BitmapMultiCountWithDate"; }
+    bool allocatesMemoryInArena() const override { return false; }
 
-    void subExpression(std::stack<String> & expression_stack, String & right, size_t index)
+    DataTypePtr getReturnType() const override
     {
-        while (!expression_stack.empty() &&
-               (expression_stack.top() == "&" || expression_stack.top() == "|"
-                || expression_stack.top() == "," || expression_stack.top() == "~" || right == "#"))
-        {
-            if (right == "#")
-            {
-                right = expression_stack.top();
-                expression_stack.pop();
-            }
-            if (expression_stack.empty())
-                break;
-            String operation = expression_stack.top();
-            expression_stack.pop();
-            if (expression_stack.empty())
-                throw Exception("Invalid expression " + operation + " for BitMap: " + original_expressions[index], ErrorCodes::LOGICAL_ERROR);
-            String left = expression_stack.top();
-            expression_stack.pop();
-            // Optimize the case which right is equal to left.
-            // If the operation is "~", add a no-exists result to expression_stack so that we can get an empty bitmap
-            if (right == left && operation == "~")
-            {
-                Int64 res = global_index--;
-                right = std::to_string(res);
-            }
-            else
-            {
-                Int64 res = global_index--;
-
-                expression_actions_vector[index].emplace_back(std::move(left), std::move(operation), std::move(right), std::to_string(res), false);
-                right = std::to_string(res);
-            }
-        }
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
     }
 
-    void analyze()
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        for (size_t i = 0; i < original_expressions.size(); i++)
-            analyzeExpression(original_expressions[i], i);
-        for (auto & expression_actions: expression_actions_vector)
+        String date;
+        try
         {
-            for (BitMapExpressionNode<String> & expression_action: expression_actions)
-            {
-                replicated_keys[expression_action.left] += 1;
-                replicated_keys[expression_action.right] += 1;
-            }
+            date = std::to_string(columns[0]->getInt(row_num));
         }
-        for (const NameSet& or_expression: or_expressions)
-        {
-            for (const String& or_expression_item: or_expression)
-            {
-                replicated_keys[or_expression_item] += 1;
-            }
+        catch(const Exception & e) {
+            throw Exception("Exception when get data from 1st argument of BitmapMultiCountWithDate, message: " + e.message(), e.code());
         }
 
-        for (auto & expression_actions: expression_actions_vector)
-        {
-            for (BitMapExpressionNode<String> & expression_action: expression_actions)
-            {
-                auto left_it = replicated_keys.find(expression_action.left);
-                do {
-                    if (left_it == replicated_keys.end())
-                        break;
-                    if (left_it->second <= 1)
-                        break;
-
-                    expression_action.replicated = true;
-                } while(false);
-            }
-        }
-    }
-    void analyzeExpression(String& original_expression, size_t index)
-    {
-        bool only_or = true;
-        NameSet or_expression;
-        String expression = original_expression + "#";
-        std::stack<String> expression_stack;
-        size_t expression_size = expression.size();
-        std::vector<String> expression_vector;
-        size_t number_index = expression_size;
-        for (size_t i = 0; i < expression_size; i++)
         {
-            if (expression[i] == '(' || expression[i] == '&' || expression[i] == '|' || expression[i] == ','
-                || expression[i] == ')' || expression[i] == '#' || expression[i] == '~' || expression[i] == ' ') {
-                if (number_index != expression_size) {
-                    String number = expression.substr(number_index, (i - number_index));
-                    // replace number with final key
-                    if (number.size() > 1 && number[0] == '_')
-                    {
-                        auto res_index = std::stoi(number.substr(1));
-                        if (res_index <= 0 || res_index > static_cast<Int32>(index))
-                        {
-                            throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
-                        }
-                        number = std::to_string(final_keys[res_index - 1]);
-                    }
-                    else {
-                        if (number.size() > 1 && (number.find('_') == std::string::npos))
-                        {
-                            auto key_without_date = std::stoi(number);
-                            keys_without_date.emplace(key_without_date);
-                        }
-                    }
-
-                    or_expression.insert(number);
-                    expression_vector.push_back(std::move(number));
-                    number_index = expression_size;
-                }
-                switch (expression[i]) {
-                    case '(':
-                        expression_vector.push_back("(");
-                        break;
-                    case '&':
-                    {
-                        expression_vector.push_back("&");
-                        only_or = false;
-                        break;
-                    }
-                    case '|':
-                        expression_vector.push_back("|");
-                        break;
-                    case ')':
-                        expression_vector.push_back(")");
-                        break;
-                    case ',':
-                        expression_vector.push_back(",");
-                        break;
-                    case '~':
-                    {
-                        expression_vector.push_back("~");
-                        only_or = false;
-                        break;
-                    }
-                    case '#':
-                        expression_vector.push_back("#");
-                        break;
-                }
-            } else {
-                if (number_index == expression_size) {
-                    number_index = i;
-                }
-            }
-        }
-        BitMapExpressions expressions;
-        expression_actions_vector.emplace_back(std::move(expressions));
-        or_expressions.emplace_back(std::move(or_expression));
-        expression_only_ors.emplace_back(only_or);
-        if (only_or)
-        {
-            final_keys.emplace_back(global_index--);
-            return;
-        }
+            T origin_key = get_bitmap_key_from_column<T>(columns[1], row_num);
 
-        for (size_t i = 0; i < expression_vector.size(); i++)
-        {
-            if (expression_vector[i] == "(" || expression_vector[i] == "&"
-                || expression_vector[i] == "|" || expression_vector[i] == ","
-                || expression_vector[i] == "~")
-            {
-                expression_stack.push(expression_vector[i]);
-            }
-            else if (expression_vector[i] == ")")
-            {
-                if (expression_stack.empty())
-                    throw Exception("Invalid expression " + expression_vector[i] + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
-                String number = expression_stack.top();
-                expression_stack.pop();
-                if (expression_stack.empty())
-                    throw Exception("Invalid expression " + number + " for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
-                expression_stack.pop();
-                subExpression(expression_stack, number, index);
-                expression_stack.push(number);
-            }
-            else
-            {
-                String right = expression_vector[i];
-                // If there are replicated number, we cannot use some optimization strategy to execute expression
-                subExpression(expression_stack, right, index);
-                expression_stack.push(right);
-            }
+            if (check_is_internal_bitmap_key(origin_key))
+                throw Exception(
+                    fmt::format("The tag (or bitmap key): {} affects the computation, please change another number, maybe positive number is better", origin_key)
+                    , ErrorCodes::BAD_ARGUMENTS
+                );
         }
 
-        if (expression_stack.size() == 1) {
-            Int64 temp_final_key;
-            const String & res = expression_stack.top();
-            std::istringstream iss(res);
-            iss >> temp_final_key;
-            final_keys.emplace_back(temp_final_key);
-        } else {
-            throw Exception("Invalid expression for BitMap: " + original_expression, ErrorCodes::LOGICAL_ERROR);
-        }
-    }
-
-    void executeExpressionImpl(String left_key, String operation, String right_key, String res, bool replicated, const AggregateFunctionBitMapData<String>& data) const
-    {
-        auto& bitmap_map = const_cast<std::unordered_map<String, BitMap64>&>(data.bitmap_map);
-        auto left_iter = bitmap_map.find(left_key);
-        auto right_iter = bitmap_map.find(right_key);
+        String key = get_bitmap_string_key_from_column<T>(columns[1], row_num);
 
-        if (left_iter == bitmap_map.end()) {
-            BitMap64 temp_bitmap;
-            auto res_pair = bitmap_map.emplace(left_key, std::move(temp_bitmap));
-            if (res_pair.second)
-                left_iter = res_pair.first;
-            else
-                throw Exception("Existing empty BitMap64 when inserting empty BitMap64", ErrorCodes::LOGICAL_ERROR);
-        }
-        if (right_iter == bitmap_map.end()) {
-            BitMap64 temp_bitmap;
-            auto res_pair = bitmap_map.emplace(right_key, std::move(temp_bitmap));
-            if (res_pair.second)
-                right_iter = res_pair.first;
-            else
-                throw Exception("Existing empty BitMap64 when inserting empty BitMap64", ErrorCodes::LOGICAL_ERROR);
-        }
-        if (!replicated)
+        if constexpr (std::is_same_v<T, String>)
         {
-            if (operation == "|" || operation == ",") {
-                left_iter->second |= right_iter->second;
-                auto left_item = bitmap_map.extract(left_iter->first);
-                left_item.key() = res;
-                bitmap_map.insert(std::move(left_item));
-            }
-            else if (operation == "&") {
-                left_iter->second &= right_iter->second;
-                auto left_item = bitmap_map.extract(left_iter->first);
-                left_item.key() = res;
-                bitmap_map.insert(std::move(left_item));
-            }
-            else if (operation == "~") {
-                left_iter->second -= right_iter->second;
-                auto left_item = bitmap_map.extract(left_iter->first);
-                left_item.key() = res;
-                bitmap_map.insert(std::move(left_item));
-            }
+            if (existBitengineExpressionKeyword(key))
+                throw Exception("The tag (or bitmap key): " + key + " has illegal character, " +
+                    "please check your tag whether contains following characters ['&' , '|' , '~' , ',' , '#' , ' ' , '(' , ')']", ErrorCodes::BAD_ARGUMENTS);
         }
-        else
-        {
-            if (operation == "|" || operation == ",") {
-                bitmap_map[res] = left_iter->second;
-                bitmap_map[res] |= right_iter->second;
-            }
-            else if (operation == "&") {
-                bitmap_map[res] = left_iter->second;
-                bitmap_map[res] &= right_iter->second;
-            }
-            else if (operation == "~") {
-                bitmap_map[res] = left_iter->second;
-                bitmap_map[res] -= right_iter->second;
-            }
-        }
-    }
 
-    void executeExpressionOnlyOr(const AggregateFunctionBitMapData<String> & data, size_t index) const
-    {
-        std::set<String> key_set;
-        for (const auto & expression : or_expressions[index])
-        {
-            key_set.insert(expression);
-        }
+        try {
+            const auto & column_bitmap = dynamic_cast<const ColumnBitMap64 &>(*columns[2]);
+            auto & bitmaps_data = this->data(place).bitmap_data;
 
-        auto& bitmap_map = const_cast<std::unordered_map<String, BitMap64>&>(data.bitmap_map);
-
-        if (key_set.size() == 1)
-        {
-            String key = *key_set.begin();
-            auto it = bitmap_map.find(key);
-            if (it == bitmap_map.end()) {
-                BitMap64 temp_bitmap;
-                auto res_pair = bitmap_map.emplace(key, std::move(temp_bitmap));
-                if (res_pair.second)
-                    it = res_pair.first;
-                else
-                    throw Exception("Existing empty BitMap64 when inserting empty BitMap64", ErrorCodes::LOGICAL_ERROR);
-            }
-            auto or_it = replicated_keys.find(key);
-            if (or_it == replicated_keys.end())
-                return;
-            else if (or_it->second > 1)
+            String date_with_key = date + "_" + key;
+            const auto & bitmap = column_bitmap.getBitMapAt(row_num);
+            if (analyzer.interested_tokens.count(date_with_key))
             {
-                bitmap_map[std::to_string(final_keys[index])] = it->second;
+                bitmaps_data.add(date_with_key, bitmap);
             }
-            else
+            if (keys_without_date.count(key))
             {
-                auto it_final_item = bitmap_map.extract(it->first);
-                it_final_item.key() = std::to_string(final_keys[index]);
-                bitmap_map.insert(std::move(it_final_item));
+                bitmaps_data.add(key, bitmap);
             }
-            return;
+        } catch (std::bad_cast &) {
+            throw Exception("The third argument for BitmapMultiCountWithDate has to be Bitmap", ErrorCodes::BAD_ARGUMENTS);
         }
-
-        std::map<UInt32, std::vector<roaring::Roaring *>> roaring_map;
-        for (const auto & key: key_set)
-        {
-            auto it = bitmap_map.find(key);
-            if (it == bitmap_map.end())
-                continue;
-            std::map<UInt32, roaring::Roaring> & inner_roaring = const_cast<std::map<UInt32, roaring::Roaring> &>(it->second.getRoarings());
-            for (auto jt = inner_roaring.begin(); jt != inner_roaring.end(); ++jt)
-            {
-                if (roaring_map.find(jt->first) == roaring_map.end())
-                    roaring_map.emplace(jt->first, std::vector<roaring::Roaring *>());
-                roaring_map[jt->first].emplace_back(&jt->second);
-            }
-        }
-
-        BitMap64 res_roaring;
-
-        for (auto it = roaring_map.begin(); it != roaring_map.end(); ++it)
-        {
-            roaring::Roaring result = roaring::Roaring::fastunion(it->second.size(), &(*(it->second.begin())));
-            const_cast<std::map<UInt32, roaring::Roaring> &>(res_roaring.getRoarings()).emplace(it->first, std::move(result));
-        }
-
-        bitmap_map[std::to_string(final_keys[index])] = std::move(res_roaring);
-    }
-
-    void executeExpression(const AggregateFunctionBitMapData<String> & data, size_t index) const
-    {
-        if (expression_only_ors[index])
-        {
-            executeExpressionOnlyOr(data, index);
-        }
-        else
-        {
-            for (const auto & action : expression_actions_vector[index])
-            {
-                executeExpressionImpl(action.left, action.op, action.right, action.res, action.replicated, data);
-            }
-        }
-    }
-};
-/// Simply count number of calls.
-class AggregateFunctionBitMapMultiCountWithDate final : public IAggregateFunctionDataHelper<AggregateFunctionBitMapMultiCountData<String>, AggregateFunctionBitMapMultiCountWithDate>
-{
-    using BitMapExpressions = std::vector<BitMapExpressionNode<String>>;
-private:
-    BitMapExpressionWithDateMultiAnalyzer analyzer;
-    std::vector<Int64> final_keys;
-    std::unordered_set<Int64> keys_without_date;
-public:
-    AggregateFunctionBitMapMultiCountWithDate(const DataTypes & argument_types_, std::vector<String> expression_)
-            : IAggregateFunctionDataHelper<AggregateFunctionBitMapMultiCountData<String>, AggregateFunctionBitMapMultiCountWithDate>(argument_types_, {}), analyzer(expression_)
-    {
-        final_keys = analyzer.final_keys;
-        keys_without_date = analyzer.keys_without_date;
-    }
-
-    String getName() const override { return "bitmapMultiCountWithDate"; }
-    bool allocatesMemoryInArena() const override { return false; }
-
-    DataTypePtr getReturnType() const override
-    {
-        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeUInt64>());
-    }
-
-    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
-    {
-        const auto & column_date = dynamic_cast<const ColumnVector<Int64> &>(*columns[0]);
-        String date = std::to_string(column_date.getElement(row_num));
-
-        const auto & column_key = dynamic_cast<const ColumnVector<Int64 > &>(*columns[1]);
-        String key = std::to_string(column_key.getElement(row_num));
-
-        const auto & column_bitmap = static_cast<const ColumnBitMap64 &>(*columns[2]);
-
-        auto & bitmap_data_map = this->data(place).bitmap_data.bitmap_map;
-
-        String date_with_key = date + "_" + key;
-        auto it = bitmap_data_map.find(date_with_key);
-        if (it == bitmap_data_map.end()) {
-            bitmap_data_map.emplace(std::make_pair(date_with_key, column_bitmap.getBitMapAtImpl(row_num)));
-        } else {
-            it->second |= column_bitmap.getBitMapAtImpl(row_num);
-        }
-
-        if (keys_without_date.find(column_key.getElement(row_num)) != keys_without_date.end())
-            bitmap_data_map.emplace(std::make_pair(key, column_bitmap.getBitMapAtImpl(row_num)));
     }
 
     bool mergeSingleStream(AggregateFunctionBitMapMultiCountData<String> & bitmap_count_data) const
@@ -883,7 +609,7 @@ class AggregateFunctionBitMapMultiCountWithDate final : public IAggregateFunctio
         for (size_t i = 0; i < final_keys.size(); i++)
         {
             analyzer.executeExpression(bitmap_data, i);
-            count_vector.push_back(bitmap_data.getCardinality(std::to_string(final_keys[i])));
+            count_vector.push_back(bitmap_data.getCardinality(final_keys[i]));
         }
         bitmap_data.is_finished = true;
 
@@ -894,10 +620,10 @@ class AggregateFunctionBitMapMultiCountWithDate final : public IAggregateFunctio
     {
         auto & lhs_bitmap_data = this->data(place).bitmap_data;
         const auto & rhs_bitmap_data = this->data(rhs).bitmap_data;
-        for (auto& final_key: final_keys)
+        for (const auto& final_key: final_keys)
         {
-            auto lhs_bitmap_it = lhs_bitmap_data.bitmap_map.find(std::to_string(final_key));
-            auto rhs_bitmap_it = rhs_bitmap_data.bitmap_map.find(std::to_string(final_key));
+            auto lhs_bitmap_it = lhs_bitmap_data.bitmap_map.find(final_key);
+            auto rhs_bitmap_it = rhs_bitmap_data.bitmap_map.find(final_key);
             bool lhs_bitmap_exists = lhs_bitmap_it != lhs_bitmap_data.bitmap_map.end();
             bool rhs_bitmap_exists = rhs_bitmap_it != rhs_bitmap_data.bitmap_map.end();
             if (lhs_bitmap_exists && rhs_bitmap_exists)
@@ -905,7 +631,7 @@ class AggregateFunctionBitMapMultiCountWithDate final : public IAggregateFunctio
                 lhs_bitmap_it->second |= rhs_bitmap_it->second;
             }
             else
-                throw Exception("Cannot find final key when merge two streams in " + getName(), ErrorCodes::LOGICAL_ERROR);
+                throw Exception("Cannot find final key when merge two streams in " + getName(), ErrorCodes::BAD_ARGUMENTS);
         }
     }
 
@@ -939,12 +665,17 @@ class AggregateFunctionBitMapMultiCountWithDate final : public IAggregateFunctio
         auto & lhs_data = this->data(place);
         auto & rhs_data = const_cast<AggregateFunctionBitMapMultiCountData<String> &>(this->data(rhs));
 
+        /** uncomment for correctness. If the first stream comes here and WITHOUT a key
+        * it will put an empty bitmap in the final_key. That will cause later insert
+        * to final_key fail. The related logic is in executeExpressionOnlyOr()->bitmap.extract
         if (lhs_data.bitmap_data.empty())
         {
             mergeSingleStream(rhs_data);
             lhs_data.merge(rhs_data);
         }
-        else if (lhs_data.bitmap_data.is_finished && rhs_data.bitmap_data.is_finished)
+        else
+        */
+        if (lhs_data.bitmap_data.is_finished && rhs_data.bitmap_data.is_finished)
         {
             for (size_t i = 0; i < final_keys.size(); i++)
                 lhs_data.count_vector[i] += rhs_data.count_vector[i];
@@ -993,7 +724,7 @@ class AggregateFunctionBitMapMultiCountWithDate final : public IAggregateFunctio
 
 };
 
-template<typename T, typename = std::enable_if_t< std::is_integral_v<T> > >
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
 class AggregateFunctionBitMapExtract final : public IAggregateFunctionDataHelper<AggregateFunctionBitMapExtractData<T>, AggregateFunctionBitMapExtract<T>>
 {
     using BitMapExpressions = std::vector<BitMapExpressionNode<T>>;
@@ -1009,7 +740,7 @@ class AggregateFunctionBitMapExtract final : public IAggregateFunctionDataHelper
         final_key = analyzer.final_key;
     }
 
-    String getName() const override { return "bitmapExtract"; }
+    String getName() const override { return "BitmapExtract"; }
     bool allocatesMemoryInArena() const override { return false; }
 
     DataTypePtr getReturnType() const override
@@ -1019,13 +750,20 @@ class AggregateFunctionBitMapExtract final : public IAggregateFunctionDataHelper
 
     void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
     {
-        const auto & column_key = static_cast<const ColumnVector<T> &>(*columns[0]);
-        T key = column_key.getElement(row_num);
+        T key = get_bitmap_key_from_column<T>(columns[0], row_num);
+
+        if (check_is_internal_bitmap_key(key))
+            throw Exception(
+                fmt::format("The tag (or bitmap key): {} affects the computation, please change another number, maybe positive number is better", key)
+                , ErrorCodes::BAD_ARGUMENTS
+            );
 
-        // use's key in this range may get the intermediate cached results
-        if (key >= final_key && key < 0)
-            throw Exception("The tag (or bitmap key): " + std::to_string(key) + " affects the computation, " +
-                "please change another number, maybe positive number is better", ErrorCodes::LOGICAL_ERROR);
+        if constexpr (std::is_same_v<T, String>)
+        {
+            if (existBitengineExpressionKeyword(key))
+                throw Exception("The tag (or bitmap key): " + key + " has illegal character, " +
+                    "please check your tag whether contains following characters ['&' , '|' , '~' , ',' , '#' , ' ' , '(' , ')']", ErrorCodes::BAD_ARGUMENTS);
+        }
 
         const auto & column_bitmap = static_cast<const ColumnBitMap64 &>(*columns[1]);
 
@@ -1151,4 +889,362 @@ class AggregateFunctionBitMapExtract final : public IAggregateFunctionDataHelper
 
 };
 
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
+class AggregateFunctionBitMapMultiExtract final :
+    public IAggregateFunctionDataHelper<AggregateFunctionBitMapExtractData<T>, AggregateFunctionBitMapMultiExtract<T>>
+{
+    using BitMapExpressions = std::vector<BitMapExpressionNode<T>>;
+private:
+    BitMapExpressionMultiAnalyzer<T> analyzer;
+    std::vector<T> final_keys;
+public:
+    AggregateFunctionBitMapMultiExtract(const DataTypes & argument_types_, const std::vector<String>& expression_)
+    : IAggregateFunctionDataHelper<AggregateFunctionBitMapExtractData<T>, AggregateFunctionBitMapMultiExtract<T>>(argument_types_, {})
+    , analyzer(expression_)
+    {
+        final_keys = analyzer.final_keys;
+    }
+
+    String getName() const override { return "BitmapMultiExtract"; }
+    bool allocatesMemoryInArena() const override { return false; }
+
+    DataTypePtr getReturnType() const override
+    {
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeBitMap64>());
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        T key = get_bitmap_key_from_column<T>(columns[0], row_num);
+
+        if (check_is_internal_bitmap_key(key))
+            throw Exception(
+                fmt::format("The tag (or bitmap key): {} affects the computation, please change another number, maybe positive number is better", key)
+                , ErrorCodes::BAD_ARGUMENTS
+            );
+
+        if constexpr (std::is_same_v<T, String>)
+        {
+            if (existBitengineExpressionKeyword(key))
+                throw Exception("The tag (or bitmap key): " + key + " has illegal character, " +
+                    "please check your tag whether contains following characters ['&' , '|' , '~' , ',' , '#' , ' ' , '(' , ')']", ErrorCodes::BAD_ARGUMENTS);
+        }
+
+        const auto & column_bitmap = static_cast<const ColumnBitMap64 &>(*columns[1]);
+
+        const BitMap64 & bitmap = column_bitmap.getBitMapAt(row_num);
+
+        auto & bitmap_data = this->data(place).bitmap_data;
+
+        bitmap_data.add(key, bitmap);
+    }
+
+    bool mergeSingleStream(AggregateFunctionBitMapExtractData<T> & bitmap_extract_data) const
+    {
+        auto & bitmap_data = bitmap_extract_data.bitmap_data;
+
+        if (bitmap_data.is_finished)
+            return false;
+
+        for (size_t i = 0; i < final_keys.size(); i++)
+        {
+            analyzer.executeExpression(bitmap_data, i);
+        }
+        bitmap_data.is_finished = true;
+
+        return true;
+    }
+
+    void mergeTwoStreams(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs) const
+    {
+        auto & lhs_bitmap_data = this->data(place).bitmap_data;
+        const auto & rhs_bitmap_data = this->data(rhs).bitmap_data;
+
+        for (auto& final_key: final_keys)
+        {
+            auto lhs_bitmap_it = lhs_bitmap_data.bitmap_map.find(final_key);
+            auto rhs_bitmap_it = rhs_bitmap_data.bitmap_map.find(final_key);
+            bool lhs_bitmap_exists = lhs_bitmap_it != lhs_bitmap_data.bitmap_map.end();
+            bool rhs_bitmap_exists = rhs_bitmap_it != rhs_bitmap_data.bitmap_map.end();
+
+            if (lhs_bitmap_exists && rhs_bitmap_exists)
+            {
+                lhs_bitmap_it->second |= rhs_bitmap_it->second;
+            }
+            else
+                throw Exception("Cannot find final key when merge two streams in " + getName(), ErrorCodes::LOGICAL_ERROR);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs, Arena *) const override
+    {
+        auto & lhs_data = this->data(place);
+        auto & rhs_data = const_cast<AggregateFunctionBitMapExtractData<T> &>(this->data(rhs));
+
+        /// uncomment for correctness. If the first stream comes here and WITHOUT a key
+        /// it will put an empty bitmap in the final_key. That will cause later insert
+        /// to final_key fail. The related logic is in executeExpressionOnlyOr()->bitmap.extract
+        // if (lhs_data.bitmap_data.empty())
+        // {
+        //     mergeSingleStream(rhs_data);
+        //     lhs_data.merge(rhs_data);
+        // }
+        // else
+        if (lhs_data.bitmap_data.is_finished && rhs_data.bitmap_data.is_finished)
+        {
+            mergeTwoStreams(place, rhs);
+        }
+            // If two stream are both false on is_finished, it must not a BITMAPEXECUTE model, so merge them and delay expression execution on insertResultInto
+        else if (!lhs_data.bitmap_data.is_finished && !rhs_data.bitmap_data.is_finished)
+        {
+            lhs_data.merge(rhs_data);
+        }
+            // If one of stream's is_finished is false, it means
+            // 1. it is a BITMAPEXECUTE model but not a distributed_perfect_shard model.
+            // 2. it may be execution on distributed table
+            // So it is better to merge them to guarantee the correctness of result
+        else
+        {
+            lhs_data.merge(rhs_data);
+        }
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
+    {
+        this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    // If is_finished is false, aggregate functions need to caculate expression.
+    // It means the aggregate function is a normal aggregate (not a BITMAPEXECUTE), so the final
+    // result is computed in insertResultInto
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & local_data = const_cast<AggregateFunctionBitMapExtractData<T> &>(this->data(place));
+
+        if (!local_data.bitmap_data.is_finished)
+        {
+            mergeSingleStream(local_data);
+        }
+
+        Array res;
+        for (auto& final_key: final_keys)
+        {
+            auto it = local_data.bitmap_data.bitmap_map.find(final_key);
+            if (it != local_data.bitmap_data.bitmap_map.end())
+            {
+                res.push_back(it->second);
+            }
+        }
+
+        static_cast<ColumnArray &>(to).insert(res);
+    }
+
+};
+
+template<typename T, typename = std::enable_if_t< std::is_integral_v<T> || std::is_same_v<T, String> > >
+class AggregateFunctionBitMapMultiExtractWithDate final :
+    public IAggregateFunctionDataHelper<AggregateFunctionBitMapExtractData<String>, AggregateFunctionBitMapMultiExtractWithDate<T>>
+{
+    using BitMapExpressions = std::vector<BitMapExpressionNode<String>>;
+private:
+    BitMapExpressionWithDateMultiAnalyzer analyzer;
+    std::vector<String> final_keys;
+    std::unordered_set<String> keys_without_date;
+public:
+    AggregateFunctionBitMapMultiExtractWithDate(const DataTypes & argument_types_, std::vector<String> expression_)
+            : IAggregateFunctionDataHelper<AggregateFunctionBitMapExtractData<String>, AggregateFunctionBitMapMultiExtractWithDate<T>>(argument_types_, {}), analyzer(expression_)
+    {
+        final_keys = analyzer.final_keys;
+        keys_without_date = analyzer.keys_without_date;
+    }
+
+    String getName() const override { return "BitmapMultiExtractWithDate"; }
+    bool allocatesMemoryInArena() const override { return false; }
+
+    DataTypePtr getReturnType() const override
+    {
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeBitMap64>());
+    }
+
+    void add(AggregateDataPtr __restrict place, const IColumn ** columns, size_t row_num, Arena *) const override
+    {
+        String date;
+        try
+        {
+            date = std::to_string(columns[0]->getInt(row_num));
+        }
+        catch(const Exception & e) {
+            throw Exception("Exception when get data from 1st argument of BitmapMultiCountWithDate, message: " + e.message(), e.code());
+        }
+
+        {
+            T origin_key = get_bitmap_key_from_column<T>(columns[1], row_num);
+
+            if (check_is_internal_bitmap_key(origin_key))
+                throw Exception(
+                    fmt::format("The tag (or bitmap key): {} affects the computation, please change another number, maybe positive number is better", origin_key)
+                    , ErrorCodes::BAD_ARGUMENTS
+                );
+        }
+
+        String key = get_bitmap_string_key_from_column<T>(columns[1], row_num);
+
+        if constexpr (std::is_same_v<T, String>)
+        {
+            if (existBitengineExpressionKeyword(key))
+                throw Exception("The tag (or bitmap key): " + key + " has illegal character, " +
+                    "please check your tag whether contains following characters ['&' , '|' , '~' , ',' , '#' , ' ' , '(' , ')']", ErrorCodes::BAD_ARGUMENTS);
+        }
+
+        try {
+            const auto & column_bitmap = dynamic_cast<const ColumnBitMap64 &>(*columns[2]);
+            auto & bitmaps_data = this->data(place).bitmap_data;
+
+            String date_with_key = date + "_" + key;
+            const auto & bitmap = column_bitmap.getBitMapAt(row_num);
+            if (analyzer.interested_tokens.count(date_with_key))
+            {
+                bitmaps_data.add(date_with_key, bitmap);
+            }
+            if (keys_without_date.count(key))
+            {
+                bitmaps_data.add(key, bitmap);
+            }
+        } catch (std::bad_cast &) {
+            throw Exception("The third argument for BitmapMultiCountWithDate has to be Bitmap", ErrorCodes::BAD_ARGUMENTS);
+        }
+    }
+
+    bool mergeSingleStream(AggregateFunctionBitMapExtractData<String> & bitmap_extract_data) const
+    {
+        auto & bitmap_data = bitmap_extract_data.bitmap_data;
+
+        if (bitmap_data.is_finished)
+            return false;
+        for (size_t i = 0; i < final_keys.size(); i++)
+        {
+            analyzer.executeExpression(bitmap_data, i);
+        }
+        bitmap_data.is_finished = true;
+
+        return true;
+    }
+
+    void mergeTwoStreams(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs) const
+    {
+        auto & lhs_bitmap_data = this->data(place).bitmap_data;
+        const auto & rhs_bitmap_data = this->data(rhs).bitmap_data;
+        for (const auto& final_key: final_keys)
+        {
+            auto lhs_bitmap_it = lhs_bitmap_data.bitmap_map.find(final_key);
+            auto rhs_bitmap_it = rhs_bitmap_data.bitmap_map.find(final_key);
+            bool lhs_bitmap_exists = lhs_bitmap_it != lhs_bitmap_data.bitmap_map.end();
+            bool rhs_bitmap_exists = rhs_bitmap_it != rhs_bitmap_data.bitmap_map.end();
+            if (lhs_bitmap_exists && rhs_bitmap_exists)
+            {
+                lhs_bitmap_it->second |= rhs_bitmap_it->second;
+            }
+            else
+                throw Exception("Cannot find final key when merge two streams in " + getName(), ErrorCodes::LOGICAL_ERROR);
+        }
+    }
+
+    /*
+    * The merge function is based on the fact different stream can caculate its result independently.
+    * If we use BITMAPEXECUTE, ParallelBitMapBlockInputStream will produce many streams and invoke this merge
+    * function for each stream. And only the ParallelBitMapBlockInputStream can invoke merge with only one stream, so
+    * we can caculate expression for this single stream directly and set is_final as true.
+    * When there are two streams, it may be invoked by ParallelBitMapBlockInputStream or normal aggregating stream. But,
+    * if is_finished is true for both two streams, we can ensure these two streams can caculate directly.
+    * Otherwise, handle them as normal aggregate functions, that is, use bitmap or to get all bitmap and caculate expression
+    * in insertResultInto functions
+    *
+    * For distributed queries, we also have two merge model:
+    * 1. distributed_perfect_shard: each node performs the merge as single node, the final result is merged by sum or bitmapOr function.
+    * 2. distributed table: each node performs the aggregation at local and return a intermediate status, then at the coordinator node, it performs a
+    * second aggregation. There exists the case the second aggregation has multiple merges - the first merge has single stream, and then merge the following
+    * streams.
+    *
+    * The second case will obstruct the determine of BITMAPEXECUTE model, since for a merge with only one stream, we cannot distinguish whether it is a BITMAPEXECUTE model
+    * or distributed table. The only thing we can do is assuming this stream can be used to execute expression.
+    * This assumption is correct as follows.
+    * 1. If it is a distributed table and it has only one stream, execute expression in advanced will not affect the correctness of the result.
+    *    If it has more than one stream, we lost some performance to execute first stream's expression. The first stream will merge other streams again and set is_finished as false.
+    *    Then caculate the result in the insertResultInto function.
+    * 2. If it is a BITMAPEXECUTE model, each stream will be passed to merge function independently in ParallelBitMapInputStream so that each stream will set is_finished as true.
+    *
+    */
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr __restrict rhs, Arena *) const override
+    {
+        auto & lhs_data = this->data(place);
+        auto & rhs_data = const_cast<AggregateFunctionBitMapExtractData<String> &>(this->data(rhs));
+
+        /// uncomment for correctness. If the first stream comes here and WITHOUT a key
+        /// it will put an empty bitmap in the final_key. That will cause later insert
+        /// to final_key fail. The related logic is in executeExpressionOnlyOr()->bitmap.extract
+        // if (lhs_data.bitmap_data.empty())
+        // {
+        //     mergeSingleStream(rhs_data);
+        //     lhs_data.merge(rhs_data);
+        // }
+        // else
+        if (lhs_data.bitmap_data.is_finished && rhs_data.bitmap_data.is_finished)
+        {
+            mergeTwoStreams(place, rhs);
+        }
+            // If two stream are both false on is_finished, it must not a BITMAPEXECUTE model, so merge them and delay expression execution on insertResultInto
+        else if (!lhs_data.bitmap_data.is_finished && !rhs_data.bitmap_data.is_finished)
+        {
+            lhs_data.merge(rhs_data);
+        }
+            // If one of stream's is_finished is false, it means
+            // 1. it is a BITMAPEXECUTE model but not a distributed_perfect_shard model.
+            // 2. it may be execution on distributed table
+            // So it is better to merge them to guarantee the correctness of result
+        else
+        {
+            lhs_data.merge(rhs_data);
+        }
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
+    {
+        this->data(const_cast<AggregateDataPtr>(place)).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    // If is_finished is false, aggregate functions need to caculate expression.
+    // It means the aggregate function is a normal aggregate (not a BITMAPEXECUTE), so the final
+    // result is computed in insertResultInto
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        auto & local_data = const_cast<AggregateFunctionBitMapExtractData<String> &>(this->data(place));
+
+        if (!local_data.bitmap_data.is_finished)
+        {
+            mergeSingleStream(local_data);
+        }
+        Array res;
+        for (const auto& final_key: final_keys)
+        {
+            auto it = local_data.bitmap_data.bitmap_map.find(final_key);
+            if (it != local_data.bitmap_data.bitmap_map.end())
+            {
+                res.push_back(it->second);
+            }
+        }
+        static_cast<ColumnArray &>(to).insert(res);
+    }
+
+};
+
 }
diff --git a/src/AggregateFunctions/AggregateFunctionCountByGranularity.cpp b/src/AggregateFunctions/AggregateFunctionCountByGranularity.cpp
new file mode 100644
index 00000000000..294388b101a
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionCountByGranularity.cpp
@@ -0,0 +1,138 @@
+//
+// Created by 袁宇豪 on 9/18/22.
+//
+
+#include "AggregateFunctionCountByGranularity.h"
+
+#include <AggregateFunctions/AggregateFunctionFactory.h>
+#include <AggregateFunctions/Helpers.h>
+#include <Common/FieldVisitors.h>
+
+namespace DB
+{
+
+namespace ErrorCodes
+{
+    extern const int ILLEGAL_TYPE_OF_ARGUMENT;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+    extern const int ARGUMENT_OUT_OF_BOUND;
+}
+
+namespace
+{
+
+    AggregateFunctionPtr createAggregateFunctionCountByGranularity
+        (const std::string & name, const DataTypes & argument_types, const Array & params, const Settings *)
+    {
+        if (argument_types.size() != 1)
+            throw Exception("Incorrect number of arguments for aggregate function " + name, ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+
+        const IDataType & argument_type = *argument_types[0];
+        WhichDataType which(argument_type);
+
+        if
+            (
+                which.isNothing()
+                || which.isArray()
+                || which.isFunction()
+                || which.isAggregateFunction()
+                || which.isMap()
+                || which.isBitmap64()
+                || which.isSet()
+                || which.isTuple()
+                || which.isInterval()
+                || which.isDecimal()
+                || which.isInt128()
+                || which.isUInt128()
+            )
+        {
+            throw Exception("argument of " + name + " can not be "
+                                                    "(Nothing,Array,Function,"
+                                                    "AggregateFunction,Map,Bitmap64,"
+                                                    "Set,Tuple,Interval,"
+                                                    "Decimal,Int128,UInt128)", ErrorCodes::BAD_ARGUMENTS);
+        }
+        else if (which.isStringOrFixedString())
+        {
+            //auto a =AggregateFunctionCountByGranularity<String>(argument_types, params);
+            return std::make_shared<AggregateFunctionCountByGranularity<String>>(argument_types, params);
+        }
+        else if (which.isInt8())
+        {
+            auto a =AggregateFunctionCountByGranularity<Int8>(argument_types, params);
+            return std::make_shared<AggregateFunctionCountByGranularity<Int8>>(argument_types, params);
+        }
+        else if (which.isUInt8() || which.isEnum8())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<UInt8>>(argument_types, params);
+        }
+        else if (which.isInt16())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<Int16>>(argument_types, params);
+        }
+        else if (which.isUInt16() || which.isEnum16())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<UInt16>>(argument_types, params);
+        }
+        else if (which.isInt32())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<Int32>>(argument_types, params);
+        }
+        else if (which.isUInt32() || which.isDateTime())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<UInt32>>(argument_types, params);
+        }
+        else if (which.isInt64())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<Int64>>(argument_types, params);
+        }
+        else if (which.isUInt64())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<UInt64>>(argument_types, params);
+        }
+        //        TODO can't support Int128 for now
+        //        else if (which.isInt128())
+        //        {
+        //            return std::make_shared<AggregateFunctionCountByGranularity<Int128>>(argument_types, params);
+        //        }
+        else if (which.isUInt128())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<UInt128>>(argument_types, params);
+        }
+        else if (which.isFloat32())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<Float32>>(argument_types, params);
+        }
+        else if (which.isFloat64())
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<Float64>>(argument_types, params);
+        }
+        // TODO can't support Decimal for now
+        //        else if (which.isDecimal32())
+        //        {
+        //            return std::make_shared<AggregateFunctionCountByGranularity<Decimal32>>(argument_types, params);
+        //        }
+        //        else if (which.isDecimal64() || which.isDateTime64())
+        //        {
+        //            return std::make_shared<AggregateFunctionCountByGranularity<Decimal64>>(argument_types, params);
+        //        }
+        //        else if (which.isDecimal128())
+        //        {
+        //            return std::make_shared<AggregateFunctionCountByGranularity<Decimal128>>(argument_types, params);
+        //        }
+        else
+        {
+            return std::make_shared<AggregateFunctionCountByGranularity<String>>(argument_types, params);
+        }
+
+        __builtin_unreachable();
+    }
+}
+
+void registerAggregateFunctionCountByGranularity(AggregateFunctionFactory & factory)
+{
+    AggregateFunctionProperties properties = { .returns_default_when_only_null = true, .is_order_dependent = false };
+    factory.registerFunction("countByGranularity", {createAggregateFunctionCountByGranularity, properties}, AggregateFunctionFactory::CaseInsensitive);
+}
+
+}
diff --git a/src/AggregateFunctions/AggregateFunctionCountByGranularity.h b/src/AggregateFunctions/AggregateFunctionCountByGranularity.h
new file mode 100644
index 00000000000..211678038e8
--- /dev/null
+++ b/src/AggregateFunctions/AggregateFunctionCountByGranularity.h
@@ -0,0 +1,577 @@
+//
+// Created by 袁宇豪 on 9/18/22.
+//
+
+#ifndef CLICKHOUSE_AGGREGATEFUNCTIONCOUNTBYGRANULARITY_H
+#define CLICKHOUSE_AGGREGATEFUNCTIONCOUNTBYGRANULARITY_H
+
+#include <AggregateFunctions/IAggregateFunction.h>
+#include <Columns/ColumnArray.h>
+#include <Columns/ColumnString.h>
+#include <Columns/ColumnDecimal.h>
+#include <Columns/ColumnTuple.h>
+#include <Columns/ColumnsNumber.h>
+#include <DataTypes/DataTypeAggregateFunction.h>
+#include <DataTypes/DataTypesNumber.h>
+#include <DataTypes/DataTypeString.h>
+#include <DataTypes/DataTypeArray.h>
+#include <Common/FieldVisitors.h>
+#include <Common/FieldVisitorConvertToNumber.h>
+#include <Common/HashTable/HashMap.h>
+#include <Common/SipHash.h>
+#include <DataTypes/DataTypeTuple.h>
+#include <IO/WriteHelpers.h>
+#include <IO/ReadHelpers.h>
+
+namespace DB
+{
+
+
+namespace ErrorCodes
+{
+    extern const int LOGICAL_ERROR;
+    extern const int NUMBER_OF_ARGUMENTS_DOESNT_MATCH;
+}
+
+template <typename T>
+struct AggregateFunctionCountByGranularityData
+{
+    using Key = T;
+
+    using Table = HashMap<Key, UInt64>;
+
+    AggregateFunctionCountByGranularityData()
+        : granularity(8192), uniq_count_table(), uniq_position_table() {}
+
+    UInt32 granularity;
+
+    Table uniq_count_table;
+
+    Table uniq_position_table;
+
+    const std::unordered_map<Key, UInt64> getCountInUnorderedMap() const
+    {
+        std::unordered_map<Key, UInt64> result;
+        for (const auto & item : uniq_count_table)
+        {
+            result[item.getKey()] = item.getMapped();
+        }
+        return result;
+    }
+
+    template <typename Value>
+    void addImpl(Value value, UInt64 now_position)
+    {
+        auto pos_iter = uniq_position_table.find(value);
+        if ((pos_iter == uniq_position_table.end())
+            || (pos_iter != uniq_position_table.end() && pos_iter->getMapped() < now_position))
+        {
+            if (pos_iter == uniq_position_table.end())
+            {
+                bool is_inserted;
+                uniq_position_table.emplace(value, pos_iter, is_inserted);
+            }
+            pos_iter->getMapped() = now_position;
+            auto count_iter = uniq_count_table.find(value);
+            if (count_iter != uniq_count_table.end())
+            {
+                count_iter->getMapped() += 1;
+            }
+            else
+            {
+                bool is_inserted;
+                uniq_count_table.emplace(value, count_iter, is_inserted);
+                count_iter->getMapped() = 1;
+            }
+        }
+    }
+
+    template <typename Value>
+    void addMany(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
+    {
+        if (null_map)
+        {
+            addManyNotNull<Value>(ptr, null_map, count);
+        }
+        else
+        {
+            addMany<Value>(ptr, count);
+        }
+    }
+
+    template <typename Value>
+    void addMany(const Value * __restrict ptr, size_t count)
+    {
+        size_t total_positions = (count / granularity);
+        size_t remains = count - total_positions * granularity;
+        for (size_t pos = 0; pos < total_positions; ++pos)
+        {
+            for (size_t i = 0; i < granularity; ++i)
+            {
+                size_t now_iter = granularity * pos + i;
+                addImpl(ptr[now_iter], pos);
+            }
+        }
+        for (size_t i = 0; i < remains; ++i)
+        {
+            size_t now_iter = granularity * total_positions + i;
+            addImpl(ptr[now_iter], total_positions);
+        }
+        uniq_position_table.clear();
+    }
+
+    template <typename Value>
+    void addManyNotNull(const Value * __restrict ptr, const UInt8 * __restrict null_map, size_t count)
+    {
+        size_t total_positions = (count / granularity);
+        size_t remains = count - total_positions * granularity;
+        for (size_t pos = 0; pos < total_positions; ++pos)
+        {
+            for (size_t i = 0; i < granularity; ++i)
+            {
+                size_t now_iter = granularity * pos + i;
+                if (null_map[now_iter])
+                {
+                    continue;
+                }
+                addImpl(ptr[now_iter], pos);
+            }
+        }
+        for (size_t i = 0; i < remains; ++i)
+        {
+            size_t now_iter = granularity * total_positions + i;
+            if (null_map[now_iter])
+            {
+                continue;
+            }
+            addImpl(ptr[now_iter], total_positions);
+        }
+        uniq_position_table.clear();
+    }
+
+    void merge(const AggregateFunctionCountByGranularityData<T> & other)
+    {
+        this->uniq_position_table.clear();
+        for (const auto & item : other.uniq_count_table)
+        {
+            auto iter = this->uniq_count_table.find(item.getKey());
+            if (iter == this->uniq_count_table.end())
+            {
+                bool is_inserted;
+                this->uniq_count_table.emplace(item.getKey(), iter, is_inserted);
+                iter->getMapped() = item.getMapped();
+            }
+            else
+            {
+                iter->getMapped() += item.getMapped();
+            }
+        }
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeIntBinary(granularity, buf);
+        //this->uniqCountTable.write(buf);
+        writeIntBinary(this->uniq_count_table.size(), buf);
+        for (const auto & item : this->uniq_count_table)
+        {
+            writeBinary(item.getKey(), buf);
+            writeIntBinary(item.getMapped(), buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        this->uniq_count_table.clear();
+
+        readIntBinary(granularity, buf);
+        //this->uniqCountTable.read(buf);
+        UInt64 size;
+        readIntBinary(size, buf);
+        for (UInt64 i = 0; i < size; ++i)
+        {
+            T key;
+            UInt64 count;
+            readBinary(key, buf);
+            readIntBinary(count, buf);
+            this->uniq_count_table[key] = count;
+        }
+    }
+
+    String str() const
+    {
+        std::ostringstream oss;
+        for (const auto & item : this->uniq_count_table)
+        {
+            oss << "(" << item.getKey() << ", " << item.getMapped() << ")";
+        }
+        return oss.str();
+    }
+};
+
+template <>
+struct AggregateFunctionCountByGranularityData<String>
+{
+    using Table = std::unordered_map<String, UInt64, std::hash<String>, std::equal_to<>, TrackAllocator<std::pair<const String, UInt64>>>;
+
+    AggregateFunctionCountByGranularityData()
+        : granularity(8192), uniq_count_table(), uniq_position_table() {}
+
+    UInt32 granularity;
+
+    Table uniq_count_table;
+
+    Table uniq_position_table;
+
+    const std::unordered_map<String , UInt64> getCountInUnorderedMap() const
+    {
+        std::unordered_map<String, UInt64> result;
+        for (const auto & item : uniq_count_table)
+        {
+            result[item.first] = item.second;
+        }
+        return result;
+    }
+
+    void addImpl(String value, UInt64 now_position)
+    {
+        auto pos_iter = uniq_position_table.find(value);
+        if ((pos_iter == uniq_position_table.end())
+            || (pos_iter != uniq_position_table.end() && pos_iter->second < now_position))
+        {
+            if (pos_iter == uniq_position_table.end())
+            {
+                uniq_position_table.emplace(value, now_position);
+            }
+            else
+                pos_iter->second = now_position;
+            auto count_iter = uniq_count_table.find(value);
+            if (count_iter != uniq_count_table.end())
+            {
+                count_iter->second += 1;
+            }
+            else
+            {
+                uniq_count_table.emplace(value, 1);
+            }
+        }
+    }
+
+    void addMany(const ColumnString & column, const UInt8 * __restrict null_map, size_t count)
+    {
+        if (null_map)
+        {
+            addManyNotNull(column, null_map, count);
+        }
+        else
+        {
+            addMany(column, count);
+        }
+    }
+
+    void addMany(const ColumnString & column, size_t count)
+    {
+        size_t total_positions = (count / granularity);
+        size_t remains = count - total_positions * granularity;
+        for (size_t pos = 0; pos < total_positions; ++pos)
+        {
+            for (size_t i = 0; i < granularity; ++i)
+            {
+                size_t nowIter = granularity * pos + i;
+                const auto & value = column.getDataAt(nowIter);
+                addImpl(value.toString(), pos);
+            }
+        }
+        for (size_t i = 0; i < remains; ++i)
+        {
+            size_t nowIter = granularity * total_positions + i;
+            const auto & value = column.getDataAt(nowIter);
+            addImpl(value.toString(), total_positions);
+        }
+        uniq_position_table.clear();
+    }
+
+    void addManyNotNull(const ColumnString & column, const UInt8 * __restrict null_map, size_t count)
+    {
+        size_t total_positions = (count / granularity);
+        size_t remains = count - total_positions * granularity;
+        for (size_t pos = 0; pos < total_positions; ++pos)
+        {
+            for (size_t i = 0; i < granularity; ++i)
+            {
+                size_t nowIter = granularity * pos + i;
+                if (null_map[nowIter])
+                {
+                    continue;
+                }
+                const auto & value = column.getDataAt(nowIter);
+                addImpl(value.toString(), pos);
+            }
+        }
+        for (size_t i = 0; i < remains; ++i)
+        {
+            size_t nowIter = granularity * total_positions + i;
+            if (null_map[nowIter])
+            {
+                continue;
+            }
+            const auto & value = column.getDataAt(nowIter);
+            addImpl(value.toString(), total_positions);
+        }
+        uniq_position_table.clear();
+    }
+
+    void merge(const AggregateFunctionCountByGranularityData<String> & other)
+    {
+        this->uniq_position_table.clear();
+        for (const auto & item : other.uniq_count_table)
+        {
+            auto iter = this->uniq_count_table.find(item.first);
+            if (iter == this->uniq_count_table.end())
+            {
+                this->uniq_count_table.emplace(item.first, item.second);
+            }
+            else
+            {
+                iter->second += item.second;
+            }
+        }
+    }
+
+    void serialize(WriteBuffer & buf) const
+    {
+        writeIntBinary(granularity, buf);
+        //this->uniqCountTable.write(buf);
+        writeIntBinary(this->uniq_count_table.size(), buf);
+        for (const auto & item : this->uniq_count_table)
+        {
+            writeStringBinary(item.first, buf);
+            writeIntBinary(item.second, buf);
+        }
+    }
+
+    void deserialize(ReadBuffer & buf)
+    {
+        this->uniq_count_table.clear();
+
+        readIntBinary(granularity, buf);
+        //this->uniqCountTable.read(buf);
+        UInt64 size;
+        readIntBinary(size, buf);
+        for (UInt64 i = 0; i < size; ++i)
+        {
+            String key;
+            UInt64 count;
+            readStringBinary(key, buf);
+            readIntBinary(count, buf);
+            this->uniq_count_table[key] = count;
+        }
+    }
+
+    String str() const
+    {
+        std::ostringstream oss;
+        for (const auto & item : this->uniq_count_table)
+        {
+            oss << "(" << item.first << ", " << item.second << ")";
+        }
+        return oss.str();
+    }
+};
+
+using T=UInt8;
+
+template <typename T>
+class AggregateFunctionCountByGranularity final
+    : public IAggregateFunctionDataHelper<AggregateFunctionCountByGranularityData<T>, AggregateFunctionCountByGranularity<T>>
+{
+public:
+    using ColVecType = std::conditional_t<IsDecimalNumber<T>, ColumnDecimal<T>, ColumnVector<T>>;
+
+    AggregateFunctionCountByGranularity(const DataTypes & argument_types_, const Array & params_)
+        : IAggregateFunctionDataHelper<AggregateFunctionCountByGranularityData<T>, AggregateFunctionCountByGranularity>(argument_types_, params_)
+    {
+        if (!params_.empty())
+        {
+            if (params_.size() != 1)
+            {
+                throw Exception(
+                    "Aggregate function AggregateFunctionCountByGranularity requires one parameter or less.", ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
+            }
+
+            UInt64 granularity_param = applyVisitorExplicit(FieldVisitorConvertToNumber<UInt64>(), params_[0]);
+
+            // This range is hardcoded below
+            if (granularity_param == 0)
+            {
+                throw Exception(
+                    "Parameter for aggregate function AggregateFunctionCountByGranularity is out or range: (0,].", ErrorCodes::ARGUMENT_OUT_OF_BOUND);
+            }
+            granularity = granularity_param;
+        }
+        else
+        {
+            granularity = 8192;
+        }
+    }
+
+    String getName() const override { return "countByGranularity"; }
+
+    DataTypePtr getReturnType() const override
+    {
+        DataTypes types;
+        if constexpr (
+            std::is_same_v<T, Int8>
+            || std::is_same_v<T, UInt8>
+            || std::is_same_v<T, Int16>
+            || std::is_same_v<T, UInt16>
+            || std::is_same_v<T, Int32>
+            || std::is_same_v<T, UInt32>
+            || std::is_same_v<T, Int64>
+            || std::is_same_v<T, UInt64>
+            //                || std::is_same_v<T, Int128>  TODO can't support Int128 for now
+            || std::is_same_v<T, UInt128>
+            || std::is_same_v<T, Float32>
+            || std::is_same_v<T, Float64>
+        )
+        {
+            types.emplace_back(std::make_shared<DataTypeNumber<T>>()); // group by
+        }
+        // TODO can't support Decimal for now
+        //        else if constexpr (std::is_same_v<T, Decimal32> || std::is_same_v<T, Decimal64> || std::is_same_v<T, Decimal128>)
+        //        {
+        //            types.emplace_back(std::make_shared<DataTypeDecimal<T>>(DataTypeDecimal<T>::maxPrecision(), ????scale????)); // can't construct for now
+        //        }
+        else
+        {
+            types.emplace_back(std::make_shared<DataTypeString>()); // group by
+        }
+        types.emplace_back(std::make_shared<DataTypeUInt64>());  // count
+        return std::make_shared<DataTypeArray>(std::make_shared<DataTypeTuple>(types));
+    }
+
+    void add([[maybe_unused]] AggregateDataPtr __restrict place, const IColumn **, size_t, Arena *) const override
+    {
+        throw new Exception("Logical error: Count by granularity must run in batch mode.", ErrorCodes::LOGICAL_ERROR);
+    }
+
+    void addBatchSinglePlace(
+        size_t batch_size, AggregateDataPtr place, const IColumn ** columns, Arena *,[[maybe_unused]] ssize_t if_argument_pos) const override
+    {
+        this->data(place).granularity = this->granularity;
+        if constexpr (std::is_same_v<T, String>)
+        {
+            const auto & string_column = static_cast<const ColumnString &>(*columns[0]);
+            this->data(place).addMany(string_column, batch_size);
+        }
+        else
+        {
+            const auto & column = static_cast<const ColVecType &>(*columns[0]);
+            this->data(place).addMany(column.getData().data(), batch_size);
+        }
+    }
+
+    void addBatchSinglePlaceNotNull(
+        size_t batch_size,
+        AggregateDataPtr place,
+        const IColumn ** columns,
+        const UInt8 * null_map,
+        Arena *,
+        [[maybe_unused]] ssize_t if_argument_pos) const override
+    {
+        this->data(place).granularity = this->granularity;
+        if constexpr (std::is_same_v<T, String>)
+        {
+            const auto & string_column = static_cast<const ColumnString &>(*columns[0]);
+            this->data(place).addManyNotNull(string_column, null_map, batch_size);
+        }
+        else
+        {
+            const auto & column = static_cast<const ColVecType &>(*columns[0]);
+            this->data(place).addManyNotNull(column.getData().data(), null_map, batch_size);
+        }
+    }
+
+    void merge(AggregateDataPtr __restrict place, ConstAggregateDataPtr rhs, Arena *) const override
+    {
+        this->data(place).merge(this->data(rhs));
+    }
+
+    void serialize(ConstAggregateDataPtr __restrict place, WriteBuffer & buf) const override
+    {
+        this->data(place).serialize(buf);
+    }
+
+    void deserialize(AggregateDataPtr __restrict place, ReadBuffer & buf, Arena *) const override
+    {
+        this->data(place).deserialize(buf);
+    }
+
+    void insertResultInto(AggregateDataPtr __restrict place, IColumn & to, Arena *) const override
+    {
+        const auto & tuples = this->data(place).uniq_count_table;
+
+        auto & column_res = static_cast<ColumnArray &>(to);
+        auto & column_offsets = static_cast<ColumnArray::ColumnOffsets &>(column_res.getOffsetsColumn());
+
+        auto & tuple_in_array = static_cast<ColumnTuple &>(column_res.getData());
+
+        for (const auto & item : tuples)
+        {
+            if constexpr (
+                std::is_same_v<T, Int8>
+                || std::is_same_v<T, UInt8>
+                || std::is_same_v<T, Int16>
+                || std::is_same_v<T, UInt16>
+                || std::is_same_v<T, Int32>
+                || std::is_same_v<T, UInt32>
+                || std::is_same_v<T, Int64>
+                || std::is_same_v<T, UInt64>
+                //                    || std::is_same_v<T, Int128> TODO can't support Int128 for now
+                || std::is_same_v<T, UInt128>
+                || std::is_same_v<T, Float32>
+                || std::is_same_v<T, Float64>
+            )
+            {
+                auto & column_group_by = static_cast<ColumnVector<T> &>(tuple_in_array.getColumn(0));
+                column_group_by.insert(item.getKey());
+            }
+            // TODO can't support Decimal for now
+            //            else if constexpr (std::is_same_v<T, Decimal32> || std::is_same_v<T, Decimal64> || std::is_same_v<T, Decimal128>)
+            //            {
+            //                auto & column_group_by = static_cast<ColumnDecimal<T> &>(tuple_in_array.getColumn(0));
+            //                column_group_by.insert(item.getKey());
+            //            }
+            else
+            {
+                auto & column_group_by = static_cast<ColumnString &>(tuple_in_array.getColumn(0));
+                std::ostringstream oss;
+                oss << item.first;
+                column_group_by.insert(oss.str());
+            }
+
+            if constexpr (std::is_same_v<T, String>)
+            {
+                auto & column_count = static_cast<ColumnUInt64 &>(tuple_in_array.getColumn(1));
+                column_count.insert(item.second);
+            }
+            else
+            {
+                auto & column_count = static_cast<ColumnUInt64 &>(tuple_in_array.getColumn(1));
+                column_count.insert(item.getMapped());
+            }
+        }
+        column_offsets.getData().push_back(column_res.getData().size());
+    }
+
+    bool allocatesMemoryInArena() const override
+    {
+        return false;
+    }
+private:
+    UInt32 granularity;
+};
+
+}
+
+#endif //CLICKHOUSE_AGGREGATEFUNCTIONCOUNTBYGRANULARITY_H
diff --git a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
index 95c7e6075d7..2d991310dcd 100644
--- a/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
+++ b/src/AggregateFunctions/AggregateFunctionGroupBitmapData.h
@@ -579,6 +579,57 @@ class RoaringBitmapWithSmallSet : private boost::noncopyable
         }
     }
 
+    UInt64 rb_offset_limit(UInt64 offset, UInt64 limit, RoaringBitmapWithSmallSet & r1) const
+    {
+        if (limit == 0 || offset >= size())
+            return 0;
+
+        if (isSmall())
+        {
+            UInt64 offset_count = 0;
+            std::vector<T> answer;
+            for (const auto & x : small)
+            {
+                T val = x.getValue();
+                if (offset_count >= offset)
+                {
+                    answer.push_back(val);
+                } else {
+                    offset_count++;
+                }
+            }
+            if (limit < answer.size())
+            {
+                std::nth_element(answer.begin(), answer.begin() + limit, answer.end());
+                answer.resize(limit);
+            }
+
+            for (const auto & elem : answer)
+                r1.add(elem);
+            return answer.size();
+        }
+        else
+        {
+            UInt64 count = 0;
+            UInt64 offset_count = 0;
+            for (auto it = rb->begin(); it != rb->end(); ++it)
+            {
+                offset_count++;
+                if (offset_count <= offset)
+                    continue;
+
+                if (count < limit)
+                {
+                    r1.add(*it);
+                    ++count;
+                }
+                else
+                    break;
+            }
+            return count;
+        }
+    }
+
     UInt64 rb_min() const
     {
         if (isSmall())
diff --git a/src/CloudServices/CnchRefreshMaterializedViewThread.cpp b/src/CloudServices/CnchRefreshMaterializedViewThread.cpp
index 75b8ee2b17f..5cd9e76d876 100644
--- a/src/CloudServices/CnchRefreshMaterializedViewThread.cpp
+++ b/src/CloudServices/CnchRefreshMaterializedViewThread.cpp
@@ -185,6 +185,8 @@ void CnchRefreshMaterializedViewThread::runImpl()
 bool CnchRefreshMaterializedViewThread::constructAndScheduleRefreshTasks(StoragePtr & istorage, StorageMaterializedView & storage)
 {
     ContextMutablePtr query_context = Context::createCopy(getContext());
+    query_context->makeQueryContext();
+    query_context->makeSessionContext();
     auto refresh_params = storage.getAsyncRefreshParams(query_context, false);
     std::vector<String> task_ids = {};
 
@@ -296,19 +298,19 @@ String CnchRefreshMaterializedViewThread::executeTaskLocal(
                                 task_id = task_id,
                                 mv_refresh_param = mv_refresh_param,
                                 command_context = Context::createCopy(query_context)]() {
+                    auto settings = query_context->getSettings();
+                    auto user_password = const_cast<const Context &> (*command_context).getCnchInterserverCredentials();
                     command_context->setCurrentTransaction(nullptr, false);
                     command_context->setCurrentVW(nullptr);
                     command_context->setCurrentWorkerGroup(nullptr);
-                    command_context->makeSessionContext();
-                    command_context->makeQueryContext();
-                    auto settings = query_context->getSettings();
                     command_context->setSettings(settings);
-                    CurrentThread::get().pushTenantId(command_context->getSettingsRef().tenant_id);
-
-                    auto user_password = const_cast<const Context &> (*command_context).getCnchInterserverCredentials();
+                    command_context->setTenantId(command_context->getSettingsRef().tenant_id);
                     command_context->setUser(user_password.first, user_password.second, Poco::Net::SocketAddress{});
                     command_context->setCurrentQueryId(task_id);
 
+                    command_context->makeSessionContext();
+                    command_context->makeQueryContext();
+
                     storage.refreshAsync(mv_refresh_param, command_context);
                     command_context->setCurrentTransaction(nullptr);
                 });
diff --git a/src/CloudServices/CnchServerClient.cpp b/src/CloudServices/CnchServerClient.cpp
index 4890becc6cd..c8bbe704ef4 100644
--- a/src/CloudServices/CnchServerClient.cpp
+++ b/src/CloudServices/CnchServerClient.cpp
@@ -369,6 +369,8 @@ void CnchServerClient::redirectCommitParts(
 {
     auto timer = ProfileEventsTimer(ProfileEvents::ServerRpcRequest, ProfileEvents::ServerRpcElaspsedMicroseconds);
     brpc::Controller cntl;
+    if (const auto * storage = dynamic_cast<const MergeTreeMetaBase *>(table.get()))
+        cntl.set_timeout_ms(storage->getSettings()->cnch_meta_rpc_timeout_ms);
     Protos::RedirectCommitPartsReq request;
     Protos::RedirectCommitPartsResp response;
 
diff --git a/src/CloudServices/CnchWorkerServiceImpl.cpp b/src/CloudServices/CnchWorkerServiceImpl.cpp
index 6ff9f69a27e..d1eb05d7a8b 100644
--- a/src/CloudServices/CnchWorkerServiceImpl.cpp
+++ b/src/CloudServices/CnchWorkerServiceImpl.cpp
@@ -566,27 +566,28 @@ void CnchWorkerServiceImpl::preloadDataParts(
         auto & cloud_merge_tree = dynamic_cast<StorageCloudMergeTree &>(*storage);
         auto data_parts = createPartVectorFromModelsForSend<MutableMergeTreeDataPartCNCHPtr>(cloud_merge_tree, request->parts());
 
+        auto preload_level = request->preload_level();
+        auto submit_ts = request->submit_ts();
+        auto sync = request->sync();
+        auto read_injection = request->read_injection();
+
         LOG_TRACE(
             log,
             "Receiving preload parts task level = {}, sync = {}, current table preload setting: parts_preload_level = {}, "
             "enable_preload_parts = {}, enable_parts_sync_preload = {}, enable_local_disk_cache = {}, enable_nexus_fs = {}",
-            request->preload_level(),
-            request->sync(),
+            preload_level,
+            sync,
             cloud_merge_tree.getSettings()->parts_preload_level.value,
             cloud_merge_tree.getSettings()->enable_preload_parts.value,
             cloud_merge_tree.getSettings()->enable_parts_sync_preload,
             cloud_merge_tree.getSettings()->enable_local_disk_cache,
             cloud_merge_tree.getSettings()->enable_nexus_fs);
 
-        if (!request->preload_level()
+        if (!preload_level
             || (!cloud_merge_tree.getSettings()->parts_preload_level && !cloud_merge_tree.getSettings()->enable_preload_parts))
             return;
 
-        auto preload_level = request->preload_level();
-        auto submit_ts = request->submit_ts();
-        auto read_injection = request->read_injection();
-
-        if (request->sync())
+        if (sync)
         {
             auto & settings = getContext()->getSettingsRef();
             auto pool = std::make_unique<ThreadPool>(std::min(data_parts.size(), settings.cnch_parallel_preloading.value));
@@ -599,26 +600,27 @@ void CnchWorkerServiceImpl::preloadDataParts(
                 });
             }
             pool->wait();
-            LOG_DEBUG(
-                log,
-                "Finish preload tasks in {} ms, level: {}, sync: {}, size: {}",
-                watch.elapsedMilliseconds(),
-                preload_level,
-                sync,
-                data_parts.size());
         }
         else
         {
             ThreadPool * preload_thread_pool = &(IDiskCache::getPreloadPool());
             for (const auto & part : data_parts)
             {
-                preload_thread_pool->scheduleOrThrowOnError([part, preload_level, submit_ts, read_injection, storage] {
+                preload_thread_pool->trySchedule([part, preload_level, submit_ts, read_injection, storage] {
                     part->remote_fs_read_failed_injection = read_injection;
                     part->disk_cache_mode = DiskCacheMode::SKIP_DISK_CACHE;// avoid getCheckum & getIndex re-cache
                     part->preload(preload_level, submit_ts);
                 });
             }
         }
+        LOG_DEBUG(
+                log,
+                "Finish preload table {} tasks in {} ms, level: {}, sync: {}, size: {}",
+                cloud_merge_tree.getCnchStorageID().getNameForLogs(),
+                watch.elapsedMilliseconds(),
+                preload_level,
+                sync,
+                data_parts.size());
     })
 }
 
diff --git a/src/Common/FieldVisitors.h b/src/Common/FieldVisitors.h
index 99c4c42360b..4547669a4e0 100644
--- a/src/Common/FieldVisitors.h
+++ b/src/Common/FieldVisitors.h
@@ -16,6 +16,39 @@ struct StaticVisitor
     using ResultType = R;
 };
 
+/// F is template parameter, to allow universal reference for field, that is useful for const and non-const values.
+template <typename Visitor, typename F>
+typename std::decay_t<Visitor>::ResultType applyVisitorExplicit(Visitor && visitor, F && field)
+{
+    switch (field.getType())
+    {
+        case Field::Types::Null: return visitor(field.template get<Null>());
+        case Field::Types::UInt64: return visitor(field.template get<UInt64>());
+        case Field::Types::UInt128: return visitor(field.template get<UInt128>());
+        case Field::Types::UInt256: return visitor(field.template get<UInt256>());
+        case Field::Types::Int64: return visitor(field.template get<Int64>());
+        case Field::Types::Float64: return visitor(field.template get<Float64>());
+        case Field::Types::String: return visitor(field.template get<String>());
+        case Field::Types::Array: return visitor(field.template get<Array>());
+        case Field::Types::Tuple: return visitor(field.template get<Tuple>());
+        case Field::Types::Decimal32: return visitor(field.template get<DecimalField<Decimal32>>());
+        case Field::Types::Decimal64: return visitor(field.template get<DecimalField<Decimal64>>());
+        case Field::Types::Decimal128: return visitor(field.template get<DecimalField<Decimal128>>());
+        case Field::Types::Decimal256: return visitor(field.template get<DecimalField<Decimal256>>());
+        case Field::Types::AggregateFunctionState: return visitor(field.template get<AggregateFunctionStateData>());
+#ifdef HAVE_BOO_TYPE
+        case Field::Types::Bool:
+            return visitor(field.template get<bool>());
+#endif
+        case Field::Types::Object: return visitor(field.template get<Object>());
+        case Field::Types::Map:     return visitor(field.template get<Map>());
+        case Field::Types::BitMap64:
+            return visitor(field.template get<BitMap64>());
+
+        default:
+            throw Exception("Bad type of Field", ErrorCodes::BAD_TYPE_OF_FIELD);
+    }
+}
 
 /// F is template parameter, to allow universal reference for field, that is useful for const and non-const values.
 template <typename Visitor, typename F>
diff --git a/src/Common/MemoryTrackerBlockerInThread.cpp b/src/Common/MemoryTrackerBlockerInThread.cpp
new file mode 100644
index 00000000000..bb65e595a85
--- /dev/null
+++ b/src/Common/MemoryTrackerBlockerInThread.cpp
@@ -0,0 +1,16 @@
+#include <Common/MemoryTrackerBlockerInThread.h>
+
+// MemoryTrackerBlockerInThread
+thread_local uint64_t MemoryTrackerBlockerInThread::counter;
+thread_local VariableContext MemoryTrackerBlockerInThread::level;
+MemoryTrackerBlockerInThread::MemoryTrackerBlockerInThread(VariableContext level_)
+    : previous_level(level)
+{
+    ++counter;
+    level = level_;
+}
+MemoryTrackerBlockerInThread::~MemoryTrackerBlockerInThread()
+{
+    --counter;
+    level = previous_level;
+}
diff --git a/src/Common/MemoryTrackerBlockerInThread.h b/src/Common/MemoryTrackerBlockerInThread.h
new file mode 100644
index 00000000000..56aa2faa3c2
--- /dev/null
+++ b/src/Common/MemoryTrackerBlockerInThread.h
@@ -0,0 +1,29 @@
+#pragma once
+#include <cstdint>
+#include <Common/VariableContext.h>
+
+
+/// To be able to temporarily stop memory tracking from current thread.
+struct MemoryTrackerBlockerInThread
+{
+private:
+    static thread_local uint64_t counter;
+    static thread_local VariableContext level;
+
+    VariableContext previous_level;
+public:
+    /// level_ - block in level and above
+    explicit MemoryTrackerBlockerInThread(VariableContext level_ = VariableContext::User);
+    ~MemoryTrackerBlockerInThread();
+
+    MemoryTrackerBlockerInThread(const MemoryTrackerBlockerInThread &) = delete;
+    MemoryTrackerBlockerInThread & operator=(const MemoryTrackerBlockerInThread &) = delete;
+
+    static bool isBlocked(VariableContext current_level)
+    {
+        return counter > 0 && current_level >= level;
+    }
+
+    friend class MemoryTracker;
+    friend struct AllocationTrace;
+};
diff --git a/src/Common/SettingsChanges.cpp b/src/Common/SettingsChanges.cpp
index 0402ccbb51d..5e2cb5c2cb5 100644
--- a/src/Common/SettingsChanges.cpp
+++ b/src/Common/SettingsChanges.cpp
@@ -274,6 +274,7 @@ std::unordered_set<String> SettingsChanges::WHITELIST_SETTINGS =
         "enable_sync_build_bitmap",
         "enable_sync_fetch",
         "enable_sync_from_ha",
+        "enable_table_scan_build_pipeline_optimization",
         "enable_testlog_to_console",
         "enable_unaligned_array_join",
         "enable_variadic_arraySetCheck",
diff --git a/src/Core/Settings.h b/src/Core/Settings.h
index 97c36c0d521..4bb6ec85389 100644
--- a/src/Core/Settings.h
+++ b/src/Core/Settings.h
@@ -79,6 +79,7 @@ enum PreloadLevelSettings : UInt64
       "The maximum size of blocks of uncompressed data before compressing for writing to a table.", \
       0) \
     M(UInt64, max_block_size, DEFAULT_BLOCK_SIZE, "Maximum block size for reading", 0) \
+    M(UInt64, min_block_size, 1024, "Minimum block size for reading", 0) \
     M(UInt64, max_insert_block_size, DEFAULT_INSERT_BLOCK_SIZE, "The maximum block size for insertion, if we control the creation of blocks for insertion.", 0) \
     M(UInt64, max_insert_block_size_bytes, DEFAULT_BLOCK_SIZE_BYTES, "The maximum block bytes for insertion, if we control the creation of blocks for insertion.", 0) \
     M(UInt64, min_insert_block_size_rows, DEFAULT_INSERT_BLOCK_SIZE, "Squash blocks passed to INSERT query to specified size in rows, if blocks are not big enough.", 0) \
@@ -314,8 +315,6 @@ enum PreloadLevelSettings : UInt64
     M(UInt64, optimize_skip_unused_shards_nesting, 0, "Same as optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \
     M(UInt64, force_optimize_skip_unused_shards_nesting, 0, "Same as force_optimize_skip_unused_shards, but accept nesting level until which it will work.", 0) \
     \
-    M(Bool, use_sync_pipeline_executor, false, "Whether to use sync pipeline executor", 0) \
-    \
     M(Bool, input_format_parallel_parsing, true, "Enable parallel parsing for some data formats.", 0) \
     M(UInt64, \
       min_chunk_bytes_for_parallel_parsing, \
@@ -807,13 +806,10 @@ enum PreloadLevelSettings : UInt64
       "will create several shared dictionaries.", \
       0) \
     M(Bool, decimal_check_overflow, true, "Check overflow of decimal arithmetic/comparison operations", 0) \
-\
-    M(Bool, \
-      prefer_localhost_replica, \
-      1, \
-      "1 - always send query to local replica, if it exists. 0 - choose replica to send query between local and remote ones according to " \
-      "load_balancing", \
-      0) \
+    \
+    M(Bool, size_predictor_estimate_lc_size_by_fullstate, true, "Using estimate size of fullstate LowCardinality in size predictor", 0) \
+    \
+    M(Bool, prefer_localhost_replica, 1, "1 - always send query to local replica, if it exists. 0 - choose replica to send query between local and remote ones according to load_balancing", 0) \
     M(UInt64, max_fetch_partition_retries_count, 5, "Amount of retries while fetching partition from another host.", 0) \
     M(UInt64, \
       http_max_multipart_form_data_size, \
@@ -1548,6 +1544,10 @@ enum PreloadLevelSettings : UInt64
     M(Float, ab_test_traffic_factor, 0, "Proportion of queries that perform ab test, meaningful between 0 and 1", 0) \
     M(String, ab_test_profile, "default", "Profile name for ab test", 0) \
     M(Bool, optimize_json_function_to_subcolumn, false, "Whether to optimize json extract functions to subcolumn read", 0) \
+    /** Point lookup optimizations */ \
+    M(Bool, enable_point_lookup_profile, false, "Whether to enable settings for point-lookup queries, If true, the settings from point_lookup_profile are applied in order to improve QPS.", 0) \
+    M(String, point_lookup_profile, "", "Name of the setting profile to apply when enable_point_lookup_profile is true. If empty, will apply engine's default settings for point-lookup queries. If not empty but the profile doesn't exist, will also fallback to engine's default settings", 0) \
+    M(Bool, use_sync_pipeline_executor, false, "Whether to use sync pipeline executor", 0) \
     /** Optimizer relative settings, statistics */ \
     M(Bool, create_stats_time_output, true, "Enable time output in create stats, should be disabled at regression test", 0) \
     M(Bool, statistics_forward_query, false, "Indicate whether this query is coming from another replica", 0)  \
diff --git a/src/DataTypes/DataTypeBitMap64.h b/src/DataTypes/DataTypeBitMap64.h
index d4ce29ec8c3..89e4d428617 100644
--- a/src/DataTypes/DataTypeBitMap64.h
+++ b/src/DataTypes/DataTypeBitMap64.h
@@ -46,10 +46,7 @@ class DataTypeBitMap64 final : public IDataType
 
     bool isParametric() const override { return false; }
     bool haveSubtypes() const override { return false; }
-    bool isComparable() const override { return true; }
-    bool canBeComparedWithCollation() const override { return true; }
-    bool isValueUnambiguouslyRepresentedInContiguousMemoryRegion() const override { return true; }
-    bool isCategorial() const override { return true; }
+    bool isComparable() const override { return false; }
     bool canBeInsideNullable() const override { return false; }
     bool canBeInsideLowCardinality() const override { return false; }
 
diff --git a/src/DataTypes/IDataType.h b/src/DataTypes/IDataType.h
index d956a5569db..d525dc56035 100644
--- a/src/DataTypes/IDataType.h
+++ b/src/DataTypes/IDataType.h
@@ -561,6 +561,13 @@ inline bool isFloat(const T & data_type)
     return which.isFloat();
 }
 
+template <typename T>
+inline bool isNativeUInt(const T & data_type)
+{
+    WhichDataType which(data_type);
+    return which.isNativeUInt();
+}
+
 template <typename T>
 inline bool isNativeInteger(const T & data_type)
 {
diff --git a/src/Functions/FunctionsBitmap.cpp b/src/Functions/FunctionsBitmap.cpp
index abf59f618a3..42232376fee 100644
--- a/src/Functions/FunctionsBitmap.cpp
+++ b/src/Functions/FunctionsBitmap.cpp
@@ -35,6 +35,8 @@ REGISTER_FUNCTION(Bitmap)
     factory.registerFunction<FunctionBitmapToArray>();
     factory.registerFunction<FunctionBitmapSubsetInRange>();
     factory.registerFunction<FunctionBitmapSubsetLimit>();
+    factory.registerFunction<FunctionBitmapSubsetOffsetLimit>();
+    factory.registerFunction<FunctionSubBitmapStartsFromOne>();
     factory.registerFunction<FunctionBitmapTransform>();
 
     factory.registerFunction<FunctionBitmapSelfCardinality>();
diff --git a/src/Functions/FunctionsBitmap.h b/src/Functions/FunctionsBitmap.h
index 0a364bd34a8..29367d74b81 100644
--- a/src/Functions/FunctionsBitmap.h
+++ b/src/Functions/FunctionsBitmap.h
@@ -50,6 +50,7 @@ namespace ErrorCodes
 {
     extern const int ILLEGAL_TYPE_OF_ARGUMENT;
     extern const int LOGICAL_ERROR;
+    extern const int ZERO_ARRAY_OR_TUPLE_INDEX;
 }
 
 /** Bitmap functions.
@@ -706,8 +707,83 @@ struct BitmapSubsetLimitImpl
     }
 };
 
+struct BitmapSubsetOffsetLimitImpl
+{
+public:
+    static constexpr auto name = "subBitmap";
+    template <typename T>
+    static void apply(
+        const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
+        UInt64 range_start,
+        UInt64 range_end,
+        AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
+    {
+        bitmap_data_0.rbs.rb_offset_limit(range_start, range_end, bitmap_data_2.rbs);
+    }
+
+    static BitMap64 apply(const BitMap64 & bitmap, UInt64 offset, UInt64 card_limit)
+    {
+        if (bitmap.isEmpty() || bitmap.cardinality() <= offset || card_limit == 0)
+            return BitMap64();
+
+        PODArray<UInt64> res_array;
+        UInt64 count = 0;
+        UInt64 offset_count = 0;
+        auto it = bitmap.begin();
+        for (;it != bitmap.end() && offset_count < offset; ++it)
+            ++offset_count;
+
+        for (; it != bitmap.end() && count < card_limit; ++it, ++count)
+            res_array.emplace_back(*it);
+        return BitMap64(res_array.size(), res_array.data());
+    }
+};
+
+struct SubBitmapStartsFromOneImpl
+{
+    static constexpr auto name = "subBitmapStartsFromOne";
+
+    template <typename T>
+    static void apply(
+        const AggregateFunctionGroupBitmapData<T> & bitmap_data_0,
+        UInt64 range_start,
+        UInt64 range_end,
+        AggregateFunctionGroupBitmapData<T> & bitmap_data_2)
+    {
+        if (range_start == 0)
+            throw Exception("Indices in bitmap are 1-based, same as subString", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX);
+
+        --range_start;
+        bitmap_data_0.rbs.rb_offset_limit(range_start, range_end, bitmap_data_2.rbs);
+    }
+
+    static BitMap64 apply(const BitMap64 & bitmap, UInt64 offset, UInt64 card_limit)
+    {
+        if (offset == 0)
+            throw Exception("Indices in bitmap are 1-based, same as subString", ErrorCodes::ZERO_ARRAY_OR_TUPLE_INDEX);
+
+        --offset;
+
+        if (bitmap.isEmpty() || bitmap.cardinality() <= offset || card_limit == 0)
+            return BitMap64();
+
+        PODArray<UInt64> res_array;
+        UInt64 count = 0;
+        UInt64 offset_count = 0;
+        auto it = bitmap.begin();
+        for (;it != bitmap.end() && offset_count < offset; ++it)
+            ++offset_count;
+
+        for (; it != bitmap.end() && count < card_limit; ++it, ++count)
+            res_array.emplace_back(*it);
+        return BitMap64(res_array.size(), res_array.data());
+    }
+};
+
 using FunctionBitmapSubsetInRange = FunctionBitmapSubset<BitmapSubsetInRangeImpl>;
 using FunctionBitmapSubsetLimit = FunctionBitmapSubset<BitmapSubsetLimitImpl>;
+using FunctionBitmapSubsetOffsetLimit = FunctionBitmapSubset<BitmapSubsetOffsetLimitImpl>;
+using FunctionSubBitmapStartsFromOne = FunctionBitmapSubset<SubBitmapStartsFromOneImpl>;
 
 
 class FunctionBitmapTransform : public IFunction
@@ -847,16 +923,19 @@ class FunctionBitmapTransform : public IFunction
             if (from_end - from_start != to_end - to_start)
                 throw Exception("From array size and to array size mismatch", ErrorCodes::LOGICAL_ERROR);
 
-            auto & bitmap = column_bitmap->getBitMapAt( is_column_const[0] ? 0ULL : i );
+            /// get a copy of the original bitmap
+            auto bitmap = column_bitmap->getBitMapAt( is_column_const[0] ? 0ULL : i );
 
             for (size_t j = from_start; j < from_end; ++j)
             {
                 if (from_container[j] == to_container[j])
                     continue;
-                bool changed = const_cast<BitMap64 &>(bitmap).removeChecked(from_container[j]);
+                bool changed = bitmap.removeChecked(from_container[j]);
                 if (changed)
-                    const_cast<BitMap64 &>(bitmap).add(to_container[j]);
+                    bitmap.add(to_container[j]);
             }
+
+            col_to->insert(bitmap);
         }
         return col_to;
     }
diff --git a/src/Functions/FunctionsJSON.h b/src/Functions/FunctionsJSON.h
index 074715c792c..7a05639b442 100644
--- a/src/Functions/FunctionsJSON.h
+++ b/src/Functions/FunctionsJSON.h
@@ -114,7 +114,7 @@ class FunctionJSONHelpers
                 throw Exception{"The first argument of function " + String(Name::name) + " should be a string containing JSON, illegal type: " + first_column.type->getName(),
                                 ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT};
             
-            const ColumnPtr & arg_json = recursiveAssumeNotNullable(first_column.column);
+            const ColumnPtr & arg_json = recursiveAssumeNotNullable(recursiveRemoveLowCardinality(first_column.column));
             const auto * col_json_const = typeid_cast<const ColumnConst *>(arg_json.get());
             const auto * col_json_string
                 = typeid_cast<const ColumnString *>(col_json_const ? col_json_const->getDataColumnPtr().get() : arg_json.get());
@@ -510,9 +510,11 @@ class ExecutableFunctionJSONBase : public IExecutableFunction
     String getName() const override { return Name::name; }
     bool useDefaultImplementationForNulls() const override { return false; }
     bool useDefaultImplementationForConstants() const override { return true; }
+    // bool useDefaultImplementationForLowCardinalityColumns()  const override { return false; }
 
     ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr & result_type, size_t input_rows_count) const override
     {
+        NullPresence lc_null_presence = getNullPresense(arguments);
         if (null_presence.has_null_constant)
             return result_type->createColumnConstWithDefaultValue(input_rows_count);
 
@@ -520,6 +522,10 @@ class ExecutableFunctionJSONBase : public IExecutableFunction
         auto temporary_result = Derived::run(temp_arguments, json_return_type, input_rows_count);
         if (null_presence.has_nullable)
             return wrapInNullable(temporary_result, arguments, result_type, input_rows_count);
+
+        if (lc_null_presence.has_nullable)
+            return wrapInNullable(temporary_result, arguments, result_type, input_rows_count);
+
         return temporary_result;
     }
 
@@ -750,6 +756,7 @@ class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext
     bool isVariadic() const override { return true; }
     size_t getNumberOfArguments() const override { return 0; }
     bool useDefaultImplementationForNulls() const override { return false; }
+    // bool useDefaultImplementationForLowCardinalityColumns()  const override { return false; }
 
     FunctionBasePtr build(const ColumnsWithTypeAndName & arguments) const override
     {
@@ -763,6 +770,8 @@ class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext
 
         const auto & first_column = arguments[0];
         auto first_type_base = removeNullable(removeLowCardinality(first_column.type));
+        auto first_type_is_lc_null = first_column.type->isLowCardinalityNullable();
+        auto first_type_is_lc = WhichDataType(first_column.type).isLowCardinality();
 
         bool is_string = isString(first_type_base);
         bool is_object = isObject(first_type_base);
@@ -783,6 +792,10 @@ class JSONOverloadResolver : public IFunctionOverloadResolver, WithContext
             return_type = makeNullable(std::make_shared<DataTypeNothing>());
         else if (null_presence.has_nullable)
             return_type = makeNullable(json_return_type);
+        else if (first_type_is_lc_null)
+            return_type = std::make_shared<DataTypeLowCardinality>(makeNullable(json_return_type));
+        else if (first_type_is_lc)
+            return_type = std::make_shared<DataTypeLowCardinality>(json_return_type);
         else
             return_type = json_return_type;
 
diff --git a/src/Functions/generateSnowflakeID.cpp b/src/Functions/generateSnowflakeID.cpp
new file mode 100644
index 00000000000..cfd916b2770
--- /dev/null
+++ b/src/Functions/generateSnowflakeID.cpp
@@ -0,0 +1,222 @@
+#include <DataTypes/DataTypesNumber.h>
+#include <Functions/FunctionFactory.h>
+#include <Functions/FunctionsRandom.h>
+#include <Functions/FunctionHelpers.h>
+#include <Core/ServerUUID.h>
+#include <Poco/Logger.h>
+#include <Common/ErrorCodes.h>
+#include <common/logger_useful.h>
+#include <common/types.h>
+
+namespace DB
+{
+
+namespace
+{
+
+/* Snowflake ID
+  https://en.wikipedia.org/wiki/Snowflake_ID
+
+ 0                   1                   2                   3
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+├─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|0|                         timestamp                           |
+├─┼                 ┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┼─┤
+|                   |     machine_id    |    machine_seq_num    |
+└─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┴─┘
+
+- The first 41 (+ 1 top zero bit) bits is the timestamp (millisecond since Unix epoch 1 Jan 1970)
+- The middle 10 bits are the machine ID
+- The last 12 bits are a counter to disambiguate multiple snowflakeIDs generated within the same millisecond by different processes
+*/
+
+/// bit counts
+constexpr auto timestamp_bits_count = 41;
+constexpr auto machine_id_bits_count = 10;
+constexpr auto machine_seq_num_bits_count = 12;
+
+/// bits masks for Snowflake ID components
+constexpr uint64_t machine_id_mask = ((1ull << machine_id_bits_count) - 1) << machine_seq_num_bits_count;
+constexpr uint64_t machine_seq_num_mask = (1ull << machine_seq_num_bits_count) - 1;
+
+/// max values
+constexpr uint64_t max_machine_seq_num = machine_seq_num_mask;
+
+uint64_t getTimestamp()
+{
+    auto now = std::chrono::system_clock::now();
+    auto ticks_since_epoch = std::chrono::duration_cast<std::chrono::milliseconds>(now.time_since_epoch()).count();
+    return static_cast<uint64_t>(ticks_since_epoch) & ((1ull << timestamp_bits_count) - 1);
+}
+
+uint64_t getMachineIdImpl()
+{
+    UUID server_uuid = ServerUUID::get();
+    /// hash into 64 bits
+    uint64_t hi = UUIDHelpers::getHighBytes(server_uuid);
+    uint64_t lo = UUIDHelpers::getLowBytes(server_uuid);
+    /// return only 10 bits
+    return (((hi * 11) ^ (lo * 17)) & machine_id_mask) >> machine_seq_num_bits_count;
+}
+
+uint64_t getMachineId()
+{
+    static uint64_t machine_id = getMachineIdImpl();
+    return machine_id;
+}
+
+struct SnowflakeId
+{
+    uint64_t timestamp;
+    uint64_t machine_id;
+    uint64_t machine_seq_num;
+};
+
+SnowflakeId toSnowflakeId(uint64_t snowflake)
+{
+    return {.timestamp = (snowflake >> (machine_id_bits_count + machine_seq_num_bits_count)),
+            .machine_id = ((snowflake & machine_id_mask) >> machine_seq_num_bits_count),
+            .machine_seq_num = (snowflake & machine_seq_num_mask)};
+}
+
+uint64_t fromSnowflakeId(SnowflakeId components)
+{
+    return (components.timestamp << (machine_id_bits_count + machine_seq_num_bits_count) |
+            components.machine_id << (machine_seq_num_bits_count) |
+            components.machine_seq_num);
+}
+
+struct SnowflakeIdRange
+{
+    SnowflakeId begin; /// inclusive
+    SnowflakeId end;   /// exclusive
+};
+
+/// To get the range of `input_rows_count` Snowflake IDs from `max(available, now)`:
+/// 1. calculate Snowflake ID by current timestamp (`now`)
+/// 2. `begin = max(available, now)`
+/// 3. Calculate `end = begin + input_rows_count` handling `machine_seq_num` overflow
+SnowflakeIdRange getRangeOfAvailableIds(const SnowflakeId & available, uint64_t machine_id, size_t input_rows_count)
+
+{
+    /// 1. `now`
+    SnowflakeId begin = {.timestamp = getTimestamp(), .machine_id = machine_id, .machine_seq_num = 0};
+
+    /// 2. `begin`
+    if (begin.timestamp <= available.timestamp)
+    {
+        begin.timestamp = available.timestamp;
+        begin.machine_seq_num = available.machine_seq_num;
+    }
+
+    /// 3. `end = begin + input_rows_count`
+    SnowflakeId end;
+    const uint64_t seq_nums_in_current_timestamp_left = (max_machine_seq_num - begin.machine_seq_num + 1);
+    if (input_rows_count >= seq_nums_in_current_timestamp_left)
+        /// if sequence numbers in current timestamp is not enough for rows --> depending on how many elements input_rows_count overflows, forward timestamp by at least 1 tick
+        end.timestamp = begin.timestamp + 1 + (input_rows_count - seq_nums_in_current_timestamp_left) / (max_machine_seq_num + 1);
+    else
+        end.timestamp = begin.timestamp;
+
+    end.machine_id = begin.machine_id;
+    end.machine_seq_num = (begin.machine_seq_num + input_rows_count) & machine_seq_num_mask;
+
+    return {begin, end};
+}
+
+struct Data
+{
+    /// Guarantee counter monotonicity within one timestamp across all threads generating Snowflake IDs simultaneously.
+    static inline std::atomic<uint64_t> lowest_available_snowflake_id = 0;
+
+    SnowflakeId reserveRange(uint64_t machine_id, size_t input_rows_count)
+    {
+        uint64_t available_snowflake_id = lowest_available_snowflake_id.load();
+        SnowflakeIdRange range;
+        do
+        {
+            range = getRangeOfAvailableIds(toSnowflakeId(available_snowflake_id), machine_id, input_rows_count);
+        }
+        while (!lowest_available_snowflake_id.compare_exchange_weak(available_snowflake_id, fromSnowflakeId(range.end)));
+        /// CAS failed --> another thread updated `lowest_available_snowflake_id` and we re-try
+        ///     else --> our thread reserved ID range [begin, end) and return the beginning of the range
+
+        return range.begin;
+    }
+};
+
+}
+
+class FunctionGenerateSnowflakeID : public IFunction
+{
+public:
+    static constexpr auto name = "generateSnowflakeID";
+
+    static FunctionPtr create(ContextPtr /*context*/) { return std::make_shared<FunctionGenerateSnowflakeID>(); }
+
+    String getName() const override { return name; }
+    size_t getNumberOfArguments() const override { return 0; }
+    bool isDeterministic() const override { return false; }
+    bool isDeterministicInScopeOfQuery() const override { return false; }
+    bool useDefaultImplementationForNulls() const override { return false; }
+    bool isSuitableForShortCircuitArgumentsExecution(const DataTypesWithConstInfo & /*arguments*/) const override { return false; }
+    bool isVariadic() const override { return true; }
+
+    DataTypePtr getReturnTypeImpl(const ColumnsWithTypeAndName & arguments) const override
+    {
+        FunctionArgumentDescriptors mandatory_args;
+        FunctionArgumentDescriptors optional_args{
+            {"expr", nullptr, nullptr, "Arbitrary expression"},
+            {"machine_id", &isNativeUInt, &isColumnConst, "const UInt*"}
+        };
+        validateFunctionArgumentTypes(*this, arguments, mandatory_args, optional_args);
+
+        return std::make_shared<DataTypeUInt64>();
+    }
+
+    ColumnPtr executeImpl(const ColumnsWithTypeAndName & arguments, const DataTypePtr &, size_t input_rows_count) const override
+    {
+        auto col_res = ColumnVector<UInt64>::create();
+        typename ColumnVector<UInt64>::Container & vec_to = col_res->getData();
+
+        if (input_rows_count > 0)
+        {
+            vec_to.resize(input_rows_count);
+
+            uint64_t machine_id = getMachineId();
+            if (arguments.size() == 2)
+            {
+                machine_id = arguments[1].column->getUInt(0);
+                machine_id &= (1ull << machine_id_bits_count) - 1;
+            }
+
+            Data data;
+            SnowflakeId snowflake_id = data.reserveRange(machine_id, input_rows_count);
+
+            for (UInt64 & to_row : vec_to)
+            {
+                to_row = fromSnowflakeId(snowflake_id);
+                if (snowflake_id.machine_seq_num == max_machine_seq_num)
+                {
+                    /// handle overflow
+                    snowflake_id.machine_seq_num = 0;
+                    ++snowflake_id.timestamp;
+                }
+                else
+                {
+                    ++snowflake_id.machine_seq_num;
+                }
+            }
+        }
+
+        return col_res;
+    }
+
+};
+
+REGISTER_FUNCTION(GenerateSnowflakeID)
+{
+    factory.registerFunction<FunctionGenerateSnowflakeID>();
+}
+
+}
diff --git a/src/Interpreters/Context.cpp b/src/Interpreters/Context.cpp
index 330cfe71a2b..2935054a415 100644
--- a/src/Interpreters/Context.cpp
+++ b/src/Interpreters/Context.cpp
@@ -495,6 +495,9 @@ struct ContextSharedPart
     bool restrict_tenanted_users_to_whitelist_settings = false;
     bool restrict_tenanted_users_to_privileged_operations = false;
 
+    mutable std::mutex extra_whitelist_settings_mutex;
+    std::unordered_set<String> extra_whitelist_settings;
+
     Stopwatch uptime_watch;
 
     Context::ApplicationType application_type = Context::ApplicationType::SERVER;
@@ -2334,7 +2337,7 @@ void Context::applySettingsChangesWithLock(const SettingsChanges & changes, bool
     {
         for (const auto & change : changes)
         {
-            if (!SettingsChanges::WHITELIST_SETTINGS.contains(change.name))
+            if (!SettingsChanges::WHITELIST_SETTINGS.contains(change.name) && !isExtraRestrictSettingsToWhitelist(change.name))
                 throw Exception(ErrorCodes::UNKNOWN_SETTING, "Unknown or disabled setting " + change.name +
                     "for tenant user. Contact the admin about whether it is needed to add it to tenant_whitelist_settings"
                     " in configuration");
@@ -4564,6 +4567,18 @@ void Context::addRestrictSettingsToWhitelist(const std::vector<String>& setting_
         SettingsChanges::WHITELIST_SETTINGS.emplace(name);
 }
 
+void Context::setExtraRestrictSettingsToWhitelist(std::unordered_set<String>&& new_settings)
+{
+    std::lock_guard<std::mutex> lock(shared->extra_whitelist_settings_mutex);
+    shared->extra_whitelist_settings.swap(new_settings);
+}
+
+bool Context::isExtraRestrictSettingsToWhitelist(const String & name) const
+{
+    std::lock_guard<std::mutex> lock(shared->extra_whitelist_settings_mutex);
+    return shared->extra_whitelist_settings.find(name) != shared->extra_whitelist_settings.end();
+}
+
 bool Context::getBlockPrivilegedOp() const
 {
     return shared->restrict_tenanted_users_to_privileged_operations;
@@ -5169,7 +5184,7 @@ void Context::setGINStoreReaderFactory(const GINStoreReaderFactorySettings & set
     if (shared->gin_store_reader_factory)
         throw Exception(ErrorCodes::LOGICAL_ERROR, "GINStoreReaderFactory has already "
             "been created");
-    
+
     shared->gin_store_reader_factory = std::make_shared<GINStoreReaderFactory>(settings_);
 }
 
diff --git a/src/Interpreters/Context.h b/src/Interpreters/Context.h
index f889d329fc3..ad80683c670 100644
--- a/src/Interpreters/Context.h
+++ b/src/Interpreters/Context.h
@@ -1429,6 +1429,8 @@ class Context : public ContextData, public std::enable_shared_from_this<Context>
     bool getIsRestrictSettingsToWhitelist() const;
     void setIsRestrictSettingsToWhitelist(bool is_restrict);
     void addRestrictSettingsToWhitelist(const std::vector<String>& name) const;
+    void setExtraRestrictSettingsToWhitelist(std::unordered_set<String>&& settings);
+    bool isExtraRestrictSettingsToWhitelist(const String & name) const;
 
     bool getBlockPrivilegedOp() const;
     void setBlockPrivilegedOp(bool is_restrict);
diff --git a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
index b4ef1e8afa7..a9d144ddbf6 100644
--- a/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
+++ b/src/Interpreters/InJoinSubqueriesPreprocessor.cpp
@@ -90,15 +90,11 @@ struct NonGlobalTableData : public WithContext
     {
         const DistributedProductMode distributed_product_mode = getContext()->getSettingsRef().distributed_product_mode;
 
-        StoragePtr storage = tryGetTable(database_and_table, getContext());
-        if (!storage || !checker.hasAtLeastTwoShards(*storage))
-            return;
-
+        /// Convert distributed table to corresponding remote table.
         if (distributed_product_mode == DistributedProductMode::LOCAL)
         {
-            /// Convert distributed table to corresponding remote table.
-            StorageDistributed * distributed = dynamic_cast<StorageDistributed *>(storage.get());
-            if (!distributed)
+            StoragePtr storage = tryGetTable(database_and_table, getContext());
+            if (!storage || !checker.hasAtLeastTwoShards(*storage))
                 return;
 
             std::string database;
diff --git a/src/Interpreters/InterpreterCreateQuery.cpp b/src/Interpreters/InterpreterCreateQuery.cpp
index 7fb372acc9a..d9b1ee79f71 100644
--- a/src/Interpreters/InterpreterCreateQuery.cpp
+++ b/src/Interpreters/InterpreterCreateQuery.cpp
@@ -492,6 +492,9 @@ ASTPtr InterpreterCreateQuery::formatColumns(const ColumnsDescription & columns,
             column_declaration->on_update_expression = column.on_update_expression->clone();
         }
 
+        if (column.replace_if_not_null)
+            column_declaration->replace_if_not_null = column.replace_if_not_null;
+
         if (!column.comment.empty())
         {
             column_declaration->comment = std::make_shared<ASTLiteral>(Field(column.comment));
@@ -739,6 +742,9 @@ ColumnsDescription InterpreterCreateQuery::getColumnsDescription(
             column.on_update_expression = getDefaultExpression(col_decl.on_update_expression);
         }
 
+        if (col_decl.replace_if_not_null)
+            column.replace_if_not_null = col_decl.replace_if_not_null;
+
         if (col_decl.comment)
             column.comment = col_decl.comment->as<ASTLiteral &>().value.get<String>();
 
diff --git a/src/Interpreters/InterpreterInsertQuery.cpp b/src/Interpreters/InterpreterInsertQuery.cpp
index 017a2b0fe39..f307ac72386 100644
--- a/src/Interpreters/InterpreterInsertQuery.cpp
+++ b/src/Interpreters/InterpreterInsertQuery.cpp
@@ -675,7 +675,15 @@ BlockIO InterpreterInsertQuery::execute()
     if (insert_query.is_overwrite && !lock_holders.empty())
     {
         /// Make sure lock is release after txn commit
-        res.in = std::make_shared<LockHoldBlockInputStream>(res.in, std::move(lock_holders));
+        if (res.in)
+        {
+            /// Only use LockHoldBlockInputStream if there is a input stream to avoid misjudgement.
+            res.in = std::make_shared<LockHoldBlockInputStream>(res.in, std::move(lock_holders));
+        }
+        else
+        {
+            res.pipeline.addLockHolders(std::move(lock_holders));
+        }
     }
     return res;
 }
diff --git a/src/Interpreters/InterpreterSelectQuery.cpp b/src/Interpreters/InterpreterSelectQuery.cpp
index 97d7de2d98b..37f8cd4438c 100644
--- a/src/Interpreters/InterpreterSelectQuery.cpp
+++ b/src/Interpreters/InterpreterSelectQuery.cpp
@@ -454,6 +454,11 @@ InterpreterSelectQuery::InterpreterSelectQuery(
         if (view)
             view->replaceWithSubquery(getSelectQuery(), view_table, metadata_snapshot);
 
+        /// for bitmap expression aggregate functions
+        /// for example, bitmapCount('1 | 2 & 3')(a, b), we will construct 'a in (1, 2, 3)' expression to where
+        if (context->getServerType() == ServerType::cnch_server)
+            optimizeBitMapParametersToWhere(query_ptr, metadata_snapshot);
+
         syntax_analyzer_result = TreeRewriter(context).analyzeSelect(
             query_ptr,
             TreeRewriterResult(source_header.getNamesAndTypesList(), storage, storage_snapshot),
diff --git a/src/Interpreters/InterpreterSetQuery.cpp b/src/Interpreters/InterpreterSetQuery.cpp
index a240c8dd9b5..cd07cd487c4 100644
--- a/src/Interpreters/InterpreterSetQuery.cpp
+++ b/src/Interpreters/InterpreterSetQuery.cpp
@@ -149,7 +149,7 @@ void InterpreterSetQuery::applyABTestProfile(ContextMutablePtr query_context)
             try
             {
                 std::random_device rd;
-                std::mt19937 gen(rd()); 
+                std::mt19937 gen(rd());
                 std::uniform_real_distribution<DB::Float64> distribution(0.0, 1.0);
                 Float64 res = distribution(gen);
                 if (res <= ab_test_traffic_factor)
@@ -162,8 +162,47 @@ void InterpreterSetQuery::applyABTestProfile(ContextMutablePtr query_context)
         }
         else
         {
-            LOG_WARNING(getLogger("applyABTestProfile"), "Apply ab test profile failed, ab_test_traffic_factor must be between 0 and 1, ab_test_profile != default");        
+            LOG_WARNING(getLogger("applyABTestProfile"), "Apply ab test profile failed, ab_test_traffic_factor must be between 0 and 1, ab_test_profile != default");
         }
     }
 }
+
+void InterpreterSetQuery::applyPointLookupProfile(ContextMutablePtr query_context)
+{
+    if (query_context->getSettingsRef().enable_point_lookup_profile)
+    {
+        String profile = query_context->getSettingsRef().point_lookup_profile;
+        if (!profile.empty())
+        {
+            try
+            {
+                query_context->setCurrentProfile(profile);
+                return;
+            }
+            catch (...)
+            {
+                LOG_WARNING(
+                    getLogger("applyPointLookupProfile"),
+                    "Failed to apply profile {}: {}, fallback to default settings",
+                    profile,
+                    getCurrentExceptionMessage(false));
+            }
+
+        }
+        /// apply default settings for point lookup
+        SettingsChanges changes;
+        changes.setSetting("log_queries_min_type", "QUERY_FINISH");
+        changes.setSetting("max_threads", 1);
+        changes.setSetting("exchange_source_pipeline_threads", 1);
+        changes.setSetting("enable_plan_cache", true);
+        changes.setSetting("query_worker_fault_tolerance", false);
+        changes.setSetting("send_cacheable_table_definitions", true);
+        changes.setSetting("optimize_skip_unused_shards", true);
+        changes.setSetting("enable_prune_source_plan_segment", true);
+        changes.setSetting("enable_table_scan_build_pipeline_optimization", true);
+        changes.setSetting("use_sync_pipeline_executor", true);
+        query_context->applySettingsChanges(changes);
+    }
+}
+
 }
diff --git a/src/Interpreters/InterpreterSetQuery.h b/src/Interpreters/InterpreterSetQuery.h
index 43fdd8464c3..7e745921541 100644
--- a/src/Interpreters/InterpreterSetQuery.h
+++ b/src/Interpreters/InterpreterSetQuery.h
@@ -31,10 +31,12 @@ class InterpreterSetQuery : public IInterpreter, WithMutableContext
 
     /// To extract SETTINGS clauses from query
     static SettingsChanges extractSettingsFromQuery(const ASTPtr & ast, ContextMutablePtr context);
-  
+
     /// apply ab test profile from query
     static void applyABTestProfile(ContextMutablePtr query_context);
 
+    static void applyPointLookupProfile(ContextMutablePtr query_context);
+
 private:
     ASTPtr query_ptr;
 };
diff --git a/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp b/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp
index b22f3063027..5e76ca919fe 100644
--- a/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp
+++ b/src/Interpreters/MySQL/InterpretersAnalyticalMySQLDDLQuery.cpp
@@ -250,9 +250,12 @@ static std::tuple<NamesAndTypesList, NamesAndTypesList, NamesAndTypesList, Names
         const auto & column = column_ast->as<ASTColumnDeclaration>();
         if (column->mysql_primary_key)
             primary_keys->arguments->children.emplace_back(std::make_shared<ASTIdentifier>(column->name));
-        if (column->auto_increment && context->getSettingsRef().exception_on_unsupported_mysql_syntax)
+        if (column->auto_increment)
         {
-            throw Exception("auto_increment not supported yet", ErrorCodes::NOT_IMPLEMENTED);
+            column->default_specifier = "DEFAULT";
+            auto default_expression = makeASTFunction("generateSnowflakeID");
+            column->default_expression = default_expression;
+            column->children.push_back(std::move(default_expression));
         }
     }
 
diff --git a/src/Interpreters/NodeSelector.cpp b/src/Interpreters/NodeSelector.cpp
index 787f0afe53f..f40b2e6a625 100644
--- a/src/Interpreters/NodeSelector.cpp
+++ b/src/Interpreters/NodeSelector.cpp
@@ -1,4 +1,5 @@
 #include <algorithm>
+#include <functional>
 #include <iterator>
 #include <limits>
 #include <map>
@@ -292,8 +293,43 @@ NodeSelectorResult LocalNodeSelector::select(PlanSegment *, ContextPtr query_con
     return result;
 }
 
+/// workers will be assigning tasks by the order of its data size
+std::vector<const AddressInfo *> orderAddrByRows(std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads)
+{
+    std::vector<const AddressInfo *> ordered_addrs;
+    ordered_addrs.reserve(payloads.size());
+    for (const auto & p : payloads)
+        ordered_addrs.emplace_back(&p.first);
+    /// order workers by weight
+    std::sort(ordered_addrs.begin(), ordered_addrs.end(), [&](auto * l, auto * r) { return payloads[*l].rows > payloads[*r].rows; });
+    return ordered_addrs;
+}
+
+/// assign one task for each non-empty worker
+size_t initNodeSelectorResult(
+    const std::vector<const AddressInfo *> & ordered_addrs,
+    std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
+    size_t parallel_size,
+    NodeSelectorResult & result,
+    std::function<void(const AddressInfo &)> init_source_func)
+{
+    size_t assigned_instances = 0;
+    for (const auto * addr_p : ordered_addrs)
+    {
+        const auto & addr = *addr_p;
+        const auto & payload_on_worker = payloads[addr];
+        if (payload_on_worker.rows > 0 && parallel_size > 0)
+        {
+            init_source_func(addr);
+            result.worker_nodes.emplace_back(WorkerNode(addr, NodeType::Remote, payload_on_worker.worker_id));
+            assigned_instances++;
+        }
+    }
+    return assigned_instances;
+}
+
 void divideSourceTaskByBucket(
-    const std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
+    std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
     size_t weight_sum,
     size_t parallel_size,
     NodeSelectorResult & result)
@@ -303,13 +339,19 @@ void divideSourceTaskByBucket(
             ErrorCodes::LOGICAL_ERROR,
             fmt::format("Invalid argument for divideSourceTaskByBucket payloads.size:{} parallel_size:{}", payloads.size(), parallel_size));
 
-    size_t assigned_instances = 0;
+    std::vector<const AddressInfo *> ordered_addrs = orderAddrByRows(payloads);
+    size_t assigned_instances = initNodeSelectorResult(ordered_addrs, payloads, parallel_size, result, [&](const AddressInfo & addr) {
+        result.buckets_on_workers[addr].emplace_back(std::set<Int64>{});
+    });
+
     while (assigned_instances < parallel_size)
     {
-        for (const auto & [addr, payload_on_worker] : payloads)
+        for (const auto & addr_p : ordered_addrs)
         {
-            size_t weight = payload_on_worker.rows;
-            size_t to_be_assigned = weight_sum == 0 ? 0 : weight * parallel_size / weight_sum;
+            const auto & addr = *addr_p;
+            const auto & payload_on_worker = payloads.find(addr)->second;
+
+            size_t to_be_assigned = weight_sum == 0 ? 0 : payload_on_worker.rows * parallel_size / weight_sum;
             /// to_be_assigned <= bucket_groups.size, as to avoid empty plan segment instance.
             to_be_assigned = std::min(to_be_assigned, payload_on_worker.bucket_groups.size());
             size_t already_assigned = std::min(to_be_assigned, result.buckets_on_workers[addr].size());
@@ -357,7 +399,7 @@ void divideSourceTaskByBucket(
 }
 
 void divideSourceTaskByPart(
-    const std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
+    std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
     size_t weight_sum,
     size_t parallel_size,
     NodeSelectorResult & result)
@@ -367,25 +409,29 @@ void divideSourceTaskByPart(
             ErrorCodes::LOGICAL_ERROR,
             fmt::format("Invalid argument for divideSourceTaskByPart payloads.size:{} parallel_size:{}", payloads.size(), parallel_size));
 
-    size_t assigned_instances = 0;
+    std::vector<const AddressInfo *> ordered_addrs = orderAddrByRows(payloads);
+    size_t assigned_instances = initNodeSelectorResult(
+        ordered_addrs, payloads, parallel_size, result, [&](const AddressInfo & addr) { result.source_task_count_on_workers[addr]++; });
+
     while (assigned_instances < parallel_size)
     {
-        for (auto iter = payloads.begin(); iter != payloads.end() && assigned_instances < parallel_size; iter++)
+        for (const auto & addr_p : ordered_addrs)
         {
-            const auto & addr = iter->first;
-            const auto & payload_on_worker = iter->second;
-            size_t weight = payload_on_worker.rows;
-            size_t to_be_assigned = weight_sum == 0 ? 0 : weight * parallel_size / weight_sum;
+            const auto & addr = *addr_p;
+            const auto & payload_on_worker = payloads.find(addr)->second;
+
+            size_t to_be_assigned = weight_sum == 0 ? 0 : payload_on_worker.rows * parallel_size / weight_sum;
             /// to_be_assigned <= part num, as to avoid empty plan segment instance.
             to_be_assigned = std::min(to_be_assigned, payload_on_worker.part_num);
             size_t already_assigned = std::min(to_be_assigned, result.source_task_count_on_workers[addr]);
             to_be_assigned = to_be_assigned - already_assigned;
             ///  make sure there is no infinte loop
             to_be_assigned = std::max(1UL, to_be_assigned);
+
             for (size_t p = 0; p < to_be_assigned && assigned_instances < parallel_size; p++)
             {
                 result.source_task_count_on_workers[addr]++;
-                result.worker_nodes.emplace_back(WorkerNode(addr, NodeType::Remote, iter->second.worker_id));
+                result.worker_nodes.emplace_back(WorkerNode(addr, NodeType::Remote, payload_on_worker.worker_id));
                 assigned_instances++;
             }
         }
@@ -452,65 +498,6 @@ NodeSelectorResult SourceNodeSelector::select(PlanSegment * plan_segment_ptr, Co
             worker_number);
     }
 
-    /// will be obsolete in the future
-    auto old_func = [&](std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payload_on_workers,
-                        size_t rows_count,
-                        size_t parallel_size) {
-        size_t avg = rows_count / parallel_size + 1;
-        if (rows_count < parallel_size)
-            rows_count = 0;
-        if (rows_count > 0)
-        {
-            // Assign parallelism accroding to regular average size.
-            for (auto & [addr, payload] : payload_on_workers)
-            {
-                size_t s = payload.rows;
-                size_t p = s / avg;
-                if (p > 0)
-                {
-                    s = s % avg;
-                }
-                if (p == 0 && s > 0)
-                {
-                    p = 1;
-                    s = 0;
-                }
-                for (size_t i = 0; i < p; i++)
-                {
-                    result.worker_nodes.emplace_back(addr, NodeType::Remote, payload.worker_id);
-                    result.source_task_count_on_workers[addr]++;
-                }
-            }
-        }
-        // Assign parallelism according to major part(>0.5) of average size, if needed.
-        if (result.worker_nodes.size() < parallel_size)
-        {
-            for (auto & [addr, payload] : payload_on_workers)
-            {
-                size_t s = payload.rows;
-                if (s * 2 > avg)
-                {
-                    result.worker_nodes.emplace_back(addr, NodeType::Remote, payload.worker_id);
-                    result.source_task_count_on_workers[addr]++;
-                    s = 0;
-                }
-                if (result.worker_nodes.size() == parallel_size)
-                    break;
-            }
-        }
-        // Assign parallelism to each worker until no one left.
-        while (result.worker_nodes.size() < parallel_size)
-        {
-            for (const auto & [addr, payload] : payload_on_workers)
-            {
-                result.worker_nodes.emplace_back(addr, NodeType::Remote, payload.worker_id);
-                result.source_task_count_on_workers[addr]++;
-                if (result.worker_nodes.size() == parallel_size)
-                    break;
-            }
-        }
-    };
-
     // If parallelism is greater than the worker number, we split the parts according to the input size.
     if (plan_segment_ptr->getParallelSize() > worker_number)
     {
@@ -560,7 +547,7 @@ NodeSelectorResult SourceNodeSelector::select(PlanSegment * plan_segment_ptr, Co
         if (is_bucket_valid)
             divideSourceTaskByBucket(payload_on_workers, rows_count, plan_segment_ptr->getParallelSize(), result);
         else
-            old_func(payload_on_workers, rows_count, plan_segment_ptr->getParallelSize());
+            divideSourceTaskByPart(payload_on_workers, rows_count, plan_segment_ptr->getParallelSize(), result);
     }
     else
     {
diff --git a/src/Interpreters/NodeSelector.h b/src/Interpreters/NodeSelector.h
index d75c7c17a87..f1d5b03e8fa 100644
--- a/src/Interpreters/NodeSelector.h
+++ b/src/Interpreters/NodeSelector.h
@@ -91,12 +91,12 @@ struct ClusterNodes
 
 struct NodeSelectorResult;
 void divideSourceTaskByBucket(
-    const std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
+    std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
     size_t weight_sum,
     size_t parallel_size,
     NodeSelectorResult & result);
 void divideSourceTaskByPart(
-    const std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
+    std::unordered_map<AddressInfo, SourceTaskPayloadOnWorker, AddressInfo::Hash> & payloads,
     size_t weight_sum,
     size_t parallel_size,
     NodeSelectorResult & result);
diff --git a/src/Interpreters/executeQuery.cpp b/src/Interpreters/executeQuery.cpp
index 92be5cebd6e..ce2bc55132c 100644
--- a/src/Interpreters/executeQuery.cpp
+++ b/src/Interpreters/executeQuery.cpp
@@ -961,6 +961,10 @@ static std::tuple<ASTPtr, BlockIO> executeQueryImpl(
         /// to allow settings to take effect.
         InterpreterSetQuery::applySettingsFromQuery(ast, context);
 
+        /// Apply point-lookup specific optimization settings when enable_point_lookup_profile is enabled.
+        if (!internal && context->getClientInfo().query_kind == ClientInfo::QueryKind::INITIAL_QUERY)
+            InterpreterSetQuery::applyPointLookupProfile(context);
+
         if (context->getServerType() == ServerType::cnch_server && context->hasQueryContext())
         {
             if (context->getSettingsRef().text_case_option == TextCaseOption::LOWERCASE)
diff --git a/src/Interpreters/tests/gtest_node_selector.cpp b/src/Interpreters/tests/gtest_node_selector.cpp
index d5e2f5ee2db..d75b26e805a 100644
--- a/src/Interpreters/tests/gtest_node_selector.cpp
+++ b/src/Interpreters/tests/gtest_node_selector.cpp
@@ -53,6 +53,8 @@ void checkDistributeBucketResultMap(
 TEST(NodeSelectorTest, divideSourceTaskByBucketTestCase1)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 3, .part_num = 3, .bucket_groups = {{0, {0}}, {2, {2}}, {4, {4}}}};
     payload_on_worker.insert({hosts[0], std::move(p1)});
@@ -61,10 +63,10 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketTestCase1)
     DB::NodeSelectorResult expected_result;
     expected_result.buckets_on_workers.insert({hosts[0], {{0}, {2, 4}}});
     expected_result.buckets_on_workers.insert({hosts[1], {{1}, {3, 5}}});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 6;
     size_t parallel_size = 4;
 
@@ -78,6 +80,8 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketTestCase1)
 TEST(NodeSelectorTest, divideSourceTaskByBucketCase2)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{
         .worker_id = "1",
@@ -90,10 +94,10 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase2)
     DB::NodeSelectorResult expected_result;
     expected_result.buckets_on_workers.insert({hosts[0], {{0, 2}, {4, 6}, {8, 10, 12}}});
     expected_result.buckets_on_workers.insert({hosts[1], {{1, 3}}});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 9;
     size_t parallel_size = 4;
 
@@ -107,6 +111,8 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase2)
 TEST(NodeSelectorTest, divideSourceTaskByBucketCase3)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 2, .part_num = 2, .bucket_groups = {{0, {0}}, {4, {4}}}};
     payload_on_worker.insert({hosts[0], std::move(p1)});
@@ -115,10 +121,10 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase3)
     DB::NodeSelectorResult expected_result;
     expected_result.buckets_on_workers.insert({hosts[0], {{0}, {4}}});
     expected_result.buckets_on_workers.insert({hosts[1], {{3}, {-1}}});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
     size_t rows_sum = 3;
     size_t parallel_size = 4;
 
@@ -132,6 +138,8 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase3)
 TEST(NodeSelectorTest, divideSourceTaskByBucketCase4)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1"};
     payload_on_worker.insert({hosts[0], std::move(p1)});
@@ -140,10 +148,10 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase4)
     DB::NodeSelectorResult expected_result;
     expected_result.buckets_on_workers.insert({hosts[0], {{-1}, {-1}}});
     expected_result.buckets_on_workers.insert({hosts[1], {{-1}, {-1}}});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 0;
     size_t parallel_size = 4;
 
@@ -157,6 +165,8 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase4)
 TEST(NodeSelectorTest, divideSourceTaskByBucketCase5)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     // first table containing buckets [0, 1, 2, 3, 5], max bucket number is 8
     // second table containing buckets [0, 1, 3], max bucket number is 4
@@ -165,12 +175,12 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase5)
     auto p2 = DB::SourceTaskPayloadOnWorker{.worker_id = "2", .rows = 5, .part_num = 5, .bucket_groups = {{1, {1, 5}}, {3, {3}}}};
     payload_on_worker.insert({hosts[1], std::move(p2)});
     DB::NodeSelectorResult expected_result;
-    expected_result.buckets_on_workers.insert({hosts[0], {{0, 2}}});
-    expected_result.buckets_on_workers.insert({hosts[1], {{1, 5}, {3}, {-1}}});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
+    expected_result.buckets_on_workers.insert({hosts[0], {{0}, {2}}});
+    expected_result.buckets_on_workers.insert({hosts[1], {{1, 5}, {3}}});
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 8;
     size_t parallel_size = 4;
 
@@ -184,6 +194,8 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase5)
 TEST(NodeSelectorTest, divideSourceTaskByBucketCase6)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 3, .part_num = 3, .bucket_groups = {{0, {0}}, {2, {2}}}};
     payload_on_worker.insert({hosts[0], std::move(p1)});
@@ -192,10 +204,10 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase6)
     DB::NodeSelectorResult expected_result;
     expected_result.buckets_on_workers.insert({hosts[0], {{0}, {2}}});
     expected_result.buckets_on_workers.insert({hosts[1], {{1, 5}, {-1}}});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 12;
     size_t parallel_size = 4;
 
@@ -205,6 +217,46 @@ TEST(NodeSelectorTest, divideSourceTaskByBucketCase6)
     checkDistributeBucketResultMap(expected_result, result, hosts);
 }
 
+/// extreme uneven case
+TEST(NodeSelectorTest, divideSourceTaskByBucketCase7)
+{
+    std::vector<DB::AddressInfo> hosts{
+        DB::AddressInfo("host1", 0, "", "", 0),
+        DB::AddressInfo("host2", 0, "", "", 0),
+        DB::AddressInfo("host3", 0, "", "", 0),
+        DB::AddressInfo("host4", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"),
+        DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"),
+        DB::WorkerNode(hosts[2], Coordination::NodeType::Remote, "3"),
+        DB::WorkerNode(hosts[3], Coordination::NodeType::Remote, "4")};
+    std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
+    auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 1, .part_num = 1, .bucket_groups = {{0, {0}}}};
+    payload_on_worker.insert({workers[0].address, std::move(p1)});
+    auto p2 = DB::SourceTaskPayloadOnWorker{.worker_id = "2", .rows = 1, .part_num = 1, .bucket_groups = {{1, {1}}}};
+    payload_on_worker.insert({workers[1].address, std::move(p2)});
+    auto p3 = DB::SourceTaskPayloadOnWorker{.worker_id = "3", .rows = 1, .part_num = 1, .bucket_groups = {{2, {2}}}};
+    payload_on_worker.insert({workers[2].address, std::move(p3)});
+    auto p4
+        = DB::SourceTaskPayloadOnWorker{.worker_id = "4", .rows = 30, .part_num = 30, .bucket_groups = {{3, {3}}, {7, {7}}, {11, {11}}}};
+    payload_on_worker.insert({workers[3].address, std::move(p4)});
+    DB::NodeSelectorResult expected_result;
+    expected_result.buckets_on_workers.insert({hosts[0], {{0}}});
+    expected_result.buckets_on_workers.insert({hosts[1], {{1}}});
+    expected_result.buckets_on_workers.insert({hosts[2], {{2}}});
+    expected_result.buckets_on_workers.insert({hosts[3], {{3, 7, 11}}});
+    expected_result.worker_nodes.emplace_back(workers[3]);
+    expected_result.worker_nodes.emplace_back(workers[2]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    size_t rows_sum = 33;
+    size_t parallel_size = 4;
+
+    DB::NodeSelectorResult result;
+    Coordination::divideSourceTaskByBucket(payload_on_worker, rows_sum, parallel_size, result);
+    checkDistributeBucketResultMap(expected_result, result, hosts);
+}
+
 std::string formatDistributedPartsResultMap(const std::multimap<size_t, size_t> & map)
 {
     std::stringstream ss;
@@ -232,18 +284,20 @@ void checkDistributePartsResultMap(
 TEST(NodeSelectorTest, divideTaskByPartTestCase1)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 2, .part_num = 2};
     payload_on_worker.insert({hosts[0], std::move(p1)});
     auto p2 = DB::SourceTaskPayloadOnWorker{.worker_id = "2", .rows = 3, .part_num = 3};
     payload_on_worker.insert({hosts[1], std::move(p2)});
     DB::NodeSelectorResult expected_result;
-    expected_result.source_task_count_on_workers.insert({hosts[0], 1});
-    expected_result.source_task_count_on_workers.insert({hosts[1], 3});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
+    expected_result.source_task_count_on_workers.insert({hosts[0], 2});
+    expected_result.source_task_count_on_workers.insert({hosts[1], 2});
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 5;
     size_t parallel_size = 4;
 
@@ -257,6 +311,8 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase1)
 TEST(NodeSelectorTest, divideTaskByPartTestCase2)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 7, .part_num = 7};
     payload_on_worker.insert({hosts[0], std::move(p1)});
@@ -265,10 +321,10 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase2)
     DB::NodeSelectorResult expected_result;
     expected_result.source_task_count_on_workers.insert({hosts[0], 3});
     expected_result.source_task_count_on_workers.insert({hosts[1], 1});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 9;
     size_t parallel_size = 4;
 
@@ -282,18 +338,20 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase2)
 TEST(NodeSelectorTest, divideTaskByPartTestCase3)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 1, .part_num = 1};
     payload_on_worker.insert({hosts[0], std::move(p1)});
     auto p2 = DB::SourceTaskPayloadOnWorker{.worker_id = "2", .rows = 2, .part_num = 2};
     payload_on_worker.insert({hosts[1], std::move(p2)});
     DB::NodeSelectorResult expected_result;
-    expected_result.source_task_count_on_workers.insert({hosts[0], 1});
-    expected_result.source_task_count_on_workers.insert({hosts[1], 3});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
+    expected_result.source_task_count_on_workers.insert({hosts[0], 2});
+    expected_result.source_task_count_on_workers.insert({hosts[1], 2});
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 3;
     size_t parallel_size = 4;
 
@@ -307,6 +365,8 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase3)
 TEST(NodeSelectorTest, divideTaskByPartTestCase4)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1"};
     payload_on_worker.insert({hosts[0], std::move(p1)});
@@ -315,10 +375,10 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase4)
     DB::NodeSelectorResult expected_result;
     expected_result.source_task_count_on_workers.insert({hosts[0], 2});
     expected_result.source_task_count_on_workers.insert({hosts[1], 2});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 0;
     size_t parallel_size = 4;
 
@@ -332,6 +392,8 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase4)
 TEST(NodeSelectorTest, divideTaskByPartTestCase5)
 {
     std::vector<DB::AddressInfo> hosts{DB::AddressInfo("host1", 0, "", "", 0), DB::AddressInfo("host2", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"), DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2")};
     std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
     auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 3, .part_num = 3};
     payload_on_worker.insert({hosts[0], std::move(p1)});
@@ -340,10 +402,10 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase5)
     DB::NodeSelectorResult expected_result;
     expected_result.source_task_count_on_workers.insert({hosts[0], 2});
     expected_result.source_task_count_on_workers.insert({hosts[1], 2});
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"));
-    expected_result.worker_nodes.emplace_back(DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"));
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
     size_t rows_sum = 12;
     size_t parallel_size = 4;
 
@@ -353,6 +415,45 @@ TEST(NodeSelectorTest, divideTaskByPartTestCase5)
     checkDistributePartsResultMap(expected_result, result, hosts);
 }
 
+/// extreme uneven case
+TEST(NodeSelectorTest, divideTaskByPartTestCase6)
+{
+    std::vector<DB::AddressInfo> hosts{
+        DB::AddressInfo("host1", 0, "", "", 0),
+        DB::AddressInfo("host2", 0, "", "", 0),
+        DB::AddressInfo("host3", 0, "", "", 0),
+        DB::AddressInfo("host4", 0, "", "", 0)};
+    std::vector<DB::WorkerNode> workers{
+        DB::WorkerNode(hosts[0], Coordination::NodeType::Remote, "1"),
+        DB::WorkerNode(hosts[1], Coordination::NodeType::Remote, "2"),
+        DB::WorkerNode(hosts[2], Coordination::NodeType::Remote, "3"),
+        DB::WorkerNode(hosts[3], Coordination::NodeType::Remote, "4")};
+    std::unordered_map<DB::AddressInfo, DB::SourceTaskPayloadOnWorker, DB::AddressInfo::Hash> payload_on_worker;
+    auto p1 = DB::SourceTaskPayloadOnWorker{.worker_id = "1", .rows = 1, .part_num = 1};
+    payload_on_worker.insert({workers[0].address, std::move(p1)});
+    auto p2 = DB::SourceTaskPayloadOnWorker{.worker_id = "2", .rows = 1, .part_num = 1};
+    payload_on_worker.insert({workers[1].address, std::move(p2)});
+    auto p3 = DB::SourceTaskPayloadOnWorker{.worker_id = "3", .rows = 1, .part_num = 1};
+    payload_on_worker.insert({workers[2].address, std::move(p3)});
+    auto p4 = DB::SourceTaskPayloadOnWorker{.worker_id = "4", .rows = 30, .part_num = 30};
+    payload_on_worker.insert({workers[3].address, std::move(p4)});
+    DB::NodeSelectorResult expected_result;
+    expected_result.source_task_count_on_workers.insert({hosts[0], 1});
+    expected_result.source_task_count_on_workers.insert({hosts[1], 1});
+    expected_result.source_task_count_on_workers.insert({hosts[2], 1});
+    expected_result.source_task_count_on_workers.insert({hosts[3], 1});
+    expected_result.worker_nodes.emplace_back(workers[3]);
+    expected_result.worker_nodes.emplace_back(workers[2]);
+    expected_result.worker_nodes.emplace_back(workers[1]);
+    expected_result.worker_nodes.emplace_back(workers[0]);
+    size_t rows_sum = 33;
+    size_t parallel_size = 4;
+
+    DB::NodeSelectorResult result;
+    Coordination::divideSourceTaskByPart(payload_on_worker, rows_sum, parallel_size, result);
+    checkDistributePartsResultMap(expected_result, result, hosts);
+}
+
 std::string printPartitionCoalescingExpectedResult(const std::map<DB::PlanSegmentInstanceId, std::vector<UInt32>> & result)
 {
     std::stringstream ss;
diff --git a/src/MergeTreeCommon/CnchServerManager.cpp b/src/MergeTreeCommon/CnchServerManager.cpp
index 6aa2592cd6c..7b2e71f057a 100644
--- a/src/MergeTreeCommon/CnchServerManager.cpp
+++ b/src/MergeTreeCommon/CnchServerManager.cpp
@@ -196,7 +196,7 @@ bool CnchServerManager::refreshTopology()
                     String server_vw_name;
                     for (auto vw_it = server_virtual_warehouses.begin(); vw_it != server_virtual_warehouses.end(); ++vw_it)
                     {
-                        if (hostname.starts_with(vw_it->first))
+                        if (vwStartsWith(hostname, vw_it->first))
                         {
                             server_vw_name = vw_it->second;
                             break;
@@ -445,4 +445,13 @@ void CnchServerManager::dumpServerStatus() const
 }
 
 
+bool CnchServerManager::vwStartsWith(const String & full_name, const String & prefix)
+{
+    if (!full_name.starts_with(prefix))
+        return false;
+    /// If next char is an alpha or number, return false.
+    if (full_name.size() == prefix.size() || (!isalpha(full_name[prefix.size()]) && !isdigit(full_name[prefix.size()])))
+        return true;
+    return false;
+}
 }
diff --git a/src/MergeTreeCommon/CnchServerManager.h b/src/MergeTreeCommon/CnchServerManager.h
index 96b107ca9e9..b0117f12b71 100644
--- a/src/MergeTreeCommon/CnchServerManager.h
+++ b/src/MergeTreeCommon/CnchServerManager.h
@@ -52,6 +52,9 @@ using Topology = CnchServerTopology;
     void dumpServerStatus() const;
 
     void updateServerVirtualWarehouses(const Poco::Util::AbstractConfiguration & config, const String & config_name = "server_virtual_warehouses");
+
+    static bool vwStartsWith(const String & full_name, const String & prefix);
+
 private:
     bool onLeader();
     bool onFollower();
diff --git a/src/MergeTreeCommon/MergeTreeDataDeduper.cpp b/src/MergeTreeCommon/MergeTreeDataDeduper.cpp
index c0b5a002796..c764e2fdf34 100644
--- a/src/MergeTreeCommon/MergeTreeDataDeduper.cpp
+++ b/src/MergeTreeCommon/MergeTreeDataDeduper.cpp
@@ -236,13 +236,19 @@ namespace
         if (optimize_for_same_update_columns)
         {
             Names columns_from_metadata = res.getNames();
+            NameSet columns_of_replace_if_not_null = data.getInMemoryMetadataPtr()->getColumns().getReplaceIfNotNullColumns();
             for (const auto & col_name : columns_from_metadata)
             {
                 if (!same_update_column_set.contains(col_name))
                     continue;
+
                 /// For map type, we still need to read origin value even if it's in update_columns when partial_update_enable_merge_map is true
-                if (!data.getInMemoryMetadataPtr()->getColumns().getPhysical(col_name).type->isMap()
-                    || !data.getSettings()->partial_update_enable_merge_map)
+                bool need_read_for_map_column = data.getInMemoryMetadataPtr()->getColumns().getPhysical(col_name).type->isMap() && data.getSettings()->partial_update_enable_merge_map;
+                /// For nullable type, we still need to read origin value when replace_if_not_null is true
+                bool need_read_for_nullable_column = data.getInMemoryMetadataPtr()->getColumns().getPhysical(col_name).type->isNullable() &&
+                    (columns_of_replace_if_not_null.count(col_name) || data.getSettings()->partial_update_replace_if_not_null);
+
+                if (!need_read_for_map_column && !need_read_for_nullable_column)
                     res.erase(col_name);
             }
         }
@@ -2124,6 +2130,8 @@ void MergeTreeDataDeduper::replaceColumnsAndFilterData(
                 parseUpdateColumns(update_columns->getDataAt(i).toString(), default_filters, check_column, get_column_by_index, i);
         }
     }
+
+    NameSet columns_of_replace_if_not_null = data.getInMemoryMetadataPtr()->getColumns().getReplaceIfNotNullColumns();
     auto parse_update_columns_time = timer.elapsedMilliseconds();
     size_t thread_num = std::max(static_cast<size_t>(1), std::min(block.columns(), static_cast<size_t>(data.getSettings()->partial_update_replace_columns_thread_size)));
     ThreadPool replace_column_pool(thread_num);
@@ -2167,6 +2175,15 @@ void MergeTreeDataDeduper::replaceColumnsAndFilterData(
                     if (!default_filters.count(col.name))
                         throw Exception(ErrorCodes::LOGICAL_ERROR, "Can not find default filter of column {} when partial_update_enable_specify_update_columns is true.", col.name);
 
+                    if (col.type->isNullable() && (columns_of_replace_if_not_null.count(col.name) || data.getSettings()->partial_update_replace_if_not_null))
+                    {
+                        auto& default_filter_ref = default_filters[col.name];
+                        for (size_t row = 0; row < block_size; ++row)
+                        {
+                            if (col.column->isNullAt(row))
+                                default_filter_ref->getData()[row] = 1;
+                        }
+                    }
                     is_default_col = std::move(default_filters[col.name]);
 
                     const IColumn::Filter & is_default_filter = assert_cast<const ColumnUInt8 &>(*is_default_col).getData();
diff --git a/src/MergeTreeCommon/MergeTreeMetaBase.cpp b/src/MergeTreeCommon/MergeTreeMetaBase.cpp
index 0e5e021bb9f..ca306ea472a 100644
--- a/src/MergeTreeCommon/MergeTreeMetaBase.cpp
+++ b/src/MergeTreeCommon/MergeTreeMetaBase.cpp
@@ -382,6 +382,12 @@ void MergeTreeMetaBase::checkProperties(
         }
     }
 
+    for (const auto & column : new_metadata.columns)
+    {
+        if (column.replace_if_not_null && !column.type->isNullable())
+            throw Exception("REPLACE_IF_NOT_NULL could not used with nullable type, column name: " + column.name, ErrorCodes::LOGICAL_ERROR);
+    }
+
     checkKeyExpression(*new_sorting_key.expression, new_sorting_key.sample_block, "Sorting", allow_nullable_key);
 
 }
diff --git a/src/MergeTreeCommon/tests/gtest_topology.cpp b/src/MergeTreeCommon/tests/gtest_topology.cpp
index 2d2c4b48601..801dbfa79e9 100644
--- a/src/MergeTreeCommon/tests/gtest_topology.cpp
+++ b/src/MergeTreeCommon/tests/gtest_topology.cpp
@@ -1,7 +1,7 @@
+#include <MergeTreeCommon/CnchServerManager.h>
 #include <MergeTreeCommon/CnchServerTopology.h>
+#include <Protos/DataModelHelpers.h>
 #include <gtest/gtest.h>
-#include "Protos/DataModelHelpers.h"
-
 
 
 using namespace DB;
@@ -57,4 +57,22 @@ TEST(CnchServerTopology, Serialization)
     EXPECT_EQ(new_topo.size(), 1);
     EXPECT_TRUE(topo.isSameTopologyWith(new_topo.front()));
 }
+
+TEST(CnchServerTopology, VWCompare)
+{
+    EXPECT_TRUE(CnchServerManager::vwStartsWith(
+        "cnch-server-default-2.cnch-server-default-headless.cnch-yg.svc.cluster.local.", "cnch-server-default-2"));
+    EXPECT_TRUE(CnchServerManager::vwStartsWith(
+        "cnch-server-default-2", "cnch-server-default-2"));
+    EXPECT_TRUE(CnchServerManager::vwStartsWith(
+        "cnch-server-default-22-fasdfasf", "cnch-server-default-22"));
+    EXPECT_FALSE(CnchServerManager::vwStartsWith(
+        "cnch-server-default-22.cnch-server-default-headless.cnch-yg.svc.cluster.local.", "cnch-server-default-2"));
+    EXPECT_FALSE(CnchServerManager::vwStartsWith(
+        "cnch-server-default-2.cnch-server-default-headless.cnch-yg.svc.cluster.local.", "cnch-asdfasdfasfd"));
+    EXPECT_FALSE(CnchServerManager::vwStartsWith(
+        "cnch-server-default-22-fasdfasf", "cnch-server-default-2"));
+    EXPECT_FALSE(CnchServerManager::vwStartsWith(
+        "cnch-server-d", "cnch-server-default-2"));
+}
 }
diff --git a/src/Optimizer/IntermediateResult/CacheableChecker.cpp b/src/Optimizer/IntermediateResult/CacheableChecker.cpp
index 4456ac7b339..50ec8be2872 100644
--- a/src/Optimizer/IntermediateResult/CacheableChecker.cpp
+++ b/src/Optimizer/IntermediateResult/CacheableChecker.cpp
@@ -1,6 +1,7 @@
 #include <Optimizer/IntermediateResult/CacheableChecker.h>
 
 #include <Interpreters/Context_fwd.h>
+#include <Interpreters/DistributedStages/ExchangeMode.h>
 #include <Optimizer/Utils.h>
 #include <Parsers/ASTFunction.h>
 #include <Parsers/IAST_fwd.h>
@@ -58,8 +59,25 @@ namespace
         bool visitStep(const IQueryPlanStep &, ContextPtr &) override { return false; }
 
         bool visitAggregatingStep(const AggregatingStep &, ContextPtr &) override { return true; }
-        bool visitTableScanStep(const TableScanStep &, ContextPtr &) override { return true; }
-        bool visitJoinStep(const JoinStep &, ContextPtr &) override { return true; }
+
+        bool visitTableScanStep(const TableScanStep & step, ContextPtr & context) override
+        {
+            if (step.getQueryInfo().getSelectQuery()->sampleSize() != nullptr
+                || step.getQueryInfo().getSelectQuery()->sampleOffset() != nullptr)
+                return false;
+            if (auto prewhere = step.getPrewhere())
+            {
+                if (containsNonDeterministicFunction(step.getPrewhere(), context))
+                    return false;
+            }
+            return true;
+        }
+
+        bool visitJoinStep(const JoinStep & step, ContextPtr & context) override
+        {
+            return !containsNonDeterministicFunction(step.getFilter(), context);
+        }
+
         bool visitMergingAggregatedStep(const MergingAggregatedStep &, ContextPtr &) override { return true; }
         bool visitExpandStep(const ExpandStep &, ContextPtr &) override { return true; }
 
diff --git a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp
index 6cb161578b3..fb511171a9d 100644
--- a/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp
+++ b/src/Optimizer/Rule/Rewrite/PushPartialStepThroughExchangeRules.cpp
@@ -530,7 +530,7 @@ TransformResult PushPartialTopNDistinctThroughExchange::transformImpl(PlanNodePt
     auto partial_sort = std::make_unique<SortingStep>(
         sorting_children[0]->getStep()->getOutputStream(),
         sort_step->getSortDescription(),
-        sort_step->getLimit(),
+        0u,
         SortingStep::Stage::PARTIAL,
         SortDescription{});
     auto partial_sort_node
@@ -551,6 +551,7 @@ TransformResult PushPartialTopNDistinctThroughExchange::transformImpl(PlanNodePt
 
     QueryPlanStepPtr final_sort = sort_step->copy(context.context);
     dynamic_cast<SortingStep *>(final_sort.get())->setStage(SortingStep::Stage::MERGE);
+    dynamic_cast<SortingStep *>(final_sort.get())->setLimit(0);
     PlanNodes exchange{exchange_node};
     auto final_sort_node
         = PlanNodeBase::createPlanNode(context.context->nextNodeId(), std::move(final_sort), exchange, node->getStatistics());
diff --git a/src/Parsers/ASTAdviseQuery.cpp b/src/Parsers/ASTAdviseQuery.cpp
index 795d2082fe2..6fa4b90c2d8 100644
--- a/src/Parsers/ASTAdviseQuery.cpp
+++ b/src/Parsers/ASTAdviseQuery.cpp
@@ -10,10 +10,13 @@ const char * ASTAdviseQuery::getTypeString(ASTAdviseQuery::AdvisorType adviseTyp
     {
         case AdvisorType::ALL: return "ALL";
         case AdvisorType::ORDER_BY: return "ORDER_BY";
-        case AdvisorType::DISTRIBUTED_BY: return "DISTRIBUTED_BY";
+        case AdvisorType::CLUSTER_BY: return "CLUSTER_BY";
+        case AdvisorType::DATA_TYPE: return "DATA_TYPE";
         case AdvisorType::MATERIALIZED_VIEW: return "MATERIALIZED_VIEW";
         case AdvisorType::PROJECTION:
             return "PROJECTION";
+        case AdvisorType::COLUMN_USAGE:
+            return "COLUMN_USAGE";
     }
 }
 
diff --git a/src/Parsers/ASTAdviseQuery.h b/src/Parsers/ASTAdviseQuery.h
index 1c51bb858d0..92a2b0c7e46 100644
--- a/src/Parsers/ASTAdviseQuery.h
+++ b/src/Parsers/ASTAdviseQuery.h
@@ -19,7 +19,9 @@ class ASTAdviseQuery : public IAST
     {
         ALL,
         ORDER_BY,
-        DISTRIBUTED_BY,
+        CLUSTER_BY,
+        COLUMN_USAGE,
+        DATA_TYPE,
         MATERIALIZED_VIEW,
         PROJECTION
     };
diff --git a/src/Parsers/ASTColumnDeclaration.cpp b/src/Parsers/ASTColumnDeclaration.cpp
index 08fd0d92133..d4478905d2f 100644
--- a/src/Parsers/ASTColumnDeclaration.cpp
+++ b/src/Parsers/ASTColumnDeclaration.cpp
@@ -55,6 +55,9 @@ ASTPtr ASTColumnDeclaration::clone() const
         res->children.push_back(res->on_update_expression);
     }
 
+    if (replace_if_not_null)
+        res->replace_if_not_null = replace_if_not_null;
+
     if (comment)
     {
         res->comment = comment->clone();
@@ -123,6 +126,11 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
         on_update_expression->formatImpl(settings, state, frame);
     }
 
+    if (replace_if_not_null)
+    {
+        settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "REPLACE_IF_NOT_NULL"  << (settings.hilite ? hilite_none : "");
+    }
+
     if (flags & TYPE_COMPRESSION_FLAG)
     {
         settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "COMPRESSION"  << (settings.hilite ? hilite_none : "");
@@ -141,18 +149,18 @@ void ASTColumnDeclaration::formatImpl(const FormatSettings & settings, FormatSta
     if (flags & TYPE_BITENGINE_ENCODE_FLAG)
     {
         settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BitEngineEncode"  << (settings.hilite ? hilite_none : "");
-    }   
+    }
 
     if (flags & TYPE_BLOOM_FLAG)
     {
         settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BLOOM"  << (settings.hilite ? hilite_none : "");
-    } 
+    }
 
     if (flags & TYPE_BITMAP_INDEX_FLAG)
     {
         settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "BitmapIndex"  << (settings.hilite ? hilite_none : "");
     }
-    
+
     if (flags & TYPE_SEGMENT_BITMAP_INDEX_FLAG)
     {
         settings.ostr << ' ' << (settings.hilite ? hilite_keyword : "") << "SegmentBitmapIndex"  << (settings.hilite ? hilite_none : "");
diff --git a/src/Parsers/ASTColumnDeclaration.h b/src/Parsers/ASTColumnDeclaration.h
index 4e389ec81b3..0afcf2cbf39 100644
--- a/src/Parsers/ASTColumnDeclaration.h
+++ b/src/Parsers/ASTColumnDeclaration.h
@@ -41,6 +41,9 @@ class ASTColumnDeclaration : public IAST
     ASTPtr on_update_expression;
     bool auto_increment;
     bool mysql_primary_key;
+    /// For partial update, this means the imported data will only be replaced when it is of non-null value
+    /// We did not add this information to flags because it is only used on the write side and does not need to be serialized to the part.
+    bool replace_if_not_null = false;
     ASTPtr comment;
     ASTPtr codec;
     ASTPtr ttl;
diff --git a/src/Parsers/ParserAdviseQuery.cpp b/src/Parsers/ParserAdviseQuery.cpp
index 4b3c1b52408..21e70b27399 100644
--- a/src/Parsers/ParserAdviseQuery.cpp
+++ b/src/Parsers/ParserAdviseQuery.cpp
@@ -21,7 +21,7 @@ bool ParserAdviseQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
 
     ParserKeyword s_all("ALL");
     ParserKeyword s_orderby("ORDER_BY");
-    ParserKeyword s_distributedby("DISTRIBUTED_BY");
+    ParserKeyword s_clusterby("CLUSTER_BY");
     ParserKeyword s_datatype("DATA_TYPE");
     ParserKeyword s_materialized_view("MATERIALIZED_VIEW");
     ParserKeyword s_projection("PROJECTION");
@@ -51,8 +51,10 @@ bool ParserAdviseQuery::parseImpl(IParser::Pos & pos, ASTPtr & node, Expected &
         type = ASTAdviseQuery::AdvisorType::ALL;
     else if (s_orderby.ignore(pos, expected))
         type = ASTAdviseQuery::AdvisorType::ORDER_BY;
-    else if (s_distributedby.ignore(pos, expected))
-        type = ASTAdviseQuery::AdvisorType::DISTRIBUTED_BY;
+    else if (s_clusterby.ignore(pos, expected))
+        type = ASTAdviseQuery::AdvisorType::CLUSTER_BY;
+    else if (s_datatype.ignore(pos, expected))
+        type = ASTAdviseQuery::AdvisorType::DATA_TYPE;
     else if (s_materialized_view.ignore(pos, expected))
         type = ASTAdviseQuery::AdvisorType::MATERIALIZED_VIEW;
     else if (s_projection.ignore(pos, expected))
diff --git a/src/Parsers/ParserAlterQuery.cpp b/src/Parsers/ParserAlterQuery.cpp
index e455d813139..bee28e15384 100644
--- a/src/Parsers/ParserAlterQuery.cpp
+++ b/src/Parsers/ParserAlterQuery.cpp
@@ -178,6 +178,7 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
     ParserKeyword s_comment("COMMENT");
     ParserKeyword s_codec("CODEC");
     ParserKeyword s_ttl("TTL");
+    ParserKeyword s_replace_if_not_null{"REPLACE_IF_NOT_NULL"};
 
     ParserKeyword s_remove_ttl("REMOVE TTL");
 
@@ -1126,6 +1127,8 @@ bool ParserAlterCommand::parseImpl(Pos & pos, ASTPtr & node, Expected & expected
                         command->remove_property = "CODEC";
                     else if (s_ttl.ignore(pos, expected))
                         command->remove_property = "TTL";
+                    else if (s_replace_if_not_null.ignore(pos, expected))
+                        command->remove_property = "REPLACE_IF_NOT_NULL";
                     else
                         return false;
                 }
diff --git a/src/Parsers/ParserCreateQuery.h b/src/Parsers/ParserCreateQuery.h
index d6e710578d3..024a680437d 100644
--- a/src/Parsers/ParserCreateQuery.h
+++ b/src/Parsers/ParserCreateQuery.h
@@ -153,6 +153,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
     ParserDataType type_parser(dt);
     ParserKeyword s_default{"DEFAULT"};
     ParserKeyword s_auto_increment{"AUTO_INCREMENT"};
+    ParserKeyword s_replace_if_not_null{"REPLACE_IF_NOT_NULL"};
     ParserKeyword s_null{"NULL"};
     ParserKeyword s_not{"NOT"};
     ParserKeyword s_pk{"PRIMARY KEY"};
@@ -244,6 +245,7 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
         && !s_pk.checkWithoutMoving(pos, expected)
         && !s_default.checkWithoutMoving(pos, expected)
         && !s_auto_increment.checkWithoutMoving(pos, expected)
+        && !s_replace_if_not_null.checkWithoutMoving(pos, expected)
         && !s_materialized.checkWithoutMoving(pos, expected)
         && !s_alias.checkWithoutMoving(pos, expected)
         && (require_type
@@ -324,6 +326,11 @@ bool IParserColumnDeclaration<NameParser>::parseImpl(Pos & pos, ASTPtr & node, E
         column_declaration->mysql_primary_key = true;
     }
 
+    if (s_replace_if_not_null.ignore(pos, expected))
+    {
+        column_declaration->replace_if_not_null = true;
+    }
+
     if (s_comment.ignore(pos, expected))
     {
         /// should be followed by a string literal
diff --git a/src/Processors/Pipe.h b/src/Processors/Pipe.h
index ffb213a55b1..893eddc018c 100644
--- a/src/Processors/Pipe.h
+++ b/src/Processors/Pipe.h
@@ -21,6 +21,7 @@
 
 #pragma once
 
+#include <iterator>
 #include <Processors/IProcessor.h>
 #include <QueryPlan/QueryIdHolder.h>
 #include <QueryPlan/QueryPlan.h>
@@ -28,6 +29,7 @@
 #include <DataStreams/SizeLimits.h>
 #include <Storages/TableLockHolder.h>
 #include <Interpreters/Cache/QueryCache.h> /// nested classes such as QC::Writer can't be fwd declared
+#include <Transaction/CnchLock.h>
 
 namespace DB
 {
@@ -153,6 +155,10 @@ class Pipe
     /// For queries with nested interpreters (i.e. StorageDistributed)
     void addQueryPlan(std::unique_ptr<QueryPlan> plan) { holder.query_plans.emplace_back(std::move(plan)); }
 
+    void addLockHolders(CnchLockHolderPtrs && lock_holders_) { holder.lock_holders.insert(holder.lock_holders.end(),
+                                                                                          std::make_move_iterator(lock_holders_.begin()),
+                                                                                          std::make_move_iterator(lock_holders_.end())); }
+
 private:
     /// Destruction order: processors, header, locks, temporary storages, local contexts
 
@@ -172,6 +178,7 @@ class Pipe
         std::vector<std::unique_ptr<QueryPlan>> query_plans;
         std::shared_ptr<QueryIdHolder> query_id_holder;
         CacheHolderPtr cache_holder;
+        CnchLockHolderPtrs lock_holders;
     };
 
     Holder holder;
diff --git a/src/Processors/QueryPipeline.h b/src/Processors/QueryPipeline.h
index 066515eeb22..3e061430ff0 100644
--- a/src/Processors/QueryPipeline.h
+++ b/src/Processors/QueryPipeline.h
@@ -156,6 +156,7 @@ class QueryPipeline
     void addStorageHolder(StoragePtr storage) { pipe.addStorageHolder(std::move(storage)); }
     void addCacheHolder(CacheHolderPtr cache_holder) { pipe.addCacheHolder(std::move(cache_holder)); }
     void addQueryPlan(std::unique_ptr<QueryPlan> plan) { pipe.addQueryPlan(std::move(plan)); }
+    void addLockHolders(CnchLockHolderPtrs && lock_holders) { pipe.addLockHolders(std::move(lock_holders)); }
     void setLimits(const StreamLocalLimits & limits) { pipe.setLimits(limits); }
     void setLeafLimits(const SizeLimits & limits) { pipe.setLeafLimits(limits); }
     void setQuota(const std::shared_ptr<const EnabledQuota> & quota_)
diff --git a/src/QueryPlan/ReadFromMergeTree.cpp b/src/QueryPlan/ReadFromMergeTree.cpp
index 1a44e208457..93c4725376a 100644
--- a/src/QueryPlan/ReadFromMergeTree.cpp
+++ b/src/QueryPlan/ReadFromMergeTree.cpp
@@ -326,9 +326,11 @@ ReadFromMergeTree::ReadFromMergeTree(
     , metadata_for_reading(storage_snapshot->getMetadataForQuery())
     , context(std::move(context_))
     , max_block_size(max_block_size_)
+    , min_block_size(context->getSettingsRef().min_block_size)
     , requested_num_streams(num_streams_)
     , preferred_block_size_bytes(context->getSettingsRef().preferred_block_size_bytes)
     , preferred_max_column_in_block_size_bytes(context->getSettingsRef().preferred_max_column_in_block_size_bytes)
+    , size_predictor_estimate_lc_size_by_fullstate(context->getSettingsRef().size_predictor_estimate_lc_size_by_fullstate)
     , sample_factor_column_queried(sample_factor_column_queried_)
     , map_column_keys_column_queried(map_column_keys_column_queried_)
     , max_block_numbers_to_read(std::move(max_block_numbers_to_read_))
@@ -390,8 +392,10 @@ Pipe ReadFromMergeTree::readFromPool(
     MergeTreeStreamSettings stream_settings {
         .min_marks_for_concurrent_read = min_marks_for_concurrent_read,
         .max_block_size = max_block_size,
+        .min_block_size = min_block_size,
         .preferred_block_size_bytes = settings.preferred_block_size_bytes,
         .preferred_max_column_in_block_size_bytes = settings.preferred_max_column_in_block_size_bytes,
+        .size_predictor_estimate_lc_size_by_fullstate = settings.size_predictor_estimate_lc_size_by_fullstate,
         .use_uncompressed_cache = use_uncompressed_cache,
         .actions_settings = actions_settings,
         .reader_settings = reader_settings
@@ -461,8 +465,10 @@ Pipe ReadFromMergeTree::readInOrder(
     Pipes pipes;
     MergeTreeStreamSettings stream_settings{
         .max_block_size = max_block_size,
+        .min_block_size = min_block_size,
         .preferred_block_size_bytes = preferred_block_size_bytes,
         .preferred_max_column_in_block_size_bytes = preferred_max_column_in_block_size_bytes,
+        .size_predictor_estimate_lc_size_by_fullstate = size_predictor_estimate_lc_size_by_fullstate,
         .use_uncompressed_cache = use_uncompressed_cache,
         .actions_settings = actions_settings,
         .reader_settings = reader_settings
diff --git a/src/QueryPlan/ReadFromMergeTree.h b/src/QueryPlan/ReadFromMergeTree.h
index 38f657ee328..2124ee69451 100644
--- a/src/QueryPlan/ReadFromMergeTree.h
+++ b/src/QueryPlan/ReadFromMergeTree.h
@@ -24,8 +24,11 @@ struct MergeTreeStreamSettings
 {
     UInt64 min_marks_for_concurrent_read;
     UInt64 max_block_size;
+    UInt64 min_block_size = 0;
+    UInt64 requested_num_streams;
     UInt64 preferred_block_size_bytes;
     UInt64 preferred_max_column_in_block_size_bytes;
+    bool size_predictor_estimate_lc_size_by_fullstate = 0;
     bool use_uncompressed_cache;
     ExpressionActionsSettings actions_settings;
     MergeTreeReaderSettings reader_settings = {};
@@ -198,9 +201,11 @@ class ReadFromMergeTree final : public ISourceStep
     ContextPtr context;
 
     const size_t max_block_size;
+    const size_t min_block_size;
     const size_t requested_num_streams;
     const size_t preferred_block_size_bytes;
     const size_t preferred_max_column_in_block_size_bytes;
+    const bool size_predictor_estimate_lc_size_by_fullstate;
     const bool sample_factor_column_queried;
     const bool map_column_keys_column_queried;
 
diff --git a/src/QueryPlan/SortingStep.h b/src/QueryPlan/SortingStep.h
index 4454a2b1cf4..5adff404a4d 100644
--- a/src/QueryPlan/SortingStep.h
+++ b/src/QueryPlan/SortingStep.h
@@ -19,6 +19,7 @@
 #include <Disks/IVolume.h>
 #include <Interpreters/prepared_statement.h>
 #include <QueryPlan/ITransformingStep.h>
+#include "common/types.h"
 
 namespace DB
 {
@@ -54,7 +55,7 @@ class SortingStep : public ITransformingStep
     {
         return std::get<UInt64>(limit);
     }
-
+    void setLimit(UInt64 limit_) { limit = limit_; }
     bool hasPreparedParam() const
     {
         return std::holds_alternative<String>(limit);
diff --git a/src/QueryPlan/TableFinishStep.cpp b/src/QueryPlan/TableFinishStep.cpp
index ce19f8a5b17..d24caeb78d1 100644
--- a/src/QueryPlan/TableFinishStep.cpp
+++ b/src/QueryPlan/TableFinishStep.cpp
@@ -67,7 +67,7 @@ void TableFinishStep::preExecute(ContextMutablePtr query_context)
         if (auto * insert = dynamic_cast<ASTInsertQuery *>(query.get()); insert->is_overwrite)
         {
             Stopwatch watch;
-            query_context->setSetting("prefer_cnch_catalog", hdfs_table->settings.prefer_cnch_catalog.value);
+            query_context->setSetting("prefer_cnch_catalog", s3_table->settings.prefer_cnch_catalog.value);
             s3_table->clear(query_context);
             ProfileEvents::increment(ProfileEvents::TableFinishStepPreClearS3TableMicroseconds, watch.elapsedMicroseconds());
         }
diff --git a/src/Storages/AlterCommands.cpp b/src/Storages/AlterCommands.cpp
index 92a5c60d9b2..26e15d1440f 100644
--- a/src/Storages/AlterCommands.cpp
+++ b/src/Storages/AlterCommands.cpp
@@ -90,6 +90,8 @@ AlterCommand::RemoveProperty removePropertyFromString(const String & property)
         return AlterCommand::RemoveProperty::CODEC;
     else if (property == "TTL")
         return AlterCommand::RemoveProperty::TTL;
+    else if (property == "REPLACE_IF_NOT_NULL")
+        return AlterCommand::RemoveProperty::REPLACE_IF_NOT_NULL;
 
     throw Exception(ErrorCodes::BAD_ARGUMENTS, "Cannot remove unknown property '{}'", property);
 }
@@ -224,6 +226,9 @@ std::optional<AlterCommand> AlterCommand::parse(const ASTAlterCommand * command_
                 command.default_expression = ast_col_decl.default_expression;
         }
 
+        if (ast_col_decl.replace_if_not_null)
+            command.replace_if_not_null = ast_col_decl.replace_if_not_null;
+
         if (ast_col_decl.comment)
         {
             const auto & ast_comment = ast_col_decl.comment->as<ASTLiteral &>();
@@ -586,11 +591,18 @@ void AlterCommand::apply(StorageInMemoryMetadata & metadata, ContextPtr context,
             {
                 column.ttl.reset();
             }
+            else if (to_remove == RemoveProperty::REPLACE_IF_NOT_NULL)
+            {
+                column.replace_if_not_null = false;
+            }
             else
             {
                 if (codec)
                     column.codec = CompressionCodecFactory::instance().validateCodecAndGetPreprocessedAST(codec, data_type ? data_type : column.type, false, true);
 
+                if (replace_if_not_null)
+                    column.replace_if_not_null = replace_if_not_null;
+
                 if (comment)
                     column.comment = *comment;
 
@@ -1467,6 +1479,11 @@ void AlterCommands::validate(const StorageInMemoryMetadata & metadata, ContextPt
                         ErrorCodes::BAD_ARGUMENTS,
                         "Column {} doesn't have COMMENT, cannot remove it",
                         backQuote(column_name));
+                if (command.to_remove == AlterCommand::RemoveProperty::REPLACE_IF_NOT_NULL && !column_from_table.replace_if_not_null)
+                    throw Exception(
+                        ErrorCodes::BAD_ARGUMENTS,
+                        "Column {} doesn't specify REPLACE_IF_NOT_NULL, cannot remove it",
+                        backQuote(column_name));
             }
 
             const auto & column = all_columns.get(column_name);
diff --git a/src/Storages/AlterCommands.h b/src/Storages/AlterCommands.h
index b69481366d1..9d36dc70578 100644
--- a/src/Storages/AlterCommands.h
+++ b/src/Storages/AlterCommands.h
@@ -92,6 +92,7 @@ struct AlterCommand
         COMMENT,
         CODEC,
         TTL,
+        REPLACE_IF_NOT_NULL,
     };
 
     Type type = UNKNOWN;
@@ -110,6 +111,9 @@ struct AlterCommand
     ColumnDefaultKind default_kind{};
     ASTPtr default_expression{};
 
+    /// For MODIFY REPLACE_IF_NOT_NULL
+    bool replace_if_not_null = false;
+
     /// For COMMENT column
     std::optional<String> comment;
 
diff --git a/src/Storages/ColumnsDescription.cpp b/src/Storages/ColumnsDescription.cpp
index 375886342e2..630842b9c8c 100644
--- a/src/Storages/ColumnsDescription.cpp
+++ b/src/Storages/ColumnsDescription.cpp
@@ -797,6 +797,15 @@ ColumnsDescription::ColumnOnUpdates ColumnsDescription::getOnUpdates() const
     return ret;
 }
 
+NameSet ColumnsDescription::getReplaceIfNotNullColumns() const
+{
+    NameSet ret;
+    for (const auto & col : columns)
+        if (col.replace_if_not_null)
+            ret.emplace(col.name);
+    return ret;
+}
+
 bool ColumnsDescription::hasCompressionCodec(const String & column_name) const
 {
     const auto it = columns.get<1>().find(column_name);
diff --git a/src/Storages/ColumnsDescription.h b/src/Storages/ColumnsDescription.h
index cd9beb4c65c..ec5d9a644d5 100644
--- a/src/Storages/ColumnsDescription.h
+++ b/src/Storages/ColumnsDescription.h
@@ -101,8 +101,10 @@ struct ColumnDescription
     String name;
     DataTypePtr type;
     ColumnDefault default_desc;
-    // TODO: Wrap on update expression using a structure similar to default_desc
+    /// TODO: Wrap on update expression using a structure similar to default_desc
     ASTPtr on_update_expression;
+    /// For partial update, this means the imported data will only be replaced when it is of non-null value
+    bool replace_if_not_null = false;
     String comment;
     ASTPtr codec;
     ASTPtr ttl;
@@ -219,6 +221,7 @@ class ColumnsDescription
     std::optional<ColumnDefault> getDefault(const String & column_name) const;
     using ColumnOnUpdates = std::unordered_map<std::string, ASTPtr>;
     ColumnOnUpdates getOnUpdates() const;
+    NameSet getReplaceIfNotNullColumns() const;
 
     /// Does column has non default specified compression codec
     bool hasCompressionCodec(const String & column_name) const;
diff --git a/src/Storages/MaterializedView/PartitionTransformer.cpp b/src/Storages/MaterializedView/PartitionTransformer.cpp
index be5f0db8b14..fa31044c7f7 100644
--- a/src/Storages/MaterializedView/PartitionTransformer.cpp
+++ b/src/Storages/MaterializedView/PartitionTransformer.cpp
@@ -16,7 +16,6 @@
 #include <Storages/Hive/HivePartition.h>
 #include <Common/Exception.h>
 #include <Common/typeid_cast.h>
-#include <Analyzers/QueryRewriter.h>
 #include <Databases/DatabasesCommon.h>
 
 #include <Optimizer/MaterializedView/ExpressionSubstitution.h>
@@ -128,7 +127,7 @@ void PartitionTransformer::validate(ContextMutablePtr local_context)
         return;
     LOG_DEBUG(log, "PartitionTransformer::validate mv_query: {} ", queryToString(mv_query));
     interpretSettings(mv_query, local_context);
-    CurrentThread::get().pushTenantId(local_context->getSettingsRef().tenant_id);
+    local_context->setTenantId(local_context->getSettingsRef().tenant_id);
     MaterializedViewStructurePtr structure
         = MaterializedViewStructure::buildFrom(target_table_id, target_table_id, mv_query->clone(), async_materialized_view, local_context);
     validate(local_context, structure);
@@ -371,7 +370,6 @@ AsyncRefreshParamPtrs PartitionTransformer::constructRefreshParams(
             AsyncRefreshParamPtr refresh_param = std::make_shared<AsyncRefreshParam>();
             const auto & settings = local_context->getSettingsRef();
             auto query = mv_query->clone();
-            QueryRewriter{}.rewrite(query, local_context, false);
             bool cascading = local_context->getSettingsRef().cascading_refresh_materialized_view;
             bool insert_overwrite = local_context->getSettingsRef().enable_mv_async_insert_overwrite;
 
diff --git a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
index 1d2fb63a65c..7b01a1fde39 100644
--- a/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
+++ b/src/Storages/MergeTree/MergeTreeBaseSelectProcessor.cpp
@@ -295,8 +295,10 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl()
         task->size_predictor->startBlock();
 
     const UInt64 current_max_block_size_rows = stream_settings.max_block_size;
+    const UInt64 current_min_block_size_rows = stream_settings.min_block_size;
     const UInt64 current_preferred_block_size_bytes = stream_settings.preferred_block_size_bytes;
     const UInt64 current_preferred_max_column_in_block_size_bytes = stream_settings.preferred_max_column_in_block_size_bytes;
+    const bool size_predictor_estimate_lc_size_by_fullstate = stream_settings.size_predictor_estimate_lc_size_by_fullstate;
     const MergeTreeIndexGranularity & index_granularity = task->data_part->index_granularity;
     const double min_filtration_ratio = 0.00001;
 
@@ -336,7 +338,8 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl()
     };
 
     UInt64 recommended_rows = estimate_num_rows(*task, task->range_reader);
-    UInt64 rows_to_read = std::max(UInt64(1), std::min(current_max_block_size_rows, recommended_rows));
+    UInt64 rows_to_read = std::max(current_min_block_size_rows, std::min(current_max_block_size_rows, recommended_rows));
+    rows_to_read = std::max(1UL, rows_to_read);
 
     auto read_result = task->range_reader.read(rows_to_read, task->mark_ranges_once_read);
 
@@ -362,7 +365,7 @@ Chunk MergeTreeBaseSelectProcessor::readFromPartImpl()
         task->size_predictor->updateFilteredRowsRation(read_result.numReadRows(), num_filtered_rows);
 
         if (!read_result.columns.empty())
-            task->size_predictor->update(sample_block, read_result.columns, read_result.num_rows);
+            task->size_predictor->update(sample_block, read_result.columns, read_result.num_rows, size_predictor_estimate_lc_size_by_fullstate);
     }
 
     if (read_result.num_rows == 0)
diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
index 40e603465a0..30aa64f9c05 100644
--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.cpp
@@ -180,6 +180,28 @@ MergeTreeReadTask::MergeTreeReadTask(
 MergeTreeReadTask::~MergeTreeReadTask() = default;
 
 
+static size_t getColumnsSize(const ColumnPtr & column, bool size_predictor_estimate_lc_size_by_fullstate)
+{
+    size_t byte_size = column->byteSize();
+
+    if (size_predictor_estimate_lc_size_by_fullstate)
+    {
+        auto * lc_col = typeid_cast<const ColumnLowCardinality *>(column.get());
+        if (lc_col && !lc_col->isFullState())
+        {
+            /// If lc contains a huge dictionary, limit to read a smaller rows may still get a big dictionary,
+            /// and makes average bytes of each rows larger and larger. So we assuming the column is in fullstate, so the result
+            /// of average bytes of each rows will converge to a stable value (max_bytes / avg_bytes_of_each_dict_key).
+            size_t approx_total_bytes_in_fullstate = lc_col->size() * lc_col->getDictionary().byteSize()
+                / std::max(1UL, lc_col->getDictionary().size());
+            byte_size = std::min(byte_size, approx_total_bytes_in_fullstate);
+        }
+    }
+
+    return byte_size;
+}
+
+
 MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor(
     const MergeTreeMetaBase::DataPartPtr & data_part_, const Names & columns, const Block & sample_block)
     : data_part(data_part_)
@@ -189,7 +211,12 @@ MergeTreeBlockSizePredictor::MergeTreeBlockSizePredictor(
     initialize(sample_block, {}, columns);
 }
 
-void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const Columns & columns, const Names & names, bool from_update)
+void MergeTreeBlockSizePredictor::initialize(
+    const Block & sample_block,
+    const Columns & columns,
+    const Names & names,
+    bool from_update,
+    bool size_predictor_estimate_lc_size_by_fullstate)
 {
     fixed_columns_bytes_per_row = 0;
     dynamic_columns_infos.clear();
@@ -229,7 +256,7 @@ void MergeTreeBlockSizePredictor::initialize(const Block & sample_block, const C
 
             info.bytes_per_row_global = column_size.data_uncompressed
                 ? column_size.data_uncompressed / number_of_rows_in_part
-                : column_data->byteSize() / std::max<size_t>(1, column_data->size());
+                : getColumnsSize(column_data, size_predictor_estimate_lc_size_by_fullstate) / std::max<size_t>(1, column_data->size());
 
             dynamic_columns_infos.emplace_back(info);
         }
@@ -256,7 +283,12 @@ void MergeTreeBlockSizePredictor::startBlock()
 
 
 /// TODO: add last_read_row_in_part parameter to take into account gaps between adjacent ranges
-void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay)
+void MergeTreeBlockSizePredictor::update(
+    const Block & sample_block,
+    const Columns & columns,
+    size_t num_rows,
+    bool size_predictor_estimate_lc_size_by_fullstate,
+    double decay)
 {
     if (columns.size() != sample_block.columns())
         throw Exception("Inconsistent number of columns passed to MergeTreeBlockSizePredictor. "
@@ -266,7 +298,7 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum
     if (!is_initialized_in_update)
     {
         /// Reinitialize with read block to update estimation for DEFAULT and MATERIALIZED columns without data.
-        initialize(sample_block, columns, {}, true);
+        initialize(sample_block, columns, {}, true, size_predictor_estimate_lc_size_by_fullstate);
         is_initialized_in_update = true;
     }
 
@@ -289,8 +321,8 @@ void MergeTreeBlockSizePredictor::update(const Block & sample_block, const Colum
     max_size_per_row_dynamic = 0;
     for (auto & info : dynamic_columns_infos)
     {
-        size_t new_size = columns[sample_block.getPositionByName(info.name)]->byteSize();
-        size_t diff_size = new_size - info.size_bytes;
+        size_t new_size = getColumnsSize(columns[sample_block.getPositionByName(info.name)], size_predictor_estimate_lc_size_by_fullstate);
+        size_t diff_size = new_size - std::min(new_size, info.size_bytes);
 
         double local_bytes_per_row = static_cast<double>(diff_size) / diff_rows;
         info.bytes_per_row = alpha * info.bytes_per_row + (1. - alpha) * local_bytes_per_row;
diff --git a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
index 5be56ae6040..40f16d92d3e 100644
--- a/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
+++ b/src/Storages/MergeTree/MergeTreeBlockReadUtils.h
@@ -134,7 +134,8 @@ struct MergeTreeBlockSizePredictor
     void startBlock();
 
     /// Updates statistic for more accurate prediction
-    void update(const Block & sample_block, const Columns & columns, size_t num_rows, double decay = DECAY());
+    void update(const Block & sample_block, const Columns & columns, size_t num_rows,
+        bool size_predictor_estimate_lc_size_by_fullstate, double decay = DECAY());
 
     /// Return current block size (after update())
     inline size_t getBlockSize() const
@@ -197,7 +198,8 @@ struct MergeTreeBlockSizePredictor
 
     bool is_initialized_in_update = false;
 
-    void initialize(const Block & sample_block, const Columns & columns, const Names & names, bool from_update = false);
+    void initialize(const Block & sample_block, const Columns & columns, const Names & names,
+        bool from_update = false, bool size_predictor_estimate_lc_size_by_fullstate = true);
 
 public:
 
diff --git a/src/Storages/MergeTree/MergeTreeSettings.h b/src/Storages/MergeTree/MergeTreeSettings.h
index f6196f503bc..62704bacb4a 100644
--- a/src/Storages/MergeTree/MergeTreeSettings.h
+++ b/src/Storages/MergeTree/MergeTreeSettings.h
@@ -468,6 +468,7 @@ enum StealingCacheMode : UInt64
     M(MaxThreads, partial_update_replace_columns_thread_size, 8, "The thread size of replace columns.", 0) \
     M(Bool, partial_update_enable_merge_map, true, "Map row will just replace the original one when it's false. Otherwise, it will merge row.", 0) \
     M(Bool, partial_update_optimize_for_batch_task, true, "Optimize partial update process when _update_columns_ are all same for batch processing.", 0) \
+    M(Bool, partial_update_replace_if_not_null, false, "For partial update, this means the imported data will only be replaced when it is of non-null value.", 0) \
     M(DedupImplVersion, dedup_impl_version, DedupImplVersion::DEDUP_IN_WRITE_SUFFIX, "Choose different dedup impl version for unique table write process, current valid values: DEDUP_IN_WRITE_SUFFIX, DEDUP_IN_TXN_COMMIT.", 0) \
     M(DedupPickWorkerAlgo, dedup_pick_worker_algo, DedupPickWorkerAlgo::CONSISTENT_HASH, "", 0) \
     /** CI settings || test settings **/               \
@@ -516,7 +517,7 @@ enum StealingCacheMode : UInt64
     M(UInt64, cnch_merge_round_robin_partitions_interval, 300, "", 0) \
     M(UInt64, cnch_gc_round_robin_partitions_interval, 600, "", 0) \
     M(UInt64, cnch_gc_round_robin_partitions_number, 10, "", 0) \
-    M(UInt64, cnch_meta_rpc_timeout_ms, 8000, "", 0) \
+    M(UInt64, cnch_meta_rpc_timeout_ms, 8000, "The timeout of meta related rpc, including parts/delete_bitmaps/partitions meta", 0) \
     M(Bool, gc_ignore_running_transactions_for_test, false, "Ignore running transactions when calculating gc timestamp. Useful for tests only.", 0) \
     M(UInt64, gc_trash_part_batch_size, 5000, "Batch size to remove stale parts to trash in background tasks", 0) \
     M(UInt64, gc_trash_part_limit, 0, "Maximum number of stale parts to process per GC round, zero means no limit", 0) \
@@ -551,6 +552,8 @@ enum StealingCacheMode : UInt64
     \
     M(String, column_compress_block_settings, "", "Column compressed block size for each column, if not specified, use max_compress_block_size.", 0) \
     M(UInt64, filtered_ratio_to_use_skip_read, 0, "Ratio of origin rows to filtered rows when using skip reading, 0 means disable", 0) \
+    M(UInt64, low_cardinality_ndv_threshold, 100000, "Threshold for fallback to none encoded column from low cardinality column, 0 disable", 0) \
+    M(Bool, low_cardinality_force_fallback, true, "Force fallback if low cardinality column has cardinality greater than low_cardinality_ndv_threshold", 0) \
 
     /// Settings that should not change after the creation of a table.
 #define APPLY_FOR_IMMUTABLE_MERGE_TREE_SETTINGS(M) \
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
index b10673b8dfe..509f9e61da5 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.cpp
@@ -45,6 +45,7 @@
 #include <AggregateFunctions/AggregateBitmapExpression_fwd.h>
 #include <Interpreters/PartitionPredicateVisitor.h>
 #include <Interpreters/TreeRewriter.h>
+#include <Interpreters/convertFieldToType.h>
 #include <Optimizer/PredicateUtils.h>
 #include <Optimizer/SymbolsExtractor.h>
 #include <Optimizer/Utils.h>
@@ -989,4 +990,159 @@ void optimizePartitionPredicate(ASTPtr & query, StoragePtr storage, SelectQueryI
                 queryToString(query), queryToString(query_info.partition_filter));
     }
 }
+
+void getTargetFunctions(const ASTPtr & ast, ASTs & functions)
+{
+    if (!ast)
+        return;
+
+    if (ASTFunction * func = ast->as<ASTFunction>())
+    {
+        String func_name = Poco::toLower(func->name);
+        if (BITMAP_EXPRESSION_AGGREGATE_FUNCTIONS.count(func_name))
+        {
+            functions.push_back(func->clone());
+            return;
+        }
+    }
+
+    for (const auto & child : ast->children)
+        getTargetFunctions(child, functions);
+}
+
+ASTPtr constructAndFunction(ASTs functions)
+{
+    if (functions.size() == 1)
+        return functions.front();
+
+    const auto and_func = std::make_shared<ASTFunction>();
+    and_func->name = "and";
+    and_func->arguments = std::make_shared<ASTExpressionList>();
+    and_func->children.push_back(and_func->arguments);
+    for (const auto & func : functions)
+        and_func->arguments->children.push_back(func);
+
+    return and_func;
+}
+
+ASTPtr constructOrFunction(ASTs functions)
+{
+    if (functions.size() == 1)
+        return functions.front();
+
+    const auto and_func = std::make_shared<ASTFunction>();
+    and_func->name = "or";
+    and_func->arguments = std::make_shared<ASTExpressionList>();
+    and_func->children.push_back(and_func->arguments);
+    for (const auto & func : functions)
+        and_func->arguments->children.push_back(func);
+
+    return and_func;
+}
+
+void collectionParameterValueInFunction(
+    const ASTPtr & ast, std::unordered_map<String, std::set<Field>> & parameters_map, const StorageMetadataPtr & metadata_snapshot)
+{
+    if (!ast)
+        return;
+
+    ASTFunction * func = ast->as<ASTFunction>();
+
+    if (!func)
+        return;
+
+    ASTPtr func_arguments = func->arguments;
+    ASTPtr func_parameters = func->parameters;
+
+    if (!func_arguments || !func_parameters)
+        return;
+
+    if (func_arguments->children.size() < 2 || func_parameters->children.empty())
+        return;
+
+    ASTIdentifier * index_arg = func_arguments->children.front()->as<ASTIdentifier>();
+
+    if (!index_arg)
+        return;
+    /// not a physical column in the table, no need to optimize
+    DataTypePtr data_type{nullptr};
+    if (metadata_snapshot)
+        data_type = metadata_snapshot->getColumns().tryGetPhysical(index_arg->name()).value_or(NameAndTypePair{}).type;
+
+    if (!data_type)
+        return;
+
+    auto & parameters = parameters_map[index_arg->name()];
+    String exp;
+    for (const auto & child : func_parameters->children)
+    {
+        ASTLiteral * param = child->as<ASTLiteral>();
+        if (param && param->value.tryGet<String>(exp))
+        {
+            Names values = getBitMapParameterValues(exp);
+            for (auto & node : values)
+            {
+                Field field(node);
+                field = convertFieldToType(field, *data_type);
+                parameters.emplace(field);
+            }
+        }
+    }
+}
+
+void optimizeBitMapParametersToWhere(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot)
+{
+    ASTSelectQuery * select = query->as<ASTSelectQuery>();
+    if (!select || !select->select())
+        return;
+
+    ASTs target_functions;
+    getTargetFunctions(select->select(), target_functions);
+
+    if (target_functions.empty())
+        return;
+
+    std::unordered_map<String, std::set<Field>> parameters_map;
+    for (const auto & function : target_functions)
+    {
+        collectionParameterValueInFunction(function, parameters_map, metadata_snapshot);
+    }
+
+    if (parameters_map.empty())
+        return;
+
+    size_t total_in_elems{0};
+    ASTs functions;
+    /// create in function for those parametered values
+    for (const auto & parameter : parameters_map)
+    {
+        auto [in_ast, elem_size] = createInFunctionForBitMapParameter(parameter.first, parameter.second);
+        functions.push_back(in_ast);
+        total_in_elems += elem_size;
+    }
+
+    size_t found_parameters = functions.size();
+    /// for example, `bitmapCount('1 | 2 & 3')(a, b), bitmapCount('3&4')(c,b)`,
+    // we need construct 'a in (1, 2, 3) OR c in (3, 4)' predicate expression.
+    auto parameter_func = constructOrFunction(functions);
+    functions.clear();
+    functions.emplace_back(parameter_func);
+
+    if (select->where())
+    {
+        functions.push_back(select->where());
+    }
+
+    // use AND function to combine the newly add IN functions and WHERE expression
+    ASTPtr new_predicate = constructAndFunction(functions);
+
+    select->setExpression(ASTSelectQuery::Expression::WHERE, std::move(new_predicate));
+
+    LOG_TRACE(
+        getLogger("optimizeBitMapParametersToWhere"),
+        "Optimize {} bitmap function arguments to where expression, "
+        "with {} IN elements",
+        found_parameters,
+        total_in_elems);
+}
 }
diff --git a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
index e20e7f0dd96..5ee870f5913 100644
--- a/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
+++ b/src/Storages/MergeTree/MergeTreeWhereOptimizer.h
@@ -46,6 +46,12 @@ class MergeTreeData;
 struct StorageInMemoryMetadata;
 using StorageMetadataPtr = std::shared_ptr<const StorageInMemoryMetadata>;
 
+/// extract expressions from bitmapCount like aggregate functions and put them
+/// into where to reduce data read
+void optimizeBitMapParametersToWhere(ASTPtr & query, const StorageMetadataPtr & metadata_snapshot);
+Names getBitMapParameterValues(String expression);
+std::pair<ASTPtr, size_t> createInFunctionForBitMapParameter(const String & index_arg, const std::set<Field> & parameter_values);
+
 /// Push down where partition predicate to query info partition_filter
 void optimizePartitionPredicate(ASTPtr & query, StoragePtr storage, SelectQueryInfo & query_info, ContextPtr context);
 Names getBitMapParameterValues(String expression);
diff --git a/src/Storages/NexusFS/NexusFS.cpp b/src/Storages/NexusFS/NexusFS.cpp
index 61a4b6742c0..79be8cf4cd7 100644
--- a/src/Storages/NexusFS/NexusFS.cpp
+++ b/src/Storages/NexusFS/NexusFS.cpp
@@ -265,6 +265,13 @@ NexusFS::NexusFS(NexusFSConfig && config)
     LOG_TRACE(log, "NexusFS created");
 }
 
+NexusFS::~NexusFS()
+{
+    if (buffer_manager)
+        buffer_manager->destroy();
+    LOG_TRACE(log, "NexusFS destroyed");
+}
+
 void NexusFS::preload(const String & file, const OffsetAndSizeVector & offsets_and_sizes, std::unique_ptr<ReadBufferFromFileBase> & source)
 {
     std::unordered_set<UInt64> segment_ids;
diff --git a/src/Storages/NexusFS/NexusFS.h b/src/Storages/NexusFS/NexusFS.h
index dc427efc26e..762976fe692 100644
--- a/src/Storages/NexusFS/NexusFS.h
+++ b/src/Storages/NexusFS/NexusFS.h
@@ -127,7 +127,7 @@ class NexusFS
     explicit NexusFS(NexusFSConfig && config);
     NexusFS(const NexusFS &) = delete;
     NexusFS & operator=(const NexusFS &) = delete;
-    ~NexusFS() = default;
+    ~NexusFS();
 
 
     UInt64 getSize() const { return region_manager.getSize(); }
diff --git a/src/Storages/NexusFS/NexusFSBuffer.cpp b/src/Storages/NexusFS/NexusFSBuffer.cpp
index 2c29368d5bb..d06f7d8987a 100644
--- a/src/Storages/NexusFS/NexusFSBuffer.cpp
+++ b/src/Storages/NexusFS/NexusFSBuffer.cpp
@@ -225,6 +225,11 @@ BufferManager * BufferManager::getInstance()
     return buffer_manager.get();
 }
 
+void BufferManager::destroy()
+{
+    buffer_manager.reset();
+}
+
 BufferManager::BufferManager(
     size_t buffer_size_,
     UInt32 segment_size_,
diff --git a/src/Storages/NexusFS/NexusFSBuffer.h b/src/Storages/NexusFS/NexusFSBuffer.h
index b6c80a2654b..1ea4749333a 100644
--- a/src/Storages/NexusFS/NexusFSBuffer.h
+++ b/src/Storages/NexusFS/NexusFSBuffer.h
@@ -120,6 +120,7 @@ class BufferManager
         InodeManager & inode_manager_
     );
     static BufferManager * getInstance();
+    static void destroy();
 
     explicit BufferManager(
         size_t buffer_size_,
diff --git a/src/Storages/RemoteFile/StorageCloudS3.cpp b/src/Storages/RemoteFile/StorageCloudS3.cpp
index 0ff85862b39..1848720b16e 100644
--- a/src/Storages/RemoteFile/StorageCloudS3.cpp
+++ b/src/Storages/RemoteFile/StorageCloudS3.cpp
@@ -139,7 +139,7 @@ void registerStorageCloudS3(StorageFactory & factory)
                 arguments.format_name,
                 arguments.compression_method));
 
-        S3::URI s3_uri(arguments.url);
+        S3::URI s3_uri(arguments.url, true);
         Strings files{s3_uri.key};
         S3ClientPtr client = initializeS3Client(args.getLocalContext(), arguments);
         std::shared_ptr<S3::S3Util> s3_util = std::make_shared<S3::S3Util>(client, s3_uri.bucket);
diff --git a/src/Storages/RemoteFile/StorageCnchS3.cpp b/src/Storages/RemoteFile/StorageCnchS3.cpp
index 79785979d73..89316c23130 100644
--- a/src/Storages/RemoteFile/StorageCnchS3.cpp
+++ b/src/Storages/RemoteFile/StorageCnchS3.cpp
@@ -45,7 +45,7 @@ namespace DB
 
 S3ClientPtr initializeS3Client(const ContextPtr & ctx, const CnchFileArguments & arguments)
 {
-    S3::URI uri(arguments.url);
+    S3::URI uri(arguments.url, true);
     if (uri.bucket.find_first_of("*?{") != DB::String::npos)
         throw Exception("Expression can not have wildcards inside bucket name", ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
 
diff --git a/src/Storages/RemoteFile/StorageCnchS3.h b/src/Storages/RemoteFile/StorageCnchS3.h
index f829c998a73..9ba2a60017b 100644
--- a/src/Storages/RemoteFile/StorageCnchS3.h
+++ b/src/Storages/RemoteFile/StorageCnchS3.h
@@ -56,7 +56,7 @@ class StorageCnchS3 : public shared_ptr_helper<StorageCnchS3>, public IStorageCn
         const CnchFileArguments & arguments_,
         const CnchFileSettings & settings_)
         : IStorageCnchFile(context_, table_id_, required_columns_, constraints_, setting_changes_, arguments_, settings_)
-        , s3_uri(arguments_.url)
+        , s3_uri(arguments_.url, true)
     {
         if (file_list.size() == 1)
             file_list[0] = s3_uri.key;
diff --git a/src/Storages/StorageMaterializedView.cpp b/src/Storages/StorageMaterializedView.cpp
index 7f69d37e2a3..33546b7cd03 100644
--- a/src/Storages/StorageMaterializedView.cpp
+++ b/src/Storages/StorageMaterializedView.cpp
@@ -590,7 +590,7 @@ void StorageMaterializedView::executeByDropInsert(AsyncRefreshParamPtr param, Co
             {
                 std::optional<CurrentThread::QueryScope> query_scope;
                 query_scope.emplace(insert_context);
-                CurrentThread::get().pushTenantId(insert_context->getSettingsRef().tenant_id);
+                insert_context->setTenantId(insert_context->getSettingsRef().tenant_id);
                 BlockIO insert_io;
                 try
                 {
@@ -623,7 +623,6 @@ void StorageMaterializedView::executeByDropInsert(AsyncRefreshParamPtr param, Co
                     insert_io.onException();
                     throw;
                 }
-
                 query_scope.reset();
             }
             catch (...)
@@ -701,7 +700,7 @@ void StorageMaterializedView::executeByInsertOverwrite(AsyncRefreshParamPtr para
         {
             std::optional<CurrentThread::QueryScope> query_scope;
             query_scope.emplace(insert_overwrite_context);
-            CurrentThread::get().pushTenantId(insert_overwrite_context->getSettingsRef().tenant_id);
+            insert_overwrite_context->setTenantId(insert_overwrite_context->getSettingsRef().tenant_id);
 
             LOG_DEBUG(log, "refresh sync materialized view refresh insert overwite query: {}", param->insert_overwrite_query);
             BlockIO insert_io;
diff --git a/tests/optimizers/tpcds/explains/tpcds100/q41.explain b/tests/optimizers/tpcds/explains/tpcds100/q41.explain
index 888266d1bec..5a3f765cf1e 100644
--- a/tests/optimizers/tpcds/explains/tpcds100/q41.explain
+++ b/tests/optimizers/tpcds/explains/tpcds100/q41.explain
@@ -5,14 +5,12 @@ Projection Est. 100 rows
    └─ Distinct Est. 100 rows
       └─ Sorting Est. 100 rows
          │     Order by: {i_product_name ASC NULLS LAST}
-         │     Limit: 100
          └─ Gather Exchange Est. 1252 rows
             └─ Limit Est. 100 rows
                │     Limit: 100
                └─ Distinct Est. 100 rows
                   └─ Sorting Est. 100 rows
                      │     Order by: {i_product_name ASC NULLS LAST}
-                     │     Limit: 100
                      └─ Distinct Est. 100 rows
                         └─ Inner Join Est. 1252 rows
                            │     Condition: 
diff --git a/tests/optimizers/tpcds/explains/tpcds1000/q41.explain b/tests/optimizers/tpcds/explains/tpcds1000/q41.explain
index e36bf8e5f9d..4d2f83c5451 100644
--- a/tests/optimizers/tpcds/explains/tpcds1000/q41.explain
+++ b/tests/optimizers/tpcds/explains/tpcds1000/q41.explain
@@ -5,14 +5,12 @@ Projection Est. 100 rows
    └─ Distinct Est. 100 rows
       └─ Sorting Est. 100 rows
          │     Order by: {i_product_name ASC NULLS LAST}
-         │     Limit: 100
          └─ Gather Exchange Est. 1818 rows
             └─ Limit Est. 100 rows
                │     Limit: 100
                └─ Distinct Est. 100 rows
                   └─ Sorting Est. 100 rows
                      │     Order by: {i_product_name ASC NULLS LAST}
-                     │     Limit: 100
                      └─ Distinct Est. 100 rows
                         └─ Inner Join Est. 1818 rows
                            │     Condition: 
diff --git a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q41.explain b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q41.explain
index 69f551e486b..d855e856dd4 100644
--- a/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q41.explain
+++ b/tests/optimizers/tpcds/explains/tpcds1000_not_show_stats/q41.explain
@@ -5,14 +5,12 @@ Projection
    └─ Distinct
       └─ Sorting
          │     Order by: {i_product_name ASC NULLS LAST}
-         │     Limit: 100
          └─ Gather Exchange
             └─ Limit
                │     Limit: 100
                └─ Distinct
                   └─ Sorting
                      │     Order by: {i_product_name ASC NULLS LAST}
-                     │     Limit: 100
                      └─ Distinct
                         └─ Inner Join
                            │     Condition: 
diff --git a/tests/optimizers/tpcds/explains/tpcds1000_sample/q41.explain b/tests/optimizers/tpcds/explains/tpcds1000_sample/q41.explain
index 1ef1c45ecf8..cc72b46fd91 100644
--- a/tests/optimizers/tpcds/explains/tpcds1000_sample/q41.explain
+++ b/tests/optimizers/tpcds/explains/tpcds1000_sample/q41.explain
@@ -5,14 +5,12 @@ Projection Est. 100 rows
    └─ Distinct Est. 100 rows
       └─ Sorting Est. 100 rows
          │     Order by: {i_product_name ASC NULLS LAST}
-         │     Limit: 100
          └─ Gather Exchange Est. 1830 rows
             └─ Limit Est. 100 rows
                │     Limit: 100
                └─ Distinct Est. 100 rows
                   └─ Sorting Est. 100 rows
                      │     Order by: {i_product_name ASC NULLS LAST}
-                     │     Limit: 100
                      └─ Distinct Est. 100 rows
                         └─ Inner Join Est. 1830 rows
                            │     Condition: 
diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.reference b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
new file mode 100644
index 00000000000..fd264f00d36
--- /dev/null
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.reference
@@ -0,0 +1,11 @@
+Negative tests
+The first bit must be zero
+1
+Test disabling of common subexpression elimination via first parameter
+0
+0
+1
+Test user-provided machine ID
+1
+Generated Snowflake IDs are unique
+100
diff --git a/tests/queries/0_stateless/03130_generateSnowflakeId.sql b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
new file mode 100644
index 00000000000..6b84efbbf1c
--- /dev/null
+++ b/tests/queries/0_stateless/03130_generateSnowflakeId.sql
@@ -0,0 +1,25 @@
+-- Test SQL function 'generateSnowflakeID'
+
+SELECT 'Negative tests';
+-- SELECT generateSnowflakeID(1, 2, 3); -- { serverError NUMBER_OF_ARGUMENTS_DOESNT_MATCH }
+-- SELECT generateSnowflakeID(1, 'not_an_int'); -- { serverError ILLEGAL_TYPE_OF_ARGUMENT }
+-- SELECT generateSnowflakeID(1, materialize(2)); -- { serverError ILLEGAL_COLUMN }
+
+ SELECT 'The first bit must be zero';
+SELECT bitAnd(bitShiftRight(generateSnowflakeID(), 63), 1) = 0;
+
+SELECT 'Test disabling of common subexpression elimination via first parameter';
+SELECT generateSnowflakeID(1) = generateSnowflakeID(2); -- disabled common subexpression elimination --> lhs != rhs
+SELECT generateSnowflakeID() = generateSnowflakeID(1); -- same as ^^
+SELECT generateSnowflakeID(1) = generateSnowflakeID(1); -- with common subexpression elimination
+
+SELECT 'Test user-provided machine ID';
+SELECT bitAnd(bitShiftRight(generateSnowflakeID(1, 123), 12), 1024 - 1) = 123; -- the machine id is actually set in the generated snowflake ID (1024 = 2^10)
+
+SELECT 'Generated Snowflake IDs are unique';
+SELECT count(*)
+FROM
+(
+    SELECT DISTINCT generateSnowflakeID()
+    FROM numbers(100)
+);
diff --git a/tests/queries/0_stateless/48034_fix_distinct_to_agg.reference b/tests/queries/0_stateless/48034_fix_distinct_to_agg.reference
index e69de29bb2d..d71507541e7 100644
--- a/tests/queries/0_stateless/48034_fix_distinct_to_agg.reference
+++ b/tests/queries/0_stateless/48034_fix_distinct_to_agg.reference
@@ -0,0 +1,18 @@
+Projection
+│     Expressions: d1:=device_id, d2:=device_id, time1:=`expr#toDateTime(current_time_ms)`
+└─ Limit
+   │     Limit: 10
+   └─ Distinct
+      └─ Sorting
+         │     Order by: {current_time_ms DESC NULLS LAST}
+         └─ Gather Exchange
+            └─ Limit
+               │     Limit: 10
+               └─ Distinct
+                  └─ Sorting
+                     │     Order by: {current_time_ms DESC NULLS LAST}
+                     └─ Distinct
+                        └─ Projection
+                           │     Expressions: [current_time_ms, device_id], expr#toDateTime(current_time_ms):=toDateTime(current_time_ms)
+                           └─ TableScan test.dwm_cprf_jarvis_anr_bg_task_hi
+                                    Outputs: [current_time_ms, device_id]
diff --git a/tests/queries/0_stateless/48034_fix_distinct_to_agg.sql b/tests/queries/0_stateless/48034_fix_distinct_to_agg.sql
index c19cf284ca7..9ba3208aca6 100644
--- a/tests/queries/0_stateless/48034_fix_distinct_to_agg.sql
+++ b/tests/queries/0_stateless/48034_fix_distinct_to_agg.sql
@@ -36,5 +36,13 @@ SELECT DISTINCT
 FROM dwm_cprf_jarvis_anr_bg_task_hi
 ORDER BY time1 DESC;
 
+explain stats = 0
+SELECT DISTINCT
+    toDateTime(current_time_ms) as time1,
+    device_id as d1,
+    device_id as d2
+FROM dwm_cprf_jarvis_anr_bg_task_hi
+ORDER BY time1 DESC limit 10 settings enable_optimizer=1;
+
 DROP TABLE IF EXISTS dwm_cprf_jarvis_anr_bg_task_hi;
 DROP TABLE IF EXISTS dwm_cprf_jarvis_anr_bg_task_hi_local;
\ No newline at end of file
diff --git a/tests/queries/4_cnch_stateless/00484_preferred_max_column_in_block_size_bytes.sql b/tests/queries/4_cnch_stateless/00484_preferred_max_column_in_block_size_bytes.sql
index c0ebc44c2dc..973d155ca7c 100644
--- a/tests/queries/4_cnch_stateless/00484_preferred_max_column_in_block_size_bytes.sql
+++ b/tests/queries/4_cnch_stateless/00484_preferred_max_column_in_block_size_bytes.sql
@@ -1,5 +1,6 @@
 
 set dialect_type='CLICKHOUSE';
+set min_block_size=1;
 drop table if exists tab;
 create table tab (date Date, x UInt64, s FixedString(128)) engine = CnchMergeTree() PARTITION BY toYYYYMM(date) ORDER BY (date, x) SETTINGS index_granularity = 8192;
 insert into tab select today(), number, toFixedString('', 128) from system.numbers limit 8192;
diff --git a/tests/queries/4_cnch_stateless/01841_point_lookup_profile.reference b/tests/queries/4_cnch_stateless/01841_point_lookup_profile.reference
new file mode 100644
index 00000000000..16573e76a46
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/01841_point_lookup_profile.reference
@@ -0,0 +1,3 @@
+default profile	1	s1
+non-exist profile	2	s2
+exist profile	3	s3
diff --git a/tests/queries/4_cnch_stateless/01841_point_lookup_profile.sql b/tests/queries/4_cnch_stateless/01841_point_lookup_profile.sql
new file mode 100644
index 00000000000..6f5c59568f6
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/01841_point_lookup_profile.sql
@@ -0,0 +1,9 @@
+drop table if exists plp;
+create table if not exists plp (id Int64, name String) engine=CnchMergeTree() order by id cluster by id into 3 buckets;
+insert into plp select number, 's' || toString(number) from system.numbers limit 10;
+
+set enable_point_lookup_profile = 1; -- NOTE: use_sync_pipeline_executor=1 doesn't support insert select
+select 'default profile',   * from plp where id = 1 settings point_lookup_profile = '';
+select 'non-exist profile', * from plp where id = 2 settings point_lookup_profile = 'fake_profile';
+select 'exist profile',     * from plp where id = 3 settings point_lookup_profile = 'point_lookup';
+drop table if exists plp;
diff --git a/tests/queries/4_cnch_stateless/03032_bitmap_function_parameter_predicate_optimize.reference b/tests/queries/4_cnch_stateless/03032_bitmap_function_parameter_predicate_optimize.reference
index 865e58034e0..c27a1e68c4e 100644
--- a/tests/queries/4_cnch_stateless/03032_bitmap_function_parameter_predicate_optimize.reference
+++ b/tests/queries/4_cnch_stateless/03032_bitmap_function_parameter_predicate_optimize.reference
@@ -7,10 +7,10 @@ explain select bitmapCount('1')(f1, uids) as r1
 from test_tag_bitmap_v2 where p_date='2024-01-01'
 settings enable_optimizer=1;
 Projection Est. ? rows
-│     Expressions: r1:=`expr#BitMapCount(\'1\')(f1, uids)`
+│     Expressions: r1:=`expr#BitmapCount(\'1\')(f1, uids)`
 └─ Aggregating Est. ? rows
    │     Group by: {}
-   │     Aggregates: expr#BitMapCount(\'1\')(f1, uids):=AggNull(bitmapCount)(f1,uids)
+   │     Aggregates: expr#BitmapCount(\'1\')(f1, uids):=AggNull(BitmapCount)(f1,uids)
    └─ Gather Exchange Est. ? rows
       └─ Filter Est. ? rows
          │     Condition: f1 = 1
@@ -22,10 +22,10 @@ explain select bitmapMultiCount('1|2','2','3')(f1, uids) as r2
 from test_tag_bitmap_v2 where p_date='2024-01-01'
 settings enable_optimizer=1;
 Projection Est. ? rows
-│     Expressions: r2:=`expr#BitMapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids)`
+│     Expressions: r2:=`expr#BitmapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids)`
 └─ Aggregating Est. ? rows
    │     Group by: {}
-   │     Aggregates: expr#BitMapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids):=AggNull(bitmapMultiCount)(f1,uids)
+   │     Aggregates: expr#BitmapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids):=AggNull(BitmapMultiCount)(f1,uids)
    └─ Gather Exchange Est. ? rows
       └─ Filter Est. ? rows
          │     Condition: f1 IN (1, 2, 3)
@@ -45,10 +45,10 @@ explain select
 from test_tag_bitmap_v2 where p_date='2024-01-01'
 settings enable_optimizer=1;
 Projection Est. ? rows
-│     Expressions: r1:=`expr#BitMapCount(\'1\')(f1, uids)`, r2:=`expr#BitMapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids)`, r5:=`expr#BitMapCount(\'2\')(toInt64(f2), uids)`, r6:=`expr#BitMapCount(\'2~1\')(toInt64(f2), uids)`, r7:=`expr#BitMapCount(\'10\')(f4, uids)`, r8:=`expr#BitMapMultiCount(\'10|20\', \'20\', \'30\')(f4, uids)`
+│     Expressions: r1:=`expr#BitmapCount(\'1\')(f1, uids)`, r2:=`expr#BitmapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids)`, r5:=`expr#BitmapCount(\'2\')(toInt64(f2), uids)`, r6:=`expr#BitmapCount(\'2~1\')(toInt64(f2), uids)`, r7:=`expr#BitmapCount(\'10\')(f4, uids)`, r8:=`expr#BitmapMultiCount(\'10|20\', \'20\', \'30\')(f4, uids)`
 └─ Aggregating Est. ? rows
    │     Group by: {}
-   │     Aggregates: expr#BitMapCount(\'1\')(f1, uids):=AggNull(bitmapCount)(f1,uids), expr#BitMapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids):=AggNull(bitmapMultiCount)(f1,uids), expr#BitMapCount(\'2\')(toInt64(f2), uids):=AggNull(bitmapCount)(expr#toInt64(f2),uids), expr#BitMapCount(\'2~1\')(toInt64(f2), uids):=AggNull(bitmapCount)(expr#toInt64(f2),uids), expr#BitMapCount(\'10\')(f4, uids):=AggNull(bitmapCount)(f4,uids), expr#BitMapMultiCount(\'10|20\', \'20\', \'30\')(f4, uids):=AggNull(bitmapMultiCount)(f4,uids)
+   │     Aggregates: expr#BitmapCount(\'1\')(f1, uids):=AggNull(BitmapCount)(f1,uids), expr#BitmapMultiCount(\'1|2\', \'2\', \'3\')(f1, uids):=AggNull(BitmapMultiCount)(f1,uids), expr#BitmapCount(\'2\')(toInt64(f2), uids):=AggNull(BitmapCount)(expr#toInt64(f2),uids), expr#BitmapCount(\'2~1\')(toInt64(f2), uids):=AggNull(BitmapCount)(expr#toInt64(f2),uids), expr#BitmapCount(\'10\')(f4, uids):=AggNull(BitmapCount)(f4,uids), expr#BitmapMultiCount(\'10|20\', \'20\', \'30\')(f4, uids):=AggNull(BitmapMultiCount)(f4,uids)
    └─ Gather Exchange Est. ? rows
       └─ Projection Est. ? rows
          │     Expressions: [f1, f4, uids], expr#toInt64(f2):=toInt64(f2)
@@ -67,10 +67,10 @@ Projection Est. ? rows
 └─ Projection Est. ? rows
    │     Expressions: expr#1:=1
    └─ Filter Est. ? rows
-      │     Condition: `expr#BitMapCount(\'1\')(f1, uids)` > 1
+      │     Condition: `expr#BitmapCount(\'1\')(f1, uids)` > 1
       └─ Aggregating Est. ? rows
          │     Group by: {}
-         │     Aggregates: expr#BitMapCount(\'1\')(f1, uids):=AggNull(bitmapCount)(f1,uids)
+         │     Aggregates: expr#BitmapCount(\'1\')(f1, uids):=AggNull(BitmapCount)(f1,uids)
          └─ Gather Exchange Est. ? rows
             └─ Filter Est. ? rows
                │     Condition: f1 = 1
@@ -87,13 +87,13 @@ settings enable_optimizer=1;
 Projection Est. 1 rows, cost 1.222000e+00
 │     Expressions: 1:=`expr#1_2`
 └─ Union Est. 1 rows, cost 1.148000e+00
-   │     OutputToInputs: expr#1_2 = [expr#1_1,expr#BitMapCount(\'1\')(f1, uids)]
+   │     OutputToInputs: expr#1_2 = [expr#1_1,expr#BitmapCount(\'1\')(f1, uids)]
    ├─ Projection Est. 1 rows, cost 1.480000e-01
    │  │     Expressions: expr#1_1:=cast(1, \'UInt64\')
    │  └─ Values Est. 1 rows, cost 7.400000e-02
    └─ Aggregating Est. ? rows, cost 1.000000e+00
       │     Group by: {}
-      │     Aggregates: expr#BitMapCount(\'1\')(f1, uids):=AggNull(bitmapCount)(f1,uids)
+      │     Aggregates: expr#BitmapCount(\'1\')(f1, uids):=AggNull(BitmapCount)(f1,uids)
       └─ Gather Exchange Est. ? rows, cost 1.000000e+00
          └─ Filter Est. ? rows, cost 0.000000e+00
             │     Condition: f1 = 1
@@ -109,10 +109,10 @@ from (
 ) s
 settings enable_optimizer=1;
 Projection Est. ? rows
-│     Expressions: r1:=`expr#BitMapCount(\'1\')(x, y)`
+│     Expressions: r1:=`expr#BitmapCount(\'1\')(x, y)`
 └─ Aggregating Est. ? rows
    │     Group by: {}
-   │     Aggregates: expr#BitMapCount(\'1\')(x, y):=AggNull(bitmapCount)(f1,uids)
+   │     Aggregates: expr#BitmapCount(\'1\')(x, y):=AggNull(BitmapCount)(f1,uids)
    └─ Gather Exchange Est. ? rows
       └─ Filter Est. ? rows
          │     Condition: f1 = 1
@@ -128,10 +128,10 @@ from (
 ) s cross join (select 1) as t
 settings enable_optimizer=1;
 Projection Est. ? rows
-│     Expressions: r1:=`expr#BitMapCount(\'1\')(x, y)`
+│     Expressions: r1:=`expr#BitmapCount(\'1\')(x, y)`
 └─ Aggregating Est. ? rows
    │     Group by: {}
-   │     Aggregates: expr#BitMapCount(\'1\')(x, y):=AggNull(bitmapCount)(f1,uids)
+   │     Aggregates: expr#BitmapCount(\'1\')(x, y):=AggNull(BitmapCount)(f1,uids)
    └─ Gather Exchange Est. ? rows
       └─ Inner Join Est. ? rows
          │     Condition: 
@@ -145,10 +145,10 @@ select bitmapCount('1')(f1, uids)
 from (select toInt64(1) as f1, arrayToBitmap([1,2,3]) as uids)
 settings enable_optimizer=1;
 Projection Est. 1 rows, cost 5.562000e+00
-│     Expressions: BitMapCount(\'1\')(f1, uids):=`expr#BitMapCount(\'1\')(f1, uids)`
+│     Expressions: BitmapCount(\'1\')(f1, uids):=`expr#BitmapCount(\'1\')(f1, uids)`
 └─ Aggregating Est. 1 rows, cost 5.488000e+00
    │     Group by: {}
-   │     Aggregates: expr#BitMapCount(\'1\')(f1, uids):=AggNull(bitmapCount)(expr#toInt64(1),expr#arrayToBitmap([1, 2, 3]))
+   │     Aggregates: expr#BitmapCount(\'1\')(f1, uids):=AggNull(BitmapCount)(expr#toInt64(1),expr#arrayToBitmap([1, 2, 3]))
    └─ Projection Est. 1 rows, cost 1.480000e-01
       │     Expressions: expr#arrayToBitmap([1, 2, 3]):=arrayToBitmap([1, 2, 3]), expr#toInt64(1):=cast(1, \'Int64\')
       └─ Values Est. 1 rows, cost 7.400000e-02
diff --git a/tests/queries/4_cnch_stateless/03032_insert_overwrite_http.reference b/tests/queries/4_cnch_stateless/03032_insert_overwrite_http.reference
new file mode 100644
index 00000000000..d3c31230aa8
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/03032_insert_overwrite_http.reference
@@ -0,0 +1,14 @@
+0
+---- INSERT VALUES enable_optimizer = 0
+1
+---- INSERT VALUES enable_optimizer = 1
+2
+---- INSERT SELECT enable_optimizer = 0
+0
+1
+2
+---- INSERT SELECT enable_optimizer = 1
+0
+1
+2
+3
diff --git a/tests/queries/4_cnch_stateless/03032_insert_overwrite_http.sh b/tests/queries/4_cnch_stateless/03032_insert_overwrite_http.sh
new file mode 100755
index 00000000000..3262fffdbcc
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/03032_insert_overwrite_http.sh
@@ -0,0 +1,27 @@
+#!/usr/bin/env bash
+
+CURDIR=$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)
+. $CURDIR/../shell_config.sh
+
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "CREATE TABLE ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http (id UInt64) ENGINE = CnchMergeTree ORDER BY id"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "INSERT INTO ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http VALUES (0)"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "SELECT * FROM ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http ORDER BY id"
+
+echo "---- INSERT VALUES enable_optimizer = 0"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}&enable_optimizer=0" -d "INSERT OVERWRITE ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http VALUES (1)"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "SELECT * FROM ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http ORDER BY id"
+
+echo "---- INSERT VALUES enable_optimizer = 1"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}&enable_optimizer=1" -d "INSERT OVERWRITE ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http VALUES (2)"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "SELECT * FROM ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http ORDER BY id"
+
+echo "---- INSERT SELECT enable_optimizer = 0"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}&enable_optimizer=0" -d "INSERT OVERWRITE ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http SELECT number as id FROM numbers(3)"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "SELECT * FROM ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http ORDER BY id"
+
+echo "---- INSERT SELECT enable_optimizer = 1"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}&enable_optimizer=1" -d "INSERT OVERWRITE ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http SELECT number as id FROM numbers(4)"
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "SELECT * FROM ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http ORDER BY id"
+
+${CLICKHOUSE_CURL} -s "${CLICKHOUSE_URL}" -d "DROP TABLE IF EXISTS ${CLICKHOUSE_DATABASE}.test_insert_overwrite_http"
diff --git a/tests/queries/4_cnch_stateless/10052_uniquekey_test_partial_update_nullable.reference b/tests/queries/4_cnch_stateless/10052_uniquekey_test_partial_update_nullable.reference
new file mode 100644
index 00000000000..345c72e81a0
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/10052_uniquekey_test_partial_update_nullable.reference
@@ -0,0 +1,30 @@
+test enable replace_if_not_null with non-nullable column, stage 1
+2023-01-01	1001	20	c1	e1
+2023-01-01	1002	21	c2	e2
+2023-01-01	1003	22	c3	e3
+2023-01-02	1004	23	c4	e4
+2023-01-02	1005	30	d2	x2
+test enable replace_if_not_null with non-nullable column, stage 2
+2023-01-01	1001	20	c1	e1
+2023-01-01	1002	21	c2	e2
+2023-01-01	1003	22	c3	e3
+2023-01-02	1004	23	c4	e4
+2023-01-02	1005	30	d2	
+test enable replace_if_not_null with non-nullable column, stage 3
+2023-01-01	1001	20	c1	e1
+2023-01-01	1002	21	c2	e2
+2023-01-01	1003	22	c3	e3
+2023-01-02	1004	23	c4	e4
+2023-01-02	1005	30	\N	
+test enable replace_if_not_null with non-nullable column, stage 4
+2023-01-01	1001	20	c1	e1
+2023-01-01	1002	21	c2	e2
+2023-01-01	1003	22	c3	e3
+2023-01-02	1004	23	c4	e4
+2023-01-02	1005	30	\N	
+test enable replace_if_not_null with non-nullable column, stage 5
+1	2	m	m\0\0\0\0\0\0\0\0\0	[]	{'a':2,'d':7}
+2	3		\0\0\0\0\0\0\0\0\0\0	[5,6]	{'e':6,'f':9}
+test enable replace_if_not_null with non-nullable column, stage 6
+1	2		m\0\0\0\0\0\0\0\0\0	[]	{'a':2,'d':7}
+2	3		\0\0\0\0\0\0\0\0\0\0	[5,6]	{'e':6,'f':9}
diff --git a/tests/queries/4_cnch_stateless/10052_uniquekey_test_partial_update_nullable.sql b/tests/queries/4_cnch_stateless/10052_uniquekey_test_partial_update_nullable.sql
new file mode 100644
index 00000000000..e059008cacf
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/10052_uniquekey_test_partial_update_nullable.sql
@@ -0,0 +1,94 @@
+DROP TABLE IF EXISTS unique_partial_update_query_nullable;
+
+CREATE TABLE unique_partial_update_query_nullable
+(
+    `p_date` Date,
+    `id` UInt32,
+    `number` Nullable(UInt32),
+    `content` Nullable(String),
+    `extra` String REPLACE_IF_NOT_NULL
+)
+ENGINE = CnchMergeTree()
+PARTITION BY p_date
+ORDER BY id
+UNIQUE KEY id
+SETTINGS enable_unique_partial_update = 1; -- { serverError 49 }
+
+CREATE TABLE unique_partial_update_query_nullable
+(
+    `p_date` Date,
+    `id` UInt32,
+    `number` Nullable(UInt32),
+    `content` Nullable(String),
+    `extra` String
+)
+ENGINE = CnchMergeTree()
+PARTITION BY p_date
+ORDER BY id
+UNIQUE KEY id
+SETTINGS enable_unique_partial_update = 1;
+
+SYSTEM STOP DEDUP WORKER unique_partial_update_query_nullable;
+
+SET enable_staging_area_for_write=0, enable_unique_partial_update=0;
+INSERT INTO unique_partial_update_query_nullable VALUES ('2023-01-01', 1001, 20, 'c1', 'e1');
+INSERT INTO unique_partial_update_query_nullable VALUES ('2023-01-01', 1002, 21, 'c2', 'e2');
+INSERT INTO unique_partial_update_query_nullable VALUES ('2023-01-01', 1003, 22, 'c3', 'e3');
+INSERT INTO unique_partial_update_query_nullable VALUES ('2023-01-02', 1004, 23, 'c4', 'e4');
+INSERT INTO unique_partial_update_query_nullable VALUES ('2023-01-02', 1005, 30, 'd2', 'x2');
+
+SELECT 'test enable replace_if_not_null with non-nullable column, stage 1';
+SELECT * FROM unique_partial_update_query_nullable ORDER BY id;
+
+SET enable_staging_area_for_write=0, enable_unique_partial_update=1;
+-- data for unique key 1005: 2023-01-02, 1005, 30, d2, ''
+INSERT INTO unique_partial_update_query_nullable (p_date, id, number, content, extra, _update_columns_) VALUES ('2023-01-02', 1005, 30, 'd1', null, 'extra');  
+
+ALTER TABLE unique_partial_update_query_nullable modify column content REPLACE_IF_NOT_NULL;
+-- data for unique key 1005: 2023-01-02, 1005, 30, d2, ''
+INSERT INTO unique_partial_update_query_nullable (p_date, id, number, content, extra, _update_columns_) VALUES ('2023-01-02', 1005, 30, null, 'x2', 'content');  
+
+SELECT 'test enable replace_if_not_null with non-nullable column, stage 2';
+SELECT * FROM unique_partial_update_query_nullable ORDER BY id;
+
+ALTER TABLE unique_partial_update_query_nullable modify column extra REPLACE_IF_NOT_NULL; -- { serverError 49 }
+ALTER TABLE unique_partial_update_query_nullable modify column content remove REPLACE_IF_NOT_NULL;
+-- data for unique key 1005: 2023-01-02, 1005, 30, NULL, ''
+INSERT INTO unique_partial_update_query_nullable (p_date, id, number, content, extra, _update_columns_) VALUES ('2023-01-02', 1005, 30, null, 'x2', 'content');  
+SELECT 'test enable replace_if_not_null with non-nullable column, stage 3';
+SELECT * FROM unique_partial_update_query_nullable ORDER BY id;
+
+ALTER TABLE unique_partial_update_query_nullable modify setting partial_update_replace_if_not_null = 1;
+
+SELECT 'test enable replace_if_not_null with non-nullable column, stage 4';
+-- data for unique key 1005: 2023-01-02, 1005, 30, NULL, ''
+INSERT INTO unique_partial_update_query_nullable (p_date, id, number, content, extra, _update_columns_) VALUES ('2023-01-02', 1005, null, null, 'x2', 'number');  
+SELECT * FROM unique_partial_update_query_nullable ORDER BY id;
+
+DROP TABLE IF EXISTS unique_partial_update_query_nullable;
+
+CREATE TABLE unique_partial_update_query_nullable 
+(
+    id Int32, 
+    a Nullable(Int32), 
+    b LowCardinality(String), 
+    c Nullable(FixedString(10)), 
+    d Nullable(Array(Int32)), 
+    e Map(String, Int32)) 
+Engine=CnchMergeTree() 
+ORDER BY id 
+UNIQUE KEY id 
+SETTINGS enable_unique_partial_update = 1, partial_update_enable_merge_map = 1, partial_update_replace_if_not_null = 1;
+
+SELECT 'test enable replace_if_not_null with non-nullable column, stage 5';
+SET enable_staging_area_for_write=0, enable_unique_partial_update = 0;
+INSERT INTO unique_partial_update_query_nullable VALUES
+(1, 0, 'x', 'x', [1, 2, 3], {'a': 1}), (1, 0, 'z', 'z', [1, 2], {'b': 3, 'c': 4}), (1, 2, 'm', 'm', [], {'a': 2, 'd': 7}), (2, 0, 'q', 'q', [], {'a': 1}), (2, 0, 't', 't', [4, 5, 6], {'e': 8}), (2, 3, '',  '',  [5, 6], {'e': 6, 'f': 9});
+SELECT * FROM unique_partial_update_query_nullable ORDER BY id;
+
+SET enable_staging_area_for_write=0, enable_unique_partial_update = 1;
+SELECT 'test enable replace_if_not_null with non-nullable column, stage 6';
+INSERT INTO unique_partial_update_query_nullable VALUES (1, null, null, null, null, null);
+SELECT * FROM unique_partial_update_query_nullable ORDER BY id;
+
+DROP TABLE IF EXISTS unique_partial_update_query_nullable;
diff --git a/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_blacklist.reference b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_blacklist.reference
new file mode 100644
index 00000000000..88cd5f03295
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_blacklist.reference
@@ -0,0 +1,105 @@
+Projection
+│     Expressions: sum(c1):=`expr#sum(c1)`
+└─ Gather Exchange
+   └─ Projection
+      │     Expressions: [expr#sum(c1)]
+      └─ MergingAggregated
+         └─ Repartition Exchange
+            │     Partition by: {c2}
+            └─ Aggregating
+               │     Group by: {c2}
+               │     Aggregates: expr#sum(c1):=AggNull(sum)(c1)
+               └─ TableScan default.blacklist
+                        Sample Size: 9 / 10
+                        Outputs: [c1, c2]
+Projection
+│     Expressions: sum(c1):=`expr#sum(c1)`
+└─ Gather Exchange
+   └─ Projection
+      │     Expressions: [expr#sum(c1)]
+      └─ MergingAggregated
+         └─ Repartition Exchange
+            │     Partition by: {c2}
+            └─ Aggregating
+               │     Group by: {c2}
+               │     Aggregates: expr#sum(c1):=AggNull(sum)(c1)
+               └─ Projection
+                  │     Expressions: [c1, c2]
+                  └─ Filter
+                     │     Condition: (`expr#rand()` % 2) = 1
+                     └─ Projection
+                        │     Expressions: [c1, c2], expr#rand():=rand()
+                        └─ TableScan default.blacklist
+                                 Outputs: [c1, c2]
+Projection
+│     Expressions: sum(c1):=`expr#sum(c1)`
+└─ Gather Exchange
+   └─ Projection
+      │     Expressions: [expr#sum(c1)]
+      └─ MergingAggregated
+         └─ Repartition Exchange
+            │     Partition by: {c2}
+            └─ Aggregating
+               │     Group by: {c2}
+               │     Aggregates: expr#sum(c1):=AggNull(sum)(c1)
+               └─ Filter
+                  │     Condition: (rand() % 2) = 1
+                  └─ TableScan default.blacklist
+                           Prewhere: (rand() % 2) = 1
+                           Outputs: [c1, c2]
+Projection
+│     Expressions: sum(c1):=`expr#sum(c1)`
+└─ Gather Exchange
+   └─ Projection
+      │     Expressions: [expr#sum(c1)]
+      └─ MergingAggregated
+         └─ Repartition Exchange
+            │     Partition by: {c2, __grouping_set}
+            └─ Aggregating
+               │     Group by: {c2}
+               │     Aggregates: expr#sum(c1):=AggNull(sum)(c1)
+               └─ TableScan default.blacklist
+                        Outputs: [c1, c2]
+Projection
+│     Expressions: sum(c1):=`expr#sum(c1)`
+└─ Gather Exchange
+   └─ Projection
+      │     Expressions: [expr#sum(c1)]
+      └─ MergingAggregated
+         └─ Repartition Exchange
+            │     Partition by: {c2}
+            └─ Aggregating
+               │     Group by: {c2}
+               │     Aggregates: expr#sum(c1):=AggNull(sum)(c1)
+               └─ Limit
+                  │     Limit: 1
+                  └─ Sorting
+                     │     Order by: {c1 ASC NULLS LAST}
+                     │     Limit: 1
+                     └─ Gather Exchange
+                        └─ Sorting
+                           │     Order by: {c1 ASC NULLS LAST}
+                           │     Prefix Order: {c1}
+                           │     Limit: 1
+                           └─ TableScan default.blacklist
+                                    Input Order Info: {c1 ASC ANY}
+                                    Outputs: [c1, c2]
+Projection
+│     Expressions: sum(c1):=`expr#sum(c1)`
+└─ Gather Exchange
+   └─ Projection
+      │     Expressions: [expr#sum(c1)]
+      └─ MergingAggregated
+         └─ Repartition Exchange
+            │     Partition by: {c2}
+            └─ Aggregating
+               │     Group by: {c2}
+               │     Aggregates: expr#sum(c1):=AggNull(sum)(c1)
+               └─ Limit
+                  │     Limit: 1
+                  └─ Gather Exchange
+                     └─ Limit
+                        │     Limit: 1
+                        └─ TableScan default.blacklist
+                                 Limit: 1
+                                 Outputs: [c1, c2]
diff --git a/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_blacklist.sql b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_blacklist.sql
new file mode 100644
index 00000000000..53c13c69706
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_blacklist.sql
@@ -0,0 +1,19 @@
+set enable_optimizer=1;
+set wait_intermediate_result_cache=0;
+set enable_optimizer_fallback=0;
+set enable_intermediate_result_cache=1;
+
+DROP TABLE if exists blacklist;
+
+CREATE TABLE blacklist(c1 UInt64, c2 String) ENGINE = CnchMergeTree ORDER BY c1;
+
+insert into blacklist values (1, 'a'), (2, 'b'), (2, 'c');
+
+explain stats=0 select sum(c1) from blacklist sample 0.9 group by c2;
+explain stats=0 select sum(c1) from blacklist where rand() % 2 = 1 group by c2;
+explain stats=0 select sum(c1) from blacklist prewhere rand() % 2 = 1 group by c2;
+explain stats=0 select sum(c1) from blacklist group by grouping sets((c2), ());
+explain stats=0 select sum(c1) from (select * from blacklist order by c1 limit 1) group by c2 settings optimize_read_in_order=1, enable_sorting_property=1;
+explain stats=0 select sum(c1) from (select * from blacklist limit 1) group by c2;
+
+DROP TABLE blacklist;
diff --git a/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_with_index_expression.reference b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_with_index_expression.reference
new file mode 100644
index 00000000000..47caecb821a
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_with_index_expression.reference
@@ -0,0 +1,40 @@
+Projection
+│     Expressions: sum(id):=`expr#sum(id)`
+└─ MergingAggregated
+   └─ Gather Exchange
+      └─ IntermediateResultCache
+         └─ Aggregating
+            │     Group by: {}
+            │     Aggregates: expr#sum(id):=AggNull(sum)(id)
+            └─ Projection
+               │     Expressions: [id]
+               └─ Filter
+                  │     Condition: `arraySetCheck(vids, 1)` AND (id1 IN (2, 3, 4)) AND (id2 IN (\'3\', \'4\', \'5\'))
+                  └─ TableScan default.cache_index_expression
+                           Where: (id1 IN (2, 3, 4)) AND (id2 IN (\'3\', \'4\', \'5\'))
+                           Inline expressions: [arraySetCheck(vids, 1):=arraySetCheck(vids, 1)]
+                           Outputs: [id, id1, id2]
+1
+1
+1
+Projection
+│     Expressions: id_1:=id1, sum(id):=`expr#sum(id)`
+└─ Gather Exchange
+   └─ MergingAggregated
+      └─ Repartition Exchange
+         │     Partition by: {id1}
+         └─ IntermediateResultCache
+            └─ Aggregating
+               │     Group by: {id1}
+               │     Aggregates: expr#sum(id):=AggNull(sum)(id)
+               └─ Projection
+                  │     Expressions: [id, id1]
+                  └─ Filter
+                     │     Condition: `arraySetCheck(vids, 1)` AND (id1 IN (2, 3, 4)) AND (id2 IN (\'3\', \'4\', \'5\'))
+                     └─ TableScan default.cache_index_expression
+                              Where: (id1 IN (2, 3, 4)) AND (id2 IN (\'3\', \'4\', \'5\'))
+                              Inline expressions: [arraySetCheck(vids, 1):=arraySetCheck(vids, 1)]
+                              Outputs: [id, id1, id2]
+1	2
+1	2
+1	2
diff --git a/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_with_index_expression.sql b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_with_index_expression.sql
new file mode 100644
index 00000000000..0278ee8a51f
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/10102_intermediate_result_cache_with_index_expression.sql
@@ -0,0 +1,47 @@
+
+set enable_ab_index_optimization = 1;
+set enable_optimizer = 1;
+set max_threads = 8;
+set exchange_source_pipeline_threads = 1;
+set optimizer_index_projection_support=1;
+set prefer_localhost_replica = 1;
+
+set enable_intermediate_result_cache=1;
+set wait_intermediate_result_cache=0;
+set enable_optimizer_fallback=0;
+
+drop table if exists cache_index_expression;
+
+create table cache_index_expression (p_date Date, id Int32, vids Array(Int32) BitmapIndex, id1 Int64, id2 String, id3 Nullable(Int64), id4 Nullable(String)) 
+    engine = CnchMergeTree partition by p_date order by id settings min_bytes_for_wide_part = 0, enable_build_ab_index = 0;
+
+
+insert into cache_index_expression select '2023-01-01', number, [number % 3],  number+1, toString(number+2), number+3, toString(number+4) from numbers(5);
+
+explain stats=0 
+select sum(id) from cache_index_expression
+    where arraySetCheck(vids, 1) and id1 in (2,3,4) and id2 in ('3', '4', '5');
+
+select sum(id) from cache_index_expression
+    where arraySetCheck(vids, 1) and id1 in (2,3,4) and id2 in ('3', '4', '5');
+
+select sum(id) from cache_index_expression
+    where arraySetCheck(vids, 1) and id1 in (2,3,4) and id2 in ('3', '4', '5');
+
+select sum(id) from cache_index_expression
+    where arraySetCheck(vids, 1) and id1 in (2,3,4) and id2 in ('3', '4', '5')
+settings enable_intermediate_result_cache=0;
+
+explain stats=0 
+select sum(id), id1 as id_1 from cache_index_expression
+    where arraySetCheck(vids, 1) and id_1 in (2,3,4) and id2 in ('3', '4', '5') group by id_1;
+
+select sum(id), id1 as id_1 from cache_index_expression
+    where arraySetCheck(vids, 1) and id_1 in (2,3,4) and id2 in ('3', '4', '5') group by id_1;
+
+select sum(id), id1 as id_1 from cache_index_expression
+    where arraySetCheck(vids, 1) and id_1 in (2,3,4) and id2 in ('3', '4', '5') group by id_1;
+
+select sum(id), id1 as id_1 from cache_index_expression
+    where arraySetCheck(vids, 1) and id_1 in (2,3,4) and id2 in ('3', '4', '5') group by id_1
+settings enable_intermediate_result_cache=0;
\ No newline at end of file
diff --git a/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference b/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference
index 95f7c49aa64..48a2ffd9e06 100644
--- a/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference
+++ b/tests/queries/4_cnch_stateless/12294_function_get_json_object.reference
@@ -6,6 +6,8 @@ abc
 [{"ac":"abc","xz":"xz"},{"def":"def"}]
 100
 100
+\N
+\N
 {"a":"b"}	2017-08-31 18:36:48
 -1	1504193808
 ["a","c"]	1504193808
diff --git a/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql b/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql
index d19dc4c4a84..08a68c8e4cf 100644
--- a/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql
+++ b/tests/queries/4_cnch_stateless/12294_function_get_json_object.sql
@@ -8,6 +8,9 @@ SELECT JSONExtractRaw('{"n_s" : [{"ac":"abc","xz":"xz"}, {"def":"def"}], "n_i" :
 select get_json_object('{"a":100}'::Nullable(String), '$.a');
 select get_json_object('{"a":100}'::LowCardinality(Nullable(String)), '$.a');
 
+select get_json_object(null::Nullable(String), '$.a');
+select get_json_object(null::LowCardinality(Nullable(String)), '$.a');
+
 DROP TABLE IF EXISTS test;
 CREATE TABLE test(a Nullable(String)) ENGINE = CnchMergeTree ORDER BY tuple() PARTITION BY tuple();
 SELECT get_json_object('{"test": "test"}', a) FROM test;
diff --git a/tests/queries/4_cnch_stateless/20020_bitmap_function.reference b/tests/queries/4_cnch_stateless/20020_bitmap_function.reference
new file mode 100644
index 00000000000..122ec79cf15
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20020_bitmap_function.reference
@@ -0,0 +1,115 @@
+[1,2,3,4,5]
+[3]
+[1,2,3,4,5]
+[1,2,4,5]
+[1,2]
+5
+1
+5
+4
+2
+2
+[100,200]
+70
+2019-01-01	50	[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50]
+2019-01-02	60	[11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70]
+2019-01-03	10	[1,2,3,4,5,6,7,8,9,10]
+60	50	70	40	20	30
+60	50	70	40	20	30
+100
+100
+20
+100
+20
+[1,2,3]
+[1,2,3]
+2019-01-01	50
+2019-01-02	60
+2019-01-03	10
+1
+1
+1
+1
+15
+[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+15
+[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+3
+[6,8,10]
+10
+[1,3,5,6,8,10,11,13,14,15]
+tag1	[0,1,2,3,4,5,6,7,8,9]	[]	[]	[0,1,2,3,4,5,6,7,8,9]
+tag2	[0,1,2,3,4,5,6,7,8,9]	[0]	[2]	[1,2,3,4,5,6,7,8,9]
+tag3	[0,1,2,3,4,5,6,7,8,9]	[0,7]	[3,101]	[1,2,3,4,5,6,8,9,101]
+tag4	[0,1,2,3,4,5,6,7,8,9]	[5,999,2]	[2,888,20]	[0,1,3,4,6,7,8,9,20]
+=== bitmapHasXXX ===
+0
+0
+0
+1
+0
+1
+0
+1
+0
+1
+0
+1
+1
+0
+1
+0
+0
+0
+1
+0
+1
+0
+1
+0
+1
+0
+0
+0
+0
+1
+1
+0
+1
+=== bitmapSubsetXXX ===
+[]
+[]
+[1]
+[]
+[5]
+[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]
+[30,31,32,33,100]
+[100]
+[]
+[]
+[1,5,7,9]
+[]
+[5,7,9]
+[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]
+[30,31,32,33,100,200,500]
+[100,200,500]
+=== subBitmap ===
+[]
+[]
+[1,5,7,9]
+[5,7,9]
+[5,7]
+[0,1,2,3,4,5,6,7,8,9]
+[30,31,32,33,100,200,500]
+[100,200,500]
+=== bitmapMin/Max ===
+0
+0
+0
+1
+0
+0
+0
+0
+9
+500
diff --git a/tests/queries/4_cnch_stateless/20020_bitmap_function.sql b/tests/queries/4_cnch_stateless/20020_bitmap_function.sql
new file mode 100644
index 00000000000..a74da1c95b5
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20020_bitmap_function.sql
@@ -0,0 +1,375 @@
+-----------------------------------------
+---- Community Aggregate Bitmap Type ----
+-----------------------------------------
+
+SELECT bitmapToArray(bitmapBuild([1, 2, 3, 4, 5]));
+SELECT bitmapToArray(bitmapAnd(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
+SELECT bitmapToArray(bitmapOr(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
+SELECT bitmapToArray(bitmapXor(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
+SELECT bitmapToArray(bitmapAndnot(bitmapBuild([1,2,3]),bitmapBuild([3,4,5])));
+SELECT bitmapCardinality(bitmapBuild([1, 2, 3, 4, 5]));
+SELECT bitmapAndCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]));
+SELECT bitmapOrCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]));
+SELECT bitmapXorCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]));
+SELECT bitmapAndnotCardinality(bitmapBuild([1,2,3]),bitmapBuild([3,4,5]));
+SELECT bitmapAndCardinality(bitmapBuild([100, 200, 500]), bitmapBuild(CAST([100, 200], 'Array(UInt16)')));
+SELECT bitmapToArray(bitmapAnd(bitmapBuild([100, 200, 500]), bitmapBuild(CAST([100, 200], 'Array(UInt16)'))));
+
+DROP TABLE IF EXISTS bitmap_test_community_20020;
+CREATE TABLE bitmap_test_community_20020(pickup_date Date, city_id UInt32, uid UInt32)ENGINE = CnchMergeTree order by city_id;
+INSERT INTO bitmap_test_community_20020 SELECT '2019-01-01', 1, number FROM numbers(1,50);
+INSERT INTO bitmap_test_community_20020 SELECT '2019-01-02', 1, number FROM numbers(11,60);
+INSERT INTO bitmap_test_community_20020 SELECT '2019-01-03', 2, number FROM numbers(1,10);
+
+
+SELECT groupBitmap( uid ) AS user_num FROM bitmap_test_community_20020;
+
+SELECT pickup_date, groupBitmap( uid ) AS user_num, bitmapToArray(groupBitmapState( uid )) AS users FROM bitmap_test_community_20020 GROUP BY pickup_date ORDER BY pickup_date;
+
+SELECT
+    bitmapCardinality(day_today) AS today_users,
+    bitmapCardinality(day_before) AS before_users,
+    bitmapOrCardinality(day_today, day_before) AS all_users,
+    bitmapAndCardinality(day_today, day_before) AS old_users,
+    bitmapAndnotCardinality(day_today, day_before) AS new_users,
+    bitmapXorCardinality(day_today, day_before) AS diff_users
+FROM
+(
+ SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-02' GROUP BY city_id
+) js1
+ALL LEFT JOIN
+(
+ SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-01' GROUP BY city_id
+) js2
+USING city_id;
+
+SELECT
+    bitmapCardinality(day_today) AS today_users,
+    bitmapCardinality(day_before) AS before_users,
+    bitmapCardinality(bitmapOr(day_today, day_before))ll_users,
+    bitmapCardinality(bitmapAnd(day_today, day_before)) AS old_users,
+    bitmapCardinality(bitmapAndnot(day_today, day_before)) AS new_users,
+    bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users
+FROM
+(
+ SELECT city_id, groupBitmapState( uid ) AS day_today FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-02' GROUP BY city_id
+) js1
+ALL LEFT JOIN
+(
+ SELECT city_id, groupBitmapState( uid ) AS day_before FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-01' GROUP BY city_id
+) js2
+USING city_id;
+
+SELECT count(*) FROM bitmap_test_community_20020 WHERE bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-01'), bitmapBuild([uid]));
+
+SELECT count(*) FROM bitmap_test_community_20020 WHERE bitmapHasAny(bitmapBuild([uid]), (SELECT groupBitmapState(uid) FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-01'));
+
+SELECT count(*) FROM bitmap_test_community_20020 WHERE 0 = bitmapHasAny((SELECT groupBitmapState(uid) FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-01'), bitmapBuild([uid]));
+
+SELECT count(*) FROM bitmap_test_community_20020 WHERE bitmapContains((SELECT groupBitmapState(uid) FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-01'), uid);
+
+SELECT count(*) FROM bitmap_test_community_20020 WHERE 0 = bitmapContains((SELECT groupBitmapState(uid) FROM bitmap_test_community_20020 WHERE pickup_date = '2019-01-01'), uid);
+
+-- PR#8082
+SELECT bitmapToArray(bitmapAnd(groupBitmapState(uid), bitmapBuild(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_test_community_20020 GROUP BY city_id ORDER BY city_id;
+
+-- bitmap state test
+DROP TABLE IF EXISTS bitmap_state_test_20020;
+CREATE TABLE bitmap_state_test_20020
+(
+	pickup_date Date,
+	city_id UInt32,
+    uv AggregateFunction( groupBitmap, UInt32 )
+)
+ENGINE = CnchAggregatingMergeTree
+PARTITION BY pickup_date ORDER BY ( pickup_date, city_id );
+
+INSERT INTO bitmap_state_test_20020 SELECT
+    pickup_date,
+    city_id,
+    groupBitmapState(uid) AS uv
+FROM bitmap_test_community_20020
+GROUP BY pickup_date, city_id;
+
+SELECT pickup_date, groupBitmapMerge(uv) AS users from bitmap_state_test_20020 group by pickup_date order by pickup_date;
+
+-- between column and expression test
+DROP TABLE IF EXISTS bitmap_column_expr_test_comm_20020;
+CREATE TABLE bitmap_column_expr_test_comm_20020
+(
+    t DateTime,
+    z AggregateFunction(groupBitmap, UInt32)
+)
+ENGINE = CnchMergeTree
+PARTITION BY toYYYYMMDD(t)
+ORDER BY t;
+
+INSERT INTO bitmap_column_expr_test_comm_20020 VALUES (now(), bitmapBuild(cast([3,19,47] as Array(UInt32))));
+
+SELECT bitmapAndCardinality( bitmapBuild(cast([19,7] AS Array(UInt32))), z) FROM bitmap_column_expr_test_comm_20020;
+SELECT bitmapAndCardinality( z, bitmapBuild(cast([19,7] AS Array(UInt32))) ) FROM bitmap_column_expr_test_comm_20020;
+
+SELECT bitmapCardinality(bitmapAnd(bitmapBuild(cast([19,7] AS Array(UInt32))), z )) FROM bitmap_column_expr_test_comm_20020;
+SELECT bitmapCardinality(bitmapAnd(z, bitmapBuild(cast([19,7] AS Array(UInt32))))) FROM bitmap_column_expr_test_comm_20020;
+
+DROP TABLE IF EXISTS bitmap_column_expr_test2_comm_20020;
+CREATE TABLE bitmap_column_expr_test2_comm_20020
+(
+    tag_id String,
+    z AggregateFunction(groupBitmap, UInt32)
+)
+ENGINE = CnchMergeTree
+ORDER BY tag_id;
+
+INSERT INTO bitmap_column_expr_test2_comm_20020 VALUES ('tag1', bitmapBuild(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));
+INSERT INTO bitmap_column_expr_test2_comm_20020 VALUES ('tag2', bitmapBuild(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32))));
+INSERT INTO bitmap_column_expr_test2_comm_20020 VALUES ('tag3', bitmapBuild(cast([2,4,6,8,10,12] as Array(UInt32))));
+
+SELECT groupBitmapMerge(z) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(groupBitmapMergeState(z))) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+
+SELECT groupBitmapOr(z) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(groupBitmapOrState(z))) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+
+SELECT groupBitmapAnd(z) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(groupBitmapAndState(z))) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+
+SELECT groupBitmapXor(z) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(groupBitmapXorState(z))) FROM bitmap_column_expr_test2_comm_20020 WHERE like(tag_id, 'tag%');
+
+
+DROP TABLE IF EXISTS bitmap_column_expr_test3_comm_20020;
+CREATE TABLE bitmap_column_expr_test3_comm_20020
+(
+    tag_id String,
+    z AggregateFunction(groupBitmap, UInt64),
+    replace Nested (
+        from UInt16,
+        to UInt64
+    )
+)
+ENGINE = CnchMergeTree
+ORDER BY tag_id;
+
+DROP TABLE IF EXISTS numbers10;
+CREATE VIEW numbers10 AS SELECT number FROM system.numbers LIMIT 10;
+
+INSERT INTO bitmap_column_expr_test3_comm_20020(tag_id, z, replace.from, replace.to) SELECT 'tag1', groupBitmapState(toUInt64(number)), cast([] as Array(UInt16)), cast([] as Array(UInt64)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3_comm_20020(tag_id, z, replace.from, replace.to) SELECT 'tag2', groupBitmapState(toUInt64(number)), cast([0] as Array(UInt16)), cast([2] as Array(UInt64)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3_comm_20020(tag_id, z, replace.from, replace.to) SELECT 'tag3', groupBitmapState(toUInt64(number)), cast([0,7] as Array(UInt16)), cast([3,101] as Array(UInt64)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3_comm_20020(tag_id, z, replace.from, replace.to) SELECT 'tag4', groupBitmapState(toUInt64(number)), cast([5,999,2] as Array(UInt16)), cast([2,888,20] as Array(UInt64)) FROM numbers10;
+
+SELECT tag_id, bitmapToArray(z), replace.from, replace.to, bitmapToArray(bitmapTransform(z, replace.from, replace.to)) FROM bitmap_column_expr_test3_comm_20020 ORDER BY tag_id;
+
+DROP TABLE IF EXISTS bitmap_test_community_20020;
+DROP TABLE IF EXISTS bitmap_state_test_20020;
+DROP TABLE IF EXISTS bitmap_column_expr_test_comm_20020;
+DROP TABLE IF EXISTS bitmap_column_expr_test2_comm_20020;
+DROP TABLE IF EXISTS numbers10;
+DROP TABLE IF EXISTS bitmap_column_expr_test3_comm_20020;
+
+select '=== bitmapHasXXX ===';
+-- bitmapHasAny:
+---- Empty
+SELECT bitmapHasAny(bitmapBuild([1, 2, 3, 5]), bitmapBuild(emptyArrayUInt8()));
+SELECT bitmapHasAny(bitmapBuild(emptyArrayUInt32()), bitmapBuild(emptyArrayUInt32()));
+SELECT bitmapHasAny(bitmapBuild(emptyArrayUInt16()), bitmapBuild([1, 2, 3, 500]));
+---- Small x Small
+SELECT bitmapHasAny(bitmapBuild([1, 2, 3, 5]),bitmapBuild([0, 3, 7]));
+SELECT bitmapHasAny(bitmapBuild([1, 2, 3, 5]),bitmapBuild([0, 4, 7]));
+---- Small x Large
+select bitmapHasAny(bitmapBuild([100,110,120]),bitmapBuild([ 99, 100, 101,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+select bitmapHasAny(bitmapBuild([100,200,500]),bitmapBuild([ 99, 101, 600,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+---- Large x Small
+select bitmapHasAny(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,230]),bitmapBuild([ 99, 100, 101]));
+select bitmapHasAny(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),bitmapBuild([ 99, 101, 600]));
+---- Large x Large
+select bitmapHasAny(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    40,50,60]),bitmapBuild([ 41, 50, 61,
+    99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65]));
+select bitmapHasAny(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    40,50,60]),bitmapBuild([ 41, 49, 51, 61,
+    99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65]));
+
+-- bitmapHasAll:
+---- Empty
+SELECT bitmapHasAll(bitmapBuild([1, 2, 3, 5]), bitmapBuild(emptyArrayUInt8()));
+SELECT bitmapHasAll(bitmapBuild(emptyArrayUInt32()), bitmapBuild(emptyArrayUInt32()));
+SELECT bitmapHasAll(bitmapBuild(emptyArrayUInt16()), bitmapBuild([1, 2, 3, 500]));
+---- Small x Small
+select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapBuild([5,7]));
+select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapBuild([5,7,2]));
+---- Small x Large
+select bitmapHasAll(bitmapBuild([100,110,120]),bitmapBuild([ 99, 100, 101,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+select bitmapHasAll(bitmapBuild([100,200,500]),bitmapBuild([ 99, 101, 600,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+---- Small x LargeSmall
+select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapXor(bitmapBuild([1,5,7]), bitmapBuild([5,7,9])));
+select bitmapHasAll(bitmapBuild([1,5,7,9]),bitmapXor(bitmapBuild([1,5,7]), bitmapBuild([2,5,7])));
+---- Large x Small
+select bitmapHasAll(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),bitmapBuild([100, 500]));
+select bitmapHasAll(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),bitmapBuild([ 99, 100, 500]));
+---- LargeSmall x Small
+select bitmapHasAll(bitmapXor(bitmapBuild([1,7]), bitmapBuild([5,7,9])), bitmapBuild([1,5]));
+select bitmapHasAll(bitmapXor(bitmapBuild([1,7]), bitmapBuild([5,7,9])), bitmapBuild([1,5,7]));
+---- Large x Large
+select bitmapHasAll(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),bitmapBuild([ 100, 200, 500,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+select bitmapHasAll(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),bitmapBuild([ 100, 200, 501,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+
+-- bitmapContains:
+---- Empty
+SELECT bitmapContains(bitmapBuild(emptyArrayUInt32()), toUInt8(0));
+SELECT bitmapContains(bitmapBuild(emptyArrayUInt16()), toUInt16(5));
+---- Small
+select bitmapContains(bitmapBuild([1,5,7,9]),toUInt32(0));
+select bitmapContains(bitmapBuild([1,5,7,9]),toUInt64(9));
+---- Large
+select bitmapContains(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),toUInt32(100));
+select bitmapContains(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),toUInt32(101));
+select bitmapContains(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),toUInt32(500));
+
+select '=== bitmapSubsetXXX ===';
+-- bitmapSubsetInRange:
+---- Empty
+SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt32()), toUInt64(0), toUInt32(10)));
+SELECT bitmapToArray(bitmapSubsetInRange(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt64(10)));
+---- Small
+select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt8(0), toUInt16(4)));
+select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt64(10)));
+select bitmapToArray(bitmapSubsetInRange(bitmapBuild([1,5,7,9]), toUInt64(3), toUInt32(7)));
+---- Large
+select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt8(0), toUInt32(100)));
+select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt64(30), toUInt32(200)));
+select bitmapToArray(bitmapSubsetInRange(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(100), toUInt64(200)));
+
+-- bitmapSubsetLimit:
+---- Empty
+SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt32()), toUInt8(0), toUInt32(10)));
+SELECT bitmapToArray(bitmapSubsetLimit(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt64(10)));
+---- Small
+select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt8(0), toUInt32(4)));
+select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt32(10), toUInt64(10)));
+select bitmapToArray(bitmapSubsetLimit(bitmapBuild([1,5,7,9]), toUInt16(3), toUInt32(7)));
+---- Large
+select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(0), toUInt32(100)));
+select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(30), toUInt32(200)));
+select bitmapToArray(bitmapSubsetLimit(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(100), toUInt16(200)));
+
+---- subBitmap:
+select '=== subBitmap ===';
+---- Empty
+SELECT bitmapToArray(subBitmap(bitmapBuild(emptyArrayUInt32()), toUInt8(0), toUInt32(10)));
+SELECT bitmapToArray(subBitmap(bitmapBuild(emptyArrayUInt16()), toUInt32(0), toUInt64(10)));
+---- Small
+select bitmapToArray(subBitmap(bitmapBuild([1,5,7,9]), toUInt8(0), toUInt32(4)));
+select bitmapToArray(subBitmap(bitmapBuild([1,5,7,9]), toUInt32(1), toUInt64(4)));
+select bitmapToArray(subBitmap(bitmapBuild([1,5,7,9]), toUInt16(1), toUInt32(2)));
+---- Large
+select bitmapToArray(subBitmap(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(0), toUInt32(10)));
+select bitmapToArray(subBitmap(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(30), toUInt32(200)));
+select bitmapToArray(subBitmap(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(34), toUInt16(3)));
+
+select '=== bitmapMin/Max ===';
+-- bitmapMin:
+---- Empty
+SELECT bitmapMin(bitmapBuild(emptyArrayUInt8()));
+SELECT bitmapMin(bitmapBuild(emptyArrayUInt16()));
+SELECT bitmapMin(bitmapBuild(emptyArrayUInt32()));
+---- Small
+select bitmapMin(bitmapBuild([1,5,7,9]));
+---- Large
+select bitmapMin(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]));
+
+-- bitmapMax:
+---- Empty
+SELECT bitmapMax(bitmapBuild(emptyArrayUInt8()));
+SELECT bitmapMax(bitmapBuild(emptyArrayUInt16()));
+SELECT bitmapMax(bitmapBuild(emptyArrayUInt32()));
+---- Small
+select bitmapMax(bitmapBuild([1,5,7,9]));
+---- Large
+select bitmapMax(bitmapBuild([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]));
+
+
+-- reproduce #18911
+CREATE TABLE bitmap_test_community_20020(pickup_date Date, city_id UInt32, uid UInt32)ENGINE = CnchMergeTree order by city_id;
+INSERT INTO bitmap_test_community_20020 SELECT '2019-01-01', 1, number FROM numbers(1,50);
+INSERT INTO bitmap_test_community_20020 SELECT '2019-01-02', 1, number FROM numbers(11,60);
+INSERT INTO bitmap_test_community_20020 SELECT '2019-01-03', 2, number FROM numbers(1,10);
+
+SELECT
+    bitmapCardinality(day_today) AS today_users,
+    bitmapCardinality(day_before) AS before_users,
+    bitmapOrCardinality(day_today, day_before) AS all_users,
+    bitmapAndCardinality(day_today, day_before) AS old_users,
+    bitmapAndnotCardinality(day_today, day_before) AS new_users,
+    bitmapXorCardinality(day_today, day_before) AS diff_users
+FROM
+(
+    SELECT
+        city_id,
+        groupBitmapState(uid) AS day_today
+    FROM bitmap_test_community_20020
+    WHERE pickup_date = '2019-01-02'
+    GROUP BY
+        rand((rand((rand('') % nan) = NULL) % 7) % rand(NULL)),
+        city_id
+) AS js1
+ALL LEFT JOIN
+(
+    SELECT
+        city_id,
+        groupBitmapState(uid) AS day_before
+    FROM bitmap_test_community_20020
+    WHERE pickup_date = '2019-01-01'
+    GROUP BY city_id
+) AS js2 USING (city_id) FORMAT Null;
+
+drop table if exists bitmap_test_community_20020;
diff --git a/tests/queries/4_cnch_stateless/20020_test_aggregation_bitmapLogics.reference b/tests/queries/4_cnch_stateless/20020_test_aggregation_bitmapLogics.reference
new file mode 100644
index 00000000000..6e4d1cffbeb
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20020_test_aggregation_bitmapLogics.reference
@@ -0,0 +1,22 @@
+{}
+{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}
+20211201	{1,2,3,4,5,6,7,8,9,10}
+20211202	{2,4,6,7,8,9,10,11,12,13,14,15}
+{}
+{6,8,10}
+20211201	{1,2,3,4,5,6,7,8,9,10}
+20211202	{6,8,10,12}
+{}
+{1,3,5,6,8,10,11,13,14,15}
+20211201	{1,2,3,4,5,6,7,8,9,10}
+20211202	{2,4,7,9,11,13,14,15}
+1
+0
+0
+15
+20211201	10
+20211202	12
+0
+15
+20211201	10
+20211202	12
diff --git a/tests/queries/4_cnch_stateless/20020_test_aggregation_bitmapLogics.sql b/tests/queries/4_cnch_stateless/20020_test_aggregation_bitmapLogics.sql
new file mode 100644
index 00000000000..f5c32f40099
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20020_test_aggregation_bitmapLogics.sql
@@ -0,0 +1,30 @@
+DROP TABLE IF EXISTS bitmap_agg_20020;
+CREATE TABLE IF NOT EXISTS bitmap_agg_20020 (tag Int32, ids BitMap64, p_date Int64) engine=CnchMergeTree ORDER BY (tag, p_date);
+insert into bitmap_agg_20020 values (1, [1,2,3,4,5,6,7,8,9,10], 20211201);
+insert into bitmap_agg_20020 values (2, [6,7,8,9,10,11,12,13,14,15], 20211202);
+insert into bitmap_agg_20020 values (3, [2,4,6,8,10,12], 20211202);
+
+SELECT bitmapColumnOr(arrayToBitmap(emptyArrayUInt32()));
+select bitmapColumnOr(ids) from bitmap_agg_20020;
+select p_date,bitmapColumnOr(ids) from bitmap_agg_20020 group by p_date order by p_date;
+
+SELECT bitmapColumnAnd(arrayToBitmap(emptyArrayUInt32()));
+select bitmapColumnAnd(ids) from bitmap_agg_20020;
+select p_date,bitmapColumnAnd(ids) from bitmap_agg_20020 group by p_date order by p_date;
+
+SELECT bitmapColumnXor(arrayToBitmap(emptyArrayUInt32()));
+select bitmapColumnXor(ids) from bitmap_agg_20020;
+select p_date,bitmapColumnXor(ids) from bitmap_agg_20020 group by p_date order by p_date;
+
+SELECT bitmapColumnHas(ids, 10) from bitmap_agg_20020;
+SELECT bitmapColumnHas(ids, 0) from bitmap_agg_20020;
+
+SELECT bitmapColumnCardinality(arrayToBitmap(emptyArrayUInt32()));
+select bitmapColumnCardinality(ids) from bitmap_agg_20020;
+select p_date,bitmapColumnCardinality(ids) from bitmap_agg_20020 group by p_date order by p_date;
+
+SELECT bitmapCardinality(bitmapColumnOr(arrayToBitmap(emptyArrayUInt32())));
+select bitmapCardinality(bitmapColumnOr(ids)) from bitmap_agg_20020;
+select p_date,bitmapCardinality(bitmapColumnOr(ids)) from bitmap_agg_20020 group by p_date order by p_date;
+
+DROP TABLE IF EXISTS bitmap_agg_20020;
\ No newline at end of file
diff --git a/tests/queries/4_cnch_stateless/20020_test_bitmap64_function.reference b/tests/queries/4_cnch_stateless/20020_test_bitmap64_function.reference
new file mode 100644
index 00000000000..45c61bcce50
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20020_test_bitmap64_function.reference
@@ -0,0 +1,121 @@
+BitMap64 Type
+{1,2,3,4,5}	BitMap64
+{}	BitMap64
+[1,2,3,4,5]
+{3}
+{1,2,3,4,5}
+{1,2,4,5}
+{1,2}
+5
+1
+5
+4
+2
+2
+[100,200]
+{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70}
+2019-01-01	50	{1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50}
+2019-01-02	60	{11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70}
+2019-01-03	10	{1,2,3,4,5,6,7,8,9,10}
+60	50	70	40	20	30
+60	50	70	40	20	30
+100
+100
+20
+100
+20
+[1,2,3]
+[1,2,3]
+1
+1
+1
+1
+15
+[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+15
+[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]
+3
+[6,8,10]
+10
+[1,3,5,6,8,10,11,13,14,15]
+tag1	[0,1,2,3,4,5,6,7,8,9]	[]	[]	[0,1,2,3,4,5,6,7,8,9]
+tag2	[0,1,2,3,4,5,6,7,8,9]	[0]	[2]	[1,2,3,4,5,6,7,8,9]
+tag3	[0,1,2,3,4,5,6,7,8,9]	[0,7]	[3,101]	[1,2,3,4,5,6,8,9,101]
+tag4	[0,1,2,3,4,5,6,7,8,9]	[5,999,2]	[2,888,20]	[0,1,3,4,6,7,8,9,20]
+=== bitmapHasAny ===
+0
+0
+0
+1
+0
+1
+0
+1
+0
+1
+0
+=== bitmapHasAll ===
+1
+1
+0
+1
+0
+0
+0
+1
+0
+1
+0
+1
+0
+1
+0
+=== bitmapContains ===
+0
+0
+0
+1
+1
+0
+1
+=== bitmapSubsetInRange ===
+[]
+[]
+[1]
+[]
+[5]
+[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]
+[30,31,32,33,100]
+[100]
+=== bitmapSubsetLimit ===
+[]
+[]
+[1,5,7,9]
+[]
+[5,7,9]
+[0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,100,200,500]
+[30,31,32,33,100,200,500]
+[100,200,500]
+=== subBitmap ===
+[]
+[]
+[1,5,7,9]
+[5,7,9]
+[5,7]
+[0,1,2,3,4,5,6,7,8,9]
+[30,31,32,33,100,200,500]
+[100,200,500]
+=== bitmapMin ===
+18446744073709551615
+18446744073709551615
+18446744073709551615
+1
+0
+=== bitmapMax ===
+0
+0
+0
+9
+500
+{}
+{}
diff --git a/tests/queries/4_cnch_stateless/20020_test_bitmap64_function.sql b/tests/queries/4_cnch_stateless/20020_test_bitmap64_function.sql
new file mode 100644
index 00000000000..2f701a24789
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20020_test_bitmap64_function.sql
@@ -0,0 +1,367 @@
+
+-----------------------
+---- BitMap64 Type ----
+-----------------------
+
+SELECT 'BitMap64 Type';
+
+SELECT arrayToBitmap([1, 2, 3, 4, 5]) as bitmap, toTypeName(bitmap);
+SELECT arrayToBitmap(emptyArrayUInt32()) as bitmap, toTypeName(bitmap);
+
+SELECT bitmapToArray(arrayToBitmap([1, 2, 3, 4, 5]));
+SELECT bitmapAnd(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapOr(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapXor(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapAndnot(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapCardinality(arrayToBitmap([1, 2, 3, 4, 5]));
+SELECT bitmapAndCardinality(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapOrCardinality(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapXorCardinality(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapAndnotCardinality(arrayToBitmap([1,2,3]),arrayToBitmap([3,4,5]));
+SELECT bitmapAndCardinality(arrayToBitmap([100, 200, 500]), arrayToBitmap(CAST([100, 200], 'Array(UInt16)')));
+SELECT bitmapToArray(bitmapAnd(arrayToBitmap([100, 200, 500]), arrayToBitmap(CAST([100, 200], 'Array(UInt16)'))));
+
+DROP TABLE IF EXISTS bitmap_function_test_20020;
+CREATE TABLE IF NOT EXISTS bitmap_function_test_20020(pickup_date Date, city_id UInt32, uid UInt32)ENGINE = CnchMergeTree order by pickup_date;
+INSERT INTO bitmap_function_test_20020 SELECT '2019-01-01', 1, number FROM numbers(1,50);
+INSERT INTO bitmap_function_test_20020 SELECT '2019-01-02', 1, number FROM numbers(11,60);
+INSERT INTO bitmap_function_test_20020 SELECT '2019-01-03', 2, number FROM numbers(1,10);
+
+select bitmapFromColumn( uid ) as uids from bitmap_function_test_20020;
+select pickup_date, bitmapCardinality(bitmapFromColumn(uid)) as user_num, bitmapFromColumn(uid) as users FROM bitmap_function_test_20020 group by pickup_date order by pickup_date;
+
+SELECT
+    bitmapCardinality(day_today) AS today_users,
+    bitmapCardinality(day_before) AS before_users,
+    bitmapOrCardinality(day_today, day_before) AS all_users,
+    bitmapAndCardinality(day_today, day_before) AS old_users,
+    bitmapAndnotCardinality(day_today, day_before) AS new_users,
+    bitmapXorCardinality(day_today, day_before) AS diff_users
+FROM
+(
+ SELECT city_id, bitmapFromColumn( uid ) AS day_today FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-02' GROUP BY city_id
+) js1
+ALL LEFT JOIN
+(
+ SELECT city_id, bitmapFromColumn( uid ) AS day_before FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-01' GROUP BY city_id
+) js2
+USING city_id;
+
+SELECT
+    bitmapCardinality(day_today) AS today_users,
+    bitmapCardinality(day_before) AS before_users,
+    bitmapCardinality(bitmapOr(day_today, day_before))ll_users,
+    bitmapCardinality(bitmapAnd(day_today, day_before)) AS old_users,
+    bitmapCardinality(bitmapAndnot(day_today, day_before)) AS new_users,
+    bitmapCardinality(bitmapXor(day_today, day_before)) AS diff_users
+FROM
+(
+ SELECT city_id, bitmapFromColumn( uid ) AS day_today FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-02' GROUP BY city_id
+) js1
+ALL LEFT JOIN
+(
+ SELECT city_id, bitmapFromColumn( uid ) AS day_before FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-01' GROUP BY city_id
+) js2
+USING city_id;
+
+SELECT count(*) FROM bitmap_function_test_20020 WHERE bitmapHasAny((SELECT bitmapFromColumn(uid) FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-01'), arrayToBitmap([uid])) SETTINGS enable_optimizer=1;
+
+SELECT count(*) FROM bitmap_function_test_20020 WHERE bitmapHasAny(arrayToBitmap([uid]), (SELECT bitmapFromColumn(uid) FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-01')) SETTINGS enable_optimizer=1;
+
+SELECT count(*) FROM bitmap_function_test_20020 WHERE 0 = bitmapHasAny((SELECT bitmapFromColumn(uid) FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-01'), arrayToBitmap([uid])) SETTINGS enable_optimizer=1;
+
+SELECT count(*) FROM bitmap_function_test_20020 WHERE bitmapContains((SELECT bitmapFromColumn(uid) FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-01'), uid) SETTINGS enable_optimizer=1;
+
+SELECT count(*) FROM bitmap_function_test_20020 WHERE 0 = bitmapContains((SELECT bitmapFromColumn(uid) FROM bitmap_function_test_20020 WHERE pickup_date = '2019-01-01'), uid) SETTINGS enable_optimizer=1;
+
+-- PR#8082
+SELECT bitmapToArray(bitmapAnd(bitmapFromColumn(uid), arrayToBitmap(CAST([1, 2, 3], 'Array(UInt32)')))) FROM bitmap_function_test_20020 GROUP BY city_id ORDER BY city_id;
+
+-- between column and expression test
+DROP TABLE IF EXISTS bitmap_column_expr_test_20020;
+CREATE TABLE IF NOT EXISTS bitmap_column_expr_test_20020
+(
+    t DateTime,
+    z BitMap64
+)
+ENGINE = CnchMergeTree
+PARTITION BY toYYYYMMDD(t)
+ORDER BY t;
+
+INSERT INTO bitmap_column_expr_test_20020 VALUES (now(), arrayToBitmap(cast([3,19,47] as Array(UInt32))));
+
+SELECT bitmapAndCardinality( arrayToBitmap(cast([19,7] AS Array(UInt32))), z) FROM bitmap_column_expr_test_20020;
+SELECT bitmapAndCardinality( z, arrayToBitmap(cast([19,7] AS Array(UInt32))) ) FROM bitmap_column_expr_test_20020;
+
+SELECT bitmapCardinality(bitmapAnd(arrayToBitmap(cast([19,7] AS Array(UInt32))), z )) FROM bitmap_column_expr_test_20020;
+SELECT bitmapCardinality(bitmapAnd(z, arrayToBitmap(cast([19,7] AS Array(UInt32))))) FROM bitmap_column_expr_test_20020;
+
+
+DROP TABLE IF EXISTS bitmap_column_expr_test2_20020;
+CREATE TABLE IF NOT EXISTS bitmap_column_expr_test2_20020
+(
+    tag_id String,
+    z BitMap64
+)
+ENGINE = CnchMergeTree
+ORDER BY tag_id;
+
+INSERT INTO bitmap_column_expr_test2_20020 VALUES ('tag1', arrayToBitmap(cast([1,2,3,4,5,6,7,8,9,10] as Array(UInt32))));
+INSERT INTO bitmap_column_expr_test2_20020 VALUES ('tag2', arrayToBitmap(cast([6,7,8,9,10,11,12,13,14,15] as Array(UInt32))));
+INSERT INTO bitmap_column_expr_test2_20020 VALUES ('tag3', arrayToBitmap(cast([2,4,6,8,10,12] as Array(UInt32))));
+
+SELECT bitmapColumnCardinality(z) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(bitmapColumnOr(z))) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+
+SELECT bitmapCardinality(bitmapColumnOr(z)) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(bitmapColumnOr(z))) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+
+SELECT bitmapCardinality(bitmapColumnAnd(z)) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(bitmapColumnAnd(z))) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+
+SELECT bitmapCardinality(bitmapColumnXor(z)) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+SELECT arraySort(bitmapToArray(bitmapColumnXor(z))) FROM bitmap_column_expr_test2_20020 WHERE like(tag_id, 'tag%');
+
+-- select '=== bitmapTransform ===';
+DROP TABLE IF EXISTS bitmap_column_expr_test3_20020;
+CREATE TABLE IF NOT EXISTS bitmap_column_expr_test3_20020
+(
+    tag_id String,
+    z BitMap64,
+    replace Nested (
+        from UInt16,
+        to UInt64
+    )
+)
+ENGINE = CnchMergeTree
+ORDER BY tag_id;
+
+DROP TABLE IF EXISTS numbers10;
+CREATE VIEW numbers10 AS SELECT number FROM system.numbers LIMIT 10;
+
+INSERT INTO bitmap_column_expr_test3_20020(tag_id, z, replace.from, replace.to) SELECT 'tag1', bitmapFromColumn(toUInt64(number)), cast([] as Array(UInt16)), cast([] as Array(UInt64)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3_20020(tag_id, z, replace.from, replace.to) SELECT 'tag2', bitmapFromColumn(toUInt64(number)), cast([0] as Array(UInt16)), cast([2] as Array(UInt64)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3_20020(tag_id, z, replace.from, replace.to) SELECT 'tag3', bitmapFromColumn(toUInt64(number)), cast([0,7] as Array(UInt16)), cast([3,101] as Array(UInt64)) FROM numbers10;
+INSERT INTO bitmap_column_expr_test3_20020(tag_id, z, replace.from, replace.to) SELECT 'tag4', bitmapFromColumn(toUInt64(number)), cast([5,999,2] as Array(UInt16)), cast([2,888,20] as Array(UInt64)) FROM numbers10;
+
+SELECT tag_id, bitmapToArray(z), replace.from, replace.to, bitmapToArray(bitmapTransform(z, replace.from, replace.to)) FROM bitmap_column_expr_test3_20020 ORDER BY tag_id;
+
+DROP TABLE IF EXISTS bitmap_function_test_20020;
+DROP TABLE IF EXISTS bitmap_column_expr_test_20020;
+DROP TABLE IF EXISTS bitmap_column_expr_test2_20020;
+DROP TABLE IF EXISTS numbers10;
+DROP TABLE IF EXISTS bitmap_column_expr_test3_20020;
+
+
+-- bitmapHasAny:
+select '=== bitmapHasAny ===';
+---- Empty
+SELECT bitmapHasAny(arrayToBitmap([1, 2, 3, 5]), arrayToBitmap(emptyArrayUInt8()));
+SELECT bitmapHasAny(arrayToBitmap(emptyArrayUInt32()), arrayToBitmap(emptyArrayUInt32()));
+SELECT bitmapHasAny(arrayToBitmap(emptyArrayUInt16()), arrayToBitmap([1, 2, 3, 500]));
+---- Small x Small
+SELECT bitmapHasAny(arrayToBitmap([1, 2, 3, 5]),arrayToBitmap([0, 3, 7]));
+SELECT bitmapHasAny(arrayToBitmap([1, 2, 3, 5]),arrayToBitmap([0, 4, 7]));
+---- Small x Large
+select bitmapHasAny(arrayToBitmap([100,110,120]),arrayToBitmap([ 99, 100, 101,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+select bitmapHasAny(arrayToBitmap([100,200,500]),arrayToBitmap([ 99, 101, 600,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+---- Large x Small
+select bitmapHasAny(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,230]),arrayToBitmap([ 99, 100, 101]));
+select bitmapHasAny(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),arrayToBitmap([ 99, 101, 600]));
+---- Large x Large
+select bitmapHasAny(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    40,50,60]),arrayToBitmap([ 41, 50, 61,
+    99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65]));
+select bitmapHasAny(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    40,50,60]),arrayToBitmap([ 41, 49, 51, 61,
+    99,98,97,96,95,94,93,92,91,90,89,88,87,86,85,84,83,82,81,80,79,78,77,76,75,74,73,72,71,70,69,68,67,66,65]));
+
+-- bitmapHasAll:
+select '=== bitmapHasAll ===';
+---- Empty
+SELECT bitmapHasAll(arrayToBitmap([1, 2, 3, 5]), arrayToBitmap(emptyArrayUInt8()));
+SELECT bitmapHasAll(arrayToBitmap(emptyArrayUInt32()), arrayToBitmap(emptyArrayUInt32()));
+SELECT bitmapHasAll(arrayToBitmap(emptyArrayUInt16()), arrayToBitmap([1, 2, 3, 500]));
+---- Small x Small
+select bitmapHasAll(arrayToBitmap([1,5,7,9]),arrayToBitmap([5,7]));
+select bitmapHasAll(arrayToBitmap([1,5,7,9]),arrayToBitmap([5,7,2]));
+---- Small x Large
+select bitmapHasAll(arrayToBitmap([100,110,120]),arrayToBitmap([ 99, 100, 101,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+select bitmapHasAll(arrayToBitmap([100,200,500]),arrayToBitmap([ 99, 101, 600,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+---- Small x LargeSmall
+select bitmapHasAll(arrayToBitmap([1,5,7,9]),bitmapXor(arrayToBitmap([1,5,7]), arrayToBitmap([5,7,9])));
+select bitmapHasAll(arrayToBitmap([1,5,7,9]),bitmapXor(arrayToBitmap([1,5,7]), arrayToBitmap([2,5,7])));
+---- Large x Small
+select bitmapHasAll(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),arrayToBitmap([100, 500]));
+select bitmapHasAll(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),arrayToBitmap([ 99, 100, 500]));
+---- LargeSmall x Small
+select bitmapHasAll(bitmapXor(arrayToBitmap([1,7]), arrayToBitmap([5,7,9])), arrayToBitmap([1,5]));
+select bitmapHasAll(bitmapXor(arrayToBitmap([1,7]), arrayToBitmap([5,7,9])), arrayToBitmap([1,5,7]));
+---- Large x Large
+select bitmapHasAll(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),arrayToBitmap([ 100, 200, 500,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+select bitmapHasAll(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),arrayToBitmap([ 100, 200, 501,
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33]));
+
+-- bitmapContains:
+select '=== bitmapContains ===';
+---- Empty
+SELECT bitmapContains(arrayToBitmap(emptyArrayUInt32()), toUInt32(0));
+SELECT bitmapContains(arrayToBitmap(emptyArrayUInt16()), toUInt32(5));
+---- Small
+select bitmapContains(arrayToBitmap([1,5,7,9]),toUInt32(0));
+select bitmapContains(arrayToBitmap([1,5,7,9]),toUInt32(9));
+---- Large
+select bitmapContains(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),toUInt32(100));
+select bitmapContains(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),toUInt32(101));
+select bitmapContains(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]),toUInt32(500));
+
+-- bitmapSubsetInRange:
+select '=== bitmapSubsetInRange ===';
+---- Empty
+SELECT bitmapToArray(bitmapSubsetInRange(arrayToBitmap(emptyArrayUInt32()), 0, 10));
+SELECT bitmapToArray(bitmapSubsetInRange(arrayToBitmap(emptyArrayUInt16()), 0, 10));
+---- Small
+select bitmapToArray(bitmapSubsetInRange(arrayToBitmap([1,5,7,9]), 0, 4));
+select bitmapToArray(bitmapSubsetInRange(arrayToBitmap([1,5,7,9]), 10, 10));
+select bitmapToArray(bitmapSubsetInRange(arrayToBitmap([1,5,7,9]), 3, 7));
+---- Large
+select bitmapToArray(bitmapSubsetInRange(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(0), toUInt32(100)));
+select bitmapToArray(bitmapSubsetInRange(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(30), toUInt32(200)));
+select bitmapToArray(bitmapSubsetInRange(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(100), toUInt32(200)));
+
+-- bitmapSubsetLimit:
+select '=== bitmapSubsetLimit ===';
+---- Empty
+SELECT bitmapToArray(bitmapSubsetLimit(arrayToBitmap(emptyArrayUInt32()), toUInt32(0), toUInt32(10)));
+SELECT bitmapToArray(bitmapSubsetLimit(arrayToBitmap(emptyArrayUInt16()), toUInt32(0), toUInt32(10)));
+---- Small
+select bitmapToArray(bitmapSubsetLimit(arrayToBitmap([1,5,7,9]), toUInt32(0), toUInt32(4)));
+select bitmapToArray(bitmapSubsetLimit(arrayToBitmap([1,5,7,9]), toUInt32(10), toUInt32(10)));
+select bitmapToArray(bitmapSubsetLimit(arrayToBitmap([1,5,7,9]), toUInt32(3), toUInt32(7)));
+---- Large
+select bitmapToArray(bitmapSubsetLimit(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(0), toUInt32(100)));
+select bitmapToArray(bitmapSubsetLimit(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(30), toUInt32(200)));
+select bitmapToArray(bitmapSubsetLimit(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(100), toUInt32(200)));
+
+--subBitmap:
+select '=== subBitmap ===';
+-- Empty
+SELECT bitmapToArray(subBitmap(arrayToBitmap(emptyArrayUInt32()), toUInt8(0), toUInt32(10)));
+SELECT bitmapToArray(subBitmap(arrayToBitmap(emptyArrayUInt16()), toUInt32(0), toUInt64(10)));
+-- Small
+SELECT bitmapToArray(subBitmap(arrayToBitmap([1,5,7,9]), toUInt8(0), toUInt32(4)));
+SELECT bitmapToArray(subBitmap(arrayToBitmap([1,5,7,9]), toUInt32(1), toUInt64(4)));
+SELECT bitmapToArray(subBitmap(arrayToBitmap([1,5,7,9]), toUInt16(1), toUInt32(2)));
+-- Large
+SELECT bitmapToArray(subBitmap(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(0), toUInt32(10)));
+SELECT bitmapToArray(subBitmap(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(30), toUInt32(200)));
+SELECT bitmapToArray(subBitmap(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]), toUInt32(34), toUInt16(3)));
+
+-- bitmapMin:
+select '=== bitmapMin ===';
+---- Empty
+SELECT bitmapMin(arrayToBitmap(emptyArrayUInt8()));
+SELECT bitmapMin(arrayToBitmap(emptyArrayUInt16()));
+SELECT bitmapMin(arrayToBitmap(emptyArrayUInt32()));
+---- Small
+select bitmapMin(arrayToBitmap([1,5,7,9]));
+---- Large
+select bitmapMin(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]));
+
+-- bitmapMax:
+select '=== bitmapMax ===';
+---- Empty
+SELECT bitmapMax(arrayToBitmap(emptyArrayUInt8()));
+SELECT bitmapMax(arrayToBitmap(emptyArrayUInt16()));
+SELECT bitmapMax(arrayToBitmap(emptyArrayUInt32()));
+---- Small
+select bitmapMax(arrayToBitmap([1,5,7,9]));
+---- Large
+select bitmapMax(arrayToBitmap([
+    0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,
+    100,200,500]));
+
+-- reproduce #18911
+CREATE TABLE IF  NOT EXISTS bitmap_function_test_20020(pickup_date Date, city_id UInt32, uid UInt32)ENGINE = CnchMergeTree order by city_id;
+INSERT INTO bitmap_function_test_20020 SELECT '2019-01-01', 1, number FROM numbers(1,50);
+INSERT INTO bitmap_function_test_20020 SELECT '2019-01-02', 1, number FROM numbers(11,60);
+INSERT INTO bitmap_function_test_20020 SELECT '2019-01-03', 2, number FROM numbers(1,10);
+
+SELECT
+    bitmapCardinality(day_today) AS today_users,
+    bitmapCardinality(day_before) AS before_users,
+    bitmapOrCardinality(day_today, day_before) AS all_users,
+    bitmapAndCardinality(day_today, day_before) AS old_users,
+    bitmapAndnotCardinality(day_today, day_before) AS new_users,
+    bitmapXorCardinality(day_today, day_before) AS diff_users
+FROM
+(
+    SELECT
+        city_id,
+        bitmapFromColumn(uid) AS day_today
+    FROM bitmap_function_test_20020
+    WHERE pickup_date = '2019-01-02'
+    GROUP BY
+        rand((rand((rand('') % nan) = NULL) % 7) % rand(NULL)),
+        city_id
+) AS js1
+ALL LEFT JOIN
+(
+    SELECT
+        city_id,
+        bitmapFromColumn(uid) AS day_before
+    FROM bitmap_function_test_20020
+    WHERE pickup_date = '2019-01-01'
+    GROUP BY city_id
+) AS js2 USING (city_id) FORMAT Null;
+
+drop table if exists bitmap_function_test_20020;
+
+
+select emptyBitmap();
+select arrayToBitmap(emptyArrayUInt32());
diff --git a/tests/queries/4_cnch_stateless/20021_test_aggregation_bitmapExpressionComputation.reference b/tests/queries/4_cnch_stateless/20021_test_aggregation_bitmapExpressionComputation.reference
new file mode 100644
index 00000000000..b36d40b4ebc
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20021_test_aggregation_bitmapExpressionComputation.reference
@@ -0,0 +1,34 @@
+2
+1
+3
+2
+2
+4
+1
+1
+1
+0
+0
+1
+1
+1
+3
+{1,2}
+{1}
+{1,2,3}
+{1,2}
+{1,2}
+{1,2,3,4}
+{1}
+{1}
+{2}
+{}
+{}
+{1}
+{2}
+{3}
+{1,2,3}
+[1,2,2]
+[2,3,2,3,3]
+[{1,2},{}]
+[{1,2},{},{}]
diff --git a/tests/queries/4_cnch_stateless/20021_test_aggregation_bitmapExpressionComputation.sql b/tests/queries/4_cnch_stateless/20021_test_aggregation_bitmapExpressionComputation.sql
new file mode 100644
index 00000000000..3caf7bb9288
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20021_test_aggregation_bitmapExpressionComputation.sql
@@ -0,0 +1,101 @@
+drop table if exists test_bitmap64_expression_calc_20021;
+
+create table if not exists test_bitmap64_expression_calc_20021 (p_date Date, tag_id Int64, uids BitMap64, shard_id Int64) engine = CnchMergeTree partition by (p_date, shard_id) order by tag_id settings index_granularity = 128;
+
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 1, [1], 1);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 2, [1, 2], 1);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 3, [1, 2, 3], 2);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 4, [1, 2, 3, 4], 2);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 5, [1], 3);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 6, [2], 3);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 7, [3], 4);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 8, [4], 4);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 9, [1, 2, 3, 4], 5);
+insert into table test_bitmap64_expression_calc_20021 values ('2019-01-01', 10, [1, 2, 3, 4, 5], 5);
+
+select bitmapCount('1 | 2')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 & 2')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | 2 | 3')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | 2 & 3')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | (2 & 3)')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | (2 & 3) | 4')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | 2 | 3 | 4 & 5')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 & ( 2 | 3 | 4 ) & 5')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | 2 | 3 | 4  | 5 & 6')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 & ( 2 | 3 | 4  | 5) & 6')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 & ( 2 | 3) & (4 | 5) & (6 | 7)')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+
+select bitmapCount('1')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('2 ~ 1')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | 2 | 3 ~ 2')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapCount('1 | 2 | ( 3 ~ 2 )')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+
+select bitmapExtract('1 | 2')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 & 2')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | 2 | 3')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | 2 & 3')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | (2 & 3)')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | (2 & 3) | 4')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | 2 | 3 | 4 & 5')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 & ( 2 | 3 | 4 ) & 5')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | 2 | 3 | 4  | 5 & 6')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 & ( 2 | 3 | 4  | 5) & 6')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 & ( 2 | 3) & (4 | 5) & (6 | 7)')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+
+select bitmapExtract('1')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('2 ~ 1')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | 2 | 3 ~ 2')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapExtract('1 | 2 | ( 3 ~ 2 )')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+
+select bitmapMultiCount('1', '_1|2', '_2&3')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapMultiCount('2', '3', '2&3', '2|3', '_3|_4')(tag_id, uids) from test_bitmap64_expression_calc_20021;
+
+-- for wrong type of parameters
+select bitmapCount(1)(tag_id, uids) from test_bitmap64_expression_calc_20021;  -- { serverError 169 }
+select bitmapExtract(1)(tag_id, uids) from test_bitmap64_expression_calc_20021;  -- { serverError 169 }
+
+select bitmapMultiExtract('1|2',3,'2')(tag_id, uids) from test_bitmap64_expression_calc_20021;  -- { serverError 169 }
+
+-- for invaild paremeters when the type is integer
+-- for bitmapExtract
+select bitmapExtract('1 - 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapExtract('1 ～ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapExtract('1 ｜ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapExtract('1 ， 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapExtract('1 * 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+-- for bitmapMultiExtract
+select bitmapMultiExtract('1 ~ 2', '1 - 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiExtract('1 ~ 2', '1 ～ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiExtract('1 ~ 2', '1 ｜ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiExtract('1 ~ 2', '1 ， 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiExtract('1 ~ 2', '1 * 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+-- for bitmapMultiExtractWithDate
+select bitmapMultiExtractWithDate('20190101_1 ～ 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiExtractWithDate('20190101_1 ～ 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiExtractWithDate('20190101_1 ｜ 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiExtractWithDate('20190101_1 ， 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+-- for bitmapCount
+select bitmapCount('1 - 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapCount('1 ～ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapCount('1 ｜ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapCount('1 ， 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapCount('1 * 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+-- for bitmapMultiCount
+select bitmapMultiCount('1 ~ 2', '1 - 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiCount('1 ~ 2', '1 ～ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiCount('1 ~ 2', '1 ｜ 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiCount('1 ~ 2', '1 ， 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiCount('1 ~ 2', '1 * 2')(idx, rbm) from ( select toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+-- for bitmapMultiCountWithDate
+select bitmapMultiCountWithDate('20190101_1 - 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiCountWithDate('20190101_1 ～ 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiCountWithDate('20190101_1 ｜ 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+select bitmapMultiCountWithDate('20190101_1 ， 20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), idx, rbm) from ( select toDate('20190101') AS p_date, toInt32(1) AS idx, arrayToBitmap([1, 2, 3, 4, 5]) AS rbm UNION ALL SELECT toDate('20190101') AS p_date, toInt32(2) AS idx, arrayToBitmap([1, 2, 3]) AS rbm );  -- { serverError 36 }
+
+select bitmapMultiCountWithDate(20190101_1,'20190101_2','20190101_3')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap64_expression_calc_20021; -- { serverError 47 }
+select bitmapMultiExtractWithDate('20190101_1|20190101_2','20190101_3',47)(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap64_expression_calc_20021; -- { serverError 169 }
+select bitmapMultiExtractWithDate('20190101_1|20190101_2','20190101_c')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap64_expression_calc_20021;
+select bitmapMultiExtractWithDate('20190101_1|20190101_2','20190101_c','47')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap64_expression_calc_20021;
+
+
+drop table if exists test_bitmap64_expression_calc_20021;
\ No newline at end of file
diff --git a/tests/queries/4_cnch_stateless/20022_test_aggregation_bitmapExpressionComputationV2.reference b/tests/queries/4_cnch_stateless/20022_test_aggregation_bitmapExpressionComputationV2.reference
new file mode 100644
index 00000000000..f93e7ef0944
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20022_test_aggregation_bitmapExpressionComputationV2.reference
@@ -0,0 +1,93 @@
+0
+{}
+[0]
+[{}]
+[0]
+[{}]
+3
+{1,2,3}
+[3,1,2]
+[{1,2,3},{1},{1,2}]
+[3,1,0]
+[{1,2,3},{1},{}]
+3
+{1,2,3}
+[3,1,2]
+[{1,2,3},{1},{1,2}]
+[3,1,0]
+[{1,2,3},{1},{}]
+[2,2,3]
+[1,3,1]
+1
+[1,3,1]
+====== More tests on cnch (str exprs)
+---- bitmapCount & bitmapExtract
+3
+3
+{1,2,3}
+{1,2,3}
+5
+5
+{3,4,5,6,7}
+{3,4,5,6,7}
+---- bitmapMultiCount & bitmapMultiExtract
+[3]
+[3]
+[{1,2,3}]
+[{1,2,3}]
+[3,5,1]
+[3,5,1]
+[{1,2,3},{3,4,5,6,7},{3}]
+[{1,2,3},{3,4,5,6,7},{3}]
+---- bitmapCountWithDate & bitmapMultiExtractWithDate
+[3]
+[3]
+[{1,2,3}]
+[{1,2,3}]
+[5]
+[5]
+[{3,4,5,6,7}]
+[{3,4,5,6,7}]
+[3,5,1]
+[3,5,1]
+[{1,2,3},{3,4,5,6,7},{3}]
+[{1,2,3},{3,4,5,6,7},{3}]
+[3,7,1,3,0,0,0]
+[3,7,1,3,0,0,0]
+[{1,2,3},{1,2,3,4,5,6,7},{1},{1,2,3},{},{},{}]
+[{1,2,3},{1,2,3,4,5,6,7},{1},{1,2,3},{},{},{}]
+====== More tests on cnch (int exprs)
+---- bitmapCount & bitmapExtract
+5
+5
+{3,4,5,6,7}
+{3,4,5,6,7}
+---- bitmapMultiCount & bitmapMultiExtract
+[5]
+[5]
+[{3,4,5,6,7}]
+[{3,4,5,6,7}]
+[5,3,1]
+[5,3,1]
+[{3,4,5,6,7},{1,2,3},{3}]
+[{3,4,5,6,7},{1,2,3},{3}]
+---- bitmapMultiCountWithDate & bitmapMultiExtractWithDate
+[5]
+[5]
+[{3,4,5,6,7}]
+[{3,4,5,6,7}]
+[5,3,1]
+[5,3,1]
+[{3,4,5,6,7},{1,2,3},{3}]
+[{3,4,5,6,7},{1,2,3},{3}]
+[0,0,0,0,0,5]
+[0,0,0,0,0,5]
+[{},{},{},{},{},{3,4,5,6,7}]
+[{},{},{},{},{},{3,4,5,6,7}]
+---- bitmapMultiCountWithDate & bitmapMultiExtractWithDate of different type
+[5]
+[{3,4,5,6,7}]
+[5]
+[{3,4,5,6,7}]
+[5]
+[{3,4,5,6,7}]
diff --git a/tests/queries/4_cnch_stateless/20022_test_aggregation_bitmapExpressionComputationV2.sql b/tests/queries/4_cnch_stateless/20022_test_aggregation_bitmapExpressionComputationV2.sql
new file mode 100644
index 00000000000..39d3cf833ef
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20022_test_aggregation_bitmapExpressionComputationV2.sql
@@ -0,0 +1,185 @@
+drop table if exists test_bitmap64_expression_v2_20021;
+
+create table test_bitmap64_expression_v2_20021 (p_date Date, tag_id String, uid BitMap64) engine = CnchMergeTree partition by p_date order by tag_id settings index_granularity = 128;
+insert into test_bitmap64_expression_v2_20021 values ('2022-01-01', 'a', [1,2,3]), ('2022-01-02', 'b', [1,2]), ('2022-01-03', 'c', [1]);
+insert into test_bitmap64_expression_v2_20021 values ('2022-01-01', '一', [1,2,3]), ('2022-01-02', '二', [1,2]), ('2022-01-03', '三', [1]);
+
+-- empty expresion
+select bitmapCountV2('')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapExtractV2('')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+select bitmapMultiCountV2('')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapMultiExtractV2('')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+select bitmapMultiCountWithDateV2('')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapMultiExtractWithDateV2('')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+-- for ascii character
+select bitmapCountV2('a')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapExtractV2('a')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+select bitmapMultiCountV2('a|b','c','b')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapMultiExtractV2('a|b','c','b')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+select bitmapMultiCountWithDateV2('20220101_a|20220102_b','20220103_c','20220103_b')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapMultiExtractWithDateV2('20220101_a|20220102_b','20220103_c','20220103_b')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+-- for Unicode character
+select bitmapCountV2('一')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapExtractV2('一')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+select bitmapMultiCountV2('一|二','三','二')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapMultiExtractV2('一|二','三','二')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+select bitmapMultiCountWithDateV2('20220101_一|20220102_二','20220103_三','20220103_二')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapMultiExtractWithDateV2('20220101_一|20220102_二','20220103_三','20220103_二')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+
+--- exception when key has `_`, which is a keyword for expression of previous position
+select bitmapMultiCountV2('一|二','三','二&_a')(tag_id, uid) from test_bitmap64_expression_v2_20021;  --{ serverError 36 }
+
+--- check the special keyword `_`
+insert into test_bitmap64_expression_v2_20021 values ('2022-01-01', 'a1', [1,2,3]), ('2022-01-02', 'b:2', [1,2]), ('2022-01-03', 'c_3', [1]), ('2022-01-04', 'd:4c_2=3', [1,3,4]), ('2022-01-05', '_5', [2,3,4]), ('2022-01-06', '_a', [1,2]);
+
+select bitmapMultiCountV2('a&b', 'a1&b:2', 'a|c_3')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapMultiCountV2('c_3', '_1|d:4c_2=3', 'b:2&_2')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+
+select bitmapCountV2('c_3')(tag_id, uid) from test_bitmap64_expression_v2_20021;
+select bitmapCountV2('_5')(tag_id, uid) from test_bitmap64_expression_v2_20021;  --{ serverError 36 }
+select bitmapCountV2('_a')(tag_id, uid) from test_bitmap64_expression_v2_20021;  --{ serverError 36 }
+select bitmapMultiCountV2('c_3', '_1~_5')(tag_id, uid) from test_bitmap64_expression_v2_20021;  --{ serverError 36 }
+select bitmapMultiCountV2('c_3', '_1&_a')(tag_id, uid) from test_bitmap64_expression_v2_20021;  --{ serverError 36 }
+
+select bitmapMultiCountWithDateV2('20220103_c_3', '_1|20220104_d:4c_2=3', '20220102_b:2&_2')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021 where tag_id IN ('c_3', 'd:4c_2=3', 'b:2');
+select bitmapMultiCountWithDateV2('20220103_c_3', '_1&20220105__5', '_2~20220106__a')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021;  --{ serverError 36 }
+
+-- for wrong type of parameters
+select bitmapCountV2(a)(tag_id, uid) from test_bitmap64_expression_v2_20021;  -- { serverError 47 }
+select bitmapExtractV2(a)(tag_id, uid) from test_bitmap64_expression_v2_20021;  -- { serverError 47 }
+
+select bitmapMultiExtractV2('a|b',c,'b')(tag_id, uid) from test_bitmap64_expression_v2_20021;  -- { serverError 47 }
+
+select bitmapMultiCountWithDateV2(20220101_a,'20220103_c','20220103_b')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021; -- { serverError 47 }
+select bitmapMultiExtractWithDateV2('20220101_a|20220102_b','20220103_c',47)(cast(toYYYYMMDD(p_date) as Int64), tag_id, uid) from test_bitmap64_expression_v2_20021; -- { serverError 169 }
+
+drop table if exists test_bitmap64_expression_v2_20021 sync;
+
+
+--- in Cnch, bitmapXXXV2 functions have same behaviour as bitampXXX.
+drop table if exists test_bitmap_expr_str_20021 sync;
+drop table if exists test_bitmap_expr_int_20021 sync;
+
+select '====== More tests on cnch (str exprs)';
+create table test_bitmap_expr_str_20021 (p_date Date, tag_id String, uids BitMap64) engine = CnchMergeTree partition by p_date order by tag_id;
+
+insert into test_bitmap_expr_str_20021 values ('2022-01-01', 'a', [1,2,3]), ('2022-01-02', 'b', [1,2]), ('2022-01-03', 'c', [1]);
+insert into test_bitmap_expr_str_20021 values ('2022-01-01', '1', [3,4,5]), ('2022-01-01', '1', [5,6,7]), ('2022-01-01', '2', [1]), ('2022-01-03', '2', [1,2,3]);
+
+select '---- bitmapCount & bitmapExtract';
+select bitmapCount('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapCountV2('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapExtract('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapExtractV2('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapCount('1')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapCountV2('1')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapExtract('1')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapExtractV2('1')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select '---- bitmapMultiCount & bitmapMultiExtract';
+select bitmapMultiCount('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiCountV2('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiExtract('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiExtractV2('a')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiCount('a', '1', 'a&1')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiCountV2('a', '1', 'a&1')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiExtract('a', '1', 'a&1')(tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiExtractV2('a', '1', 'a&1')(tag_id, uids) from test_bitmap_expr_str_20021;
+
+select '---- bitmapCountWithDate & bitmapMultiExtractWithDate';
+select bitmapMultiCountWithDate('20220101_a')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiCountWithDateV2('20220101_a')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiExtractWithDate('20220101_a')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiExtractWithDateV2('20220101_a')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiCountWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiCountWithDateV2('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiExtractWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiExtractWithDateV2('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiCountWithDate('20220101_a', '20220101_1', '20220101_a&20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiCountWithDateV2('20220101_a', '20220101_1', '20220101_a&20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiExtractWithDate('20220101_a', '20220101_1', '20220101_a&20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiExtractWithDateV2('20220101_a', '20220101_1', '20220101_a&20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiCountWithDate('20220101_a', '20220101_1|20220103_2', '20220101_2', '2', '20220102_2', '20220102_d', 'd')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiCountWithDateV2('20220101_a', '20220101_1|20220103_2', '20220101_2', '2', '20220102_2', '20220102_d', 'd')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+select bitmapMultiExtractWithDate('20220101_a', '20220101_1|20220103_2', '20220101_2', '2', '20220102_2', '20220102_d', 'd')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+select bitmapMultiExtractWithDateV2('20220101_a', '20220101_1|20220103_2', '20220101_2', '2', '20220102_2', '20220102_d', 'd')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_str_20021;
+
+
+select '====== More tests on cnch (int exprs)';
+create table test_bitmap_expr_int_20021 (p_date Date, tag_id Int32, uids BitMap64) engine = CnchMergeTree partition by p_date order by tag_id;
+
+insert into test_bitmap_expr_int_20021 values ('2022-01-01', '1', [3,4,5]), ('2022-01-01', '1', [5,6,7]), ('2022-01-01', '2', [1]), ('2022-01-03', '2', [1,2,3]);
+
+select '---- bitmapCount & bitmapExtract';
+select bitmapCount('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapCountV2('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapExtract('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapExtractV2('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+
+select '---- bitmapMultiCount & bitmapMultiExtract';
+select bitmapMultiCount('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiCountV2('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiExtract('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractV2('1')(tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiCount('1', '2', '1&2')(tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiCountV2('1', '2', '1&2')(tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiExtract('1', '2', '1&2')(tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractV2('1', '2', '1&2')(tag_id, uids) from test_bitmap_expr_int_20021;
+
+select '---- bitmapMultiCountWithDate & bitmapMultiExtractWithDate';
+select bitmapMultiCountWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiCountWithDateV2('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiExtractWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractWithDateV2('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiCountWithDate('20220101_1', '20220103_2', '20220101_1&20220103_2')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiCountWithDateV2('20220101_1', '20220103_2', '20220101_1&20220103_2')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiExtractWithDate('20220101_1', '20220103_2', '20220101_1&20220103_2')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractWithDateV2('20220101_1', '20220103_2', '20220101_1&20220103_2')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiCountWithDate('20230101_1', '20220101_1&20230101_1', '20220101_1&20230101_a', '20220101_1&a', 'a', '1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiCountWithDateV2('20230101_1', '20220101_1&20230101_1', '20220101_1&20230101_a', '20220101_1&a', 'a', '1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiExtractWithDate('20230101_1', '20220101_1&20230101_1', '20220101_1&20230101_a', '20220101_1&a', 'a', '1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractWithDateV2('20230101_1', '20220101_1&20230101_1', '20220101_1&20230101_a', '20220101_1&a', 'a', '1')(cast(toYYYYMMDD(p_date) as Int64), tag_id, uids) from test_bitmap_expr_int_20021;
+
+select '---- bitmapMultiCountWithDate & bitmapMultiExtractWithDate of different type';
+select bitmapMultiCountWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int32), tag_id, uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int32), tag_id, uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiCountWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), toInt16(tag_id), uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int64), toInt16(tag_id), uids) from test_bitmap_expr_int_20021;
+
+select bitmapMultiCountWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int32), toString(tag_id), uids) from test_bitmap_expr_int_20021;
+select bitmapMultiExtractWithDate('20220101_1')(cast(toYYYYMMDD(p_date) as Int32), toString(tag_id), uids) from test_bitmap_expr_int_20021;
+
+drop table test_bitmap_expr_str_20021 sync;
+drop table test_bitmap_expr_int_20021 sync;
diff --git a/tests/queries/4_cnch_stateless/20023_test_aggregation_bitmapColumnDiff.reference b/tests/queries/4_cnch_stateless/20023_test_aggregation_bitmapColumnDiff.reference
new file mode 100644
index 00000000000..cb083d957d0
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20023_test_aggregation_bitmapColumnDiff.reference
@@ -0,0 +1,28 @@
+today_and_tomorrow	2021-07-01	0
+today_and_tomorrow	2021-07-02	1
+today_and_tomorrow	2021-07-03	1
+today_and_tomorrow	2021-07-04	1
+today_and_tomorrow	2021-07-05	1
+both	2021-07-01	{5}	{}
+both	2021-07-02	{4}	{12}
+both	2021-07-03	{3}	{13}
+both	2021-07-04	{2}	{14}
+both	2021-07-05	{}	{15}
+today_and_2_days_later	2021-07-01	{4,5}
+today_and_2_days_later	2021-07-02	{3,4}
+today_and_2_days_later	2021-07-03	{2,3}
+today_and_2_days_later	2021-07-04	{}
+today_and_2_days_later	2021-07-05	{}
+both_group_by	a	2021-07-01	{5}	{}
+both_group_by	a	2021-07-02	{4}	{12}
+both_group_by	a	2021-07-03	{3}	{13}
+both_group_by	a	2021-07-04	{2}	{14}
+both_group_by	a	2021-07-05	{}	{15}
+both_group_by	b	2021-07-01	{5}	{}
+both_group_by	b	2021-07-02	{4}	{12}
+both_group_by	b	2021-07-03	{}	{13}
+both	2021-07-01	{5}	{}
+both	2021-07-02	{4}	{12}
+both	2021-07-03	{3}	{13}
+both	2021-07-04	{2}	{14}
+both	2021-07-05	{}	{15}
diff --git a/tests/queries/4_cnch_stateless/20023_test_aggregation_bitmapColumnDiff.sql b/tests/queries/4_cnch_stateless/20023_test_aggregation_bitmapColumnDiff.sql
new file mode 100644
index 00000000000..b299d51c14e
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20023_test_aggregation_bitmapColumnDiff.sql
@@ -0,0 +1,87 @@
+drop table if exists test_columndiff_20022;
+
+create table if not exists test_columndiff_20022 (type String, p_date Date, id_map BitMap64) Engine=CnchMergeTree order by p_date;
+
+insert into test_columndiff_20022 values ('a', '2021-07-01', [1,2,3,4,5,11]);
+insert into test_columndiff_20022 values ('a', '2021-07-02', [1,2,3,4,11,12]);
+insert into test_columndiff_20022 values ('a', '2021-07-03', [1,2,3,11,12,13]);
+insert into test_columndiff_20022 values ('a', '2021-07-04', [1,2,11,12,13,14]);
+insert into test_columndiff_20022 values ('a', '2021-07-05', [1,11,12,13,14,15]);
+
+insert into test_columndiff_20022 values ('b', '2021-07-01', [1,2,3,4,5,11]);
+insert into test_columndiff_20022 values ('b', '2021-07-02', [1,2,3,4,11,12]);
+insert into test_columndiff_20022 values ('b', '2021-07-03', [1,2,3,11,12,13]);
+
+-- compute the difference between the day after 'today' and 'today', return a sum value
+select
+    'today_and_tomorrow',
+    tupleElement(res_tuple, 1) as p_date,
+    tupleElement(res_tuple, 2)[1] as latter_former_cnt
+from (
+    select arrayJoin(res) as res_tuple from (
+        select bitmapColumnDiff(0, 'backward', 1)(p_date, id_map) as res from test_columndiff_20022 where type = 'a'
+))
+order by p_date;
+
+-- compute both the difference between the day after 'today' and 'today', as well as that of 'today' and the day after 'today', return result bitmap
+select
+    'both',
+    tupleElement(res_tuple, 1) as p_date,
+    tupleElement(res_tuple, 2)[1] as former_latter,
+    tupleElement(res_tuple, 2)[2] as latter_former
+from (
+    select arrayJoin(res) as res_tuple from (
+        select bitmapColumnDiff(1, 'bidirection', 1)(p_date, id_map) as res from test_columndiff_20022 where type = 'a'
+))
+order by p_date;
+
+-- compute the difference between 'today' and the 2nd day after 'today' (step=2), return result bitmap
+select
+    'today_and_2_days_later',
+    tupleElement(res_tuple, 1) as p_date,
+    tupleElement(res_tuple, 2)[1] as latter_former
+from (
+    select arrayJoin(res) as res_tuple from (
+        select bitmapColumnDiff(1, 'forward', 2)(p_date, id_map) as res from test_columndiff_20022 where type = 'a'
+))
+order by p_date;
+
+
+-- compute both the difference between the day after 'today' and 'today', as well as that of 'today' and the day after 'today', return result bitmap
+select
+    'both_group_by',
+    type,
+    tupleElement(res_tuple, 1) as p_date,
+    tupleElement(res_tuple, 2)[1] as former_latter,
+    tupleElement(res_tuple, 2)[2] as latter_former
+from (
+    select type, arrayJoin(res) as res_tuple from (
+        select type, bitmapColumnDiff(1, 'bidirection', 1)(p_date, id_map) as res
+        from test_columndiff_20022
+        group by type
+    ))
+order by type, p_date;
+
+--- different diff_key type
+drop table if exists test_columndiff_20022;
+create table if not exists test_columndiff_20022 (type String, p_date String, id_map BitMap64) Engine=CnchMergeTree order by p_date;
+
+insert into test_columndiff_20022 values ('a', '2021-07-01', [1,2,3,4,5,11]);
+insert into test_columndiff_20022 values ('a', '2021-07-02', [1,2,3,4,11,12]);
+insert into test_columndiff_20022 values ('a', '2021-07-03', [1,2,3,11,12,13]);
+insert into test_columndiff_20022 values ('a', '2021-07-04', [1,2,11,12,13,14]);
+insert into test_columndiff_20022 values ('a', '2021-07-05', [1,11,12,13,14,15]);
+
+-- compute both the difference between the day after 'today' and 'today', as well as that of 'today' and the day after 'today', return result bitmap
+select
+    'both',
+    tupleElement(res_tuple, 1) as p_date,
+    tupleElement(res_tuple, 2)[1] as former_latter,
+    tupleElement(res_tuple, 2)[2] as latter_former
+from (
+    select arrayJoin(res) as res_tuple from (
+        select bitmapColumnDiff(1, 'bidirection', 1)(p_date, id_map) as res from test_columndiff_20022 where type = 'a'
+))
+order by p_date;
+
+drop table if exists test_columndiff_20022;
\ No newline at end of file
diff --git a/tests/queries/4_cnch_stateless/20024_test_aggregation_bitmapMaxLevel.reference b/tests/queries/4_cnch_stateless/20024_test_aggregation_bitmapMaxLevel.reference
new file mode 100644
index 00000000000..bc18414a7d7
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20024_test_aggregation_bitmapMaxLevel.reference
@@ -0,0 +1,6 @@
+[(1,2),(2,2),(3,2),(4,2),(5,2)]
+[(1,[9,10]),(2,[7,8]),(3,[5,6]),(4,[3,4]),(5,[1,2])]
+[(1,[2]),(2,[2]),(3,[2]),(4,[2]),(5,[2]),(1,[9,10]),(2,[7,8]),(3,[5,6]),(4,[3,4]),(5,[1,2])]
+[(-2,2),(-1,2),(1,2),(2,2),(3,2)]
+[(-2,[9,10]),(-1,[7,8]),(1,[5,6]),(2,[3,4]),(3,[1,2])]
+[(-2,[2]),(-1,[2]),(1,[2]),(2,[2]),(3,[2]),(-2,[9,10]),(-1,[7,8]),(1,[5,6]),(2,[3,4]),(3,[1,2])]
diff --git a/tests/queries/4_cnch_stateless/20024_test_aggregation_bitmapMaxLevel.sql b/tests/queries/4_cnch_stateless/20024_test_aggregation_bitmapMaxLevel.sql
new file mode 100644
index 00000000000..3719e4b5a2e
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20024_test_aggregation_bitmapMaxLevel.sql
@@ -0,0 +1,65 @@
+drop table if exists test_maxlevel_20022;
+
+create table if not exists test_maxlevel_20022 (tag String, uids BitMap64) Engine=CnchMergeTree order by tag;
+
+insert into test_maxlevel_20022 values ('a', [1,2,3,4,5,6,7,8,9,10]);
+insert into test_maxlevel_20022 values ('b', [1,2,3,4,5,6,7,8]);
+insert into test_maxlevel_20022 values ('c', [1,2,3,4,5,6]);
+insert into test_maxlevel_20022 values ('d', [1,2,3,4]);
+insert into test_maxlevel_20022 values ('e', [1,2]);
+
+select bitmapMaxLevel(level, uids)
+from (
+         select
+             multiIf(tag='a', 1, tag='b', 2, tag='c', 3, tag='d', 4, tag='e', 5, -1) as level,
+             uids
+         from test_maxlevel_20022
+     );
+
+select bitmapMaxLevel(1)(level, uids)
+from (
+         select
+             multiIf(tag='a', 1, tag='b', 2, tag='c', 3, tag='d', 4, tag='e', 5, -1) as level,
+             uids
+         from test_maxlevel_20022
+     );
+
+select bitmapMaxLevel(2)(level, uids)
+from (
+         select
+             multiIf(tag='a', 1, tag='b', 2, tag='c', 3, tag='d', 4, tag='e', 5, -1) as level,
+             uids
+         from test_maxlevel_20022
+     );
+
+drop table if exists test_maxlevel_20022;
+
+
+---- level is int in the table
+create table if not exists test_maxlevel_20022 (level Int32, uids BitMap64) Engine=CnchMergeTree order by level;
+
+insert into test_maxlevel_20022 values (-2, [1,2,3,4,5,6,7,8,9,10]);
+insert into test_maxlevel_20022 values (-1, [1,2,3,4,5,6,7,8]);
+insert into test_maxlevel_20022 values (1, [1,2,3,4,5,6]);
+insert into test_maxlevel_20022 values (2, [1,2,3,4]);
+insert into test_maxlevel_20022 values (3, [1,2]);
+
+select bitmapMaxLevel(level, uids) from test_maxlevel_20022;
+
+select bitmapMaxLevel(1)(level, uids)
+from (
+         select
+             level,
+             uids
+         from test_maxlevel_20022
+     );
+
+select bitmapMaxLevel(2)(level, uids)
+from (
+         select
+             level,
+             uids
+         from test_maxlevel_20022
+     );
+
+drop table if exists test_maxlevel_20022;
\ No newline at end of file
diff --git a/tests/queries/4_cnch_stateless/20025_test_aggregation_bitmapJoin.reference b/tests/queries/4_cnch_stateless/20025_test_aggregation_bitmapJoin.reference
new file mode 100644
index 00000000000..c732552bc6b
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20025_test_aggregation_bitmapJoin.reference
@@ -0,0 +1,47 @@
+orgin1	a	a1	6
+orgin1	b	b1	4
+orgin1	c	c1	2
+orgin1	c	c2	0
+agg1	a	a1	6
+agg1	b	b1	4
+agg1	c	c1	2
+agg1	c	c2	0
+origin2	a	a1	10
+origin2	b	b1	7
+origin2	c	c1	6
+origin2	c	c2	8
+origin2	d	d1	4
+agg2	a	a1	10
+agg2	b	b1	7
+agg2	c	c1	6
+agg2	c	c2	8
+agg2	d	d1	4
+origin3	a	a1	6
+origin3	b	b1	6
+origin3	c	c1	5
+origin3	c	c2	5
+agg3	a	a1	6
+agg3	b	b1	6
+agg3	c	c1	5
+agg3	c	c2	5
+a	a1	4
+b	b1	1
+c	c1	1
+c	c2	3
+a	a1	4
+b	b1	1
+c	c1	1
+c	c2	3
+a	a1	0
+b	b1	2
+c	c1	3
+c	c2	5
+a	a1	0
+b	b1	2
+c	c1	3
+c	c2	5
+('1',5)
+('1',4)
+('1',4)
+('1',4)
+('1',{2,3,4,5})
diff --git a/tests/queries/4_cnch_stateless/20025_test_aggregation_bitmapJoin.sql b/tests/queries/4_cnch_stateless/20025_test_aggregation_bitmapJoin.sql
new file mode 100644
index 00000000000..e366ebef4ae
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20025_test_aggregation_bitmapJoin.sql
@@ -0,0 +1,365 @@
+drop table if exists test_bitmapjoin_data_20024;
+
+create table test_bitmapjoin_data_20024 (type String, name String, dids BitMap64, p_date Date) engine = CnchMergeTree partition by p_date order by type settings index_granularity = 128;
+
+insert into table test_bitmapjoin_data_20024 values ('a', 'a1', [1,2,3,4,5,6,7,8,9,10], '2021-01-01');
+insert into table test_bitmapjoin_data_20024 values ('b', 'b1', [11,12,13,14,15], '2021-01-01');
+insert into table test_bitmapjoin_data_20024 values ('c', 'c1', [21,22,23], '2021-01-01');
+insert into table test_bitmapjoin_data_20024 values ('c', 'c2', [26,27,28], '2021-01-01');
+insert into table test_bitmapjoin_data_20024 values ('d', 'd1', [31,32,33,34], '2021-01-01');
+insert into table test_bitmapjoin_data_20024 values ('a', 'a2', [1,2,3,4,5,6], '2021-01-02');
+insert into table test_bitmapjoin_data_20024 values ('b', 'b1', [7,8,11,12,13,14], '2021-01-02');
+insert into table test_bitmapjoin_data_20024 values ('c', 'c1', [9,10,15,21,22], '2021-01-02');
+
+-- original join
+select 'orgin1', type, name, bitmapCardinality(bitmapAnd(dids, b.dids)) as num
+from (
+    select type, name, b.name, dids, b.dids from
+    (select
+        1 as position,
+        type,
+        name,
+        dids
+    from test_bitmapjoin_data_20024
+    where p_date = '2021-01-01') as a
+    inner join
+    (
+        select
+            2 as position,
+            type,
+            name,
+            dids
+        from test_bitmapjoin_data_20024
+        where p_date = '2021-01-02'
+    ) as b
+    using type
+)
+order by type, name;
+
+-- bitmapJoin
+select 'agg1', tupleElement(tuples, 1) as type, tupleElement(tuples, 2) as name, tupleElement(tuples, 3) as num
+from (
+    select arrayJoin(arr)  as tuples
+    from (
+        select BitmapJoin(1, [3], ['3', '1.4'], 'AND', 'INNER')(position, dids, type, name) as arr
+        from (
+            select
+                1 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-01'
+            union all
+            select
+                2 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-02'
+        )
+    )
+)
+order by type, name;
+
+
+-- original join
+select 'origin2', type, name, bitmapCardinality(bitmapOr(a_dids, b_dids)) as num
+from (
+    select type, name, bitmapColumnOr(dids) as a_dids, bitmapColumnOr(b.dids) as b_dids from
+    (select
+        1 as position,
+        type,
+        name,
+        dids
+    from test_bitmapjoin_data_20024
+    where p_date = '2021-01-01') as a
+    left join
+    (
+        select
+            2 as position,
+            type,
+            name,
+            dids
+        from test_bitmapjoin_data_20024
+        where p_date = '2021-01-02'
+    ) as b
+    using type
+    group by type,name
+)
+order by type, name;
+
+-- -- bitmapJoin
+select 'agg2', tupleElement(tuples, 1) as type, tupleElement(tuples, 2) as name, tupleElement(tuples, 3) as num
+from (
+    select arrayJoin(arr)  as tuples
+    from (
+        select BitmapJoin(1, [3], [3, 4], 'OR', 'LEFT')(position, dids, type, name) as arr
+        from (
+            select
+                1 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-01'
+            union all
+            select
+                2 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-02'
+        )
+    )
+)
+order by type, name;
+
+-- -- original join
+select 'origin3', type, name, bitmapColumnCardinality(dids) as num
+from (
+    select type, name, bitmapColumnOr(b.dids) as dids from
+    (select
+        1 as position,
+        type,
+        name,
+        arrayToBitmap(emptyArrayUInt8()) as dids
+    from test_bitmapjoin_data_20024
+    where p_date = '2021-01-01') as a
+    inner join
+    (
+        select
+            2 as position,
+            type,
+            name,
+            dids
+        from test_bitmapjoin_data_20024
+        where p_date = '2021-01-02'
+    ) as b
+    using type
+    group by type,name
+)
+group by type, name
+order by type, name;
+
+-- -- bitmapJoin
+select 'agg3', tupleElement(tuples, 1) as type, tupleElement(tuples, 2) as name, tupleElement(tuples, 3) as num
+from (
+    select arrayJoin(arr)  as tuples
+    from (
+        select BitmapJoin(1, [3], [3, 4], '', '')(position, dids, type, name) as arr
+        from (
+            select
+                1 as position,
+                type,
+                name,
+                arrayToBitmap(emptyArrayUInt8()) as dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-01'
+            union all
+            select
+                2 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-02'
+        )
+    )
+)
+order by type, name;
+
+-- original join
+select type, name, bitmapAndnotCardinality(dids, b.dids) as num
+from (
+    select type, name, b.name, dids, b.dids from
+    (select
+        1 as position,
+        type,
+        name,
+        dids
+    from test_bitmapjoin_data_20024
+    where p_date = '2021-01-01') as a
+    inner join
+    (
+        select
+            2 as position,
+            type,
+            name,
+            dids
+        from test_bitmapjoin_data_20024
+        where p_date = '2021-01-02'
+    ) as b
+    using type
+)
+order by type, name;
+
+-- bitmapJoin
+select tupleElement(tuples, 1) as type, tupleElement(tuples, 2) as name, tupleElement(tuples, 3) as num
+from (
+    select arrayJoin(arr)  as tuples
+    from (
+        select BitmapJoin(1, [3], ['3', '1.4'], 'ANDNOT', 'INNER')(position, dids, type, name) as arr
+        from (
+            select
+                1 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-01'
+            union all
+            select
+                2 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-02'
+        )
+    )
+)
+order by type, name;
+
+-- original join
+select type, name, bitmapAndnotCardinality(b.dids, dids) as num
+from (
+    select type, name, b.name, dids, b.dids from
+    (select
+        1 as position,
+        type,
+        name,
+        dids
+    from test_bitmapjoin_data_20024
+    where p_date = '2021-01-01') as a
+    inner join
+    (
+        select
+            2 as position,
+            type,
+            name,
+            dids
+        from test_bitmapjoin_data_20024
+        where p_date = '2021-01-02'
+    ) as b
+    using type
+)
+order by type, name;
+
+-- bitmapJoin
+select tupleElement(tuples, 1) as type, tupleElement(tuples, 2) as name, tupleElement(tuples, 3) as num
+from (
+    select arrayJoin(arr)  as tuples
+    from (
+        select BitmapJoin(1, [3], ['3', '1.4'], 'ReverseANDNOT', 'INNER')(position, dids, type, name) as arr
+        from (
+            select
+                1 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-01'
+            union all
+            select
+                2 as position,
+                type,
+                name,
+                dids
+            from test_bitmapjoin_data_20024
+            where p_date = '2021-01-02'
+        )
+    )
+)
+order by type, name;
+
+drop table test_bitmapjoin_data_20024;
+
+
+--- fix for default parameter parsing when parameters.size()=7
+-- Three parameters: return bitmap of left table, right is dismiss
+SELECT arrayJoin(assumeNotNull(BitmapJoin(1, [3], ['1.4'])(position, id_map, join_key, group_key))) AS resTuples
+FROM
+(
+    SELECT
+        arrayToBitmap([1, 2, 3, 4, 5]) AS id_map,
+        1 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+    UNION ALL
+    SELECT
+        arrayToBitmap([2, 3, 4, 5, 6]) AS id_map,
+        2 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+);
+
+--- Five parameters: return bitmapCardinality default, and thread_num=32
+SELECT arrayJoin(assumeNotNull(BitmapJoin(1, [3], ['1.4'], 'AND', 'INNER')(position, id_map, join_key, group_key))) AS resTuples
+FROM
+(
+    SELECT
+        arrayToBitmap([1, 2, 3, 4, 5]) AS id_map,
+        1 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+    UNION ALL
+    SELECT
+        arrayToBitmap([2, 3, 4, 5, 6]) AS id_map,
+        2 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+);
+
+-- Six parameters: return bitmapCardinality default, and thread_num is assigned
+SELECT arrayJoin(assumeNotNull(BitmapJoin(1, [3], ['1.4'], 'AND', 'INNER', 2)(position, id_map, join_key, group_key))) AS resTuples
+FROM
+(
+    SELECT
+        arrayToBitmap([1, 2, 3, 4, 5]) AS id_map,
+        1 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+    UNION ALL
+    SELECT
+        arrayToBitmap([2, 3, 4, 5, 6]) AS id_map,
+        2 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+);
+
+-- Full seven parameters, the 7th parameter set to 0, return bitmapCardinality
+SELECT arrayJoin(assumeNotNull(BitmapJoin(1, [3], ['1.4'], 'AND', 'INNER', 2, 0)(position, id_map, join_key, group_key))) AS resTuples
+FROM
+(
+    SELECT
+        arrayToBitmap([1, 2, 3, 4, 5]) AS id_map,
+        1 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+    UNION ALL
+    SELECT
+        arrayToBitmap([2, 3, 4, 5, 6]) AS id_map,
+        2 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+);
+
+-- the 7th parameter set to 1, return bitmap
+SELECT arrayJoin(assumeNotNull(BitmapJoin(1, [3], ['1.4'], 'AND', 'INNER', 2, 1)(position, id_map, join_key, group_key))) AS resTuples
+FROM
+(
+    SELECT
+        arrayToBitmap([1, 2, 3, 4, 5]) AS id_map,
+        1 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+    UNION ALL
+    SELECT
+        arrayToBitmap([2, 3, 4, 5, 6]) AS id_map,
+        2 AS position,
+        '1' AS join_key,
+        '1' AS group_key
+);
diff --git a/tests/queries/4_cnch_stateless/20026_test_aggregation_bitmapJoinAndCard.reference b/tests/queries/4_cnch_stateless/20026_test_aggregation_bitmapJoinAndCard.reference
new file mode 100644
index 00000000000..e89d21f790a
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20026_test_aggregation_bitmapJoinAndCard.reference
@@ -0,0 +1,24 @@
+1	b	4
+2	c	3
+3	b	1
+3	c	1
+1	b	1	2
+2	c	4	1
+3	b	4	1
+3	c	4	1
+1	b	1	2
+2	c	4	1
+3	b	4	1
+3	c	4	1
+1	b	9
+2	c	5
+3	b	1
+3	c	1
+1	b	1	8
+2	c	4	2
+3	b	4	2
+3	c	4	2
+1	b	a	8
+2	c	a	2
+3	b	a	2
+3	c	a	2
diff --git a/tests/queries/4_cnch_stateless/20026_test_aggregation_bitmapJoinAndCard.sql b/tests/queries/4_cnch_stateless/20026_test_aggregation_bitmapJoinAndCard.sql
new file mode 100644
index 00000000000..e8c3f40852d
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20026_test_aggregation_bitmapJoinAndCard.sql
@@ -0,0 +1,149 @@
+drop table if exists test_bitmapjoinandcard_20025;
+drop table if exists test_bitmapjoinandcard_20025;
+
+create table test_bitmapjoinandcard_20025 (tag_id Int32, tag_value String, id_map BitMap64, p_date Date, split_id Int32) engine = CnchMergeTree partition by (p_date, split_id) order by (p_date, split_id, tag_id, tag_value) settings index_granularity = 128;
+
+
+insert into table test_bitmapjoinandcard_20025 values (1, 'a', [1], '2020-01-01', 1);
+insert into table test_bitmapjoinandcard_20025 values (1, 'b', [1,2], '2020-01-01', 1);
+insert into table test_bitmapjoinandcard_20025 values (2, 'a', [2,3], '2020-01-01', 1);
+insert into table test_bitmapjoinandcard_20025 values (3, 'b', [2,3,4], '2020-01-01', 1);
+insert into table test_bitmapjoinandcard_20025 values (2, 'a', [1,2,3,4], '2020-01-01', 1);
+insert into table test_bitmapjoinandcard_20025 values (3, 'a', [1,2,3], '2020-01-01', 2);
+insert into table test_bitmapjoinandcard_20025 values (3, 'a', [1,2,3,4], '2020-01-01', 2);
+insert into table test_bitmapjoinandcard_20025 values (4, 'c', [3], '2020-01-01', 2);
+insert into table test_bitmapjoinandcard_20025 values (5, 'c', [1,4], '2020-01-01', 2);
+insert into table test_bitmapjoinandcard_20025 values (4, 'a', [1], '2020-01-01', 3);
+insert into table test_bitmapjoinandcard_20025 values (4, 'b', [1,2], '2020-01-01', 3);
+insert into table test_bitmapjoinandcard_20025 values (5, 'b', [2], '2020-01-01', 3);
+insert into table test_bitmapjoinandcard_20025 values (3, 'c', [1,2,3], '2020-01-01', 3);
+
+select tupleElement(resTuples, 2) as split_id, tupleElement(resTuples, 4) as tag_bin, sum(tupleElement(resTuples, 1)) as count
+from(
+    select split_id, arrayJoin(BitMapJoinAndCard(1, 1)(id_map, position, split_id, tag_bin)) as resTuples
+    from
+    (
+        select id_map, 1 as position, split_id, '#-1#' as tag_bin
+        from test_bitmapjoinandcard_20025
+        where p_date = '2020-01-01' and tag_value = 'a'
+        union all
+        select id_map, 2 as position, split_id, multiIf(tag_value = 'b', 'b', tag_value = 'c', 'c', 'others') as tag_bin
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_value = 'b') or (p_date = '2020-01-01' and tag_value = 'c')
+    )
+    group by split_id
+)
+group by split_id, tag_bin
+order by split_id, tag_bin, count desc;
+
+select tupleElement(resTuples, 2) as split_id, tupleElement(resTuples, 4) as tag_bin, tupleElement(resTuples, 5) as tag_bin_1, sum(tupleElement(resTuples, 1)) count
+from(
+    select split_id, arrayJoin(BitMapJoinAndCard(2, 4)(id_map, position, split_id, tag_bin)) as resTuples
+    from
+    (
+        select id_map, 1 as position, split_id, '#-1#' as tag_bin
+        from test_bitmapjoinandcard_20025
+        where p_date = '2020-01-01' and tag_value = 'a'
+        union all
+        select id_map, 2 as position, split_id, multiIf(tag_value = 'b', 'b', tag_value = 'c', 'c', 'others') as tag_bin
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_value = 'b') or (p_date = '2020-01-01' and tag_value = 'c')
+        union all
+        select id_map, 3 as position, split_id, multiIf(p_date = '2020-01-01' and tag_id = 1, '1', p_date = '2020-01-01' and tag_id = 4, '4','others') as tag_bin
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_id = 1) or (p_date = '2020-01-01' and tag_id = 4)
+    )
+    group by split_id
+)
+group by split_id, tag_bin, tag_bin_1
+order by split_id, tag_bin, tag_bin_1, count desc;
+
+select tupleElement(resTuples, 2) as split_id, tupleElement(resTuples, 4) as tag_bin, tupleElement(resTuples, 5) as seg_name, sum(tupleElement(resTuples, 1)) count
+from(
+    select split_id, arrayJoin(BitMapJoinAndCard(2, 4)(id_map, position, split_id, tag_bin, seg_name)) as resTuples
+    from
+    (
+        select id_map, 1 as position, split_id, '#-1#' as tag_bin, 'a' as seg_name
+        from test_bitmapjoinandcard_20025
+        where p_date = '2020-01-01' and tag_value = 'a'
+        union all
+        select id_map, 2 as position, split_id, multiIf(tag_value = 'b', 'b', tag_value = 'c', 'c', 'others') as tag_bin, '#-1#' as seg_name
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_value = 'b') or (p_date = '2020-01-01' and tag_value = 'c')
+        union all
+        select id_map, 3 as position, split_id, multiIf(p_date = '2020-01-01' and tag_id = 1, '1', p_date = '2020-01-01' and tag_id = 4, '4', 'others') as tag_bin, '#-1#' as seg_name
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_id = 1) or (p_date = '2020-01-01' and tag_id = 4)
+    )
+    group by split_id
+)
+group by split_id, tag_bin, seg_name
+order by split_id, tag_bin, seg_name, count desc;
+
+
+select tupleElement(resTuples, 2) as split_id, tupleElement(resTuples, 4) as tag_bin, sum(tupleElement(resTuples, 1)) as count
+from(
+    select split_id, arrayJoin(BitMapJoinAndCard2(1, 1)(id_map, position, split_id, tag_bin)) as resTuples
+    from
+    (
+        select id_map, 1 as position, split_id, '#-1#' as tag_bin
+        from test_bitmapjoinandcard_20025
+        where p_date = '2020-01-01' and tag_value = 'a'
+        union all
+        select id_map, 2 as position, split_id, multiIf(tag_value = 'b', 'b', tag_value = 'c', 'c', 'others') as tag_bin
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_value = 'b') or (p_date = '2020-01-01' and tag_value = 'c')
+    )
+    group by split_id
+)
+group by split_id, tag_bin
+order by split_id, tag_bin, count desc;
+
+select tupleElement(resTuples, 2) as split_id, tupleElement(resTuples, 4) as tag_bin, tupleElement(resTuples, 5) as tag_bin_1, sum(tupleElement(resTuples, 1)) count
+from(
+    select split_id, arrayJoin(BitMapJoinAndCard2(2, 4)(id_map, position, split_id, tag_bin)) as resTuples
+    from
+    (
+        select id_map, 1 as position, split_id, '#-1#' as tag_bin
+        from test_bitmapjoinandcard_20025
+        where p_date = '2020-01-01' and tag_value = 'a'
+        union all
+        select id_map, 2 as position, split_id, multiIf(tag_value = 'b', 'b', tag_value = 'c', 'c', 'others') as tag_bin
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_value = 'b') or (p_date = '2020-01-01' and tag_value = 'c')
+        union all
+        select id_map, 3 as position, split_id, multiIf(p_date = '2020-01-01' and tag_id = 1, '1', p_date = '2020-01-01' and tag_id = 4, '4','others') as tag_bin
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_id = 1) or (p_date = '2020-01-01' and tag_id = 4)
+    )
+    group by split_id
+)
+group by split_id, tag_bin, tag_bin_1
+order by split_id, tag_bin, tag_bin_1, count desc;
+
+select tupleElement(resTuples, 2) as split_id, tupleElement(resTuples, 4) as tag_bin, tupleElement(resTuples, 6) as seg_name, sum(tupleElement(resTuples, 1)) count
+from(
+    select split_id, arrayJoin(BitMapJoinAndCard2(2, 4)(id_map, position, split_id, tag_bin, seg_name)) as resTuples
+    from
+    (
+        select id_map, 1 as position, split_id, '#-1#' as tag_bin, 'a' as seg_name
+        from test_bitmapjoinandcard_20025
+        where p_date = '2020-01-01' and tag_value = 'a'
+        union all
+        select id_map, 2 as position, split_id, multiIf(tag_value = 'b', 'b', tag_value = 'c', 'c', 'others') as tag_bin, '#-1#' as seg_name
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_value = 'b') or (p_date = '2020-01-01' and tag_value = 'c')
+        union all
+        select id_map, 3 as position, split_id, multiIf(p_date = '2020-01-01' and tag_id = 1, '1', p_date = '2020-01-01' and tag_id = 4, '4', 'others') as tag_bin, '#-1#' as seg_name
+        from test_bitmapjoinandcard_20025
+        where (p_date = '2020-01-01' and tag_id = 1) or (p_date = '2020-01-01' and tag_id = 4)
+    )
+    group by split_id
+)
+group by split_id, tag_bin, seg_name
+order by split_id, tag_bin, seg_name, count desc;
+
+
+
+drop table if exists test_bitmapjoinandcard_20025;
+drop table if exists test_bitmapjoinandcard_20025;
diff --git a/tests/queries/4_cnch_stateless/20027_test_bitmap_parameter_predicate_optimization.reference b/tests/queries/4_cnch_stateless/20027_test_bitmap_parameter_predicate_optimization.reference
new file mode 100644
index 00000000000..b30f3626c68
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20027_test_bitmap_parameter_predicate_optimization.reference
@@ -0,0 +1,7 @@
+single_agg	3
+multi_agg	6	5	3	[6,5,3]
+multi_agg_with_subquery	6	5	3	[6,5,3]
+multi_agg_multi_column	5	[6,4,4]	5	3	6	3
+multi_agg_multi_column_with_subquery	5	[6,4,4]	5	3	6	3
+multi_agg_multi_column_2	{1,2,3,4,5}	[{1,2,3,4,5,6},{3,4,5,6},{1,2,4,8}]	{1,2,3,4,5}	{3,4,5}	{1,2,3,4,5,6}	{3,5,6}
+multi_agg_multi_column_2_with_subquery	{1,2,3,4,5}	[{1,2,3,4,5,6},{3,4,5,6},{1,2,4,8}]	{1,2,3,4,5}	{3,4,5}	{1,2,3,4,5,6}	{3,5,6}
diff --git a/tests/queries/4_cnch_stateless/20027_test_bitmap_parameter_predicate_optimization.sql b/tests/queries/4_cnch_stateless/20027_test_bitmap_parameter_predicate_optimization.sql
new file mode 100644
index 00000000000..40d5cec135a
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/20027_test_bitmap_parameter_predicate_optimization.sql
@@ -0,0 +1,57 @@
+drop table if exists test_tag_bitmap_20027;
+drop table if exists test_tag_bitmap_v2_20027;
+
+CREATE TABLE test_tag_bitmap_20027(
+    `tag_id` Int32,
+    `uids` BitMap64,
+    `p_date` Date
+)ENGINE = CnchMergeTree
+PARTITION BY p_date
+ORDER BY tag_id;
+
+insert into test_tag_bitmap_20027 values (2, [1,2,3,4,5],'2024-01-01'),(5,[3,4,5,6,7,8],'2024-01-01');
+
+select 'single_agg', bitmapCount('2&5')(tag_id, uids) from test_tag_bitmap_20027 where p_date = '2024-01-01';
+
+select 'multi_agg', bitmapCount('5')(tag_id, uids), bitmapCount('2')(tag_id, uids), bitmapCount('2&5')(tag_id, uids), bitmapMultiCount('5','2','2&5')(tag_id, uids) from test_tag_bitmap_20027 where p_date = '2024-01-01';
+
+-- no predicate optimization now
+select 'multi_agg_with_subquery', bitmapCount('5')(tag_id, uids), bitmapCount('2')(tag_id, uids), bitmapCount('2&5')(tag_id, uids), bitmapMultiCount('5','2','2&5')(tag_id, uids) from ( select tag_id, uids from test_tag_bitmap_20027 where p_date = '2024-01-01');
+
+CREATE TABLE test_tag_bitmap_v2_20027 (
+f1 Int32, f2 UInt64, f3 String, uids BitMap64, p_date Date
+) engine=CnchMergeTree PARTITION BY p_date ORDER BY f1;
+
+insert into test_tag_bitmap_v2_20027 values (1, 2, 'a', [1,2,3,4,5], '2024-01-01'),(2,2,'b',[3,4,5,6], '2024-01-01'),(3,1,'c',[1,2,4,8],'2024-01-01');
+
+--- new where condition: WHERE ((f3 IN ('a', 'b')) OR (f1 IN (1, 2, 3))) AND (p_date = '2024-01-01')
+select 'multi_agg_multi_column',
+bitmapCount('1')(f1, uids) as r1,
+bitmapMultiCount('1|2','2','3')(f1, uids) as r2,
+bitmapCount('a')(f3, uids) as r3,
+bitmapCount('a&b')(f3, uids) as r4,
+bitmapCount('2')(toInt64(f2), uids) as r5,
+bitmapCount('2~1')(toInt64(f2), uids) as r6
+from test_tag_bitmap_v2_20027 where p_date='2024-01-01';
+
+-- no predicate optimization now
+select 'multi_agg_multi_column_with_subquery',
+bitmapCount('1')(f1, uids) as r1,
+bitmapMultiCount('1|2','2','3')(f1, uids) as r2,
+bitmapCount('a')(f3, uids) as r3,
+bitmapCount('a&b')(f3, uids) as r4,
+bitmapCount('2')(toInt64(f2), uids) as r5,
+bitmapCount('2~1')(toInt64(f2), uids) as r6
+FROM
+(
+    select *
+    from test_tag_bitmap_v2_20027 where p_date='2024-01-01'
+);
+
+SELECT 'multi_agg_multi_column_2', bitmapExtract('1')(f1, uids) AS r1, bitmapMultiExtract('1|2', '2', '3')(f1, uids) AS r2, bitmapExtract('a')(f3, uids) AS r3, bitmapExtract('a&b')(f3, uids) AS r4, bitmapExtract('2')(toInt64(f2), uids) AS r5, bitmapExtract('2~1')(toInt64(f2), uids) AS r6 FROM test_tag_bitmap_v2_20027 WHERE p_date='2024-01-01';
+
+-- no predicate optimization now
+SELECT 'multi_agg_multi_column_2_with_subquery', bitmapExtract('1')(f1, uids) AS r1, bitmapMultiExtract('1|2', '2', '3')(f1, uids) AS r2, bitmapExtract('a')(f3, uids) AS r3, bitmapExtract('a&b')(f3, uids) AS r4, bitmapExtract('2')(toInt64(f2), uids) AS r5, bitmapExtract('2~1')(toInt64(f2), uids) AS r6 FROM ( select * FROM test_tag_bitmap_v2_20027 WHERE p_date='2024-01-01');
+
+drop table test_tag_bitmap_20027;
+drop table test_tag_bitmap_v2_20027;
\ No newline at end of file
diff --git a/tests/queries/4_cnch_stateless/40106_fix_bitmap_func_miss_partial_agg.reference b/tests/queries/4_cnch_stateless/40106_fix_bitmap_func_miss_partial_agg.reference
index 12309d0c324..d43ed69cbaa 100644
--- a/tests/queries/4_cnch_stateless/40106_fix_bitmap_func_miss_partial_agg.reference
+++ b/tests/queries/4_cnch_stateless/40106_fix_bitmap_func_miss_partial_agg.reference
@@ -8,10 +8,10 @@ Projection
          └─ TableScan default.t40106
                   Outputs: [bm]
 Projection
-│     Expressions: BitMapCount(\'1\')(toInt64(1), bm):=`expr#BitMapCount(\'1\')(toInt64(1), bm)`
+│     Expressions: BitmapCount(\'1\')(toInt64(1), bm):=`expr#BitmapCount(\'1\')(toInt64(1), bm)`
 └─ Aggregating
    │     Group by: {}
-   │     Aggregates: expr#BitMapCount(\'1\')(toInt64(1), bm):=AggNull(bitmapCount)(expr#toInt64(1),bm)
+   │     Aggregates: expr#BitmapCount(\'1\')(toInt64(1), bm):=AggNull(BitmapCount)(expr#toInt64(1),bm)
    └─ Gather Exchange
       └─ Projection
          │     Expressions: [bm], expr#toInt64(1):=cast(1, \'Int64\')
diff --git a/tests/queries/4_cnch_stateless/40276_final_sample.sql b/tests/queries/4_cnch_stateless/40276_final_sample.sql
index 6a1b243cd70..eb0ae3ba687 100644
--- a/tests/queries/4_cnch_stateless/40276_final_sample.sql
+++ b/tests/queries/4_cnch_stateless/40276_final_sample.sql
@@ -4,6 +4,7 @@ DROP TABLE IF EXISTS sample_40276_local;
 CREATE TABLE sample_40276 (d Date DEFAULT '2000-01-01', x UInt8) ENGINE = CnchMergeTree order by d;
 INSERT INTO sample_40276 (x) SELECT toUInt8(number) AS x FROM system.numbers LIMIT 256;
 
+SET min_block_size = 1;
 SET max_block_size = 100;
 
 SELECT count() < 256 from ( SELECT * FROM sample_40276 SAMPLE 100 ) SETTINGS enable_final_sample = 1;
diff --git a/tests/queries/4_cnch_stateless/60004_mysql_ddl_auto_increment.reference b/tests/queries/4_cnch_stateless/60004_mysql_ddl_auto_increment.reference
new file mode 100644
index 00000000000..70acc002d59
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/60004_mysql_ddl_auto_increment.reference
@@ -0,0 +1,9 @@
+60004_t1	CREATE TABLE test.`60004_t1` (\n    `id` Int64 NULL DEFAULT generateSnowflakeID(),\n    `val` String NULL\n)\nENGINE = CnchMergeTree\nORDER BY tuple()\nUNIQUE KEY tuple()\nSETTINGS partition_level_unique_keys = 0, storage_policy = \'cnch_default_hdfs\', allow_nullable_key = 1, storage_dialect_type = \'MYSQL\', index_granularity = 8192
+1
+1
+100
+101
+1
+1
+1
+1
diff --git a/tests/queries/4_cnch_stateless/60004_mysql_ddl_auto_increment.sql b/tests/queries/4_cnch_stateless/60004_mysql_ddl_auto_increment.sql
new file mode 100644
index 00000000000..1e8fd41e245
--- /dev/null
+++ b/tests/queries/4_cnch_stateless/60004_mysql_ddl_auto_increment.sql
@@ -0,0 +1,30 @@
+set dialect_type='MYSQL';
+use test;
+drop table if exists 60004_t1;
+create table 60004_t1(id Int64 auto_increment, val String);
+SHOW CREATE TABLE 60004_t1;
+
+INSERT INTO 60004_t1(val) VALUES ('Value 1'), ('Value 2'), ('Value 3');
+
+SELECT (SELECT id AS id1 FROM 60004_t1 WHERE val = 'Value 1')  < (SELECT id AS id2 FROM 60004_t1 WHERE val = 'Value 2') AS comparison;
+
+SELECT (SELECT id AS id2 FROM 60004_t1 WHERE val = 'Value 2')  < (SELECT id AS id3 FROM 60004_t1 WHERE val = 'Value 3') AS comparison;
+
+INSERT INTO 60004_t1(id, val) VALUES (100, 'Explicit ID 100'), (101, 'Explicit ID 101');
+
+SELECT id AS id1 FROM 60004_t1 WHERE val = 'Explicit ID 100';
+SELECT id AS id1 FROM 60004_t1 WHERE val = 'Explicit ID 101';
+
+INSERT INTO 60004_t1(val) VALUES ('Value after explicit ID');
+
+SELECT (SELECT id AS id1 FROM 60004_t1 WHERE val = 'Value 1')  < (SELECT id AS id2 FROM 60004_t1 WHERE val = 'Value after explicit ID') AS comparison;
+
+INSERT INTO 60004_t1(val) VALUES ('Value 4'), ('Value 5'), ('Value 6');
+
+SELECT (SELECT id AS id1 FROM 60004_t1 WHERE val = 'Value 3')  < (SELECT id AS id2 FROM 60004_t1 WHERE val = 'Value 4') AS comparison;
+
+SELECT (SELECT id AS id1 FROM 60004_t1 WHERE val = 'Value 4')  < (SELECT id AS id2 FROM 60004_t1 WHERE val = 'Value 5') AS comparison;
+
+SELECT (SELECT id AS id2 FROM 60004_t1 WHERE val = 'Value 5')  < (SELECT id AS id3 FROM 60004_t1 WHERE val = 'Value 6') AS comparison;
+
+drop table if exists 60004_t1;
diff --git a/tests/queries/4_cnch_stateless_no_tenant/01001_alter_delete.reference b/tests/queries/4_cnch_stateless_no_tenant/01001_alter_delete.reference
deleted file mode 100644
index 888d6f61467..00000000000
--- a/tests/queries/4_cnch_stateless_no_tenant/01001_alter_delete.reference
+++ /dev/null
@@ -1,13 +0,0 @@
------ DELETE WHERE -----
-5
------ DELETE IN PARTITION WHERE -----
-11
------ DELETE NOTHING -----
-11
------ DELETE ALL -----
-0
------ INSERT AFTER DELETE -----
-2
------ CHECK MUTATION QUERY ID -----
-1
------ DELETE IN INVALID PARTITION -----
diff --git a/tests/queries/4_cnch_stateless_no_tenant/01001_alter_delete.sql b/tests/queries/4_cnch_stateless_no_tenant/01001_alter_delete.sql
deleted file mode 100644
index eee74702823..00000000000
--- a/tests/queries/4_cnch_stateless_no_tenant/01001_alter_delete.sql
+++ /dev/null
@@ -1,72 +0,0 @@
-DROP TABLE IF EXISTS t_alter_d;
-DROP TABLE IF EXISTS t_alter_ids;
-
-CREATE TABLE t_alter_d(k Int32, m Int32) ENGINE = CnchMergeTree PARTITION BY k ORDER BY m UNIQUE KEY m SETTINGS enable_delete_mutation_on_unique_table = 1;
-CREATE TABLE t_alter_ids(id Int32) ENGINE = CnchMergeTree ORDER BY id;
--- t_alter_ids: 0,1,2,3,4,5,6,7,8,9
-INSERT INTO t_alter_ids select number from numbers(10);
-SYSTEM START MERGES t_alter_d;
-
-SELECT '----- DELETE WHERE -----';
--- t_alter_d|1: 5,6,7,8,9,10,11,12,13,14
-INSERT INTO t_alter_d select 1, number from numbers(5, 10);
-
--- t_alter_d|1: 10,11,12,13,14
-ALTER TABLE t_alter_d DELETE WHERE m in t_alter_ids;
-
--- MergeMutateThread will wait ~3 seconds before scheduling.
-SELECT sleepEachRow(3) FROM numbers(40) FORMAT Null;
-SELECT count() FROM t_alter_d;
-
-SELECT '----- DELETE IN PARTITION WHERE -----';
--- t_alter_d|2: 6,7,8,9,10,11,12,13,14,15
-INSERT INTO t_alter_d select 2, number from numbers(6, 10);
-
--- t_alter_d|2: 10,11,12,13,14,15
-ALTER TABLE t_alter_d DELETE IN PARTITION '2' WHERE m in t_alter_ids;
-
-SELECT sleepEachRow(3) FROM numbers(10) FORMAT Null;
-SELECT count() FROM t_alter_d;
-
-SELECT '----- DELETE NOTHING -----';
--- t_alter_d|1: 10,11,12,13,14; 2: 10,11,12,13,14,15
-ALTER TABLE t_alter_d DELETE WHERE m < 10;
-
-SELECT sleepEachRow(3) FROM numbers(10) FORMAT Null;
-SELECT count() FROM t_alter_d;
-
-SELECT '----- DELETE ALL -----';
--- t_alter_d: EMPTY
-ALTER TABLE t_alter_d DELETE WHERE 1 = 1;
-
-SELECT sleepEachRow(3) FROM numbers(10) FORMAT Null;
-SELECT count() FROM t_alter_d;
-
-SELECT '----- INSERT AFTER DELETE -----';
--- t_alter_d|1: 1
-INSERT INTO t_alter_d VALUES (1,1);
-
--- t_alter_d|1: EMPTY
-ALTER TABLE t_alter_d DELETE WHERE k = 1 and m <= 5;
-
--- t_alter_d|1: 2
-INSERT INTO t_alter_d VALUES (1,2);
-
-SELECT sleepEachRow(3) FROM numbers(10) FORMAT Null;
-SELECT m FROM t_alter_d WHERE k = 1 ORDER BY m;
-
-SELECT '----- CHECK MUTATION QUERY ID -----';
-ALTER TABLE t_alter_d DELETE WHERE m <= 100;
-SELECT length(query_id) > 0 FROM system.mutations WHERE database = currentDatabase() and table = 't_alter_d' LIMIT 1;
-
-DROP TABLE t_alter_d;
-DROP TABLE t_alter_ids;
-
-
-SELECT '----- DELETE IN INVALID PARTITION -----';
-CREATE TABLE t_alter_d_partition(d Date, k Int32, m Int32) ENGINE = CnchMergeTree PARTITION BY (d, k) ORDER BY m;
-ALTER TABLE t_alter_d_partition DELETE IN PARTITION '20231010-10' WHERE m = 10; -- { serverError 248}
-
-DROP TABLE t_alter_d_partition;
-
-CREATE TABLE wrong_column_row_exists(k Int32, _row_exists Int32) ENGINE = CnchMergeTree ORDER BY k; -- { serverError 44 }
diff --git a/tests/queries/4_cnch_stateless/01039_mergetree_exec_time.reference b/tests/queries/4_cnch_stateless_problematic/01039_mergetree_exec_time.reference
similarity index 100%
rename from tests/queries/4_cnch_stateless/01039_mergetree_exec_time.reference
rename to tests/queries/4_cnch_stateless_problematic/01039_mergetree_exec_time.reference
diff --git a/tests/queries/4_cnch_stateless/01039_mergetree_exec_time.sql b/tests/queries/4_cnch_stateless_problematic/01039_mergetree_exec_time.sql
similarity index 94%
rename from tests/queries/4_cnch_stateless/01039_mergetree_exec_time.sql
rename to tests/queries/4_cnch_stateless_problematic/01039_mergetree_exec_time.sql
index 7e3869f781a..06595614eb3 100644
--- a/tests/queries/4_cnch_stateless/01039_mergetree_exec_time.sql
+++ b/tests/queries/4_cnch_stateless_problematic/01039_mergetree_exec_time.sql
@@ -1,4 +1,5 @@
 DROP TABLE IF EXISTS tab;
+set min_block_size = 1;
 create table tab (A Int64) Engine=CnchMergeTree order by tuple() SETTINGS min_bytes_for_wide_part = 0;
 insert into tab select cityHash64(number) from numbers(1000);
 select sum(sleep(0.1)) from tab settings enable_optimizer = 0, max_block_size = 1, max_execution_time = 1; -- { serverError 159 }
diff --git a/tests/readonly/test_readonly.py b/tests/readonly/test_readonly.py
index d917c2aea0f..d6a66d234db 100644
--- a/tests/readonly/test_readonly.py
+++ b/tests/readonly/test_readonly.py
@@ -54,6 +54,7 @@
 )
 
 allowed_error_code = (
+    36,
     43,
     47,
     48,
@@ -414,4 +415,3 @@ def check_health():
 
     if not res==0:
         exit(1)
-