Merge remote-tracking branch 'origin/kafka2' into kafka2

ClickHouse · Oct 20, 2023 · ac6baba · ac6baba
2 parents 1930a45 + 4f54f45
commit ac6baba
Show file tree

Hide file tree

Showing 32 changed files with 337 additions and 132 deletions.
diff --git a/clickhouseapi.js b/clickhouseapi.js
@@ -85,55 +85,60 @@ function generateDocusaurusMarkdown(spec, groupedEndpoints, prefix) {
       }
 
       if (operation.responses) {
-        markdownContent += `\n### Response\n\n`;
-
-        markdownContent += `#### Response Schema\n\n`;
-
         const rawSchema = operation.responses['200'].content["application/json"].schema
         const result = rawSchema.properties.result
-        const schema = rawSchema.properties.result.type === 'array' ?
-          result.items['$ref'].split('/').pop() : result['$ref'].split('/').pop()
-
-        const bodyParamAttrs = spec.components.schemas[schema].properties
-        const bodyParams = Object.keys(bodyParamAttrs)
-        const sampleResponseObj = {}
 
-        markdownContent += `| Name | Type | Description |\n`
-        markdownContent += `| :--- | :--- | :---------- |\n`
-
-        for (const parameter of bodyParams) {
-          const paramType = bodyParamAttrs[parameter].format || bodyParamAttrs[parameter].type
-          markdownContent += `| ${parameter} | ${paramType || ''} | ${bodyParamAttrs[parameter].description || ''} | \n`
-
-          switch (paramType) {
-            case 'uuid':
-              sampleResponseObj[parameter] = 'uuid';
-              break;
-            case 'string':
-              sampleResponseObj[parameter] = 'string';
-              break;
-            case 'number':
-              sampleResponseObj[parameter] = 0;
-              break;
-            case 'array':
-              sampleResponseObj[parameter] = 'Array';
-              break;
-            case 'boolean':
-              sampleResponseObj[parameter] = 'boolean';
-              break;
-            case 'date-time':
-              sampleResponseObj[parameter] = 'date-time';
-              break;
-            case 'email':
-              sampleResponseObj[parameter] = 'email';
-              break;
+        if (result) {
+          markdownContent += `\n### Response\n\n`;
+
+          markdownContent += `#### Response Schema\n\n`;
+
+          const schema = rawSchema.properties.result.type === 'array' ?
+            result.items['$ref'].split('/').pop() : result['$ref'].split('/').pop()
+
+          const bodyParamAttrs = spec.components.schemas[schema].properties
+          const bodyParams = Object.keys(bodyParamAttrs)
+          const sampleResponseObj = {}
+
+          markdownContent += `| Name | Type | Description |\n`
+          markdownContent += `| :--- | :--- | :---------- |\n`
+
+          for (const parameter of bodyParams) {
+            const paramType = bodyParamAttrs[parameter].format || bodyParamAttrs[parameter].type
+            markdownContent += `| ${parameter} | ${paramType || ''} | ${bodyParamAttrs[parameter].description || ''} | \n`
+
+            switch (paramType) {
+              case 'uuid':
+                sampleResponseObj[parameter] = 'uuid';
+                break;
+              case 'string':
+                sampleResponseObj[parameter] = 'string';
+                break;
+              case 'number':
+                sampleResponseObj[parameter] = 0;
+                break;
+              case 'array':
+                sampleResponseObj[parameter] = 'Array';
+                break;
+              case 'boolean':
+                sampleResponseObj[parameter] = 'boolean';
+                break;
+              case 'date-time':
+                sampleResponseObj[parameter] = 'date-time';
+                break;
+              case 'email':
+                sampleResponseObj[parameter] = 'email';
+                break;
+            }
           }
+
+          markdownContent += `\n#### Sample response\n\n`;
+          markdownContent += '```\n'
+          markdownContent += `${JSON.stringify(sampleResponseObj, 0, 2)}`
+          markdownContent += '\n```\n'
         }
 
-        markdownContent += `\n#### Sample response\n\n`;
-        markdownContent += '```\n'
-        markdownContent += `${JSON.stringify(sampleResponseObj, 0, 2)}`
-        markdownContent += '\n```\n'
+
       }
     }
   }

diff --git a/docs/en/about-us/adopters.md b/docs/en/about-us/adopters.md
diff --git a/docs/en/cloud/bestpractices/avoidnullablecolumns.md b/docs/en/cloud/bestpractices/avoidnullablecolumns.md
@@ -4,7 +4,7 @@ sidebar_label: Avoid Nullable Columns
 title: Avoid Nullable Columns
 ---
 
-[`Nullable` column](/docs/en/sql-reference/data-types/nullable/) (e.g. `Nullable(String))` creates a separate column of `UInt8` type. This additional column has to be processed every time a user works with a nullable column. This leads to additional storage space used and almost always negatively affects performance.
+[`Nullable` column](/docs/en/sql-reference/data-types/nullable/) (e.g. `Nullable(String)`) creates a separate column of `UInt8` type. This additional column has to be processed every time a user works with a nullable column. This leads to additional storage space used and almost always negatively affects performance.
 
 To avoid `Nullable` columns, consider setting a default value for that column.  For example, instead of:
 

diff --git a/docs/en/cloud/reference/changelog.md b/docs/en/cloud/reference/changelog.md
@@ -5,6 +5,39 @@ title: Cloud Changelog
 ---
 
 In addition to this ClickHouse Cloud changelog, please see the [Cloud Compatibility](/docs/en/cloud/reference/cloud-compatibility.md) page.
+
+## October 19, 2023
+
+This release brings usability and performance improvements in the SQL console, better IP data type handling in the Metabase connector, and new functionality in the Java and Node.js clients.
+
+### Console changes
+- Improved usability of the SQL console (e.g. preserve column width between query executions)
+- Improved performance of the SQL console
+
+### Integrations changes 
+- Java client:
+  - Switched the default network library to improve performance and reuse open connections
+  - Added proxy support
+  - Added support for secure connections with using Trust Store
+- Node.js client: Fixed keep-alive behavior for insert queries
+- Metabase: Fixed IPv4/IPv6 column serialization
+
+## September 28, 2023
+
+This release brings general availability of ClickPipes for Kafka, Confluent Cloud, and Amazon MSK and the Kafka Connect ClickHouse Sink, self-service workflow to secure access to Amazon S3 via IAM roles, and AI-assisted query suggestions ( private preview).
+
+### Console changes
+- Added a self-service workflow to secure [access to Amazon S3 via IAM roles](/docs/en/cloud/manage/security/secure-s3)
+- Introduced AI-assisted query suggestions in private preview (please [contact ClickHouse Cloud support](https://clickhouse.cloud/support) to try it out!)
+
+### Integrations changes 
+- Announced general availability of ClickPipes - a turnkey data ingestion service - for Kafka, Confluent Cloud, and Amazon MSK (see the [release blog](https://clickhouse.com/blog/clickpipes-is-generally-available))
+- Reached general availability of Kafka Connect ClickHouse Sink
+  - Extended support for customized ClickHouse settings using `clickhouse.settings` property
+  - Improved deduplication behavior to account for dynamic fields
+  - Added support for `tableRefreshInterval` to re-fetch table changes from ClickHouse
+- Fixed an SSL connection issue and type mappings between [PowerBI](/docs/en/integrations/powerbi) and ClickHouse data types
+
 ## September 7, 2023
 
 This release brings the beta release of the PowerBI Desktop official connector, improved credit card payment handling for India, and multiple improvements across supported language clients. 
@@ -20,7 +53,6 @@ This release brings the beta release of the PowerBI Desktop official connector,
 - Node.js client: added default_format setting support
 - Golang client: fixed bool type handling, removed string limits
 
-
 ## Aug 24, 2023
 
 This release adds support for the MySQL interface to the ClickHouse database, introduces a new official PowerBI connector, adds a new “Running Queries” view in the cloud console, and updates the ClickHouse version to 23.7.

diff --git a/docs/en/cloud/reference/cloud-compatibility.md b/docs/en/cloud/reference/cloud-compatibility.md
@@ -27,9 +27,10 @@ For the most part, the DDL syntax of ClickHouse Cloud should match what is avail
   - Support for `CREATE AS SELECT`, which is currently not available. As a workaround, we suggest using `CREATE ... EMPTY ... AS SELECT` and then inserting into that table (see [this blog](https://clickhouse.com/blog/getting-data-into-clickhouse-part-1) for an example).
   - Some experimental syntax may be disabled, for instance, `ALTER TABLE … MODIFY QUERY` statement.
   - Some introspection functionality may be disabled for security purposes, for example, the `addressToLine` SQL function.
+  - Do not use `ON CLUSTER` parameters in ClickHouse Cloud - these are not needed. While these are mostly no-op functions, they can still cause an error if you are trying to use [macros](https://clickhouse.com/docs/en/operations/server-configuration-parameters/settings#macros). Macros often do not work and are not needed in ClickHouse Cloud.
 
 ### Database and table engines
-ClickHouse Cloud provides a highly-available, replicated service by default. As a result, the database engine is Replicated and the following table engines are supported:
+ClickHouse Cloud provides a highly-available, replicated service by default. As a result, all database and table engines are "Replicated":
   - ReplicatedMergeTree (default, when none is specified)
   - ReplicatedSummingMergeTree
   - ReplicatedAggregatingMergeTree
@@ -57,6 +58,8 @@ ClickHouse Cloud provides a highly-available, replicated service by default. As
   - PostgreSQL
   - S3
 
+Please note: in ClickHouse Cloud, you do not need to add the "Replicated" term to your specified database or table engine. All *MergeTree tables are replicated in ClickHouse Cloud automatically.
+
 ### Interfaces
 ClickHouse Cloud supports HTTPS and Native interfaces. Support for more interfaces such as MySQL and Postgres is coming soon.
 
@@ -83,20 +86,20 @@ Experimental features can be self-enabled by users in Development services. They
 
 ### Kafka
 
-The [Kafka Table Engine](/docs/en/integrations/data-ingestion/kafka/index.md) is not available in ClickHouse Cloud. Instead, we recommend relying on architectures that decouple the Kafka connectivity components from the ClickHouse service to achieve a separation of concerns. We recommend considering the alternatives listed in the [Kafka User Guide](/docs/en/integrations/data-ingestion/kafka/index.md)
+The [Kafka Table Engine](/docs/en/integrations/data-ingestion/kafka/index.md) is not generally available in ClickHouse Cloud. Instead, we recommend relying on architectures that decouple the Kafka connectivity components from the ClickHouse service to achieve a separation of concerns. We recommend [ClickPipes](https://clickhouse.com/cloud/clickpipes) for pulling data from a Kafka stream. Alternatively, consider the push-based alternatives listed in the [Kafka User Guide](/docs/en/integrations/data-ingestion/kafka/index.md)
 
 ## Operational Defaults and Considerations
 The following are default settings for ClickHouse Cloud services. In some cases, these settings are fixed to ensure the correct operation of the service, and in others, they can be adjusted.
 
 ### Operational limits
 
-### `max_parts_in_total: 10,000`
+#### `max_parts_in_total: 10,000`
 The default value of the `max_parts_in_total` setting for MergeTree tables has been lowered from 100,000 to 10,000. The reason for this change is that we observed that a large number of data parts is likely to cause a slow startup time of services in the cloud. A large number of parts usually indicate a choice of too granular partition key, which is typically done accidentally and should be avoided. The change of default will allow the detection of these cases earlier.
 
-### `max_concurrent_queries: 1,000`
+#### `max_concurrent_queries: 1,000`
 Increased this per-server setting from the default of 100 to 1000 to allow for more concurrency. This will result in 2,000 concurrent queries for development services and 3,000 for production.
 
-### `max_table_size_to_drop: 1,000,000,000,000`
+#### `max_table_size_to_drop: 1,000,000,000,000`
 Increased this setting from 50GB to allow for dropping of tables/partitions up to 1TB.
 
 ### System settings
@@ -112,7 +115,7 @@ The table below summarizes our efforts to expand some of the capabilities descri
 |-------------------------------------------------------------------------|:----------------------------------------|
 |Dictionary support: PostgreSQL, MySQL, remote and local ClickHouse servers, Redis, MongoDB and HTTP sources | **Added in GA** |
 |SQL user-defined functions (UDFs)                                        | **Added in GA**                         |
-|MySQL and Postgres engine                                                | **Added in GA**                         |
+|MySQL and PostgreSQL engine                                              | **Added in GA**                         |
 |Engines for SQLite, ODBC, JDBC, Redis, RabbitMQ, HDFS, and Hive          | ✔                                       |
 |MySQL & Postgres interfaces                                              | ✔                                       |
 |Kafka Table Engine                                                       | Not recommended; see alternatives above |

diff --git a/docs/en/cloud/security/compliance-and-certification.md b/docs/en/cloud/security/compliance-and-certification.md
@@ -0,0 +1,17 @@
+---
+slug: /en/manage/security/compliance-and-certification
+sidebar_label: Compliance and Certification
+title: Compliance and Certification
+---
+
+# Compliance and Certification
+
+ClickHouse Cloud adheres to the following compliance frameworks:
+- [SOC 2](https://secureframe.com/hub/soc-2/what-is-soc-2)
+- [ISO 27001](https://www.iso.org/standard/27001)
+- [GDPR](https://gdpr-info.eu/)
+- [CCPA](https://oag.ca.gov/privacy/ccpa)
+
+We also provide a secure method to pay by credit card that is compliant with [PCI SAQ A v4.0](https://www.pcisecuritystandards.org/document_library/).
+
+To download detailed reports, please see our [Trust Center](https://trust.clickhouse.com/).
diff --git a/docs/en/guides/creating-tables.md b/docs/en/guides/creating-tables.md
@@ -5,7 +5,7 @@ sidebar_label: Creating Tables
 
 # Creating Tables in ClickHouse
 
- Like most database management systems, ClickHouse logically groups tables into **databases**. Use the `CREATE DATABASE` command to create a new database in ClickHouse:
+ Like most databases, ClickHouse logically groups tables into **databases**. Use the `CREATE DATABASE` command to create a new database in ClickHouse:
 
   ```sql
   CREATE DATABASE IF NOT EXISTS helloworld
@@ -62,4 +62,4 @@ In the example above, `my_first_table` is a `MergeTree` table with four columns:
 
 :::tip
 For more details, check out the [Creating Databases and Tables](https://learn.clickhouse.com/visitor_catalog_class/show/1043458/) training course in ClickHouse Academy.
-:::
+:::
diff --git a/docs/en/guides/inserting-data.md b/docs/en/guides/inserting-data.md
@@ -22,10 +22,15 @@ Let's verify it worked - you should see the four rows of data that were inserted
 SELECT * FROM helloworld.my_first_table
 ```
 
-:::tip
-Insert a large number of rows per batch - tens of thousands or even millions of rows at once. Don't worry - ClickHouse can easily handle that type of volume!
+:::note Need help inserting large datasets?
+If you need help inserting large datasets or encounter any errors when importing data into ClickHouse Cloud, please contact us at [email protected] and we can assist.
 :::
 
+
+## Insert large batches
+
+Insert a large number of rows per batch - tens of thousands or even millions of rows at once. Inserting in batches optimizes for insert performance. Don't worry - ClickHouse can easily handle that type of volume!
+
 :::tip
 If you can not insert a lot of rows at once and you are using an HTTP client, use the [`async_insert` setting](../operations/settings/settings.md#async-insert), which batches your smaller inserts before inserting them into the table.
 :::

diff --git a/docs/en/guides/sre/keeper/index.md b/docs/en/guides/sre/keeper/index.md
@@ -379,7 +379,7 @@ The following features are available:
 
 ### Migration from ZooKeeper {#migration-from-zookeeper}
 
-Seamlessly migration from ZooKeeper to ClickHouse Keeper is impossible you have to stop your ZooKeeper cluster, convert data and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
+Seamless migration from ZooKeeper to ClickHouse Keeper is not possible. You have to stop your ZooKeeper cluster, convert data, and start ClickHouse Keeper. `clickhouse-keeper-converter` tool allows converting ZooKeeper logs and snapshots to ClickHouse Keeper snapshot. It works only with ZooKeeper > 3.4. Steps for migration:
 
 1. Stop all ZooKeeper nodes.
 

diff --git a/docs/en/integrations/data-ingestion/clickpipes/index.md b/docs/en/integrations/data-ingestion/clickpipes/index.md
@@ -142,7 +142,6 @@ Nullable versions of the above are also supported with these exceptions:
 
 ## Current Limitations
 
-- During the Private Preview phase, ClickPipes is available only on the services backed by Amazon Web Services, in the `us-east-2` and `eu-central-1` regions.
 - Private Link support isn't currently available for ClickPipes but will be released in the near future.
 
 ## List of Static IPs

diff --git a/docs/en/integrations/data-ingestion/etl-tools/nifi-and-clickhouse.md b/docs/en/integrations/data-ingestion/etl-tools/nifi-and-clickhouse.md
@@ -22,7 +22,7 @@ import ConnectionDetails from '@site/docs/en/_snippets/_gather_your_details_http
 ## 3. Download the ClickHouse JDBC driver
 
 1. Visit the <a href="https://github.com/ClickHouse/clickhouse-java/releases" target="_blank">ClickHouse JDBC driver release page</a> on GitHub and look for  the latest JDBC release version
-2. In the release version, click on "Show all xx assets" and look for the JAR file containing the keyword "shaded" or "all", for example, `clickhouse-jdbc-0.4.6-all.jar`
+2. In the release version, click on "Show all xx assets" and look for the JAR file containing the keyword "shaded" or "all", for example, `clickhouse-jdbc-0.5.0-all.jar`
 3. Place the JAR file in a folder accessible by Apache NiFi and take note of the absolute path
 
 ## 4. Add DBCPConnectionPool Controller Service and configure its properties

diff --git a/docs/en/integrations/data-ingestion/kafka/confluent/index.md b/docs/en/integrations/data-ingestion/kafka/confluent/index.md
@@ -0,0 +1,13 @@
+---
+sidebar_label: Confluent Platform
+sidebar_position: 1
+slug: /en/integrations/kafka/cloud/confluent
+description: Kafka Connectivity with Confluent Cloud
+---
+
+# Integrating Confluent Cloud with ClickHouse
+
+Confluent platform provides two options to integration with ClickHouse
+
+* [ClickHouse Connect Sink on Confluent Cloud](./custom-connector.md) using the custom connectors feature 
+* [HTTP Sink Connector for Confluent Platform](./kafka-connect-http.md) that integrates Apache Kafka with an API via HTTP or HTTPS