Skip to content

Commit

Permalink
[add] add the way of create paimon table before test to paimonwriter.md
Browse files Browse the repository at this point in the history
  • Loading branch information
awol2005ex committed Jan 23, 2025
1 parent 1784cf0 commit 30ce1e7
Showing 1 changed file with 266 additions and 0 deletions.
266 changes: 266 additions & 0 deletions docs/writer/paimonwriter.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,272 @@ Paimon Writer 提供向 已有的paimon表写入数据的能力。
### paimonConfig

`paimonConfig` 里可以配置与 Paimon catalog和Hadoop 相关的一些高级参数,比如HA的配置

本地目录创建paimon表

pom.xml

```xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.test</groupId>
<artifactId>paimon-java-api-test</artifactId>
<version>1.0-SNAPSHOT</version>

<properties>
<maven.compiler.source>8</maven.compiler.source>
<maven.compiler.target>8</maven.compiler.target>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<hadoop.version>3.2.4</hadoop.version>
<woodstox.version>7.0.0</woodstox.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.paimon</groupId>
<artifactId>paimon-bundle</artifactId>
<version>1.0.0</version>
</dependency>


<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-common</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>commons-net</groupId>
<artifactId>commons-net</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</exclusion>
<exclusion>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-aws</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-core-asl</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jackson</groupId>
<artifactId>jackson-mapper-asl</artifactId>
</exclusion>
<exclusion>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>commons-net</groupId>
<artifactId>commons-net</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>log4j</groupId>
<artifactId>log4j</artifactId>
</exclusion>
<exclusion>
<groupId>net.minidev</groupId>
<artifactId>json-smart</artifactId>
</exclusion>
<exclusion>
<groupId>org.codehaus.jettison</groupId>
<artifactId>jettison</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-server</artifactId>
</exclusion>
<exclusion>
<groupId>org.xerial.snappy</groupId>
<artifactId>snappy-java</artifactId>
</exclusion>
<exclusion>
<groupId>org.apache.zookeeper</groupId>
<artifactId>zookeeper</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
</exclusion>
</exclusions>
</dependency>

<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-mapreduce-client-core</artifactId>
<version>${hadoop.version}</version>
<exclusions>
<exclusion>
<groupId>com.fasterxml.jackson.core</groupId>
<artifactId>jackson-databind</artifactId>
</exclusion>
<exclusion>
<groupId>commons-codec</groupId>
<artifactId>commons-codec</artifactId>
</exclusion>
<exclusion>
<groupId>io.netty</groupId>
<artifactId>netty</artifactId>
</exclusion>
<exclusion>
<groupId>org.eclipse.jetty</groupId>
<artifactId>jetty-util</artifactId>
</exclusion>
</exclusions>
</dependency>


<dependency>
<groupId>com.fasterxml.woodstox</groupId>
<artifactId>woodstox-core</artifactId>
<version>${woodstox.version}</version>
</dependency>
</dependencies>
</project>
```

```java

import org.apache.paimon.catalog.Catalog;
import org.apache.paimon.catalog.CatalogContext;
import org.apache.paimon.catalog.CatalogFactory;
import org.apache.paimon.catalog.Identifier;
import org.apache.paimon.fs.Path;
import org.apache.paimon.schema.Schema;
import org.apache.paimon.types.DataTypes;

import java.util.HashMap;
import java.util.Map;

public class CreatePaimonTable {

public static Catalog createFilesystemCatalog() {
CatalogContext context = CatalogContext.create(new Path("file:///g:/paimon"));
return CatalogFactory.createCatalog(context);
}

public static void main(String[] args) {
Schema.Builder schemaBuilder = Schema.newBuilder();
schemaBuilder.primaryKey("id");
schemaBuilder.column("id", DataTypes.INT());
schemaBuilder.column("name", DataTypes.STRING());
Map<String, String> options = new HashMap<>();
options.put("bucket", "1");//由于paimon java api 限制需要bucket>0
options.put("bucket-key", "id");
options.put("file.format", "orc");
options.put("file.compression", "lz4");
options.put("lookup.cache-spill-compression", "lz4");
options.put("spill-compression", "LZ4");
options.put("orc.compress", "lz4");
options.put("manifest.format", "orc");

schemaBuilder.options(options);
Schema schema = schemaBuilder.build();

Identifier identifier = Identifier.create("test", "test2");
try {
Catalog catalog = CreatePaimonTable.createFilesystemCatalog();
catalog.createTable(identifier, schema, true);
} catch (Catalog.TableAlreadyExistException e) {
e.printStackTrace();
} catch (Catalog.DatabaseNotExistException e) {
e.printStackTrace();
}


}
}


```

Spark 或者 flink 环境创建表

```sql
CREATE TABLE if not exists test.test2 tblproperties (
'primary-key' = 'id',
'bucket' = '1',
'bucket-key' = 'id'
'file.format'='orc',
'file.compression'='lz4',
'lookup.cache-spill-compression'='lz4',
'spill-compression'='LZ4',
'orc.compress'='lz4',
'manifest.format'='orc'
)

```


```json
{
"name": "paimonwriter",
Expand Down

0 comments on commit 30ce1e7

Please sign in to comment.