spotify · clairemcginty · Jul 9, 2024 · Jul 9, 2024 · Sep 12, 2024 · Sep 12, 2024
diff --git a/build.sbt b/build.sbt
@@ -693,6 +693,7 @@ lazy val tools = project
       "com.google.apis" % "google-api-services-bigquery" % bigqueryVersion,
       "org.apache.avro" % "avro" % avroVersion % Provided,
       "org.apache.parquet" % "parquet-hadoop" % parquetVersion,
+      "org.apache.hadoop" % "hadoop-common" % hadoopVersion,
       "org.typelevel" %% "paiges-core" % paigesVersion
     )
   )
@@ -707,6 +708,7 @@ lazy val jmh: Project = project
     cats % Test,
     datastore % Test,
     guava % Test,
+    parquet % Test,
     protobuf % "test->test",
     scalacheck % Test,
     tensorflow % Test,
@@ -726,7 +728,12 @@ lazy val jmh: Project = project
       "com.google.apis" % "google-api-services-bigquery" % bigqueryVersion % Test,
       "com.google.cloud.datastore" % "datastore-v1-proto-client" % datastoreVersion % Test,
       "org.apache.avro" % "avro" % avroVersion % Test,
-      "org.tensorflow" % "tensorflow-core-api" % tensorflowVersion % Test
+      "org.tensorflow" % "tensorflow-core-api" % tensorflowVersion % Test,
+      "org.apache.parquet" % "parquet-avro" % parquetVersion % Test,
+      "org.apache.parquet" % "parquet-column" % parquetVersion % Test,
+      "org.apache.parquet" % "parquet-hadoop" % parquetVersion % Test,
+      "org.apache.hadoop" % "hadoop-common" % hadoopVersion % Test,
+      "org.apache.hadoop" % "hadoop-mapreduce-client-core" % hadoopVersion % Test
     )
   )
 

diff --git a/parquet/src/main/scala/magnolify/parquet/MagnolifyParquetProperties.scala b/parquet/src/main/scala/magnolify/parquet/MagnolifyParquetProperties.scala
@@ -0,0 +1,34 @@
+/*
+ * Copyright 2024 Spotify AB
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package magnolify.parquet
+
+import org.apache.hadoop.conf.Configuration
+
+/**
+ * Properties for reading and writing Magnolify ParquetType classes, configurable via a Hadoop
+ * [[Configuration]] instance.
+ */
+object MagnolifyParquetProperties {
+  val WriteGroupedArrays: String = "magnolify.parquet.write-grouped-arrays"
+  val WriteGroupedArraysDefault: Boolean = false
+
+  val WriteAvroSchemaToMetadata: String = "magnolify.parquet.write-avro-schema"
+  val WriteAvroSchemaToMetadataDefault: Boolean = true
+
+  val ReadTypeKey = "parquet.type.read.type"
+  val WriteTypeKey = "parquet.type.write.type"
+}