Skip to content

Commit

Permalink
apacheGH-21815: [JS] Add support for Duration type (apache#37341)
Browse files Browse the repository at this point in the history
### Rationale for this change

The `Duration` type is currently not supported and trying to deserialize a Table containing the type (e.g. using `tableFromIPC`) fails with `Unrecognized type` error. This PR aims to fix that.

### What changes are included in this PR?

- definition of the `Duration` data type
- updates to the visitor classes so that things like parsing work correctly
- test coverage for the type
- documentation update

### Are these changes tested?

Yes, I extended the data generator with the new type so that the type is tested by the existing tests.

### Are there any user-facing changes?

Yes, I've updated the documentation status page. I also noticed that it was outdated for JavaScript, i.e. there is already support for `Decimal` type so I updated this as well.

Closes: apache#21815
Closes: apache#35439
* Closes: apache#21815

Lead-authored-by: František Necas <[email protected]>
Co-authored-by: ptaylor <[email protected]>
Signed-off-by: Dominik Moritz <[email protected]>
  • Loading branch information
2 people authored and loicalleyne committed Nov 13, 2023
1 parent 7b3368f commit 2046f23
Show file tree
Hide file tree
Showing 31 changed files with 338 additions and 18 deletions.
3 changes: 1 addition & 2 deletions dev/archery/archery/integration/datagen.py
Original file line number Diff line number Diff line change
Expand Up @@ -1805,8 +1805,7 @@ def _temp_path():
generate_datetime_case(),

generate_duration_case()
.skip_tester('C#')
.skip_tester('JS'), # TODO(ARROW-5239): Intervals + JS
.skip_tester('C#'),

generate_interval_case()
.skip_tester('C#')
Expand Down
4 changes: 2 additions & 2 deletions docs/source/status.rst
Original file line number Diff line number Diff line change
Expand Up @@ -46,15 +46,15 @@ Data Types
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Decimal128 |||||||| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Decimal256 |||| |||| |
| Decimal256 |||| |||| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Date32/64 |||||||||
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Time32/64 |||||||||
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Timestamp |||||||| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Duration |||| | ||| |
| Duration |||| | ||| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
| Interval |||| | ||| |
+-------------------+-------+-------+-------+------------+-------+-------+-------+-------+
Expand Down
2 changes: 2 additions & 0 deletions js/src/Arrow.dom.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ export {
Union, DenseUnion, SparseUnion,
Dictionary,
Interval, IntervalDayTime, IntervalYearMonth,
Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond,
FixedSizeList,
Map_, MapRow,
Table, makeTable, tableFromArrays,
Expand Down Expand Up @@ -86,6 +87,7 @@ export {
FixedSizeListBuilder,
FloatBuilder, Float16Builder, Float32Builder, Float64Builder,
IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder,
DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder,
IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint8Builder, Uint16Builder, Uint32Builder, Uint64Builder,
ListBuilder,
MapBuilder,
Expand Down
2 changes: 2 additions & 0 deletions js/src/Arrow.ts
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ export {
Union, DenseUnion, SparseUnion,
Dictionary,
Interval, IntervalDayTime, IntervalYearMonth,
Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond,
FixedSizeList,
Map_
} from './type.js';
Expand Down Expand Up @@ -75,6 +76,7 @@ export { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder, Uint
export { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from './builder/time.js';
export { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from './builder/timestamp.js';
export { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './builder/interval.js';
export { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder } from './builder/duration.js';
export { Utf8Builder } from './builder/utf8.js';
export { BinaryBuilder } from './builder/binary.js';
export { ListBuilder } from './builder/list.js';
Expand Down
6 changes: 3 additions & 3 deletions js/src/builder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ import { MapRow, kKeys } from './row/map.js';
import {
DataType, strideForType,
Float, Int, Decimal, FixedSizeBinary,
Date_, Time, Timestamp, Interval,
Date_, Time, Timestamp, Interval, Duration,
Utf8, Binary, List, Map_,
} from './type.js';
import { createIsValidFunction } from './builder/valid.js';
Expand Down Expand Up @@ -290,7 +290,7 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
} else if (valueOffsets = _offsets?.flush(length)) { // Variable-width primitives (Binary, Utf8), and Lists
// Binary, Utf8
data = _values?.flush(_offsets.last());
} else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, and Interval)
} else { // Fixed-width primitives (Int, Float, Decimal, Time, Timestamp, Duration and Interval)
data = _values?.flush(length);
}

Expand Down Expand Up @@ -342,7 +342,7 @@ export abstract class Builder<T extends DataType = any, TNull = any> {
(Builder.prototype as any)._isValid = () => true;

/** @ignore */
export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary | Date_ | Timestamp | Time | Decimal | Interval = any, TNull = any> extends Builder<T, TNull> {
export abstract class FixedWidthBuilder<T extends Int | Float | FixedSizeBinary | Date_ | Timestamp | Time | Decimal | Interval | Duration = any, TNull = any> extends Builder<T, TNull> {
constructor(opts: BuilderOptions<T, TNull>) {
super(opts);
this._values = new DataBufferBuilder(new this.ArrayType(0), this.stride);
Expand Down
46 changes: 46 additions & 0 deletions js/src/builder/duration.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@

// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

import { FixedWidthBuilder } from '../builder.js';
import { Duration, DurationSecond, DurationMillisecond, DurationMicrosecond, DurationNanosecond } from '../type.js';
import { setDuration, setDurationSecond, setDurationMillisecond, setDurationMicrosecond, setDurationNanosecond } from '../visitor/set.js';

/** @ignore */
export class DurationBuilder<T extends Duration = Duration, TNull = any> extends FixedWidthBuilder<T, TNull> { }

(DurationBuilder.prototype as any)._setValue = setDuration;

/** @ignore */
export class DurationSecondBuilder<TNull = any> extends DurationBuilder<DurationSecond, TNull> { }

(DurationSecondBuilder.prototype as any)._setValue = setDurationSecond;

/** @ignore */
export class DurationMillisecondBuilder<TNull = any> extends DurationBuilder<DurationMillisecond, TNull> { }

(DurationMillisecondBuilder.prototype as any)._setValue = setDurationMillisecond;

/** @ignore */
export class DurationMicrosecondBuilder<TNull = any> extends DurationBuilder<DurationMicrosecond, TNull> { }

(DurationMicrosecondBuilder.prototype as any)._setValue = setDurationMicrosecond;

/** @ignore */
export class DurationNanosecondBuilder<TNull = any> extends DurationBuilder<DurationNanosecond, TNull> { }

(DurationNanosecondBuilder.prototype as any)._setValue = setDurationNanosecond;
11 changes: 11 additions & 0 deletions js/src/data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,7 @@ import {
Int,
Date_,
Interval,
Duration,
Time,
Timestamp,
Union, DenseUnion, SparseUnion,
Expand Down Expand Up @@ -390,6 +391,13 @@ class MakeDataVisitor extends Visitor {
const { ['length']: length = data.length / strideForType(type), ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props;
return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]);
}
public visitDuration<T extends Duration>(props: DurationDataProps<T>) {
const { ['type']: type, ['offset']: offset = 0 } = props;
const nullBitmap = toUint8Array(props['nullBitmap']);
const data = toArrayBufferView(type.ArrayType, props['data']);
const { ['length']: length = data.length, ['nullCount']: nullCount = props['nullBitmap'] ? -1 : 0, } = props;
return new Data(type, offset, length, nullCount, [undefined, data, nullBitmap]);
}
public visitFixedSizeList<T extends FixedSizeList>(props: FixedSizeListDataProps<T>) {
const { ['type']: type, ['offset']: offset = 0, ['child']: child = new MakeDataVisitor().visit({ type: type.valueType }) } = props;
const nullBitmap = toUint8Array(props['nullBitmap']);
Expand Down Expand Up @@ -424,6 +432,7 @@ interface Date_DataProps<T extends Date_> extends DataProps_<T> { data?: DataBuf
interface TimeDataProps<T extends Time> extends DataProps_<T> { data?: DataBuffer<T> }
interface TimestampDataProps<T extends Timestamp> extends DataProps_<T> { data?: DataBuffer<T> }
interface IntervalDataProps<T extends Interval> extends DataProps_<T> { data?: DataBuffer<T> }
interface DurationDataProps<T extends Duration> extends DataProps_<T> { data?: DataBuffer<T> }
interface FixedSizeBinaryDataProps<T extends FixedSizeBinary> extends DataProps_<T> { data?: DataBuffer<T> }
interface BinaryDataProps<T extends Binary> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
interface Utf8DataProps<T extends Utf8> extends DataProps_<T> { valueOffsets: ValueOffsetsBuffer; data?: DataBuffer<T> }
Expand All @@ -446,6 +455,7 @@ export type DataProps<T extends DataType> = (
T extends Time /* */ ? TimeDataProps<T> :
T extends Timestamp /* */ ? TimestampDataProps<T> :
T extends Interval /* */ ? IntervalDataProps<T> :
T extends Duration /* */ ? DurationDataProps<T> :
T extends FixedSizeBinary /* */ ? FixedSizeBinaryDataProps<T> :
T extends Binary /* */ ? BinaryDataProps<T> :
T extends Utf8 /* */ ? Utf8DataProps<T> :
Expand All @@ -471,6 +481,7 @@ export function makeData<T extends Date_>(props: Date_DataProps<T>): Data<T>;
export function makeData<T extends Time>(props: TimeDataProps<T>): Data<T>;
export function makeData<T extends Timestamp>(props: TimestampDataProps<T>): Data<T>;
export function makeData<T extends Interval>(props: IntervalDataProps<T>): Data<T>;
export function makeData<T extends Duration>(props: DurationDataProps<T>): Data<T>;
export function makeData<T extends FixedSizeBinary>(props: FixedSizeBinaryDataProps<T>): Data<T>;
export function makeData<T extends Binary>(props: BinaryDataProps<T>): Data<T>;
export function makeData<T extends Utf8>(props: Utf8DataProps<T>): Data<T>;
Expand Down
7 changes: 6 additions & 1 deletion js/src/enum.ts
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ export enum MessageHeader {
* nested type consisting of other data types, or another data type (e.g. a
* timestamp encoded as an int64).
*
* **Note**: Only enum values 0-17 (NONE through Map) are written to an Arrow
* **Note**: Only enum values 0-18 (NONE through Duration) are written to an Arrow
* IPC payload.
*
* The rest of the values are specified here so TypeScript can narrow the type
Expand Down Expand Up @@ -174,6 +174,7 @@ export enum Type {
FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same number of bytes */
FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */
Map = 17, /** Map of named logical types */
Duration = 18, /** Measure of elapsed time in either seconds, miliseconds, microseconds or nanoseconds. */

Dictionary = -1, /** Dictionary aka Category type */
Int8 = -2,
Expand Down Expand Up @@ -201,6 +202,10 @@ export enum Type {
SparseUnion = -24,
IntervalDayTime = -25,
IntervalYearMonth = -26,
DurationSecond = -27,
DurationMillisecond = -28,
DurationMicrosecond = -29,
DurationNanosecond = -30
}

export enum BufferType {
Expand Down
16 changes: 16 additions & 0 deletions js/src/interfaces.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import type { IntBuilder, Int8Builder, Int16Builder, Int32Builder, Int64Builder,
import type { TimeBuilder, TimeSecondBuilder, TimeMillisecondBuilder, TimeMicrosecondBuilder, TimeNanosecondBuilder } from './builder/time.js';
import type { TimestampBuilder, TimestampSecondBuilder, TimestampMillisecondBuilder, TimestampMicrosecondBuilder, TimestampNanosecondBuilder } from './builder/timestamp.js';
import type { IntervalBuilder, IntervalDayTimeBuilder, IntervalYearMonthBuilder } from './builder/interval.js';
import type { DurationBuilder, DurationSecondBuilder, DurationMillisecondBuilder, DurationMicrosecondBuilder, DurationNanosecondBuilder } from './builder/duration.js';
import type { Utf8Builder } from './builder/utf8.js';
import type { BinaryBuilder } from './builder/binary.js';
import type { ListBuilder } from './builder/list.js';
Expand Down Expand Up @@ -222,6 +223,11 @@ export type TypeToDataType<T extends Type> = {
[Type.Interval]: type.Interval;
[Type.IntervalDayTime]: type.IntervalDayTime;
[Type.IntervalYearMonth]: type.IntervalYearMonth;
[Type.Duration]: type.Duration;
[Type.DurationSecond]: type.DurationSecond;
[Type.DurationMillisecond]: type.DurationMillisecond;
[Type.DurationMicrosecond]: type.DurationMicrosecond;
[Type.DurationNanosecond]: type.DurationNanosecond;
[Type.Map]: type.Map_;
[Type.List]: type.List;
[Type.Struct]: type.Struct;
Expand Down Expand Up @@ -270,6 +276,11 @@ type TypeToBuilder<T extends Type = any, TNull = any> = {
[Type.Interval]: IntervalBuilder<any, TNull>;
[Type.IntervalDayTime]: IntervalDayTimeBuilder<TNull>;
[Type.IntervalYearMonth]: IntervalYearMonthBuilder<TNull>;
[Type.Duration]: DurationBuilder<any, TNull>;
[Type.DurationSecond]: DurationBuilder<any, TNull>;
[Type.DurationMillisecond]: DurationMillisecondBuilder<TNull>;
[Type.DurationMicrosecond]: DurationMicrosecondBuilder<TNull>;
[Type.DurationNanosecond]: DurationNanosecondBuilder<TNull>;
[Type.Map]: MapBuilder<any, any, TNull>;
[Type.List]: ListBuilder<any, TNull>;
[Type.Struct]: StructBuilder<any, TNull>;
Expand Down Expand Up @@ -318,6 +329,11 @@ type DataTypeToBuilder<T extends DataType = any, TNull = any> = {
[Type.Interval]: T extends type.Interval ? IntervalBuilder<T, TNull> : never;
[Type.IntervalDayTime]: T extends type.IntervalDayTime ? IntervalDayTimeBuilder<TNull> : never;
[Type.IntervalYearMonth]: T extends type.IntervalYearMonth ? IntervalYearMonthBuilder<TNull> : never;
[Type.Duration]: T extends type.Duration ? DurationBuilder<T, TNull>: never;
[Type.DurationSecond]: T extends type.DurationSecond ? DurationSecondBuilder<TNull> : never;
[Type.DurationMillisecond]: T extends type.DurationMillisecond ? DurationMillisecondBuilder<TNull> : never;
[Type.DurationMicrosecond]: T extends type.DurationMicrosecond ? DurationMicrosecondBuilder<TNull>: never;
[Type.DurationNanosecond]: T extends type.DurationNanosecond ? DurationNanosecondBuilder<TNull>: never;
[Type.Map]: T extends type.Map_ ? MapBuilder<T['keyType'], T['valueType'], TNull> : never;
[Type.List]: T extends type.List ? ListBuilder<T['valueType'], TNull> : never;
[Type.Struct]: T extends type.Struct ? StructBuilder<T['dataTypes'], TNull> : never;
Expand Down
6 changes: 5 additions & 1 deletion js/src/ipc/metadata/json.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ import {
DataType, Dictionary, TimeBitWidth,
Utf8, Binary, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
} from '../../type.js';

import { DictionaryBatch, RecordBatch, FieldNode, BufferRegion } from './message.js';
Expand Down Expand Up @@ -185,6 +185,10 @@ function typeFromJSON(f: any, children?: Field[]): DataType<any> {
const t = f['type'];
return new Interval(IntervalUnit[t['unit']] as any);
}
case 'duration': {
const t = f['type'];
return new Duration(TimeUnit[t['unit']] as any);
}
case 'union': {
const t = f['type'];
const [m, ...ms] = (t['mode'] + '').toLowerCase();
Expand Down
7 changes: 6 additions & 1 deletion js/src/ipc/metadata/message.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ import { Date as _Date } from '../../fb/date.js';
import { Time as _Time } from '../../fb/time.js';
import { Timestamp as _Timestamp } from '../../fb/timestamp.js';
import { Interval as _Interval } from '../../fb/interval.js';
import { Duration as _Duration } from '../../fb/duration.js';
import { Union as _Union } from '../../fb/union.js';
import { FixedSizeBinary as _FixedSizeBinary } from '../../fb/fixed-size-binary.js';
import { FixedSizeList as _FixedSizeList } from '../../fb/fixed-size-list.js';
Expand All @@ -57,7 +58,7 @@ import {
DataType, Dictionary, TimeBitWidth,
Utf8, Binary, Decimal, FixedSizeBinary,
List, FixedSizeList, Map_, Struct, Union,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys,
Bool, Null, Int, Float, Date_, Time, Interval, Timestamp, IntBitWidth, Int32, TKeys, Duration,
} from '../../type.js';

/**
Expand Down Expand Up @@ -466,6 +467,10 @@ function decodeFieldType(f: _Field, children?: Field[]): DataType<any> {
const t = f.type(new _Interval())!;
return new Interval(t.unit());
}
case Type['Duration']: {
const t = f.type(new _Duration())!;
return new Duration(t.unit());
}
case Type['Union']: {
const t = f.type(new _Union())!;
return new Union(t.mode(), t.typeIdsArray() || [], children || []);
Expand Down
34 changes: 34 additions & 0 deletions js/src/type.ts
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ export abstract class DataType<TType extends Type = Type, TChildren extends Type
/** @nocollapse */ static isTime(x: any): x is Time_ { return x?.typeId === Type.Time; }
/** @nocollapse */ static isTimestamp(x: any): x is Timestamp_ { return x?.typeId === Type.Timestamp; }
/** @nocollapse */ static isInterval(x: any): x is Interval_ { return x?.typeId === Type.Interval; }
/** @nocollapse */ static isDuration(x: any): x is Duration { return x?.typeId === Type.Duration; }
/** @nocollapse */ static isList(x: any): x is List { return x?.typeId === Type.List; }
/** @nocollapse */ static isStruct(x: any): x is Struct { return x?.typeId === Type.Struct; }
/** @nocollapse */ static isUnion(x: any): x is Union_ { return x?.typeId === Type.Union; }
Expand Down Expand Up @@ -433,6 +434,39 @@ export class IntervalDayTime extends Interval_<Type.IntervalDayTime> { construct
/** @ignore */
export class IntervalYearMonth extends Interval_<Type.IntervalYearMonth> { constructor() { super(IntervalUnit.YEAR_MONTH); } }

/** @ignore */
type Durations = Type.Duration | Type.DurationSecond | Type.DurationMillisecond | Type.DurationMicrosecond | Type.DurationNanosecond;
/** @ignore */
export interface Duration<T extends Durations = Durations> extends DataType<T> {
TArray: BigInt64Array;
TValue: bigint;
ArrayType: BigInt64Array;
}

/** @ignore */
export class Duration<T extends Durations = Durations> extends DataType<T> {
constructor(public readonly unit: TimeUnit) {
super();
}
public get typeId() { return Type.Duration as T; }
public toString() { return `Duration<${TimeUnit[this.unit]}>`; }
protected static [Symbol.toStringTag] = ((proto: Duration) => {
(<any>proto).unit = null;
(<any>proto).ArrayType = BigInt64Array;
return proto[Symbol.toStringTag] = 'Duration';
})(Duration.prototype);
}

/** @ignore */
export class DurationSecond extends Duration<Type.DurationSecond> { constructor() { super(TimeUnit.SECOND); }}
/** @ignore */
export class DurationMillisecond extends Duration<Type.DurationMillisecond> { constructor() { super(TimeUnit.MILLISECOND); }}
/** @ignore */
export class DurationMicrosecond extends Duration<Type.DurationMicrosecond> { constructor() { super(TimeUnit.MICROSECOND); }}
/** @ignore */
export class DurationNanosecond extends Duration<Type.DurationNanosecond> { constructor() { super(TimeUnit.NANOSECOND); }}


/** @ignore */
export interface List<T extends DataType = any> extends DataType<Type.List, { [0]: T }> {
TArray: Array<T>;
Expand Down
Loading

0 comments on commit 2046f23

Please sign in to comment.