org.apache.iceberg.data.Record

Here are the examples of the java api org.apache.iceberg.data.Record taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

123 Examples 7

19 Source : IcebergWriterStageTest.java
with Apache License 2.0
from Netflix

clreplaced IcebergWriterStageTest {

    private static final Schema SCHEMA = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get()));

    private TestScheduler scheduler;

    private TestSubscriber<DataFile> subscriber;

    private IcebergWriterStage.Transformer transformer;

    private Catalog catalog;

    private Context context;

    private IcebergWriterPool writerPool;

    private Parreplacedioner parreplacedioner;

    private Observable<DataFile> flow;

    private Record record;

    @BeforeEach
    void setUp() {
        record = GenericRecord.create(SCHEMA);
        record.setField("id", 1);
        this.scheduler = new TestScheduler();
        this.subscriber = new TestSubscriber<>();
        // Writer
        Parameters parameters = StageOverrideParameters.newParameters();
        WriterConfig config = new WriterConfig(parameters, mock(Configuration.clreplaced));
        WriterMetrics metrics = new WriterMetrics();
        IcebergWriterFactory factory = FakeIcebergWriter::new;
        this.writerPool = spy(new FixedIcebergWriterPool(factory, config.getWriterFlushFrequencyBytes(), config.getWriterMaximumPoolSize()));
        doReturn(Collections.singleton(record)).when(writerPool).getFlushableWriters();
        this.parreplacedioner = mock(Parreplacedioner.clreplaced);
        when(parreplacedioner.parreplacedion(record)).thenReturn(record);
        this.transformer = new IcebergWriterStage.Transformer(config, metrics, this.writerPool, this.parreplacedioner, this.scheduler, this.scheduler);
        // Catalog
        ServiceLocator serviceLocator = mock(ServiceLocator.clreplaced);
        when(serviceLocator.service(Configuration.clreplaced)).thenReturn(mock(Configuration.clreplaced));
        this.catalog = mock(Catalog.clreplaced);
        Table table = mock(Table.clreplaced);
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("id").build();
        when(table.spec()).thenReturn(spec);
        when(this.catalog.loadTable(any())).thenReturn(table);
        when(serviceLocator.service(Catalog.clreplaced)).thenReturn(this.catalog);
        when(serviceLocator.service(ParreplacedionerFactory.clreplaced)).thenReturn(mock(ParreplacedionerFactory.clreplaced));
        // Mantis Context
        this.context = mock(Context.clreplaced);
        when(this.context.getParameters()).thenReturn(parameters);
        when(this.context.getServiceLocator()).thenReturn(serviceLocator);
        // Flow
        Observable<Record> source = Observable.interval(1, TimeUnit.MILLISECONDS, this.scheduler).map(i -> record);
        this.flow = source.compose(this.transformer);
    }

    @Test
    void shouldAddWriterOnNewParreplacedion() throws IOException {
        Record recordWithNewParreplacedion = GenericRecord.create(SCHEMA);
        recordWithNewParreplacedion.setField("id", 2);
        // Idenreplacedy parreplacedioning.
        when(parreplacedioner.parreplacedion(recordWithNewParreplacedion)).thenReturn(recordWithNewParreplacedion);
        Observable<Record> source = Observable.just(record, record, recordWithNewParreplacedion, record).concatMap(r -> Observable.just(r).delay(1, TimeUnit.MILLISECONDS, scheduler));
        flow = source.compose(transformer);
        flow.subscribeOn(scheduler).subscribe(subscriber);
        // Same parreplacedion; no other thresholds (size, time) met.
        scheduler.advanceTimeBy(2, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        // New parreplacedion detected; no thresholds met yet.
        scheduler.advanceTimeBy(1, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        // Existing parreplacedion detected; no thresholds met yet.
        scheduler.advanceTimeBy(1, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        verify(writerPool, times(1)).open(record);
        verify(writerPool, times(1)).open(recordWithNewParreplacedion);
        verify(writerPool, times(3)).write(eq(record), any());
        verify(writerPool, times(1)).write(eq(recordWithNewParreplacedion), any());
        verify(writerPool, times(0)).close(any());
    }

    @Test
    void shouldCloseOnSizeThreshold() throws IOException {
        flow.subscribeOn(scheduler).subscribe(subscriber);
        scheduler.advanceTimeBy(100, TimeUnit.MILLISECONDS);
        subscriber.replacedertValueCount(1);
        verify(writerPool, times(100)).write(any(), any());
        verify(writerPool, times(1)).close(record);
    }

    @Test
    void shouldNotCloseWhenUnderSizeThreshold() throws IOException {
        doReturn(new HashSet<>()).when(writerPool).getFlushableWriters();
        flow.subscribeOn(scheduler).subscribe(subscriber);
        // Size is checked at row-group-size config, but under size-threshold, so no-op.
        scheduler.advanceTimeBy(100, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        subscriber.replacedertNoTerminalEvent();
        verify(writerPool, times(100)).write(eq(record), any());
        verify(writerPool, times(0)).close(any());
    }

    @Test
    void shouldCloseOnlyFlushableWritersOnSizeThreshold() throws IOException {
        Record recordWithNewParreplacedion = GenericRecord.create(SCHEMA);
        when(parreplacedioner.parreplacedion(recordWithNewParreplacedion)).thenReturn(recordWithNewParreplacedion);
        Observable<Record> source = Observable.just(record, recordWithNewParreplacedion).concatMap(r -> Observable.just(r).delay(1, TimeUnit.MILLISECONDS, scheduler)).repeat();
        flow = source.compose(transformer);
        flow.subscribeOn(scheduler).subscribe(subscriber);
        scheduler.advanceTimeBy(100, TimeUnit.MILLISECONDS);
        subscriber.replacedertValueCount(1);
        scheduler.advanceTimeBy(1, TimeUnit.MILLISECONDS);
        subscriber.replacedertValueCount(1);
        subscriber.replacedertNoTerminalEvent();
        verify(writerPool, times(101)).write(any(), any());
        verify(writerPool, times(1)).close(record);
        verify(writerPool, times(0)).close(recordWithNewParreplacedion);
    }

    @Test
    void shouldCloseAllWritersOnTimeThresholdWhenLowVolume() throws IOException {
        Record recordWithNewParreplacedion = GenericRecord.create(SCHEMA);
        when(parreplacedioner.parreplacedion(recordWithNewParreplacedion)).thenReturn(recordWithNewParreplacedion);
        doReturn(new HashSet<>()).when(writerPool).getFlushableWriters();
        // Low volume stream.
        Observable<Record> source = Observable.just(record, recordWithNewParreplacedion).concatMap(r -> Observable.just(r).delay(50, TimeUnit.MILLISECONDS, scheduler)).repeat();
        flow = source.compose(transformer);
        flow.subscribeOn(scheduler).subscribe(subscriber);
        // Over the size threshold, but not yet checked at row-group-size config.
        scheduler.advanceTimeBy(50, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        // Hits time threshold and there's data to write; proceed to close.
        scheduler.advanceTimeBy(450, TimeUnit.MILLISECONDS);
        subscriber.replacedertValueCount(2);
        subscriber.replacedertNoTerminalEvent();
        verify(writerPool, times(10)).write(any(), any());
        verify(writerPool, times(2)).close(any());
    }

    @Test
    void shouldCloseAllWritersOnTimeThresholdWhenHighVolume() throws IOException {
        Record recordWithNewParreplacedion = GenericRecord.create(SCHEMA);
        when(parreplacedioner.parreplacedion(recordWithNewParreplacedion)).thenReturn(recordWithNewParreplacedion);
        doReturn(new HashSet<>()).when(writerPool).getFlushableWriters();
        Observable<Record> source = Observable.just(record, recordWithNewParreplacedion).concatMap(r -> Observable.just(r).delay(1, TimeUnit.MILLISECONDS, scheduler)).repeat();
        flow = source.compose(transformer);
        flow.subscribeOn(scheduler).subscribe(subscriber);
        scheduler.advanceTimeBy(1, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        // Size is checked at row-group-size config, but under size threshold, so no-op.
        scheduler.advanceTimeBy(99, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        // Hits time threshold; proceed to close.
        scheduler.advanceTimeBy(400, TimeUnit.MILLISECONDS);
        subscriber.replacedertValueCount(2);
        subscriber.replacedertNoTerminalEvent();
        verify(writerPool, times(500)).write(any(), any());
        verify(writerPool, times(2)).close(any());
    }

    @Test
    void shouldNoOpOnTimeThresholdWhenNoData() throws IOException {
        doReturn(new HashSet<>()).when(writerPool).getFlushableWriters();
        // Low volume stream.
        Observable<Record> source = Observable.interval(900, TimeUnit.MILLISECONDS, scheduler).map(i -> record);
        flow = source.compose(transformer);
        flow.subscribeOn(scheduler).subscribe(subscriber);
        // No event yet.
        scheduler.advanceTimeBy(500, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoValues();
        // 1 event, timer threshold met, size threshold not met: flush.
        scheduler.advanceTimeBy(500, TimeUnit.MILLISECONDS);
        subscriber.replacedertValueCount(1);
        // No event yet again, writer exists but is closed from previous flush, timer threshold met: noop.
        scheduler.advanceTimeBy(500, TimeUnit.MILLISECONDS);
        // Count should not increase.
        subscriber.replacedertValueCount(1);
        subscriber.replacedertNoErrors();
        subscriber.replacedertNoTerminalEvent();
        verify(writerPool, times(1)).open(any());
        verify(writerPool, times(1)).write(any(), any());
        // 2nd close is a noop.
        verify(writerPool, times(1)).close(any());
    }

    @Test
    void shouldNoOpWhenFailedToOpen() throws IOException {
        doThrow(new IOException()).when(writerPool).open(any());
        flow.subscribeOn(scheduler).subscribe(subscriber);
        scheduler.advanceTimeBy(1, TimeUnit.MILLISECONDS);
        subscriber.replacedertError(RuntimeException.clreplaced);
        subscriber.replacedertTerminalEvent();
        verify(writerPool).open(any());
        subscriber.replacedertNoValues();
    }

    @Test
    void shouldContinueOnWriteFailure() {
        doThrow(new RuntimeException()).when(writerPool).write(any(), any());
        flow.subscribeOn(scheduler).subscribe(subscriber);
        scheduler.advanceTimeBy(1, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoTerminalEvent();
        scheduler.advanceTimeBy(1, TimeUnit.MILLISECONDS);
        subscriber.replacedertNoTerminalEvent();
        verify(writerPool, times(2)).write(any(), any());
    }

    @Test
    @Disabled("Will never terminate: Source terminates, but timer will continue to tick")
    void shouldCloseOnTerminate() throws IOException {
        Observable<Record> source = Observable.just(record);
        Observable<DataFile> flow = source.compose(transformer);
        flow.subscribeOn(scheduler).subscribe(subscriber);
        scheduler.triggerActions();
        subscriber.replacedertNoErrors();
        verify(writerPool).open(record);
        verify(writerPool).write(any(), any());
        verify(writerPool, times(2)).isClosed(record);
        verify(writerPool, times(1)).close(record);
    }

    @Test
    void shouldInitializeWithExistingTable() {
        IcebergWriterStage stage = new IcebergWriterStage();
        replacedertDoesNotThrow(() -> stage.init(context));
    }

    @Test
    void shouldFailToInitializeWithMissingTable() {
        when(catalog.loadTable(any())).thenThrow(new RuntimeException());
        IcebergWriterStage stage = new IcebergWriterStage();
        replacedertThrows(RuntimeException.clreplaced, () -> stage.init(context));
    }

    private static clreplaced FakeIcebergWriter implements IcebergWriter {

        private static final DataFile DATA_FILE = new DataFiles.Builder().withPath("/datafile.parquet").withFileSizeInBytes(1L).withRecordCount(1L).build();

        private final Object object;

        private Object fileAppender;

        private StructLike parreplacedionKey;

        public FakeIcebergWriter() {
            this.object = new Object();
            this.fileAppender = null;
        }

        @Override
        public void open() throws IOException {
            open(null);
        }

        @Override
        public void open(StructLike newParreplacedionKey) throws IOException {
            fileAppender = object;
            parreplacedionKey = newParreplacedionKey;
        }

        @Override
        public void write(Record record) {
        }

        @Override
        public DataFile close() throws IOException {
            if (fileAppender != null) {
                fileAppender = null;
                return DATA_FILE;
            }
            return null;
        }

        @Override
        public boolean isClosed() {
            return fileAppender == null;
        }

        @Override
        public long length() {
            return 0;
        }

        @Override
        public StructLike getParreplacedionKey() {
            return parreplacedionKey;
        }
    }
}

19 Source : FixedIcebergWriterPoolTest.java
with Apache License 2.0
from Netflix

clreplaced FixedIcebergWriterPoolTest {

    private static final Schema SCHEMA = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get()));

    private IcebergWriter writer;

    private IcebergWriterPool writerPool;

    private Record record;

    private StructLike parreplacedion;

    @BeforeEach
    void setUp() {
        Parameters parameters = StageOverrideParameters.newParameters();
        WriterConfig config = new WriterConfig(parameters, mock(Configuration.clreplaced));
        IcebergWriterFactory factory = mock(IcebergWriterFactory.clreplaced);
        this.writer = mock(IcebergWriter.clreplaced);
        when(this.writer.length()).thenReturn(Long.MAX_VALUE);
        when(factory.newIcebergWriter()).thenReturn(this.writer);
        this.writerPool = spy(new FixedIcebergWriterPool(factory, config.getWriterFlushFrequencyBytes(), config.getWriterMaximumPoolSize()));
        this.record = GenericRecord.create(SCHEMA);
        this.record.setField("id", 1);
        // Idenreplacedy parreplacedioning (without explicitly using a Parreplacedioner).
        this.parreplacedion = this.record.copy();
    }

    @Test
    void shouldOpenNewWriter() {
        replacedertDoesNotThrow(() -> writerPool.open(record));
    }

    @Test
    void shouldFailToOpenNewWriterWhenMaximumPoolSizeExceeded() {
        writerPool = spy(new FixedIcebergWriterPool(mock(IcebergWriterFactory.clreplaced), 0, 0));
        replacedertThrows(IOException.clreplaced, () -> writerPool.open(any()));
    }

    @Test
    void shouldOpenWhenWriterExists() {
        replacedertDoesNotThrow(() -> writerPool.open(record));
        replacedertDoesNotThrow(() -> writerPool.open(record));
    }

    @Test
    void shouldFailToWriteWhenNoWriterExists() {
        replacedertThrows(RuntimeException.clreplaced, () -> writerPool.write(parreplacedion, record));
    }

    @Test
    void shouldWriteWhenWriterExists() throws IOException {
        writerPool.open(parreplacedion);
        replacedertDoesNotThrow(() -> writerPool.write(parreplacedion, record));
    }

    @Test
    void shouldFailToCloseWhenNoWriterExists() {
        replacedertThrows(RuntimeException.clreplaced, () -> writerPool.close(record));
    }

    @Test
    void shouldCloseWhenWriterExists() throws IOException {
        writerPool.open(parreplacedion);
        replacedertDoesNotThrow(() -> writerPool.close(parreplacedion));
    }

    @Test
    void shouldGetFlushableWriters() throws IOException {
        writerPool.open(parreplacedion);
        replacedertFalse(writerPool.getFlushableWriters().isEmpty());
        when(writer.length()).thenReturn(Long.MIN_VALUE);
        replacedertTrue(writerPool.getFlushableWriters().isEmpty());
    }
}

19 Source : IcebergWritable.java
with Apache License 2.0
from ExpediaGroup

clreplaced IcebergWritable implements Writable {

    private Record record;

    private Schema schema;

    public IcebergWritable() {
    }

    public void setRecord(Record record) {
        this.record = record;
    }

    public Record getRecord() {
        return record;
    }

    public Schema getSchema() {
        return schema;
    }

    public void setSchema(Schema schema) {
        this.schema = schema;
    }

    @Override
    public void write(DataOutput dataOutput) throws IOException {
    }

    @Override
    public void readFields(DataInput dataInput) throws IOException {
    }
}

19 Source : IcebergWritable.java
with Apache License 2.0
from ExpediaGroup

public void setRecord(Record record) {
    this.record = record;
}

19 Source : HiveIcebergRecordWriter.java
with Apache License 2.0
from apache

@Override
protected ParreplacedionKey parreplacedion(Record row) {
    currentKey.parreplacedion(row);
    return currentKey;
}

19 Source : SimpleDataUtil.java
with Apache License 2.0
from apache

public clreplaced SimpleDataUtil {

    private SimpleDataUtil() {
    }

    public static final Schema SCHEMA = new Schema(Types.NestedField.optional(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get()));

    public static final TableSchema FLINK_SCHEMA = TableSchema.builder().field("id", DataTypes.INT()).field("data", DataTypes.STRING()).build();

    public static final RowType ROW_TYPE = (RowType) FLINK_SCHEMA.toRowDataType().getLogicalType();

    public static final Record RECORD = GenericRecord.create(SCHEMA);

    public static Table createTable(String path, Map<String, String> properties, boolean parreplacedioned) {
        ParreplacedionSpec spec;
        if (parreplacedioned) {
            spec = ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("data").build();
        } else {
            spec = ParreplacedionSpec.unparreplacedioned();
        }
        return new HadoopTables().create(SCHEMA, spec, properties, path);
    }

    public static Record createRecord(Integer id, String data) {
        Record record = RECORD.copy();
        record.setField("id", id);
        record.setField("data", data);
        return record;
    }

    public static RowData createRowData(Integer id, String data) {
        return GenericRowData.of(id, StringData.fromString(data));
    }

    public static RowData createInsert(Integer id, String data) {
        return GenericRowData.ofKind(RowKind.INSERT, id, StringData.fromString(data));
    }

    public static RowData createDelete(Integer id, String data) {
        return GenericRowData.ofKind(RowKind.DELETE, id, StringData.fromString(data));
    }

    public static RowData createUpdateBefore(Integer id, String data) {
        return GenericRowData.ofKind(RowKind.UPDATE_BEFORE, id, StringData.fromString(data));
    }

    public static RowData createUpdateAfter(Integer id, String data) {
        return GenericRowData.ofKind(RowKind.UPDATE_AFTER, id, StringData.fromString(data));
    }

    public static DataFile writeFile(Schema schema, ParreplacedionSpec spec, Configuration conf, String location, String filename, List<RowData> rows) throws IOException {
        Path path = new Path(location, filename);
        FileFormat fileFormat = FileFormat.fromFileName(filename);
        Preconditions.checkNotNull(fileFormat, "Cannot determine format for file: %s", filename);
        RowType flinkSchema = FlinkSchemaUtil.convert(schema);
        FileAppenderFactory<RowData> appenderFactory = new FlinkAppenderFactory(schema, flinkSchema, ImmutableMap.of(), spec);
        FileAppender<RowData> appender = appenderFactory.newAppender(fromPath(path, conf), fileFormat);
        try (FileAppender<RowData> closeableAppender = appender) {
            closeableAppender.addAll(rows);
        }
        return DataFiles.builder(spec).withInputFile(HadoopInputFile.fromPath(path, conf)).withMetrics(appender.metrics()).build();
    }

    public static DeleteFile writeEqDeleteFile(Table table, FileFormat format, String tablePath, String filename, FileAppenderFactory<RowData> appenderFactory, List<RowData> deletes) throws IOException {
        EncryptedOutputFile outputFile = table.encryption().encrypt(fromPath(new Path(tablePath, filename), new Configuration()));
        EqualityDeleteWriter<RowData> eqWriter = appenderFactory.newEqDeleteWriter(outputFile, format, null);
        try (EqualityDeleteWriter<RowData> writer = eqWriter) {
            writer.deleteAll(deletes);
        }
        return eqWriter.toDeleteFile();
    }

    public static DeleteFile writePosDeleteFile(Table table, FileFormat format, String tablePath, String filename, FileAppenderFactory<RowData> appenderFactory, List<Pair<CharSequence, Long>> positions) throws IOException {
        EncryptedOutputFile outputFile = table.encryption().encrypt(fromPath(new Path(tablePath, filename), new Configuration()));
        PositionDeleteWriter<RowData> posWriter = appenderFactory.newPosDeleteWriter(outputFile, format, null);
        try (PositionDeleteWriter<RowData> writer = posWriter) {
            for (Pair<CharSequence, Long> p : positions) {
                writer.delete(p.first(), p.second());
            }
        }
        return posWriter.toDeleteFile();
    }

    private static List<Record> convertToRecords(List<RowData> rows) {
        List<Record> records = Lists.newArrayList();
        for (RowData row : rows) {
            Integer id = row.isNullAt(0) ? null : row.getInt(0);
            String data = row.isNullAt(1) ? null : row.getString(1).toString();
            records.add(createRecord(id, data));
        }
        return records;
    }

    public static void replacedertTableRows(String tablePath, List<RowData> expected) throws IOException {
        replacedertTableRecords(tablePath, convertToRecords(expected));
    }

    public static void replacedertTableRows(Table table, List<RowData> expected) throws IOException {
        replacedertTableRecords(table, convertToRecords(expected));
    }

    public static void replacedertTableRecords(Table table, List<Record> expected) throws IOException {
        table.refresh();
        try (CloseableIterable<Record> iterable = IcebergGenerics.read(table).build()) {
            replacedert.replacedertEquals("Should produce the expected record", HashMultiset.create(expected), HashMultiset.create(iterable));
        }
    }

    public static void replacedertTableRecords(String tablePath, List<Record> expected) throws IOException {
        Preconditions.checkArgument(expected != null, "expected records shouldn't be null");
        replacedertTableRecords(new HadoopTables().load(tablePath), expected);
    }

    public static StructLikeSet expectedRowSet(Table table, Record... records) {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        Collections.addAll(set, records);
        return set;
    }

    public static StructLikeSet actualRowSet(Table table, String... columns) throws IOException {
        table.refresh();
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        try (CloseableIterable<Record> reader = IcebergGenerics.read(table).select(columns).build()) {
            reader.forEach(set::add);
        }
        return set;
    }
}

19 Source : TestGenericSortedPosDeleteWriter.java
with Apache License 2.0
from apache

private DataFile prepareDataFile(FileAppenderFactory<Record> appenderFactory, List<Record> rowSet) throws IOException {
    DataWriter<Record> writer = appenderFactory.newDataWriter(createEncryptedOutputFile(), format, null);
    try (DataWriter<Record> closeableWriter = writer) {
        for (Record record : rowSet) {
            closeableWriter.add(record);
        }
    }
    return writer.toDataFile();
}

19 Source : TestGenericReadProjection.java
with Apache License 2.0
from apache

@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
    File file = temp.newFile(desc + ".orc");
    file.delete();
    try (FileAppender<Record> appender = ORC.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
        appender.add(record);
    }
    Iterable<Record> records = ORC.read(Files.localInput(file)).project(readSchema).createReaderFunc(fileSchema -> GenericOrcReader.buildReader(readSchema, fileSchema)).build();
    return Iterables.getOnlyElement(records);
}

19 Source : TestGenericReadProjection.java
with Apache License 2.0
from apache

@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
    File file = temp.newFile(desc + ".avro");
    file.delete();
    try (FileAppender<Record> appender = Avro.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(DataWriter::create).build()) {
        appender.add(record);
    }
    Iterable<Record> records = Avro.read(Files.localInput(file)).project(readSchema).createReaderFunc(DataReader::create).build();
    return Iterables.getOnlyElement(records);
}

19 Source : TestMetrics.java
with Apache License 2.0
from apache

/**
 * Tests for Metrics.
 */
public abstract clreplaced TestMetrics {

    private static final StructType LEAF_STRUCT_TYPE = StructType.of(optional(5, "leafLongCol", LongType.get()), optional(6, "leafBinaryCol", BinaryType.get()));

    private static final StructType NESTED_STRUCT_TYPE = StructType.of(required(3, "longCol", LongType.get()), required(4, "leafStructCol", LEAF_STRUCT_TYPE), required(7, "doubleCol", DoubleType.get()));

    private static final Schema NESTED_SCHEMA = new Schema(required(1, "intCol", IntegerType.get()), required(2, "nestedStructCol", NESTED_STRUCT_TYPE));

    private static final Schema SIMPLE_SCHEMA = new Schema(optional(1, "booleanCol", BooleanType.get()), required(2, "intCol", IntegerType.get()), optional(3, "longCol", LongType.get()), required(4, "floatCol", FloatType.get()), optional(5, "doubleCol", DoubleType.get()), optional(6, "decimalCol", DecimalType.of(10, 2)), required(7, "stringCol", StringType.get()), optional(8, "dateCol", DateType.get()), required(9, "timeCol", TimeType.get()), required(10, "timestampColAboveEpoch", TimestampType.withoutZone()), required(11, "fixedCol", FixedType.ofLength(4)), required(12, "binaryCol", BinaryType.get()), required(13, "timestampColBelowEpoch", TimestampType.withoutZone()));

    private static final Schema FLOAT_DOUBLE_ONLY_SCHEMA = new Schema(optional(1, "floatCol", FloatType.get()), optional(2, "doubleCol", DoubleType.get()));

    private static final Record FLOAT_DOUBLE_RECORD_1 = createRecordWithFloatAndDouble(1.2F, 3.4D);

    private static final Record FLOAT_DOUBLE_RECORD_2 = createRecordWithFloatAndDouble(5.6F, 7.8D);

    private static final Record NAN_ONLY_RECORD = createRecordWithFloatAndDouble(Float.NaN, Double.NaN);

    private final byte[] fixed = "abcd".getBytes(StandardCharsets.UTF_8);

    private static Record createRecordWithFloatAndDouble(float floatValue, double doubleValue) {
        Record record = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
        record.setField("floatCol", floatValue);
        record.setField("doubleCol", doubleValue);
        return record;
    }

    public abstract FileFormat fileFormat();

    public abstract Metrics getMetrics(Schema schema, MetricsConfig metricsConfig, Record... records) throws IOException;

    public abstract Metrics getMetrics(Schema schema, Record... records) throws IOException;

    protected abstract Metrics getMetricsForRecordsWithSmallRowGroups(Schema schema, OutputFile outputFile, Record... records) throws IOException;

    public abstract int splitCount(InputFile inputFile) throws IOException;

    public boolean supportsSmallRowGroups() {
        return false;
    }

    protected abstract OutputFile createOutputFile() throws IOException;

    @Test
    public void testMetricsForRepeatedValues() throws IOException {
        Record record = GenericRecord.create(SIMPLE_SCHEMA);
        record.setField("booleanCol", true);
        record.setField("intCol", 3);
        record.setField("longCol", null);
        record.setField("floatCol", Float.NaN);
        record.setField("doubleCol", 2.0D);
        record.setField("decimalCol", new BigDecimal("3.50"));
        record.setField("stringCol", "AAA");
        record.setField("dateCol", DateTimeUtil.dateFromDays(1500));
        record.setField("timeCol", DateTimeUtil.timeFromMicros(2000L));
        record.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L));
        record.setField("fixedCol", fixed);
        record.setField("binaryCol", ByteBuffer.wrap("S".getBytes()));
        record.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(0L));
        Metrics metrics = getMetrics(SIMPLE_SCHEMA, record, record);
        replacedert.replacedertEquals(2L, (long) metrics.recordCount());
        replacedertCounts(1, 2L, 0L, metrics);
        replacedertCounts(2, 2L, 0L, metrics);
        replacedertCounts(3, 2L, 2L, metrics);
        replacedertCounts(4, 2L, 0L, 2L, metrics);
        replacedertCounts(5, 2L, 0L, 0L, metrics);
        replacedertCounts(6, 2L, 0L, metrics);
        replacedertCounts(7, 2L, 0L, metrics);
        replacedertCounts(8, 2L, 0L, metrics);
        replacedertCounts(9, 2L, 0L, metrics);
        replacedertCounts(10, 2L, 0L, metrics);
        replacedertCounts(11, 2L, 0L, metrics);
        replacedertCounts(12, 2L, 0L, metrics);
        replacedertCounts(13, 2L, 0L, metrics);
    }

    @Test
    public void testMetricsForTopLevelFields() throws IOException {
        Record firstRecord = GenericRecord.create(SIMPLE_SCHEMA);
        firstRecord.setField("booleanCol", true);
        firstRecord.setField("intCol", 3);
        firstRecord.setField("longCol", 5L);
        firstRecord.setField("floatCol", 2.0F);
        firstRecord.setField("doubleCol", 2.0D);
        firstRecord.setField("decimalCol", new BigDecimal("3.50"));
        firstRecord.setField("stringCol", "AAA");
        firstRecord.setField("dateCol", DateTimeUtil.dateFromDays(1500));
        firstRecord.setField("timeCol", DateTimeUtil.timeFromMicros(2000L));
        firstRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(0L));
        firstRecord.setField("fixedCol", fixed);
        firstRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes()));
        firstRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(-1_900_300L));
        Record secondRecord = GenericRecord.create(SIMPLE_SCHEMA);
        secondRecord.setField("booleanCol", false);
        secondRecord.setField("intCol", Integer.MIN_VALUE);
        secondRecord.setField("longCol", null);
        secondRecord.setField("floatCol", 1.0F);
        secondRecord.setField("doubleCol", null);
        secondRecord.setField("decimalCol", null);
        secondRecord.setField("stringCol", "ZZZ");
        secondRecord.setField("dateCol", null);
        secondRecord.setField("timeCol", DateTimeUtil.timeFromMicros(3000L));
        secondRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(900L));
        secondRecord.setField("fixedCol", fixed);
        secondRecord.setField("binaryCol", ByteBuffer.wrap("W".getBytes()));
        secondRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros(-7_000L));
        Metrics metrics = getMetrics(SIMPLE_SCHEMA, firstRecord, secondRecord);
        replacedert.replacedertEquals(2L, (long) metrics.recordCount());
        replacedertCounts(1, 2L, 0L, metrics);
        replacedertBounds(1, BooleanType.get(), false, true, metrics);
        replacedertCounts(2, 2L, 0L, metrics);
        replacedertBounds(2, IntegerType.get(), Integer.MIN_VALUE, 3, metrics);
        replacedertCounts(3, 2L, 1L, metrics);
        replacedertBounds(3, LongType.get(), 5L, 5L, metrics);
        replacedertCounts(4, 2L, 0L, 0L, metrics);
        replacedertBounds(4, FloatType.get(), 1.0F, 2.0F, metrics);
        replacedertCounts(5, 2L, 1L, 0L, metrics);
        replacedertBounds(5, DoubleType.get(), 2.0D, 2.0D, metrics);
        replacedertCounts(6, 2L, 1L, metrics);
        replacedertBounds(6, DecimalType.of(10, 2), new BigDecimal("3.50"), new BigDecimal("3.50"), metrics);
        replacedertCounts(7, 2L, 0L, metrics);
        replacedertBounds(7, StringType.get(), CharBuffer.wrap("AAA"), CharBuffer.wrap("ZZZ"), metrics);
        replacedertCounts(8, 2L, 1L, metrics);
        replacedertBounds(8, DateType.get(), 1500, 1500, metrics);
        replacedertCounts(9, 2L, 0L, metrics);
        replacedertBounds(9, TimeType.get(), 2000L, 3000L, metrics);
        replacedertCounts(10, 2L, 0L, metrics);
        replacedertBounds(10, TimestampType.withoutZone(), 0L, 900L, metrics);
        replacedertCounts(11, 2L, 0L, metrics);
        replacedertBounds(11, FixedType.ofLength(4), ByteBuffer.wrap(fixed), ByteBuffer.wrap(fixed), metrics);
        replacedertCounts(12, 2L, 0L, metrics);
        replacedertBounds(12, BinaryType.get(), ByteBuffer.wrap("S".getBytes()), ByteBuffer.wrap("W".getBytes()), metrics);
        if (fileFormat() == FileFormat.ORC) {
            // TODO: The special condition for ORC can be removed when ORC-342 is fixed
            // ORC-342: ORC writer creates inaccurate timestamp data and stats 1 sec below epoch
            // Values in the range `[1969-12-31 23:59:59.000,1969-12-31 23:59:59.999]` will have 1 sec added to them
            // So the upper bound value of -7_000 micros becomes 993_000 micros
            replacedertBounds(13, TimestampType.withoutZone(), -1_900_300L, 993_000L, metrics);
        } else {
            replacedertBounds(13, TimestampType.withoutZone(), -1_900_300L, -7_000L, metrics);
        }
    }

    @Test
    public void testMetricsForDecimals() throws IOException {
        Schema schema = new Schema(required(1, "decimalAsInt32", DecimalType.of(4, 2)), required(2, "decimalAsInt64", DecimalType.of(14, 2)), required(3, "decimalAsFixed", DecimalType.of(22, 2)));
        Record record = GenericRecord.create(schema);
        record.setField("decimalAsInt32", new BigDecimal("2.55"));
        record.setField("decimalAsInt64", new BigDecimal("4.75"));
        record.setField("decimalAsFixed", new BigDecimal("5.80"));
        Metrics metrics = getMetrics(schema, record);
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        replacedertCounts(1, 1L, 0L, metrics);
        replacedertBounds(1, DecimalType.of(4, 2), new BigDecimal("2.55"), new BigDecimal("2.55"), metrics);
        replacedertCounts(2, 1L, 0L, metrics);
        replacedertBounds(2, DecimalType.of(14, 2), new BigDecimal("4.75"), new BigDecimal("4.75"), metrics);
        replacedertCounts(3, 1L, 0L, metrics);
        replacedertBounds(3, DecimalType.of(22, 2), new BigDecimal("5.80"), new BigDecimal("5.80"), metrics);
    }

    @Test
    public void testMetricsForNestedStructFields() throws IOException {
        Metrics metrics = getMetrics(NESTED_SCHEMA, buildNestedTestRecord());
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        replacedertCounts(1, 1L, 0L, metrics);
        replacedertBounds(1, IntegerType.get(), Integer.MAX_VALUE, Integer.MAX_VALUE, metrics);
        replacedertCounts(3, 1L, 0L, metrics);
        replacedertBounds(3, LongType.get(), 100L, 100L, metrics);
        replacedertCounts(5, 1L, 0L, metrics);
        replacedertBounds(5, LongType.get(), 20L, 20L, metrics);
        replacedertCounts(6, 1L, 0L, metrics);
        replacedertBounds(6, BinaryType.get(), ByteBuffer.wrap("A".getBytes()), ByteBuffer.wrap("A".getBytes()), metrics);
        replacedertCounts(7, 1L, 0L, 1L, metrics);
        replacedertBounds(7, DoubleType.get(), Double.NaN, Double.NaN, metrics);
    }

    private Record buildNestedTestRecord() {
        Record leafStruct = GenericRecord.create(LEAF_STRUCT_TYPE);
        leafStruct.setField("leafLongCol", 20L);
        leafStruct.setField("leafBinaryCol", ByteBuffer.wrap("A".getBytes()));
        Record nestedStruct = GenericRecord.create(NESTED_STRUCT_TYPE);
        nestedStruct.setField("longCol", 100L);
        nestedStruct.setField("leafStructCol", leafStruct);
        nestedStruct.setField("doubleCol", Double.NaN);
        Record record = GenericRecord.create(NESTED_SCHEMA);
        record.setField("intCol", Integer.MAX_VALUE);
        record.setField("nestedStructCol", nestedStruct);
        return record;
    }

    @Test
    public void testMetricsForListAndMapElements() throws IOException {
        StructType structType = StructType.of(required(1, "leafIntCol", IntegerType.get()), optional(2, "leafStringCol", StringType.get()));
        Schema schema = new Schema(optional(3, "intListCol", ListType.ofRequired(4, IntegerType.get())), optional(5, "mapCol", MapType.ofRequired(6, 7, StringType.get(), structType)));
        Record record = GenericRecord.create(schema);
        record.setField("intListCol", Lists.newArrayList(10, 11, 12));
        Record struct = GenericRecord.create(structType);
        struct.setField("leafIntCol", 1);
        struct.setField("leafStringCol", "BBB");
        Map<String, Record> map = Maps.newHashMap();
        map.put("4", struct);
        record.set(1, map);
        Metrics metrics = getMetrics(schema, record);
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        if (fileFormat() != FileFormat.ORC) {
            replacedertCounts(1, 1L, 0L, metrics);
            replacedertCounts(2, 1L, 0L, metrics);
            replacedertCounts(4, 3L, 0L, metrics);
            replacedertCounts(6, 1L, 0L, metrics);
        } else {
            replacedertCounts(1, null, null, metrics);
            replacedertCounts(2, null, null, metrics);
            replacedertCounts(4, null, null, metrics);
            replacedertCounts(6, null, null, metrics);
        }
        replacedertBounds(1, IntegerType.get(), null, null, metrics);
        replacedertBounds(2, StringType.get(), null, null, metrics);
        replacedertBounds(4, IntegerType.get(), null, null, metrics);
        replacedertBounds(6, StringType.get(), null, null, metrics);
        replacedertBounds(7, structType, null, null, metrics);
    }

    @Test
    public void testMetricsForNullColumns() throws IOException {
        Schema schema = new Schema(optional(1, "intCol", IntegerType.get()));
        Record firstRecord = GenericRecord.create(schema);
        firstRecord.setField("intCol", null);
        Record secondRecord = GenericRecord.create(schema);
        secondRecord.setField("intCol", null);
        Metrics metrics = getMetrics(schema, firstRecord, secondRecord);
        replacedert.replacedertEquals(2L, (long) metrics.recordCount());
        replacedertCounts(1, 2L, 2L, metrics);
        replacedertBounds(1, IntegerType.get(), null, null, metrics);
    }

    @Test
    public void testMetricsForNaNColumns() throws IOException {
        Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, NAN_ONLY_RECORD, NAN_ONLY_RECORD);
        replacedert.replacedertEquals(2L, (long) metrics.recordCount());
        replacedertCounts(1, 2L, 0L, 2L, metrics);
        replacedertCounts(2, 2L, 0L, 2L, metrics);
        // below: current behavior; will be null once NaN is excluded from upper/lower bound
        replacedertBounds(1, FloatType.get(), Float.NaN, Float.NaN, metrics);
        replacedertBounds(2, DoubleType.get(), Double.NaN, Double.NaN, metrics);
    }

    @Test
    public void testColumnBoundsWithNaNValueAtFront() throws IOException {
        Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, NAN_ONLY_RECORD, FLOAT_DOUBLE_RECORD_1, FLOAT_DOUBLE_RECORD_2);
        replacedert.replacedertEquals(3L, (long) metrics.recordCount());
        replacedertCounts(1, 3L, 0L, 1L, metrics);
        replacedertCounts(2, 3L, 0L, 1L, metrics);
        // below: current behavior; will be non-NaN values once NaN is excluded from upper/lower bound. ORC and Parquet's
        // behaviors differ due to their implementation of comparison being different.
        if (fileFormat() == FileFormat.ORC) {
            replacedertBounds(1, FloatType.get(), Float.NaN, Float.NaN, metrics);
            replacedertBounds(2, DoubleType.get(), Double.NaN, Double.NaN, metrics);
        } else {
            replacedertBounds(1, FloatType.get(), 1.2F, Float.NaN, metrics);
            replacedertBounds(2, DoubleType.get(), 3.4D, Double.NaN, metrics);
        }
    }

    @Test
    public void testColumnBoundsWithNaNValueInMiddle() throws IOException {
        Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, FLOAT_DOUBLE_RECORD_1, NAN_ONLY_RECORD, FLOAT_DOUBLE_RECORD_2);
        replacedert.replacedertEquals(3L, (long) metrics.recordCount());
        replacedertCounts(1, 3L, 0L, 1L, metrics);
        replacedertCounts(2, 3L, 0L, 1L, metrics);
        // below: current behavior; will be non-NaN values once NaN is excluded from upper/lower bound. ORC and Parquet's
        // behaviors differ due to their implementation of comparison being different.
        if (fileFormat() == FileFormat.ORC) {
            replacedertBounds(1, FloatType.get(), 1.2F, 5.6F, metrics);
            replacedertBounds(2, DoubleType.get(), 3.4D, 7.8D, metrics);
        } else {
            replacedertBounds(1, FloatType.get(), 1.2F, Float.NaN, metrics);
            replacedertBounds(2, DoubleType.get(), 3.4D, Double.NaN, metrics);
        }
    }

    @Test
    public void testColumnBoundsWithNaNValueAtEnd() throws IOException {
        Metrics metrics = getMetrics(FLOAT_DOUBLE_ONLY_SCHEMA, FLOAT_DOUBLE_RECORD_1, FLOAT_DOUBLE_RECORD_2, NAN_ONLY_RECORD);
        replacedert.replacedertEquals(3L, (long) metrics.recordCount());
        replacedertCounts(1, 3L, 0L, 1L, metrics);
        replacedertCounts(2, 3L, 0L, 1L, metrics);
        // below: current behavior; will be non-NaN values once NaN is excluded from upper/lower bound. ORC and Parquet's
        // behaviors differ due to their implementation of comparison being different.
        if (fileFormat() == FileFormat.ORC) {
            replacedertBounds(1, FloatType.get(), 1.2F, 5.6F, metrics);
            replacedertBounds(2, DoubleType.get(), 3.4D, 7.8D, metrics);
        } else {
            replacedertBounds(1, FloatType.get(), 1.2F, Float.NaN, metrics);
            replacedertBounds(2, DoubleType.get(), 3.4D, Double.NaN, metrics);
        }
    }

    @Test
    public void testMetricsForTopLevelWithMultipleRowGroup() throws Exception {
        replacedume.replacedumeTrue("Skip test for formats that do not support small row groups", supportsSmallRowGroups());
        int recordCount = 201;
        List<Record> records = new ArrayList<>(recordCount);
        for (int i = 0; i < recordCount; i++) {
            Record newRecord = GenericRecord.create(SIMPLE_SCHEMA);
            newRecord.setField("booleanCol", i == 0 ? false : true);
            newRecord.setField("intCol", i + 1);
            newRecord.setField("longCol", i == 0 ? null : i + 1L);
            newRecord.setField("floatCol", i + 1.0F);
            newRecord.setField("doubleCol", i == 0 ? null : i + 1.0D);
            newRecord.setField("decimalCol", i == 0 ? null : new BigDecimal(i + "").add(new BigDecimal("1.00")));
            newRecord.setField("stringCol", "AAA");
            newRecord.setField("dateCol", DateTimeUtil.dateFromDays(i + 1));
            newRecord.setField("timeCol", DateTimeUtil.timeFromMicros(i + 1L));
            newRecord.setField("timestampColAboveEpoch", DateTimeUtil.timestampFromMicros(i + 1L));
            newRecord.setField("fixedCol", fixed);
            newRecord.setField("binaryCol", ByteBuffer.wrap("S".getBytes()));
            newRecord.setField("timestampColBelowEpoch", DateTimeUtil.timestampFromMicros((i + 1L) * -1L));
            records.add(newRecord);
        }
        // create file with multiple row groups. by using smaller number of bytes
        OutputFile outputFile = createOutputFile();
        Metrics metrics = getMetricsForRecordsWithSmallRowGroups(SIMPLE_SCHEMA, outputFile, records.toArray(new Record[0]));
        InputFile recordsFile = outputFile.toInputFile();
        replacedert.replacedertNotNull(recordsFile);
        // rowgroup size should be > 1
        replacedert.replacedertEquals(3, splitCount(recordsFile));
        replacedert.replacedertEquals(201L, (long) metrics.recordCount());
        replacedertCounts(1, 201L, 0L, metrics);
        replacedertBounds(1, Types.BooleanType.get(), false, true, metrics);
        replacedertBounds(2, Types.IntegerType.get(), 1, 201, metrics);
        replacedertCounts(3, 201L, 1L, metrics);
        replacedertBounds(3, Types.LongType.get(), 2L, 201L, metrics);
        replacedertCounts(4, 201L, 0L, 0L, metrics);
        replacedertBounds(4, Types.FloatType.get(), 1.0F, 201.0F, metrics);
        replacedertCounts(5, 201L, 1L, 0L, metrics);
        replacedertBounds(5, Types.DoubleType.get(), 2.0D, 201.0D, metrics);
        replacedertCounts(6, 201L, 1L, metrics);
        replacedertBounds(6, Types.DecimalType.of(10, 2), new BigDecimal("2.00"), new BigDecimal("201.00"), metrics);
    }

    @Test
    public void testMetricsForNestedStructFieldsWithMultipleRowGroup() throws IOException {
        replacedume.replacedumeTrue("Skip test for formats that do not support small row groups", supportsSmallRowGroups());
        int recordCount = 201;
        List<Record> records = Lists.newArrayListWithExpectedSize(recordCount);
        for (int i = 0; i < recordCount; i++) {
            Record newLeafStruct = GenericRecord.create(LEAF_STRUCT_TYPE);
            newLeafStruct.setField("leafLongCol", i + 1L);
            newLeafStruct.setField("leafBinaryCol", ByteBuffer.wrap("A".getBytes()));
            Record newNestedStruct = GenericRecord.create(NESTED_STRUCT_TYPE);
            newNestedStruct.setField("longCol", i + 1L);
            newNestedStruct.setField("leafStructCol", newLeafStruct);
            newNestedStruct.setField("doubleCol", Double.NaN);
            Record newRecord = GenericRecord.create(NESTED_SCHEMA);
            newRecord.setField("intCol", i + 1);
            newRecord.setField("nestedStructCol", newNestedStruct);
            records.add(newRecord);
        }
        // create file with multiple row groups. by using smaller number of bytes
        OutputFile outputFile = createOutputFile();
        Metrics metrics = getMetricsForRecordsWithSmallRowGroups(NESTED_SCHEMA, outputFile, records.toArray(new Record[0]));
        InputFile recordsFile = outputFile.toInputFile();
        replacedert.replacedertNotNull(recordsFile);
        // rowgroup size should be > 1
        replacedert.replacedertEquals(3, splitCount(recordsFile));
        replacedert.replacedertEquals(201L, (long) metrics.recordCount());
        replacedertCounts(1, 201L, 0L, metrics);
        replacedertBounds(1, IntegerType.get(), 1, 201, metrics);
        replacedertCounts(3, 201L, 0L, metrics);
        replacedertBounds(3, LongType.get(), 1L, 201L, metrics);
        replacedertCounts(5, 201L, 0L, metrics);
        replacedertBounds(5, LongType.get(), 1L, 201L, metrics);
        replacedertCounts(6, 201L, 0L, metrics);
        replacedertBounds(6, BinaryType.get(), ByteBuffer.wrap("A".getBytes()), ByteBuffer.wrap("A".getBytes()), metrics);
        replacedertCounts(7, 201L, 0L, 201L, metrics);
        replacedertBounds(7, DoubleType.get(), Double.NaN, Double.NaN, metrics);
    }

    @Test
    public void testNoneMetricsMode() throws IOException {
        Metrics metrics = getMetrics(NESTED_SCHEMA, MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default", "none")), buildNestedTestRecord());
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        replacedert.replacedertTrue(metrics.columnSizes().values().stream().allMatch(Objects::nonNull));
        replacedertCounts(1, null, null, metrics);
        replacedertBounds(1, Types.IntegerType.get(), null, null, metrics);
        replacedertCounts(3, null, null, metrics);
        replacedertBounds(3, Types.LongType.get(), null, null, metrics);
        replacedertCounts(5, null, null, metrics);
        replacedertBounds(5, Types.LongType.get(), null, null, metrics);
        replacedertCounts(6, null, null, metrics);
        replacedertBounds(6, Types.BinaryType.get(), null, null, metrics);
        replacedertCounts(7, null, null, metrics);
        replacedertBounds(7, Types.DoubleType.get(), null, null, metrics);
    }

    @Test
    public void testCountsMetricsMode() throws IOException {
        Metrics metrics = getMetrics(NESTED_SCHEMA, MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default", "counts")), buildNestedTestRecord());
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        replacedert.replacedertTrue(metrics.columnSizes().values().stream().allMatch(Objects::nonNull));
        replacedertCounts(1, 1L, 0L, metrics);
        replacedertBounds(1, Types.IntegerType.get(), null, null, metrics);
        replacedertCounts(3, 1L, 0L, metrics);
        replacedertBounds(3, Types.LongType.get(), null, null, metrics);
        replacedertCounts(5, 1L, 0L, metrics);
        replacedertBounds(5, Types.LongType.get(), null, null, metrics);
        replacedertCounts(6, 1L, 0L, metrics);
        replacedertBounds(6, Types.BinaryType.get(), null, null, metrics);
        replacedertCounts(7, 1L, 0L, 1L, metrics);
        replacedertBounds(7, Types.DoubleType.get(), null, null, metrics);
    }

    @Test
    public void testFullMetricsMode() throws IOException {
        Metrics metrics = getMetrics(NESTED_SCHEMA, MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default", "full")), buildNestedTestRecord());
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        replacedert.replacedertTrue(metrics.columnSizes().values().stream().allMatch(Objects::nonNull));
        replacedertCounts(1, 1L, 0L, metrics);
        replacedertBounds(1, Types.IntegerType.get(), Integer.MAX_VALUE, Integer.MAX_VALUE, metrics);
        replacedertCounts(3, 1L, 0L, metrics);
        replacedertBounds(3, Types.LongType.get(), 100L, 100L, metrics);
        replacedertCounts(5, 1L, 0L, metrics);
        replacedertBounds(5, Types.LongType.get(), 20L, 20L, metrics);
        replacedertCounts(6, 1L, 0L, metrics);
        replacedertBounds(6, Types.BinaryType.get(), ByteBuffer.wrap("A".getBytes()), ByteBuffer.wrap("A".getBytes()), metrics);
        replacedertCounts(7, 1L, 0L, 1L, metrics);
        replacedertBounds(7, Types.DoubleType.get(), Double.NaN, Double.NaN, metrics);
    }

    @Test
    public void testTruncateStringMetricsMode() throws IOException {
        String colName = "str_to_truncate";
        Schema singleStringColSchema = new Schema(required(1, colName, Types.StringType.get()));
        String value = "Lorem ipsum dolor sit amet";
        Record record = GenericRecord.create(singleStringColSchema);
        record.setField(colName, value);
        Metrics metrics = getMetrics(singleStringColSchema, MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default", "truncate(10)")), record);
        CharBuffer expectedMinBound = CharBuffer.wrap("Lorem ipsu");
        CharBuffer expectedMaxBound = CharBuffer.wrap("Lorem ipsv");
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        replacedert.replacedertTrue(metrics.columnSizes().values().stream().allMatch(Objects::nonNull));
        replacedertCounts(1, 1L, 0L, metrics);
        replacedertBounds(1, Types.StringType.get(), expectedMinBound, expectedMaxBound, metrics);
    }

    @Test
    public void testTruncateBinaryMetricsMode() throws IOException {
        String colName = "bin_to_truncate";
        Schema singleBinaryColSchema = new Schema(required(1, colName, Types.BinaryType.get()));
        byte[] value = new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0x7, 0x8, 0x9, 0x10, 0xA, 0xB };
        Record record = GenericRecord.create(singleBinaryColSchema);
        record.setField(colName, ByteBuffer.wrap(value));
        Metrics metrics = getMetrics(singleBinaryColSchema, MetricsConfig.fromProperties(ImmutableMap.of("write.metadata.metrics.default", "truncate(5)")), record);
        ByteBuffer expectedMinBounds = ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3, 0x4, 0x5 });
        ByteBuffer expectedMaxBounds = ByteBuffer.wrap(new byte[] { 0x1, 0x2, 0x3, 0x4, 0x6 });
        replacedert.replacedertEquals(1L, (long) metrics.recordCount());
        replacedert.replacedertTrue(metrics.columnSizes().values().stream().allMatch(Objects::nonNull));
        replacedertCounts(1, 1L, 0L, metrics);
        replacedertBounds(1, Types.BinaryType.get(), expectedMinBounds, expectedMaxBounds, metrics);
    }

    protected void replacedertCounts(int fieldId, Long valueCount, Long nullValueCount, Metrics metrics) {
        replacedertCounts(fieldId, valueCount, nullValueCount, null, metrics);
    }

    protected void replacedertCounts(int fieldId, Long valueCount, Long nullValueCount, Long nanValueCount, Metrics metrics) {
        Map<Integer, Long> valueCounts = metrics.valueCounts();
        Map<Integer, Long> nullValueCounts = metrics.nullValueCounts();
        Map<Integer, Long> nanValueCounts = metrics.nanValueCounts();
        replacedert.replacedertEquals(valueCount, valueCounts.get(fieldId));
        replacedert.replacedertEquals(nullValueCount, nullValueCounts.get(fieldId));
        replacedert.replacedertEquals(nanValueCount, nanValueCounts.get(fieldId));
    }

    protected <T> void replacedertBounds(int fieldId, Type type, T lowerBound, T upperBound, Metrics metrics) {
        Map<Integer, ByteBuffer> lowerBounds = metrics.lowerBounds();
        Map<Integer, ByteBuffer> upperBounds = metrics.upperBounds();
        replacedert.replacedertEquals(lowerBound, lowerBounds.containsKey(fieldId) ? fromByteBuffer(type, lowerBounds.get(fieldId)) : null);
        replacedert.replacedertEquals(upperBound, upperBounds.containsKey(fieldId) ? fromByteBuffer(type, upperBounds.get(fieldId)) : null);
    }
}

18 Source : DefaultIcebergWriter.java
with Apache License 2.0
from Netflix

@Override
public void write(Record record) {
    appender.add(record);
}

18 Source : TestIcebergRecordObjectInspector.java
with Apache License 2.0
from apache

@Test
public void testIcebergRecordObjectInspector() {
    Schema schema = new Schema(required(1, "integer_field", Types.IntegerType.get()), required(2, "struct_field", Types.StructType.of(Types.NestedField.required(3, "string_field", Types.StringType.get()))));
    Record record = RandomGenericData.generate(schema, 1, 0L).get(0);
    Record innerRecord = record.get(1, Record.clreplaced);
    StructObjectInspector soi = (StructObjectInspector) IcebergObjectInspector.create(schema);
    replacedert.replacedertEquals(ImmutableList.of(record.get(0), record.get(1)), soi.getStructFieldsDataAsList(record));
    StructField integerField = soi.getStructFieldRef("integer_field");
    replacedert.replacedertEquals(record.get(0), soi.getStructFieldData(record, integerField));
    StructField structField = soi.getStructFieldRef("struct_field");
    Object innerData = soi.getStructFieldData(record, structField);
    replacedert.replacedertEquals(innerRecord, innerData);
    StructObjectInspector innerSoi = (StructObjectInspector) structField.getFieldObjectInspector();
    StructField stringField = innerSoi.getStructFieldRef("string_field");
    replacedert.replacedertEquals(ImmutableList.of(innerRecord.get(0)), innerSoi.getStructFieldsDataAsList(innerRecord));
    replacedert.replacedertEquals(innerRecord.get(0), innerSoi.getStructFieldData(innerData, stringField));
}

18 Source : HiveIcebergTestUtils.java
with Apache License 2.0
from apache

/**
 * Hive values for the test record.
 * @param record The original Iceberg record
 * @return The Hive 'record' containing the same values
 */
public static List<Object> valuesForTestRecord(Record record) {
    return Arrays.asList(new BooleanWritable(Boolean.TRUE), new IntWritable(record.get(1, Integer.clreplaced)), new LongWritable(record.get(2, Long.clreplaced)), new FloatWritable(record.get(3, Float.clreplaced)), new DoubleWritable(record.get(4, Double.clreplaced)), new DateWritable((int) record.get(5, LocalDate.clreplaced).toEpochDay()), new TimestampWritable(Timestamp.from(record.get(6, OffsetDateTime.clreplaced).toInstant())), new TimestampWritable(Timestamp.valueOf(record.get(7, LocalDateTime.clreplaced))), new Text(record.get(8, String.clreplaced)), new BytesWritable(record.get(9, byte[].clreplaced)), new BytesWritable(ByteBuffers.toByteArray(record.get(10, ByteBuffer.clreplaced))), new HiveDecimalWritable(HiveDecimal.create(record.get(11, BigDecimal.clreplaced))), new Text(record.get(12, LocalTime.clreplaced).toString()), new Text(record.get(13, UUID.clreplaced).toString()));
}

18 Source : TestFlinkIcebergSinkV2.java
with Apache License 2.0
from apache

private StructLikeSet expectedRowSet(Record... records) {
    return SimpleDataUtil.expectedRowSet(table, records);
}

18 Source : TestParquetMetrics.java
with Apache License 2.0
from apache

@Override
public Metrics getMetrics(Schema schema, Record... records) throws IOException {
    return getMetrics(schema, MetricsConfig.getDefault(), records);
}

18 Source : TestGenericSortedPosDeleteWriter.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestGenericSortedPosDeleteWriter extends TableTestBase {

    private static final int FORMAT_V2 = 2;

    private final FileFormat format;

    private OutputFileFactory fileFactory;

    private Record gRecord;

    @Parameterized.Parameters(name = "FileFormat={0}")
    public static Object[] parameters() {
        return new Object[][] { new Object[] { "avro" }, new Object[] { "parquet" } };
    }

    public TestGenericSortedPosDeleteWriter(String fileFormat) {
        super(FORMAT_V2);
        this.format = FileFormat.valueOf(fileFormat.toUpperCase(Locale.ENGLISH));
    }

    @Before
    public void setupTable() throws IOException {
        this.tableDir = temp.newFolder();
        replacedert.replacedertTrue(tableDir.delete());
        this.metadataDir = new File(tableDir, "metadata");
        this.table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
        this.gRecord = GenericRecord.create(SCHEMA);
        this.fileFactory = new OutputFileFactory(table.spec(), format, table.locationProvider(), table.io(), table.encryption(), 1, 1);
        table.updateProperties().defaultFormat(format).commit();
    }

    private EncryptedOutputFile createEncryptedOutputFile() {
        return fileFactory.newOutputFile();
    }

    private DataFile prepareDataFile(FileAppenderFactory<Record> appenderFactory, List<Record> rowSet) throws IOException {
        DataWriter<Record> writer = appenderFactory.newDataWriter(createEncryptedOutputFile(), format, null);
        try (DataWriter<Record> closeableWriter = writer) {
            for (Record record : rowSet) {
                closeableWriter.add(record);
            }
        }
        return writer.toDataFile();
    }

    private Record createRow(Integer id, String data) {
        Record row = gRecord.copy();
        row.setField("id", id);
        row.setField("data", data);
        return row;
    }

    private StructLikeSet expectedRowSet(Iterable<Record> records) {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        records.forEach(set::add);
        return set;
    }

    private StructLikeSet actualRowSet(String... columns) throws IOException {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        try (CloseableIterable<Record> reader = IcebergGenerics.read(table).select(columns).build()) {
            reader.forEach(set::add);
        }
        return set;
    }

    @Test
    public void testSortedPosDelete() throws IOException {
        List<Record> rowSet = Lists.newArrayList(createRow(0, "aaa"), createRow(1, "bbb"), createRow(2, "ccc"), createRow(3, "ddd"), createRow(4, "eee"));
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, null);
        DataFile dataFile = prepareDataFile(appenderFactory, rowSet);
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 100);
        try (SortedPosDeleteWriter<Record> closeableWriter = writer) {
            for (int index = rowSet.size() - 1; index >= 0; index -= 2) {
                closeableWriter.delete(dataFile.path(), index);
            }
        }
        List<DeleteFile> deleteFiles = writer.complete();
        replacedert.replacedertEquals(1, deleteFiles.size());
        DeleteFile deleteFile = deleteFiles.get(0);
        // Check whether the path-pos pairs are sorted as expected.
        Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema();
        Record record = GenericRecord.create(pathPosSchema);
        List<Record> expectedDeletes = Lists.newArrayList(record.copy("file_path", dataFile.path(), "pos", 0L), record.copy("file_path", dataFile.path(), "pos", 2L), record.copy("file_path", dataFile.path(), "pos", 4L));
        replacedert.replacedertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path()));
        table.newRowDelta().addRows(dataFile).addDeletes(deleteFiles.get(0)).validateDataFilesExist(writer.referencedDataFiles()).validateDeletedFiles().commit();
        List<Record> expectedData = Lists.newArrayList(createRow(1, "bbb"), createRow(3, "ddd"));
        replacedert.replacedertEquals("Should have the expected records", expectedRowSet(expectedData), actualRowSet("*"));
    }

    @Test
    public void testSortedPosDeleteWithSchemaAndNullRow() throws IOException {
        List<Record> rowSet = Lists.newArrayList(createRow(0, "aaa"), createRow(1, "bbb"), createRow(2, "ccc"));
        // Create a FileAppenderFactory which requires pos-delete row schema.
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, table.schema());
        DataFile dataFile = prepareDataFile(appenderFactory, rowSet);
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 1);
        boolean caughtError = false;
        try {
            writer.delete(dataFile.path(), 0L);
        } catch (Exception e) {
            caughtError = true;
        }
        replacedert.replacedertTrue("Should fail because the appender are required non-null rows to write", caughtError);
    }

    @Test
    public void testSortedPosDeleteWithRow() throws IOException {
        List<Record> rowSet = Lists.newArrayList(createRow(0, "aaa"), createRow(1, "bbb"), createRow(2, "ccc"), createRow(3, "ddd"), createRow(4, "eee"));
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, table.schema());
        DataFile dataFile = prepareDataFile(appenderFactory, rowSet);
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 100);
        try (SortedPosDeleteWriter<Record> closeableWriter = writer) {
            for (int index = rowSet.size() - 1; index >= 0; index -= 2) {
                // Write deletes with row.
                closeableWriter.delete(dataFile.path(), index, rowSet.get(index));
            }
        }
        List<DeleteFile> deleteFiles = writer.complete();
        replacedert.replacedertEquals(1, deleteFiles.size());
        DeleteFile deleteFile = deleteFiles.get(0);
        // Check whether the path-pos pairs are sorted as expected.
        Schema pathPosSchema = DeleteSchemaUtil.posDeleteSchema(table.schema());
        Record record = GenericRecord.create(pathPosSchema);
        List<Record> expectedDeletes = Lists.newArrayList(record.copy("file_path", dataFile.path(), "pos", 0L, "row", createRow(0, "aaa")), record.copy("file_path", dataFile.path(), "pos", 2L, "row", createRow(2, "ccc")), record.copy("file_path", dataFile.path(), "pos", 4L, "row", createRow(4, "eee")));
        replacedert.replacedertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path()));
        table.newRowDelta().addRows(dataFile).addDeletes(deleteFiles.get(0)).validateDataFilesExist(writer.referencedDataFiles()).validateDeletedFiles().commit();
        List<Record> expectedData = Lists.newArrayList(createRow(1, "bbb"), createRow(3, "ddd"));
        replacedert.replacedertEquals("Should have the expected records", expectedRowSet(expectedData), actualRowSet("*"));
    }

    @Test
    public void testMultipleFlush() throws IOException {
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, null);
        // It will produce 5 record lists, each list will write into a separate data file:
        // The 1th file has: <0  , val-0>   , <1  , val-1>   , ... , <99 , val-99>
        // The 2th file has: <100, val-100> , <101, val-101> , ... , <199, val-199>
        // The 3th file has: <200, val-200> , <201, val-201> , ... , <299, val-299>
        // The 4th file has: <300, val-300> , <301, val-301> , ... , <399, val-399>
        // The 5th file has: <400, val-400> , <401, val-401> , ... , <499, val-499>
        List<DataFile> dataFiles = Lists.newArrayList();
        for (int fileIndex = 0; fileIndex < 5; fileIndex++) {
            List<Record> recordList = Lists.newLinkedList();
            for (int recordIndex = 0; recordIndex < 100; recordIndex++) {
                int id = fileIndex * 100 + recordIndex;
                recordList.add(createRow(id, String.format("val-%s", id)));
            }
            // Write the records and generate the data file.
            dataFiles.add(prepareDataFile(appenderFactory, recordList));
        }
        // Commit those data files to iceberg table.
        RowDelta rowDelta = table.newRowDelta();
        dataFiles.forEach(rowDelta::addRows);
        rowDelta.commit();
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 50);
        try (SortedPosDeleteWriter<Record> closeableWriter = writer) {
            for (int pos = 0; pos < 100; pos++) {
                for (int fileIndex = 4; fileIndex >= 0; fileIndex--) {
                    closeableWriter.delete(dataFiles.get(fileIndex).path(), pos);
                }
            }
        }
        List<DeleteFile> deleteFiles = writer.complete();
        replacedert.replacedertEquals(10, deleteFiles.size());
        Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema();
        Record record = GenericRecord.create(pathPosSchema);
        for (int deleteFileIndex = 0; deleteFileIndex < 10; deleteFileIndex++) {
            List<Record> expectedDeletes = Lists.newArrayList();
            for (int dataFileIndex = 0; dataFileIndex < 5; dataFileIndex++) {
                DataFile dataFile = dataFiles.get(dataFileIndex);
                for (long pos = deleteFileIndex * 10; pos < deleteFileIndex * 10 + 10; pos++) {
                    expectedDeletes.add(record.copy("file_path", dataFile.path(), "pos", pos));
                }
            }
            DeleteFile deleteFile = deleteFiles.get(deleteFileIndex);
            replacedert.replacedertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path()));
        }
        rowDelta = table.newRowDelta();
        deleteFiles.forEach(rowDelta::addDeletes);
        rowDelta.commit();
        replacedert.replacedertEquals("Should have no record.", expectedRowSet(ImmutableList.of()), actualRowSet("*"));
    }

    private List<Record> readRecordsAsList(Schema schema, CharSequence path) throws IOException {
        CloseableIterable<Record> iterable;
        InputFile inputFile = Files.localInput(path.toString());
        switch(format) {
            case PARQUET:
                iterable = Parquet.read(inputFile).project(schema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)).build();
                break;
            case AVRO:
                iterable = Avro.read(inputFile).project(schema).createReaderFunc(DataReader::create).build();
                break;
            default:
                throw new UnsupportedOperationException("Unsupported file format: " + format);
        }
        try (CloseableIterable<Record> closeableIterable = iterable) {
            return Lists.newArrayList(closeableIterable);
        }
    }
}

18 Source : TestGenericSortedPosDeleteWriter.java
with Apache License 2.0
from apache

private Record createRow(Integer id, String data) {
    Record row = gRecord.copy();
    row.setField("id", id);
    row.setField("data", data);
    return row;
}

18 Source : TestAppenderFactory.java
with Apache License 2.0
from apache

private ParreplacedionKey createParreplacedionKey() {
    if (table.spec().isUnparreplacedioned()) {
        return null;
    }
    Record record = GenericRecord.create(table.schema()).copy(ImmutableMap.of("data", "aaa"));
    ParreplacedionKey parreplacedionKey = new ParreplacedionKey(table.spec(), table.schema());
    parreplacedionKey.parreplacedion(record);
    return parreplacedionKey;
}

18 Source : TestGenericReadProjection.java
with Apache License 2.0
from apache

@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
    File file = temp.newFile(desc + ".parquet");
    file.delete();
    try (FileAppender<Record> appender = Parquet.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
        appender.add(record);
    }
    Iterable<Record> records = Parquet.read(Files.localInput(file)).project(readSchema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(readSchema, fileSchema)).build();
    return Iterables.getOnlyElement(records);
}

17 Source : IcebergCodecsTest.java
with Apache License 2.0
from Netflix

@Test
void shouldEncodeAndDecodeRecord() {
    Record expected = GenericRecord.create(SCHEMA);
    expected.setField("id", 1);
    byte[] encoded = recordCodec.encode(expected);
    Record actual = recordCodec.decode(encoded);
    replacedertEquals(expected, actual);
}

17 Source : FixedIcebergWriterPool.java
with Apache License 2.0
from Netflix

@Override
public void write(StructLike parreplacedion, Record record) {
    IcebergWriter writer = pool.get(parreplacedion);
    if (writer == null) {
        throw new RuntimeException("writer does not exist in writer pool");
    }
    writer.write(record);
}

17 Source : TestDeserializer.java
with Apache License 2.0
from apache

@Test
public void testDeserializeEverySupportedType() {
    replacedume.replacedumeFalse("No test yet for Hive3 (Date/Timestamp creation)", MetastoreUtil.hive3PresentOnClreplacedpath());
    Deserializer deserializer = new Deserializer.Builder().schema(HiveIcebergTestUtils.FULL_SCHEMA).writerInspector((StructObjectInspector) IcebergObjectInspector.create(HiveIcebergTestUtils.FULL_SCHEMA)).sourceInspector(HiveIcebergTestUtils.FULL_SCHEMA_OBJECT_INSPECTOR).build();
    Record expected = HiveIcebergTestUtils.getTestRecord();
    Record actual = deserializer.deserialize(HiveIcebergTestUtils.valuesForTestRecord(expected));
    HiveIcebergTestUtils.replacedertEquals(expected, actual);
}

17 Source : HiveIcebergTestUtils.java
with Apache License 2.0
from apache

/**
 * Record with every field set to null.
 * @return Empty record
 */
public static Record getNullTestRecord() {
    Record record = GenericRecord.create(HiveIcebergTestUtils.FULL_SCHEMA);
    for (int i = 0; i < HiveIcebergTestUtils.FULL_SCHEMA.columns().size(); i++) {
        record.set(i, null);
    }
    return record;
}

17 Source : SimpleDataUtil.java
with Apache License 2.0
from apache

public static Record createRecord(Integer id, String data) {
    Record record = RECORD.copy();
    record.setField("id", id);
    record.setField("data", data);
    return record;
}

17 Source : RowDataConverter.java
with Apache License 2.0
from apache

public static RowData convert(Schema iSchema, Record record) {
    return convert(iSchema.replacedtruct(), record);
}

17 Source : TestParquetMetrics.java
with Apache License 2.0
from apache

@Override
public Metrics getMetrics(Schema schema, MetricsConfig metricsConfig, Record... records) throws IOException {
    return getMetrics(schema, createOutputFile(), ImmutableMap.of(), metricsConfig, records);
}

17 Source : TestParquetMetrics.java
with Apache License 2.0
from apache

@Override
protected Metrics getMetricsForRecordsWithSmallRowGroups(Schema schema, OutputFile outputFile, Record... records) throws IOException {
    return getMetrics(schema, outputFile, SMALL_ROW_GROUP_CONFIG, MetricsConfig.getDefault(), records);
}

17 Source : TestOrcMetrics.java
with Apache License 2.0
from apache

@Override
protected Metrics getMetricsForRecordsWithSmallRowGroups(Schema schema, OutputFile outputFile, Record... records) {
    throw new UnsupportedOperationException("supportsSmallRowGroups = " + supportsSmallRowGroups());
}

17 Source : TestSingleMessageEncoding.java
with Apache License 2.0
from apache

private static Record v2Record(long id, String message, Double data) {
    Record rec = GenericRecord.create(SCHEMA_V2.replacedtruct());
    rec.setField("id", id);
    rec.setField("message", message);
    rec.setField("data", data);
    return rec;
}

17 Source : TestSingleMessageEncoding.java
with Apache License 2.0
from apache

private static Record v1Record(int id, String msg) {
    Record rec = GenericRecord.create(SCHEMA_V1.replacedtruct());
    rec.setField("id", id);
    rec.setField("msg", msg);
    return rec;
}

17 Source : TestMetrics.java
with Apache License 2.0
from apache

private static Record createRecordWithFloatAndDouble(float floatValue, double doubleValue) {
    Record record = GenericRecord.create(FLOAT_DOUBLE_ONLY_SCHEMA);
    record.setField("floatCol", floatValue);
    record.setField("doubleCol", doubleValue);
    return record;
}

16 Source : SystemTableUtil.java
with Apache License 2.0
from ExpediaGroup

protected static Record recordWithVirtualColumn(Record record, long snapshotId, Schema oldSchema, String columnName) {
    Schema newSchema = schemaWithVirtualColumn(oldSchema, columnName);
    Record newRecord = GenericRecord.create(newSchema);
    for (Types.NestedField field : oldSchema.columns()) {
        newRecord.setField(field.name(), record.getField(field.name()));
    }
    newRecord.setField(columnName, snapshotId);
    return newRecord;
}

16 Source : TestFilteredScan.java
with Apache License 2.0
from apache

private static Record record(Schema schema, Object... values) {
    Record rec = GenericRecord.create(schema);
    for (int i = 0; i < values.length; i += 1) {
        rec.set(i, values[i]);
    }
    return rec;
}

16 Source : TestDeserializer.java
with Apache License 2.0
from apache

@Test
public void testStructDeserialize() {
    Deserializer deserializer = new Deserializer.Builder().schema(CUSTOMER_SCHEMA).writerInspector((StructObjectInspector) IcebergObjectInspector.create(CUSTOMER_SCHEMA)).sourceInspector(CUSTOMER_OBJECT_INSPECTOR).build();
    Record expected = GenericRecord.create(CUSTOMER_SCHEMA);
    expected.set(0, 1L);
    expected.set(1, "Bob");
    Record actual = deserializer.deserialize(new Object[] { new LongWritable(1L), new Text("Bob") });
    replacedert.replacedertEquals(expected, actual);
}

16 Source : HiveIcebergTestUtils.java
with Apache License 2.0
from apache

/**
 * Check if 2 Iceberg records are the same or not. Compares OffsetDateTimes only by the Intant they represent.
 * @param expected The expected record
 * @param actual The actual record
 */
public static void replacedertEquals(Record expected, Record actual) {
    for (int i = 0; i < expected.size(); ++i) {
        if (expected.get(i) instanceof OffsetDateTime) {
            // For OffsetDateTime we just compare the actual instant
            replacedert.replacedertEquals(((OffsetDateTime) expected.get(i)).toInstant(), ((OffsetDateTime) actual.get(i)).toInstant());
        } else if (expected.get(i) instanceof byte[]) {
            replacedert.replacedertArrayEquals((byte[]) expected.get(i), (byte[]) actual.get(i));
        } else {
            replacedert.replacedertEquals(expected.get(i), actual.get(i));
        }
    }
}

16 Source : TestParquetMetrics.java
with Apache License 2.0
from apache

private Metrics getMetrics(Schema schema, OutputFile file, Map<String, String> properties, MetricsConfig metricsConfig, Record... records) throws IOException {
    FileAppender<Record> writer = Parquet.write(file).schema(schema).setAll(properties).createWriterFunc(GenericParquetWriter::buildWriter).metricsConfig(metricsConfig).build();
    try (FileAppender<Record> appender = writer) {
        appender.addAll(Lists.newArrayList(records));
    }
    return writer.metrics();
}

16 Source : TestOrcMetrics.java
with Apache License 2.0
from apache

private Metrics getMetrics(Schema schema, OutputFile file, Map<String, String> properties, MetricsConfig metricsConfig, Record... records) throws IOException {
    FileAppender<Record> writer = ORC.write(file).schema(schema).setAll(properties).createWriterFunc(GenericOrcWriter::buildWriter).metricsConfig(metricsConfig).build();
    try (FileAppender<Record> appender = writer) {
        appender.addAll(Lists.newArrayList(records));
    }
    return writer.metrics();
}

15 Source : TestIcebergSerDe.java
with Apache License 2.0
from ExpediaGroup

@Test
public void testDeserializePrimitives() {
    Schema schema = new Schema(required(1, "string_type", Types.StringType.get()), required(2, "int_type", Types.IntegerType.get()), required(3, "long_type", Types.LongType.get()), required(4, "boolean_type", Types.BooleanType.get()), required(5, "float_type", Types.FloatType.get()), required(6, "double_type", Types.DoubleType.get()), required(7, "date_type", Types.DateType.get()));
    List<?> expected = Arrays.asList("foo", 12, 3000L, true, 3.01F, 3.0D, "1998-11-13");
    Record record = TestHelpers.createCustomRecord(schema, expected);
    IcebergWritable writable = new IcebergWritable();
    writable.setRecord(record);
    writable.setSchema(schema);
    IcebergSerDe serDe = new IcebergSerDe();
    List<Object> result = (List<Object>) serDe.deserialize(writable);
    replacedertEquals(expected, result);
}

15 Source : TestReadProjection.java
with Apache License 2.0
from apache

@Test
public void testFullProjection() throws Exception {
    Schema schema = new Schema(Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()));
    Record record = GenericRecord.create(schema);
    record.setField("id", 34L);
    record.setField("data", "test");
    Record projected = writeAndRead("full_projection", schema, schema, record);
    replacedert.replacedertEquals("Should contain the correct id value", 34L, (long) projected.getField("id"));
    int cmp = Comparators.charSequences().compare("test", (CharSequence) projected.getField("data"));
    replacedert.replacedertEquals("Should contain the correct data value", 0, cmp);
}

15 Source : TestReadProjection.java
with Apache License 2.0
from apache

@Test
public void testRename() throws Exception {
    Schema writeSchema = new Schema(Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()));
    Record record = GenericRecord.create(writeSchema);
    record.setField("id", 34L);
    record.setField("data", "test");
    Schema readSchema = new Schema(Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "renamed", Types.StringType.get()));
    Record projected = writeAndRead("project_and_rename", writeSchema, readSchema, record);
    replacedert.replacedertEquals("Should contain the correct id value", 34L, (long) projected.getField("id"));
    int cmp = Comparators.charSequences().compare("test", (CharSequence) projected.getField("renamed"));
    replacedert.replacedertEquals("Should contain the correct data/renamed value", 0, cmp);
}

15 Source : TestHiveIcebergStorageHandlerWithEngine.java
with Apache License 2.0
from apache

private String insertQueryForComplexType(String tableName, String dummyTableName, Schema schema, Record record) {
    StringBuilder query = new StringBuilder("INSERT INTO TABLE ").append(tableName).append(" SELECT ").append(record.get(0)).append(", ");
    Type type = schema.replacedtruct().fields().get(1).type();
    query.append(buildComplexTypeInnerQuery(record.get(1), type));
    query.setLength(query.length() - 1);
    query.append(" FROM ").append(dummyTableName).append(" LIMIT 1");
    return query.toString();
}

15 Source : TestHiveIcebergSerDe.java
with Apache License 2.0
from apache

@Test
public void testDeserialize() {
    HiveIcebergSerDe serDe = new HiveIcebergSerDe();
    Record record = RandomGenericData.generate(schema, 1, 0).get(0);
    Container<Record> container = new Container<>();
    container.set(record);
    replacedert.replacedertEquals(record, serDe.deserialize(container));
}

15 Source : HiveIcebergTestUtils.java
with Apache License 2.0
from apache

/**
 * Converts a list of Object arrays to a list of Iceberg records.
 * @param schema The schema of the Iceberg record
 * @param rows The data of the records
 * @return The list of the converted records
 */
public static List<Record> valueForRow(Schema schema, List<Object[]> rows) {
    return rows.stream().map(row -> {
        Record record = GenericRecord.create(schema);
        for (int i = 0; i < row.length; ++i) {
            record.set(i, row[i]);
        }
        return record;
    }).collect(Collectors.toList());
}

15 Source : IcebergRecordObjectInspector.java
with Apache License 2.0
from apache

@Override
public List<Object> getStructFieldsDataAsList(Object o) {
    Record record = (Record) o;
    return structFields.stream().map(f -> record.get(f.position())).collect(Collectors.toList());
}

15 Source : TestMergingMetrics.java
with Apache License 2.0
from apache

@Test
public void verifySingleRecordMetric() throws Exception {
    Record record = GenericRecord.create(SCHEMA);
    record.setField("id", 3);
    // FLOAT_FIELD - 1
    record.setField("float", Float.NaN);
    // DOUBLE_FIELD - 1
    record.setField("double", Double.NaN);
    // FLOAT_LIST - 2
    record.setField("floatlist", ImmutableList.of(3.3F, 2.8F, Float.NaN, -25.1F, Float.NaN));
    // MAP_FIELD_1 - 1
    record.setField("map1", ImmutableMap.of(Float.NaN, "a", 0F, "b"));
    record.setField("map2", ImmutableMap.of(0, 0D, 1, Double.NaN, 2, 2D, 3, Double.NaN, 4, // MAP_FIELD_2 - 3
    Double.NaN));
    FileAppender<T> appender = writeAndGetAppender(ImmutableList.of(record));
    Map<Integer, Long> nanValueCount = appender.metrics().nanValueCounts();
    replacedertNaNCountMatch(1L, nanValueCount, FLOAT_FIELD);
    replacedertNaNCountMatch(1L, nanValueCount, DOUBLE_FIELD);
    replacedertNaNCountMatch(2L, nanValueCount, FLOAT_LIST);
    replacedertNaNCountMatch(1L, nanValueCount, MAP_FIELD_1);
    replacedertNaNCountMatch(3L, nanValueCount, MAP_FIELD_2);
}

15 Source : TestBaseTaskWriter.java
with Apache License 2.0
from apache

@Test
public void testAbort() throws IOException {
    List<Record> records = Lists.newArrayList();
    for (int i = 0; i < 2000; i++) {
        records.add(createRecord(i, "aaa"));
    }
    List<Path> files;
    try (TestTaskWriter taskWriter = createTaskWriter(4)) {
        for (Record record : records) {
            taskWriter.write(record);
            taskWriter.delete(record);
        }
        // Close the current opened files.
        taskWriter.close();
        // replacedert the current data file count.
        files = Files.list(Paths.get(tableDir.getPath(), "data")).filter(p -> !p.toString().endsWith(".crc")).collect(Collectors.toList());
        replacedert.replacedertEquals("Should have 4 files but the files are: " + files, 4, files.size());
        // Abort to clean all delete files and data files.
        taskWriter.abort();
    }
    for (Path path : files) {
        replacedert.replacedertFalse(Files.exists(path));
    }
}

15 Source : TestMetrics.java
with Apache License 2.0
from apache

private Record buildNestedTestRecord() {
    Record leafStruct = GenericRecord.create(LEAF_STRUCT_TYPE);
    leafStruct.setField("leafLongCol", 20L);
    leafStruct.setField("leafBinaryCol", ByteBuffer.wrap("A".getBytes()));
    Record nestedStruct = GenericRecord.create(NESTED_STRUCT_TYPE);
    nestedStruct.setField("longCol", 100L);
    nestedStruct.setField("leafStructCol", leafStruct);
    nestedStruct.setField("doubleCol", Double.NaN);
    Record record = GenericRecord.create(NESTED_SCHEMA);
    record.setField("intCol", Integer.MAX_VALUE);
    record.setField("nestedStructCol", nestedStruct);
    return record;
}

14 Source : TestIcebergSerDe.java
with Apache License 2.0
from ExpediaGroup

@Test
public void testDeserializeList() {
    Schema schema = new Schema(required(1, "list_type", Types.ListType.ofRequired(17, Types.LongType.get())));
    List<Long> expected = Arrays.asList(1000L, 2000L, 3000L);
    List<List> data = new ArrayList<>();
    data.add(expected);
    Record record = TestHelpers.createCustomRecord(schema, data);
    IcebergWritable writable = new IcebergWritable();
    writable.setRecord(record);
    writable.setSchema(schema);
    IcebergSerDe serDe = new IcebergSerDe();
    List<Object> deserialized = (List<Object>) serDe.deserialize(writable);
    List result = (List) deserialized.get(0);
    replacedertEquals(expected, result);
}

14 Source : SnapshotIterable.java
with Apache License 2.0
from ExpediaGroup

/**
 * Populating a Record with snapshot metadata
 */
private Record createSnapshotRecord(Snapshot snapshot) {
    Record snapRecord = GenericRecord.create(table.schema());
    snapRecord.setField("committed_at", snapshot.timestampMillis());
    snapRecord.setField("snapshot_id", snapshot.snapshotId());
    snapRecord.setField("parent_id", snapshot.parentId());
    snapRecord.setField("operation", snapshot.operation());
    snapRecord.setField("manifest_list", snapshot.manifestListLocation());
    snapRecord.setField("summary", snapshot.summary());
    return snapRecord;
}

14 Source : TestReadProjection.java
with Apache License 2.0
from apache

@Test
public void testBasicProjection() throws Exception {
    Schema writeSchema = new Schema(Types.NestedField.required(0, "id", Types.LongType.get()), Types.NestedField.optional(1, "data", Types.StringType.get()));
    Record record = GenericRecord.create(writeSchema);
    record.setField("id", 34L);
    record.setField("data", "test");
    Schema idOnly = new Schema(Types.NestedField.required(0, "id", Types.LongType.get()));
    Record projected = writeAndRead("basic_projection_id", writeSchema, idOnly, record);
    replacedert.replacedertNull("Should not project data", projected.getField("data"));
    replacedert.replacedertEquals("Should contain the correct id value", 34L, (long) projected.getField("id"));
    Schema dataOnly = new Schema(Types.NestedField.optional(1, "data", Types.StringType.get()));
    projected = writeAndRead("basic_projection_data", writeSchema, dataOnly, record);
    replacedert.replacedertNull("Should not project id", projected.getField("id"));
    int cmp = Comparators.charSequences().compare("test", (CharSequence) projected.getField("data"));
    replacedert.replacedertEquals("Should contain the correct data value", 0, cmp);
}

See More Examples