org.apache.iceberg.Files.localOutput()

Here are the examples of the java api org.apache.iceberg.Files.localOutput() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

73 Examples 7

18 Source : SparkParquetReadersNestedDataBenchmark.java
with Apache License 2.0
from apache

@Setup
public void setupBenchmark() throws IOException {
    dataFile = File.createTempFile("parquet-nested-data-benchmark", ".parquet");
    dataFile.delete();
    List<GenericData.Record> records = RandomData.generateList(SCHEMA, NUM_RECORDS, 0L);
    try (FileAppender<GenericData.Record> writer = Parquet.write(Files.localOutput(dataFile)).schema(SCHEMA).named("benchmark").build()) {
        writer.addAll(records);
    }
}

18 Source : SparkParquetReadersFlatDataBenchmark.java
with Apache License 2.0
from apache

@Setup
public void setupBenchmark() throws IOException {
    dataFile = File.createTempFile("parquet-flat-data-benchmark", ".parquet");
    dataFile.delete();
    List<GenericData.Record> records = RandomData.generateList(SCHEMA, NUM_RECORDS, 0L);
    try (FileAppender<GenericData.Record> writer = Parquet.write(Files.localOutput(dataFile)).schema(SCHEMA).named("benchmark").build()) {
        writer.addAll(records);
    }
}

18 Source : TestOrcRowIterator.java
with Apache License 2.0
from apache

@Before
public void writeFile() throws IOException {
    testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(GenericOrcWriter::buildWriter).schema(DATA_SCHEMA).config("iceberg.orc.vectorbatch.size", "1000").config(OrcConf.ROW_INDEX_STRIDE.getAttribute(), "1000").config(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "4000").config(OrcConf.STRIPE_SIZE.getAttribute(), "1").build()) {
        writer.addAll(DATA_ROWS);
    }
}

17 Source : TestParquetMetrics.java
with Apache License 2.0
from apache

@Override
protected OutputFile createOutputFile() throws IOException {
    File tmpFolder = temp.newFolder("parquet");
    String filename = UUID.randomUUID().toString();
    return Files.localOutput(new File(tmpFolder, FileFormat.PARQUET.addExtension(filename)));
}

17 Source : TestOrcMetrics.java
with Apache License 2.0
from apache

@Override
protected OutputFile createOutputFile() throws IOException {
    File tmpFolder = temp.newFolder("orc");
    String filename = UUID.randomUUID().toString();
    return Files.localOutput(new File(tmpFolder, FileFormat.ORC.addExtension(filename)));
}

16 Source : SparkParquetWritersNestedDataBenchmark.java
with Apache License 2.0
from apache

@Benchmark
@Threads(1)
public void writeUsingIcebergWriter() throws IOException {
    try (FileAppender<InternalRow> writer = Parquet.write(Files.localOutput(dataFile)).createWriterFunc(msgType -> SparkParquetWriters.buildWriter(SparkSchemaUtil.convert(SCHEMA), msgType)).schema(SCHEMA).build()) {
        writer.addAll(rows);
    }
}

16 Source : TestParquetAvroReader.java
with Apache License 2.0
from apache

private File writeTestData(Schema schema, int numRecords, int seed) throws IOException {
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(schema).build()) {
        writer.addAll(RandomData.generate(schema, numRecords, seed));
    }
    return testFile;
}

16 Source : TestParquetVectorizedReads.java
with Apache License 2.0
from apache

FileAppender<GenericData.Record> getParquetWriter(Schema schema, File testFile) throws IOException {
    return Parquet.write(Files.localOutput(testFile)).schema(schema).named("test").build();
}

15 Source : SparkParquetWritersNestedDataBenchmark.java
with Apache License 2.0
from apache

@Benchmark
@Threads(1)
public void writeUsingSparkWriter() throws IOException {
    StructType sparkSchema = SparkSchemaUtil.convert(SCHEMA);
    try (FileAppender<InternalRow> writer = Parquet.write(Files.localOutput(dataFile)).writeSupport(new ParquetWriteSupport()).set("org.apache.spark.sql.parquet.row.attributes", sparkSchema.json()).set("spark.sql.parquet.writeLegacyFormat", "false").set("spark.sql.parquet.binaryreplacedtring", "false").set("spark.sql.parquet.int96AsTimestamp", "false").set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MICROS").schema(SCHEMA).build()) {
        writer.addAll(rows);
    }
}

15 Source : TestParquetDictionaryFallbackToPlainEncodingVectorizedReads.java
with Apache License 2.0
from apache

@Override
FileAppender<GenericData.Record> getParquetWriter(Schema schema, File testFile) throws IOException {
    return Parquet.write(Files.localOutput(testFile)).schema(schema).named("test").set(TableProperties.PARQUET_DICT_SIZE_BYTES, "512000").build();
}

15 Source : TestAvroFileSplit.java
with Apache License 2.0
from apache

@Before
public void writeDataFile() throws IOException {
    this.expected = Lists.newArrayList();
    OutputFile out = Files.localOutput(temp.newFile());
    try (FileAppender<Object> writer = Avro.write(out).set(TableProperties.AVRO_COMPRESSION, "uncompressed").createWriterFunc(DataWriter::create).schema(SCHEMA).overwrite().build()) {
        Record record = GenericRecord.create(SCHEMA);
        for (long i = 0; i < NUM_RECORDS; i += 1) {
            Record next = record.copy(ImmutableMap.of("id", i, "data", UUID.randomUUID().toString()));
            expected.add(next);
            writer.add(next);
        }
    }
    this.file = out.toInputFile();
}

14 Source : TestSparkOrcReadMetadataColumns.java
with Apache License 2.0
from apache

@Before
public void writeFile() throws IOException {
    testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<InternalRow> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(SparkOrcWriter::new).schema(DATA_SCHEMA).config("iceberg.orc.vectorbatch.size", "100").config(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "100").config(OrcConf.STRIPE_SIZE.getAttribute(), "1").build()) {
        writer.addAll(DATA_ROWS);
    }
}

14 Source : TestGenericAvro.java
with Apache License 2.0
from apache

@Override
protected void writeAndValidate(Schema schema) throws IOException {
    List<Record> expected = RandomAvroData.generate(schema, 100, 0L);
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) {
        for (Record rec : expected) {
            writer.add(rec);
        }
    }
    List<Record> rows;
    try (AvroIterable<Record> reader = Avro.read(Files.localInput(testFile)).project(schema).build()) {
        rows = Lists.newArrayList(reader);
    }
    for (int i = 0; i < expected.size(); i += 1) {
        AvroTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
    }
}

14 Source : TestAvroReadProjection.java
with Apache License 2.0
from apache

@Override
protected GenericData.Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, GenericData.Record record) throws IOException {
    File file = temp.newFile(desc + ".avro");
    file.delete();
    try (FileAppender<GenericData.Record> appender = Avro.write(Files.localOutput(file)).schema(writeSchema).build()) {
        appender.add(record);
    }
    Iterable<GenericData.Record> records = Avro.read(Files.localInput(file)).project(readSchema).build();
    return Iterables.getOnlyElement(records);
}

13 Source : GenericAppenderHelper.java
with Apache License 2.0
from apache

private static DataFile appendToLocalFile(Table table, File file, FileFormat format, StructLike parreplacedion, List<Record> records) throws IOException {
    FileAppender<Record> appender = new GenericAppenderFactory(table.schema()).newAppender(Files.localOutput(file), format);
    try (FileAppender<Record> fileAppender = appender) {
        fileAppender.addAll(records);
    }
    return DataFiles.builder(table.spec()).withRecordCount(records.size()).withFileSizeInBytes(file.length()).withPath(Files.localInput(file).location()).withMetrics(appender.metrics()).withFormat(format).withParreplacedion(parreplacedion).build();
}

12 Source : TestSparkAvroReader.java
with Apache License 2.0
from apache

@Override
protected void writeAndValidate(Schema schema) throws IOException {
    List<Record> expected = RandomData.generateList(schema, 100, 0L);
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) {
        for (Record rec : expected) {
            writer.add(rec);
        }
    }
    List<InternalRow> rows;
    try (AvroIterable<InternalRow> reader = Avro.read(Files.localInput(testFile)).createReaderFunc(SparkAvroReader::new).project(schema).build()) {
        rows = Lists.newArrayList(reader);
    }
    for (int i = 0; i < expected.size(); i += 1) {
        replacedertEqualsUnsafe(schema.replacedtruct(), expected.get(i), rows.get(i));
    }
}

12 Source : TestNessieTable.java
with Apache License 2.0
from apache

private static String addRecordsToFile(Table table, String filename) throws IOException {
    GenericRecordBuilder recordBuilder = new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test"));
    List<GenericData.Record> records = new ArrayList<>();
    records.add(recordBuilder.set("id", 1L).build());
    records.add(recordBuilder.set("id", 2L).build());
    records.add(recordBuilder.set("id", 3L).build());
    String fileLocation = table.location().replace("file:", "") + String.format("/data/%s.avro", filename);
    try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(fileLocation)).schema(schema).named("test").build()) {
        for (GenericData.Record rec : records) {
            writer.add(rec);
        }
    }
    return fileLocation;
}

12 Source : TestGenericData.java
with Apache License 2.0
from apache

@Override
protected void writeAndValidate(Schema schema) throws IOException {
    List<Record> expected = RandomGenericData.generate(schema, 100, 0L);
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> appender = Parquet.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
        appender.addAll(expected);
    }
    List<Record> rows;
    try (CloseableIterable<Record> reader = Parquet.read(Files.localInput(testFile)).project(schema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)).build()) {
        rows = Lists.newArrayList(reader);
    }
    for (int i = 0; i < expected.size(); i += 1) {
        DataTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
    }
}

12 Source : DeleteReadTests.java
with Apache License 2.0
from apache

@Test
public void testMultipleEqualityDeleteSchemas() throws IOException {
    Schema dataSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(dataSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 29
    dataDelete.copy("data", "a"), // id = 89
    dataDelete.copy("data", "d"), // id = 122
    dataDelete.copy("data", "g"));
    DeleteFile dataEqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, dataSchema);
    Schema idSchema = table.schema().select("id");
    Record idDelete = GenericRecord.create(idSchema);
    List<Record> idDeletes = Lists.newArrayList(// id = 121
    idDelete.copy("id", 121), // id = 29
    idDelete.copy("id", 29));
    DeleteFile idEqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), idDeletes, idSchema);
    table.newRowDelta().addDeletes(dataEqDeletes).addDeletes(idEqDeletes).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 121, 122);
    StructLikeSet actual = rowSet(tableName, table, "*");
    replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}

12 Source : DeleteReadTests.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletesWithRequiredEqColumn() throws IOException {
    Schema deleteRowSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(deleteRowSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 29
    dataDelete.copy("data", "a"), // id = 89
    dataDelete.copy("data", "d"), // id = 122
    dataDelete.copy("data", "g"));
    DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, deleteRowSchema);
    table.newRowDelta().addDeletes(eqDeletes).commit();
    StructLikeSet expected = selectColumns(rowSetWithoutIds(29, 89, 122), "id");
    StructLikeSet actual = rowSet(tableName, table, "id");
    if (expectPruned()) {
        replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
    } else {
        // data is added by the reader to apply the eq deletes, use StructProjection to remove it from comparison
        replacedert.replacedertEquals("Table should contain expected rows", expected, selectColumns(actual, "id"));
    }
}

12 Source : DeleteReadTests.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletes() throws IOException {
    Schema deleteRowSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(deleteRowSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 29
    dataDelete.copy("data", "a"), // id = 89
    dataDelete.copy("data", "d"), // id = 122
    dataDelete.copy("data", "g"));
    DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, deleteRowSchema);
    table.newRowDelta().addDeletes(eqDeletes).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 122);
    StructLikeSet actual = rowSet(tableName, table, "*");
    replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}

12 Source : TestGenericData.java
with Apache License 2.0
from apache

@Override
protected void writeAndValidate(Schema schema) throws IOException {
    List<Record> expected = RandomGenericData.generate(schema, 100, 0L);
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(DataWriter::create).named("test").build()) {
        for (Record rec : expected) {
            writer.add(rec);
        }
    }
    List<Record> rows;
    try (AvroIterable<Record> reader = Avro.read(Files.localInput(testFile)).project(schema).createReaderFunc(DataReader::create).build()) {
        rows = Lists.newArrayList(reader);
    }
    for (int i = 0; i < expected.size(); i += 1) {
        DataTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
    }
}

11 Source : TestSparkOrcReader.java
with Apache License 2.0
from apache

private void writeAndValidateRecords(Schema schema, Iterable<InternalRow> expected) throws IOException {
    final File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<InternalRow> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(SparkOrcWriter::new).schema(schema).build()) {
        writer.addAll(expected);
    }
    try (CloseableIterable<InternalRow> reader = ORC.read(Files.localInput(testFile)).project(schema).createReaderFunc(readOrcSchema -> new SparkOrcReader(schema, readOrcSchema)).build()) {
        final Iterator<InternalRow> actualRows = reader.iterator();
        final Iterator<InternalRow> expectedRows = expected.iterator();
        while (expectedRows.hasNext()) {
            replacedert.replacedertTrue("Should have expected number of rows", actualRows.hasNext());
            replacedertEquals(schema, expectedRows.next(), actualRows.next());
        }
        replacedert.replacedertFalse("Should not have extra rows", actualRows.hasNext());
    }
    try (CloseableIterable<ColumnarBatch> reader = ORC.read(Files.localInput(testFile)).project(schema).createBatchedReaderFunc(readOrcSchema -> VectorizedSparkOrcReaders.buildReader(schema, readOrcSchema, ImmutableMap.of())).build()) {
        final Iterator<InternalRow> actualRows = batchesToRows(reader.iterator());
        final Iterator<InternalRow> expectedRows = expected.iterator();
        while (expectedRows.hasNext()) {
            replacedert.replacedertTrue("Should have expected number of rows", actualRows.hasNext());
            replacedertEquals(schema, expectedRows.next(), actualRows.next());
        }
        replacedert.replacedertFalse("Should not have extra rows", actualRows.hasNext());
    }
}

11 Source : Parquet.java
with Apache License 2.0
from apache

/**
 * Combines several files into one
 *
 * @param inputFiles   an {@link Iterable} of parquet files. The order of iteration determines the order in which
 *                     content of files are read and written to the {@code outputFile}
 * @param outputFile   the output parquet file containing all the data from {@code inputFiles}
 * @param rowGroupSize the row group size to use when writing the {@code outputFile}
 * @param schema       the schema of the data
 * @param metadata     extraMetadata to write at the footer of the {@code outputFile}
 */
public static void concat(Iterable<File> inputFiles, File outputFile, int rowGroupSize, Schema schema, Map<String, String> metadata) throws IOException {
    OutputFile file = Files.localOutput(outputFile);
    ParquetFileWriter writer = new ParquetFileWriter(ParquetIO.file(file), ParquetSchemaUtil.convert(schema, "table"), ParquetFileWriter.Mode.CREATE, rowGroupSize, 0);
    writer.start();
    for (File inputFile : inputFiles) {
        writer.appendFile(ParquetIO.file(Files.localInput(inputFile)));
    }
    writer.end(metadata);
}

10 Source : TestSparkParquetWriter.java
with Apache License 2.0
from apache

@Test
public void testCorrectness() throws IOException {
    int numRows = 50_000;
    Iterable<InternalRow> records = RandomData.generateSpark(COMPLEX_SCHEMA, numRows, 19981);
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<InternalRow> writer = Parquet.write(Files.localOutput(testFile)).schema(COMPLEX_SCHEMA).createWriterFunc(msgType -> SparkParquetWriters.buildWriter(SparkSchemaUtil.convert(COMPLEX_SCHEMA), msgType)).build()) {
        writer.addAll(records);
    }
    try (CloseableIterable<InternalRow> reader = Parquet.read(Files.localInput(testFile)).project(COMPLEX_SCHEMA).createReaderFunc(type -> SparkParquetReaders.buildReader(COMPLEX_SCHEMA, type)).build()) {
        Iterator<InternalRow> expected = records.iterator();
        Iterator<InternalRow> rows = reader.iterator();
        for (int i = 0; i < numRows; i += 1) {
            replacedert.replacedertTrue("Should have expected number of rows", rows.hasNext());
            TestHelpers.replacedertEquals(COMPLEX_SCHEMA, expected.next(), rows.next());
        }
        replacedert.replacedertFalse("Should not have extra rows", rows.hasNext());
    }
}

10 Source : TestParquetAvroWriter.java
with Apache License 2.0
from apache

@Test
public void testCorrectness() throws IOException {
    Iterable<Record> records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139);
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(COMPLEX_SCHEMA).createWriterFunc(ParquetAvroWriter::buildWriter).build()) {
        writer.addAll(records);
    }
    // RandomData uses the root record name "test", which must match for records to be equal
    MessageType readSchema = ParquetSchemaUtil.convert(COMPLEX_SCHEMA, "test");
    // verify that the new read path is correct
    try (CloseableIterable<Record> reader = Parquet.read(Files.localInput(testFile)).project(COMPLEX_SCHEMA).createReaderFunc(fileSchema -> ParquetAvroValueReaders.buildReader(COMPLEX_SCHEMA, readSchema)).build()) {
        int recordNum = 0;
        Iterator<Record> iter = records.iterator();
        for (Record actual : reader) {
            Record expected = iter.next();
            replacedert.replacedertEquals("Record " + recordNum + " should match expected", expected, actual);
            recordNum += 1;
        }
    }
}

10 Source : TestParquetAvroReader.java
with Apache License 2.0
from apache

@Test
public void testCorrectness() throws IOException {
    Iterable<Record> records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139);
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(COMPLEX_SCHEMA).build()) {
        writer.addAll(records);
    }
    // RandomData uses the root record name "test", which must match for records to be equal
    MessageType readSchema = ParquetSchemaUtil.convert(COMPLEX_SCHEMA, "test");
    // verify that the new read path is correct
    try (CloseableIterable<Record> reader = Parquet.read(Files.localInput(testFile)).project(COMPLEX_SCHEMA).createReaderFunc(fileSchema -> ParquetAvroValueReaders.buildReader(COMPLEX_SCHEMA, readSchema)).reuseContainers().build()) {
        int recordNum = 0;
        Iterator<Record> iter = records.iterator();
        for (Record actual : reader) {
            Record expected = iter.next();
            replacedert.replacedertEquals("Record " + recordNum + " should match expected", expected, actual);
            recordNum += 1;
        }
    }
}

10 Source : TestParquetReadProjection.java
with Apache License 2.0
from apache

@Override
protected GenericData.Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, GenericData.Record record) throws IOException {
    File file = temp.newFile(desc + ".parquet");
    file.delete();
    try (FileAppender<GenericData.Record> appender = Parquet.write(Files.localOutput(file)).schema(writeSchema).build()) {
        appender.add(record);
    }
    Iterable<GenericData.Record> records = Parquet.read(Files.localInput(file)).project(readSchema).callInit().build();
    return Iterables.getOnlyElement(records);
}

10 Source : DeleteReadTests.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletesSpanningMultipleDataFiles() throws IOException {
    // Add another DataFile with common values
    GenericRecord record = GenericRecord.create(table.schema());
    records.add(record.copy("id", 144, "data", "a"));
    this.dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), Row.of(0), records);
    table.newAppend().appendFile(dataFile).commit();
    Schema deleteRowSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(deleteRowSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 29, 144
    dataDelete.copy("data", "a"), // id = 89
    dataDelete.copy("data", "d"), // id = 122
    dataDelete.copy("data", "g"));
    DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, deleteRowSchema);
    table.newRowDelta().addDeletes(eqDeletes).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 122, 144);
    StructLikeSet actual = rowSet(tableName, table, "*");
    replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}

9 Source : TestSparkRecordOrcReaderWriter.java
with Apache License 2.0
from apache

private void writeAndValidate(Schema schema, List<Record> expectedRecords) throws IOException {
    final File originalFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", originalFile.delete());
    // Write few generic records into the original test file.
    try (FileAppender<Record> writer = ORC.write(Files.localOutput(originalFile)).createWriterFunc(GenericOrcWriter::buildWriter).schema(schema).build()) {
        writer.addAll(expectedRecords);
    }
    // Read into spark InternalRow from the original test file.
    List<InternalRow> internalRows = Lists.newArrayList();
    try (CloseableIterable<InternalRow> reader = ORC.read(Files.localInput(originalFile)).project(schema).createReaderFunc(readOrcSchema -> new SparkOrcReader(schema, readOrcSchema)).build()) {
        reader.forEach(internalRows::add);
        replacedertEqualsUnsafe(schema.replacedtruct(), expectedRecords, reader, expectedRecords.size());
    }
    final File anotherFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", anotherFile.delete());
    // Write those spark InternalRows into a new file again.
    try (FileAppender<InternalRow> writer = ORC.write(Files.localOutput(anotherFile)).createWriterFunc(SparkOrcWriter::new).schema(schema).build()) {
        writer.addAll(internalRows);
    }
    // Check whether the InternalRows are expected records.
    try (CloseableIterable<InternalRow> reader = ORC.read(Files.localInput(anotherFile)).project(schema).createReaderFunc(readOrcSchema -> new SparkOrcReader(schema, readOrcSchema)).build()) {
        replacedertEqualsUnsafe(schema.replacedtruct(), expectedRecords, reader, expectedRecords.size());
    }
    // Read into iceberg GenericRecord and check again.
    try (CloseableIterable<Record> reader = ORC.read(Files.localInput(anotherFile)).createReaderFunc(typeDesc -> GenericOrcReader.buildReader(schema, typeDesc)).project(schema).build()) {
        replacedertRecordEquals(expectedRecords, reader, expectedRecords.size());
    }
}

9 Source : TestOrcWrite.java
with Apache License 2.0
from apache

@Test
public void splitOffsets() throws IOException {
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    Iterable<InternalRow> rows = RandomData.generateSpark(SCHEMA, 1, 0L);
    FileAppender<InternalRow> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(SparkOrcWriter::new).schema(SCHEMA).build();
    writer.addAll(rows);
    writer.close();
    replacedert.replacedertNotNull("Split offsets not present", writer.splitOffsets());
}

9 Source : TestFlinkParquetReader.java
with Apache License 2.0
from apache

private void writeAndValidate(Iterable<Record> iterable, Schema schema) throws IOException {
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
        writer.addAll(iterable);
    }
    try (CloseableIterable<RowData> reader = Parquet.read(Files.localInput(testFile)).project(schema).createReaderFunc(type -> FlinkParquetReaders.buildReader(schema, type)).build()) {
        Iterator<Record> expected = iterable.iterator();
        Iterator<RowData> rows = reader.iterator();
        LogicalType rowType = FlinkSchemaUtil.convert(schema);
        for (int i = 0; i < NUM_RECORDS; i += 1) {
            replacedert.replacedertTrue("Should have expected number of rows", rows.hasNext());
            TestHelpers.replacedertRowData(schema.replacedtruct(), rowType, expected.next(), rows.next());
        }
        replacedert.replacedertFalse("Should not have extra rows", rows.hasNext());
    }
}

9 Source : TestMetricsRowGroupFilterTypes.java
with Apache License 2.0
from apache

public void createParquetInputFile(List<Record> records) throws IOException {
    if (PARQUET_FILE.exists()) {
        replacedert.replacedertTrue(PARQUET_FILE.delete());
    }
    OutputFile outFile = Files.localOutput(PARQUET_FILE);
    try (FileAppender<Record> appender = Parquet.write(outFile).schema(FILE_SCHEMA).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
        appender.addAll(records);
    }
    InputFile inFile = Files.localInput(PARQUET_FILE);
    try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
        replacedert.replacedertEquals("Should create only one row group", 1, reader.getRowGroups().size());
        rowGroupMetadata = reader.getRowGroups().get(0);
        parquetSchema = reader.getFileMetaData().getSchema();
    }
    PARQUET_FILE.deleteOnExit();
}

9 Source : TestGenericReadProjection.java
with Apache License 2.0
from apache

@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
    File file = temp.newFile(desc + ".parquet");
    file.delete();
    try (FileAppender<Record> appender = Parquet.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
        appender.add(record);
    }
    Iterable<Record> records = Parquet.read(Files.localInput(file)).project(readSchema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(readSchema, fileSchema)).build();
    return Iterables.getOnlyElement(records);
}

9 Source : TestGenericReadProjection.java
with Apache License 2.0
from apache

@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
    File file = temp.newFile(desc + ".orc");
    file.delete();
    try (FileAppender<Record> appender = ORC.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
        appender.add(record);
    }
    Iterable<Record> records = ORC.read(Files.localInput(file)).project(readSchema).createReaderFunc(fileSchema -> GenericOrcReader.buildReader(readSchema, fileSchema)).build();
    return Iterables.getOnlyElement(records);
}

9 Source : DeleteReadTests.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeleteByNull() throws IOException {
    // data is required in the test table; make it optional for this test
    table.updateSchema().makeColumnOptional("data").commit();
    // add a new data file with a record where data is null
    Record record = GenericRecord.create(table.schema());
    DataFile dataFileWithNull = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), Row.of(0), Lists.newArrayList(record.copy("id", 131, "data", null)));
    table.newAppend().appendFile(dataFileWithNull).commit();
    // delete where data is null
    Schema dataSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(dataSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 131
    dataDelete.copy("data", null));
    DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, dataSchema);
    table.newRowDelta().addDeletes(eqDeletes).commit();
    StructLikeSet expected = rowSetWithoutIds(131);
    StructLikeSet actual = rowSet(tableName, table, "*");
    replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}

9 Source : DeleteReadTests.java
with Apache License 2.0
from apache

@Test
public void testPositionDeletes() throws IOException {
    List<Pair<CharSequence, Long>> deletes = Lists.newArrayList(// id = 29
    Pair.of(dataFile.path(), 0L), // id = 89
    Pair.of(dataFile.path(), 3L), // id = 122
    Pair.of(dataFile.path(), 6L));
    Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
    table.newRowDelta().addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 122);
    StructLikeSet actual = rowSet(tableName, table, "*");
    replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}

9 Source : TestGenericReadProjection.java
with Apache License 2.0
from apache

@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
    File file = temp.newFile(desc + ".avro");
    file.delete();
    try (FileAppender<Record> appender = Avro.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(DataWriter::create).build()) {
        appender.add(record);
    }
    Iterable<Record> records = Avro.read(Files.localInput(file)).project(readSchema).createReaderFunc(DataReader::create).build();
    return Iterables.getOnlyElement(records);
}

8 Source : TestRowProjection.java
with Apache License 2.0
from apache

private RowData writeAndRead(String desc, Schema writeSchema, Schema readSchema, RowData row) throws IOException {
    File file = temp.newFile(desc + ".avro");
    replacedert.replacedertTrue(file.delete());
    try (FileAppender<RowData> appender = Avro.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(ignore -> new FlinkAvroWriter(FlinkSchemaUtil.convert(writeSchema))).build()) {
        appender.add(row);
    }
    Iterable<RowData> records = Avro.read(Files.localInput(file)).project(readSchema).createReaderFunc(FlinkAvroReader::new).build();
    return Iterables.getOnlyElement(records);
}

8 Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletePlanningStats() throws IOException {
    table.newAppend().appendFile(dataFile).commit();
    List<Record> deletes = Lists.newArrayList();
    Schema deleteRowSchema = SCHEMA.select("data");
    Record delete = GenericRecord.create(deleteRowSchema);
    deletes.add(delete.copy("data", "d"));
    DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
    table.newRowDelta().addDeletes(posDeletes).commit();
    List<FileScanTask> tasks;
    try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
        tasks = Lists.newArrayList(tasksIterable);
    }
    replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
    FileScanTask task = tasks.get(0);
    replacedert.replacedertEquals("Should have one delete file, data contains a matching value", 1, task.deletes().size());
}

8 Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletePlanningStatsSomeNullValuesWithSomeNullDeletes() throws IOException {
    table.newAppend().appendFile(// note that there are some nulls in the data column
    dataFile).commit();
    List<Record> deletes = Lists.newArrayList();
    Schema deleteRowSchema = SCHEMA.select("data");
    Record delete = GenericRecord.create(deleteRowSchema);
    // the data and delete ranges do not overlap, but both contain null
    deletes.add(delete.copy("data", null));
    deletes.add(delete.copy("data", "x"));
    DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
    table.newRowDelta().addDeletes(posDeletes).commit();
    List<FileScanTask> tasks;
    try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
        tasks = Lists.newArrayList(tasksIterable);
    }
    replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
    FileScanTask task = tasks.get(0);
    replacedert.replacedertEquals("Should have one delete file, data and deletes have null values", 1, task.deletes().size());
}

8 Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletePlanningStatsAllNullValuesWithNoNullDeletes() throws IOException {
    table.newAppend().appendFile(// note that there are only nulls in the data column
    dataFileOnlyNulls).commit();
    List<Record> deletes = Lists.newArrayList();
    Schema deleteRowSchema = SCHEMA.select("data");
    Record delete = GenericRecord.create(deleteRowSchema);
    deletes.add(delete.copy("data", "d"));
    DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
    table.newRowDelta().addDeletes(posDeletes).commit();
    List<FileScanTask> tasks;
    try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
        tasks = Lists.newArrayList(tasksIterable);
    }
    replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
    FileScanTask task = tasks.get(0);
    replacedert.replacedertEquals("Should have no delete files, data contains no null values", 0, task.deletes().size());
}

8 Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletePlanningStatsNullValueWithAllNullDeletes() throws IOException {
    table.newAppend().appendFile(dataFile).commit();
    List<Record> deletes = Lists.newArrayList();
    Schema deleteRowSchema = SCHEMA.select("data");
    Record delete = GenericRecord.create(deleteRowSchema);
    deletes.add(delete.copy("data", null));
    DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
    table.newRowDelta().addDeletes(posDeletes).commit();
    List<FileScanTask> tasks;
    try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
        tasks = Lists.newArrayList(tasksIterable);
    }
    replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
    FileScanTask task = tasks.get(0);
    replacedert.replacedertEquals("Should have delete file, data contains a null value", 1, task.deletes().size());
}

8 Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletePlanningStatsNoNullValuesWithAllNullDeletes() throws IOException {
    table.newAppend().appendFile(// note that there are no nulls in the data column
    dataFileWithoutNulls).commit();
    List<Record> deletes = Lists.newArrayList();
    Schema deleteRowSchema = SCHEMA.select("data");
    Record delete = GenericRecord.create(deleteRowSchema);
    deletes.add(delete.copy("data", null));
    DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
    table.newRowDelta().addDeletes(posDeletes).commit();
    List<FileScanTask> tasks;
    try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
        tasks = Lists.newArrayList(tasksIterable);
    }
    replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
    FileScanTask task = tasks.get(0);
    replacedert.replacedertEquals("Should have no delete files, data contains no null values", 0, task.deletes().size());
}

8 Source : TestGenericData.java
with Apache License 2.0
from apache

private void writeAndValidateRecords(Schema schema, List<Record> expected) throws IOException {
    File testFile = temp.newFile();
    replacedert.replacedertTrue("Delete should succeed", testFile.delete());
    try (FileAppender<Record> writer = ORC.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
        for (Record rec : expected) {
            writer.add(rec);
        }
    }
    List<Record> rows;
    try (CloseableIterable<Record> reader = ORC.read(Files.localInput(testFile)).project(schema).createReaderFunc(fileSchema -> GenericOrcReader.buildReader(schema, fileSchema)).build()) {
        rows = Lists.newArrayList(reader);
    }
    for (int i = 0; i < expected.size(); i += 1) {
        DataTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
    }
}

7 Source : TestMetricsRowGroupFilterTypes.java
with Apache License 2.0
from apache

public void createOrcInputFile(List<Record> records) throws IOException {
    if (ORC_FILE.exists()) {
        replacedert.replacedertTrue(ORC_FILE.delete());
    }
    OutputFile outFile = Files.localOutput(ORC_FILE);
    try (FileAppender<Record> appender = ORC.write(outFile).schema(FILE_SCHEMA).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
        appender.addAll(records);
    }
    InputFile inFile = Files.localInput(ORC_FILE);
    try (Reader reader = OrcFile.createReader(new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
        replacedert.replacedertEquals("Should create only one stripe", 1, reader.getStripes().size());
    }
    ORC_FILE.deleteOnExit();
}

7 Source : TestMetricsRowGroupFilter.java
with Apache License 2.0
from apache

public void createOrcInputFile() throws IOException {
    this.orcFile = temp.newFile();
    replacedert.replacedertTrue(orcFile.delete());
    OutputFile outFile = Files.localOutput(orcFile);
    try (FileAppender<GenericRecord> appender = ORC.write(outFile).schema(FILE_SCHEMA).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
        GenericRecord record = GenericRecord.create(FILE_SCHEMA);
        // create 50 records
        for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
            // min=30, max=79, num-nulls=0
            record.setField("_id", INT_MIN_VALUE + i);
            // value longer than 4k will produce no stats
            record.setField("_no_stats_parquet", TOO_LONG_FOR_STATS_PARQUET);
            // in Parquet, but will produce stats for ORC
            // required, always non-null
            record.setField("_required", "req");
            // never non-null
            record.setField("_all_nulls", null);
            // includes some null values
            record.setField("_some_nulls", (i % 10 == 0) ? null : "some");
            // optional, but always non-null
            record.setField("_no_nulls", "");
            record.setField("_str", i + "str" + i);
            // never non-nan
            record.setField("_all_nans", Double.NaN);
            // includes some nan values
            record.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F);
            // optional, but always non-nan
            record.setField("_no_nans", 3D);
            GenericRecord structNotNull = GenericRecord.create(_structFieldType);
            structNotNull.setField("_int_field", INT_MIN_VALUE + i);
            // struct with int
            record.setField("_struct_not_null", structNotNull);
            appender.add(record);
        }
    }
    InputFile inFile = Files.localInput(orcFile);
    try (Reader reader = OrcFile.createReader(new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
        replacedert.replacedertEquals("Should create only one stripe", 1, reader.getStripes().size());
    }
    orcFile.deleteOnExit();
}

7 Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeletePlanningStatsFilter() throws IOException {
    table.newAppend().appendFile(dataFile).commit();
    List<Record> deletes = Lists.newArrayList();
    Schema deleteRowSchema = table.schema().select("data");
    Record delete = GenericRecord.create(deleteRowSchema);
    deletes.add(delete.copy("data", "x"));
    deletes.add(delete.copy("data", "y"));
    deletes.add(delete.copy("data", "z"));
    DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
    table.newRowDelta().addDeletes(posDeletes).commit();
    List<FileScanTask> tasks;
    try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
        tasks = Lists.newArrayList(tasksIterable);
    }
    replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
    FileScanTask task = tasks.get(0);
    replacedert.replacedertEquals("Should not have delete file, filtered by data column stats", 0, task.deletes().size());
}

7 Source : DeleteReadTests.java
with Apache License 2.0
from apache

@Test
public void testMixedPositionAndEqualityDeletes() throws IOException {
    Schema dataSchema = table.schema().select("data");
    Record dataDelete = GenericRecord.create(dataSchema);
    List<Record> dataDeletes = Lists.newArrayList(// id = 29
    dataDelete.copy("data", "a"), // id = 89
    dataDelete.copy("data", "d"), // id = 122
    dataDelete.copy("data", "g"));
    DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, dataSchema);
    List<Pair<CharSequence, Long>> deletes = Lists.newArrayList(// id = 89
    Pair.of(dataFile.path(), 3L), // id = 121
    Pair.of(dataFile.path(), 5L));
    Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
    table.newRowDelta().addDeletes(eqDeletes).addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
    StructLikeSet expected = rowSetWithoutIds(29, 89, 121, 122);
    StructLikeSet actual = rowSet(tableName, table, "*");
    replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}

5 Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache

@Test
public void testPositionDeletePlanningPathFilter() throws IOException {
    table.newAppend().appendFile(dataFile).commit();
    List<Pair<CharSequence, Long>> deletes = Lists.newArrayList();
    deletes.add(Pair.of("some-other-file.parquet", 0L));
    deletes.add(Pair.of("some-other-file.parquet", 1L));
    Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes);
    table.newRowDelta().addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
    List<FileScanTask> tasks;
    try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
        tasks = Lists.newArrayList(tasksIterable);
    }
    replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
    FileScanTask task = tasks.get(0);
    replacedert.replacedertEquals("Should not have delete file, filtered by file_path stats", 0, task.deletes().size());
}

See More Examples