Here are the examples of the java api org.apache.iceberg.Files.localOutput() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
73 Examples
18
Source : SparkParquetReadersNestedDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Setup
public void setupBenchmark() throws IOException {
dataFile = File.createTempFile("parquet-nested-data-benchmark", ".parquet");
dataFile.delete();
List<GenericData.Record> records = RandomData.generateList(SCHEMA, NUM_RECORDS, 0L);
try (FileAppender<GenericData.Record> writer = Parquet.write(Files.localOutput(dataFile)).schema(SCHEMA).named("benchmark").build()) {
writer.addAll(records);
}
}
18
Source : SparkParquetReadersFlatDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Setup
public void setupBenchmark() throws IOException {
dataFile = File.createTempFile("parquet-flat-data-benchmark", ".parquet");
dataFile.delete();
List<GenericData.Record> records = RandomData.generateList(SCHEMA, NUM_RECORDS, 0L);
try (FileAppender<GenericData.Record> writer = Parquet.write(Files.localOutput(dataFile)).schema(SCHEMA).named("benchmark").build()) {
writer.addAll(records);
}
}
18
Source : TestOrcRowIterator.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Before
public void writeFile() throws IOException {
testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(GenericOrcWriter::buildWriter).schema(DATA_SCHEMA).config("iceberg.orc.vectorbatch.size", "1000").config(OrcConf.ROW_INDEX_STRIDE.getAttribute(), "1000").config(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "4000").config(OrcConf.STRIPE_SIZE.getAttribute(), "1").build()) {
writer.addAll(DATA_ROWS);
}
}
17
Source : TestParquetMetrics.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected OutputFile createOutputFile() throws IOException {
File tmpFolder = temp.newFolder("parquet");
String filename = UUID.randomUUID().toString();
return Files.localOutput(new File(tmpFolder, FileFormat.PARQUET.addExtension(filename)));
}
17
Source : TestOrcMetrics.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected OutputFile createOutputFile() throws IOException {
File tmpFolder = temp.newFolder("orc");
String filename = UUID.randomUUID().toString();
return Files.localOutput(new File(tmpFolder, FileFormat.ORC.addExtension(filename)));
}
16
Source : SparkParquetWritersNestedDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Benchmark
@Threads(1)
public void writeUsingIcebergWriter() throws IOException {
try (FileAppender<InternalRow> writer = Parquet.write(Files.localOutput(dataFile)).createWriterFunc(msgType -> SparkParquetWriters.buildWriter(SparkSchemaUtil.convert(SCHEMA), msgType)).schema(SCHEMA).build()) {
writer.addAll(rows);
}
}
16
Source : TestParquetAvroReader.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private File writeTestData(Schema schema, int numRecords, int seed) throws IOException {
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(schema).build()) {
writer.addAll(RandomData.generate(schema, numRecords, seed));
}
return testFile;
}
16
Source : TestParquetVectorizedReads.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
FileAppender<GenericData.Record> getParquetWriter(Schema schema, File testFile) throws IOException {
return Parquet.write(Files.localOutput(testFile)).schema(schema).named("test").build();
}
15
Source : SparkParquetWritersNestedDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Benchmark
@Threads(1)
public void writeUsingSparkWriter() throws IOException {
StructType sparkSchema = SparkSchemaUtil.convert(SCHEMA);
try (FileAppender<InternalRow> writer = Parquet.write(Files.localOutput(dataFile)).writeSupport(new ParquetWriteSupport()).set("org.apache.spark.sql.parquet.row.attributes", sparkSchema.json()).set("spark.sql.parquet.writeLegacyFormat", "false").set("spark.sql.parquet.binaryreplacedtring", "false").set("spark.sql.parquet.int96AsTimestamp", "false").set("spark.sql.parquet.outputTimestampType", "TIMESTAMP_MICROS").schema(SCHEMA).build()) {
writer.addAll(rows);
}
}
15
Source : TestParquetDictionaryFallbackToPlainEncodingVectorizedReads.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
FileAppender<GenericData.Record> getParquetWriter(Schema schema, File testFile) throws IOException {
return Parquet.write(Files.localOutput(testFile)).schema(schema).named("test").set(TableProperties.PARQUET_DICT_SIZE_BYTES, "512000").build();
}
15
Source : TestAvroFileSplit.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Before
public void writeDataFile() throws IOException {
this.expected = Lists.newArrayList();
OutputFile out = Files.localOutput(temp.newFile());
try (FileAppender<Object> writer = Avro.write(out).set(TableProperties.AVRO_COMPRESSION, "uncompressed").createWriterFunc(DataWriter::create).schema(SCHEMA).overwrite().build()) {
Record record = GenericRecord.create(SCHEMA);
for (long i = 0; i < NUM_RECORDS; i += 1) {
Record next = record.copy(ImmutableMap.of("id", i, "data", UUID.randomUUID().toString()));
expected.add(next);
writer.add(next);
}
}
this.file = out.toInputFile();
}
14
Source : TestSparkOrcReadMetadataColumns.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Before
public void writeFile() throws IOException {
testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<InternalRow> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(SparkOrcWriter::new).schema(DATA_SCHEMA).config("iceberg.orc.vectorbatch.size", "100").config(OrcConf.ROWS_BETWEEN_CHECKS.getAttribute(), "100").config(OrcConf.STRIPE_SIZE.getAttribute(), "1").build()) {
writer.addAll(DATA_ROWS);
}
}
14
Source : TestGenericAvro.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected void writeAndValidate(Schema schema) throws IOException {
List<Record> expected = RandomAvroData.generate(schema, 100, 0L);
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) {
for (Record rec : expected) {
writer.add(rec);
}
}
List<Record> rows;
try (AvroIterable<Record> reader = Avro.read(Files.localInput(testFile)).project(schema).build()) {
rows = Lists.newArrayList(reader);
}
for (int i = 0; i < expected.size(); i += 1) {
AvroTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
}
}
14
Source : TestAvroReadProjection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected GenericData.Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, GenericData.Record record) throws IOException {
File file = temp.newFile(desc + ".avro");
file.delete();
try (FileAppender<GenericData.Record> appender = Avro.write(Files.localOutput(file)).schema(writeSchema).build()) {
appender.add(record);
}
Iterable<GenericData.Record> records = Avro.read(Files.localInput(file)).project(readSchema).build();
return Iterables.getOnlyElement(records);
}
13
Source : GenericAppenderHelper.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static DataFile appendToLocalFile(Table table, File file, FileFormat format, StructLike parreplacedion, List<Record> records) throws IOException {
FileAppender<Record> appender = new GenericAppenderFactory(table.schema()).newAppender(Files.localOutput(file), format);
try (FileAppender<Record> fileAppender = appender) {
fileAppender.addAll(records);
}
return DataFiles.builder(table.spec()).withRecordCount(records.size()).withFileSizeInBytes(file.length()).withPath(Files.localInput(file).location()).withMetrics(appender.metrics()).withFormat(format).withParreplacedion(parreplacedion).build();
}
12
Source : TestSparkAvroReader.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected void writeAndValidate(Schema schema) throws IOException {
List<Record> expected = RandomData.generateList(schema, 100, 0L);
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)).schema(schema).named("test").build()) {
for (Record rec : expected) {
writer.add(rec);
}
}
List<InternalRow> rows;
try (AvroIterable<InternalRow> reader = Avro.read(Files.localInput(testFile)).createReaderFunc(SparkAvroReader::new).project(schema).build()) {
rows = Lists.newArrayList(reader);
}
for (int i = 0; i < expected.size(); i += 1) {
replacedertEqualsUnsafe(schema.replacedtruct(), expected.get(i), rows.get(i));
}
}
12
Source : TestNessieTable.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static String addRecordsToFile(Table table, String filename) throws IOException {
GenericRecordBuilder recordBuilder = new GenericRecordBuilder(AvroSchemaUtil.convert(schema, "test"));
List<GenericData.Record> records = new ArrayList<>();
records.add(recordBuilder.set("id", 1L).build());
records.add(recordBuilder.set("id", 2L).build());
records.add(recordBuilder.set("id", 3L).build());
String fileLocation = table.location().replace("file:", "") + String.format("/data/%s.avro", filename);
try (FileAppender<GenericData.Record> writer = Avro.write(Files.localOutput(fileLocation)).schema(schema).named("test").build()) {
for (GenericData.Record rec : records) {
writer.add(rec);
}
}
return fileLocation;
}
12
Source : TestGenericData.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected void writeAndValidate(Schema schema) throws IOException {
List<Record> expected = RandomGenericData.generate(schema, 100, 0L);
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> appender = Parquet.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
appender.addAll(expected);
}
List<Record> rows;
try (CloseableIterable<Record> reader = Parquet.read(Files.localInput(testFile)).project(schema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)).build()) {
rows = Lists.newArrayList(reader);
}
for (int i = 0; i < expected.size(); i += 1) {
DataTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
}
}
12
Source : DeleteReadTests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testMultipleEqualityDeleteSchemas() throws IOException {
Schema dataSchema = table.schema().select("data");
Record dataDelete = GenericRecord.create(dataSchema);
List<Record> dataDeletes = Lists.newArrayList(// id = 29
dataDelete.copy("data", "a"), // id = 89
dataDelete.copy("data", "d"), // id = 122
dataDelete.copy("data", "g"));
DeleteFile dataEqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, dataSchema);
Schema idSchema = table.schema().select("id");
Record idDelete = GenericRecord.create(idSchema);
List<Record> idDeletes = Lists.newArrayList(// id = 121
idDelete.copy("id", 121), // id = 29
idDelete.copy("id", 29));
DeleteFile idEqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), idDeletes, idSchema);
table.newRowDelta().addDeletes(dataEqDeletes).addDeletes(idEqDeletes).commit();
StructLikeSet expected = rowSetWithoutIds(29, 89, 121, 122);
StructLikeSet actual = rowSet(tableName, table, "*");
replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}
12
Source : DeleteReadTests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletesWithRequiredEqColumn() throws IOException {
Schema deleteRowSchema = table.schema().select("data");
Record dataDelete = GenericRecord.create(deleteRowSchema);
List<Record> dataDeletes = Lists.newArrayList(// id = 29
dataDelete.copy("data", "a"), // id = 89
dataDelete.copy("data", "d"), // id = 122
dataDelete.copy("data", "g"));
DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, deleteRowSchema);
table.newRowDelta().addDeletes(eqDeletes).commit();
StructLikeSet expected = selectColumns(rowSetWithoutIds(29, 89, 122), "id");
StructLikeSet actual = rowSet(tableName, table, "id");
if (expectPruned()) {
replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
} else {
// data is added by the reader to apply the eq deletes, use StructProjection to remove it from comparison
replacedert.replacedertEquals("Table should contain expected rows", expected, selectColumns(actual, "id"));
}
}
12
Source : DeleteReadTests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletes() throws IOException {
Schema deleteRowSchema = table.schema().select("data");
Record dataDelete = GenericRecord.create(deleteRowSchema);
List<Record> dataDeletes = Lists.newArrayList(// id = 29
dataDelete.copy("data", "a"), // id = 89
dataDelete.copy("data", "d"), // id = 122
dataDelete.copy("data", "g"));
DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, deleteRowSchema);
table.newRowDelta().addDeletes(eqDeletes).commit();
StructLikeSet expected = rowSetWithoutIds(29, 89, 122);
StructLikeSet actual = rowSet(tableName, table, "*");
replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}
12
Source : TestGenericData.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected void writeAndValidate(Schema schema) throws IOException {
List<Record> expected = RandomGenericData.generate(schema, 100, 0L);
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Avro.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(DataWriter::create).named("test").build()) {
for (Record rec : expected) {
writer.add(rec);
}
}
List<Record> rows;
try (AvroIterable<Record> reader = Avro.read(Files.localInput(testFile)).project(schema).createReaderFunc(DataReader::create).build()) {
rows = Lists.newArrayList(reader);
}
for (int i = 0; i < expected.size(); i += 1) {
DataTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
}
}
11
Source : TestSparkOrcReader.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private void writeAndValidateRecords(Schema schema, Iterable<InternalRow> expected) throws IOException {
final File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<InternalRow> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(SparkOrcWriter::new).schema(schema).build()) {
writer.addAll(expected);
}
try (CloseableIterable<InternalRow> reader = ORC.read(Files.localInput(testFile)).project(schema).createReaderFunc(readOrcSchema -> new SparkOrcReader(schema, readOrcSchema)).build()) {
final Iterator<InternalRow> actualRows = reader.iterator();
final Iterator<InternalRow> expectedRows = expected.iterator();
while (expectedRows.hasNext()) {
replacedert.replacedertTrue("Should have expected number of rows", actualRows.hasNext());
replacedertEquals(schema, expectedRows.next(), actualRows.next());
}
replacedert.replacedertFalse("Should not have extra rows", actualRows.hasNext());
}
try (CloseableIterable<ColumnarBatch> reader = ORC.read(Files.localInput(testFile)).project(schema).createBatchedReaderFunc(readOrcSchema -> VectorizedSparkOrcReaders.buildReader(schema, readOrcSchema, ImmutableMap.of())).build()) {
final Iterator<InternalRow> actualRows = batchesToRows(reader.iterator());
final Iterator<InternalRow> expectedRows = expected.iterator();
while (expectedRows.hasNext()) {
replacedert.replacedertTrue("Should have expected number of rows", actualRows.hasNext());
replacedertEquals(schema, expectedRows.next(), actualRows.next());
}
replacedert.replacedertFalse("Should not have extra rows", actualRows.hasNext());
}
}
11
Source : Parquet.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Combines several files into one
*
* @param inputFiles an {@link Iterable} of parquet files. The order of iteration determines the order in which
* content of files are read and written to the {@code outputFile}
* @param outputFile the output parquet file containing all the data from {@code inputFiles}
* @param rowGroupSize the row group size to use when writing the {@code outputFile}
* @param schema the schema of the data
* @param metadata extraMetadata to write at the footer of the {@code outputFile}
*/
public static void concat(Iterable<File> inputFiles, File outputFile, int rowGroupSize, Schema schema, Map<String, String> metadata) throws IOException {
OutputFile file = Files.localOutput(outputFile);
ParquetFileWriter writer = new ParquetFileWriter(ParquetIO.file(file), ParquetSchemaUtil.convert(schema, "table"), ParquetFileWriter.Mode.CREATE, rowGroupSize, 0);
writer.start();
for (File inputFile : inputFiles) {
writer.appendFile(ParquetIO.file(Files.localInput(inputFile)));
}
writer.end(metadata);
}
10
Source : TestSparkParquetWriter.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCorrectness() throws IOException {
int numRows = 50_000;
Iterable<InternalRow> records = RandomData.generateSpark(COMPLEX_SCHEMA, numRows, 19981);
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<InternalRow> writer = Parquet.write(Files.localOutput(testFile)).schema(COMPLEX_SCHEMA).createWriterFunc(msgType -> SparkParquetWriters.buildWriter(SparkSchemaUtil.convert(COMPLEX_SCHEMA), msgType)).build()) {
writer.addAll(records);
}
try (CloseableIterable<InternalRow> reader = Parquet.read(Files.localInput(testFile)).project(COMPLEX_SCHEMA).createReaderFunc(type -> SparkParquetReaders.buildReader(COMPLEX_SCHEMA, type)).build()) {
Iterator<InternalRow> expected = records.iterator();
Iterator<InternalRow> rows = reader.iterator();
for (int i = 0; i < numRows; i += 1) {
replacedert.replacedertTrue("Should have expected number of rows", rows.hasNext());
TestHelpers.replacedertEquals(COMPLEX_SCHEMA, expected.next(), rows.next());
}
replacedert.replacedertFalse("Should not have extra rows", rows.hasNext());
}
}
10
Source : TestParquetAvroWriter.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCorrectness() throws IOException {
Iterable<Record> records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139);
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(COMPLEX_SCHEMA).createWriterFunc(ParquetAvroWriter::buildWriter).build()) {
writer.addAll(records);
}
// RandomData uses the root record name "test", which must match for records to be equal
MessageType readSchema = ParquetSchemaUtil.convert(COMPLEX_SCHEMA, "test");
// verify that the new read path is correct
try (CloseableIterable<Record> reader = Parquet.read(Files.localInput(testFile)).project(COMPLEX_SCHEMA).createReaderFunc(fileSchema -> ParquetAvroValueReaders.buildReader(COMPLEX_SCHEMA, readSchema)).build()) {
int recordNum = 0;
Iterator<Record> iter = records.iterator();
for (Record actual : reader) {
Record expected = iter.next();
replacedert.replacedertEquals("Record " + recordNum + " should match expected", expected, actual);
recordNum += 1;
}
}
}
10
Source : TestParquetAvroReader.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCorrectness() throws IOException {
Iterable<Record> records = RandomData.generate(COMPLEX_SCHEMA, 50_000, 34139);
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(COMPLEX_SCHEMA).build()) {
writer.addAll(records);
}
// RandomData uses the root record name "test", which must match for records to be equal
MessageType readSchema = ParquetSchemaUtil.convert(COMPLEX_SCHEMA, "test");
// verify that the new read path is correct
try (CloseableIterable<Record> reader = Parquet.read(Files.localInput(testFile)).project(COMPLEX_SCHEMA).createReaderFunc(fileSchema -> ParquetAvroValueReaders.buildReader(COMPLEX_SCHEMA, readSchema)).reuseContainers().build()) {
int recordNum = 0;
Iterator<Record> iter = records.iterator();
for (Record actual : reader) {
Record expected = iter.next();
replacedert.replacedertEquals("Record " + recordNum + " should match expected", expected, actual);
recordNum += 1;
}
}
}
10
Source : TestParquetReadProjection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected GenericData.Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, GenericData.Record record) throws IOException {
File file = temp.newFile(desc + ".parquet");
file.delete();
try (FileAppender<GenericData.Record> appender = Parquet.write(Files.localOutput(file)).schema(writeSchema).build()) {
appender.add(record);
}
Iterable<GenericData.Record> records = Parquet.read(Files.localInput(file)).project(readSchema).callInit().build();
return Iterables.getOnlyElement(records);
}
10
Source : DeleteReadTests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletesSpanningMultipleDataFiles() throws IOException {
// Add another DataFile with common values
GenericRecord record = GenericRecord.create(table.schema());
records.add(record.copy("id", 144, "data", "a"));
this.dataFile = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), Row.of(0), records);
table.newAppend().appendFile(dataFile).commit();
Schema deleteRowSchema = table.schema().select("data");
Record dataDelete = GenericRecord.create(deleteRowSchema);
List<Record> dataDeletes = Lists.newArrayList(// id = 29, 144
dataDelete.copy("data", "a"), // id = 89
dataDelete.copy("data", "d"), // id = 122
dataDelete.copy("data", "g"));
DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, deleteRowSchema);
table.newRowDelta().addDeletes(eqDeletes).commit();
StructLikeSet expected = rowSetWithoutIds(29, 89, 122, 144);
StructLikeSet actual = rowSet(tableName, table, "*");
replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}
9
Source : TestSparkRecordOrcReaderWriter.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private void writeAndValidate(Schema schema, List<Record> expectedRecords) throws IOException {
final File originalFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", originalFile.delete());
// Write few generic records into the original test file.
try (FileAppender<Record> writer = ORC.write(Files.localOutput(originalFile)).createWriterFunc(GenericOrcWriter::buildWriter).schema(schema).build()) {
writer.addAll(expectedRecords);
}
// Read into spark InternalRow from the original test file.
List<InternalRow> internalRows = Lists.newArrayList();
try (CloseableIterable<InternalRow> reader = ORC.read(Files.localInput(originalFile)).project(schema).createReaderFunc(readOrcSchema -> new SparkOrcReader(schema, readOrcSchema)).build()) {
reader.forEach(internalRows::add);
replacedertEqualsUnsafe(schema.replacedtruct(), expectedRecords, reader, expectedRecords.size());
}
final File anotherFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", anotherFile.delete());
// Write those spark InternalRows into a new file again.
try (FileAppender<InternalRow> writer = ORC.write(Files.localOutput(anotherFile)).createWriterFunc(SparkOrcWriter::new).schema(schema).build()) {
writer.addAll(internalRows);
}
// Check whether the InternalRows are expected records.
try (CloseableIterable<InternalRow> reader = ORC.read(Files.localInput(anotherFile)).project(schema).createReaderFunc(readOrcSchema -> new SparkOrcReader(schema, readOrcSchema)).build()) {
replacedertEqualsUnsafe(schema.replacedtruct(), expectedRecords, reader, expectedRecords.size());
}
// Read into iceberg GenericRecord and check again.
try (CloseableIterable<Record> reader = ORC.read(Files.localInput(anotherFile)).createReaderFunc(typeDesc -> GenericOrcReader.buildReader(schema, typeDesc)).project(schema).build()) {
replacedertRecordEquals(expectedRecords, reader, expectedRecords.size());
}
}
9
Source : TestOrcWrite.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void splitOffsets() throws IOException {
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
Iterable<InternalRow> rows = RandomData.generateSpark(SCHEMA, 1, 0L);
FileAppender<InternalRow> writer = ORC.write(Files.localOutput(testFile)).createWriterFunc(SparkOrcWriter::new).schema(SCHEMA).build();
writer.addAll(rows);
writer.close();
replacedert.replacedertNotNull("Split offsets not present", writer.splitOffsets());
}
9
Source : TestFlinkParquetReader.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private void writeAndValidate(Iterable<Record> iterable, Schema schema) throws IOException {
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = Parquet.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
writer.addAll(iterable);
}
try (CloseableIterable<RowData> reader = Parquet.read(Files.localInput(testFile)).project(schema).createReaderFunc(type -> FlinkParquetReaders.buildReader(schema, type)).build()) {
Iterator<Record> expected = iterable.iterator();
Iterator<RowData> rows = reader.iterator();
LogicalType rowType = FlinkSchemaUtil.convert(schema);
for (int i = 0; i < NUM_RECORDS; i += 1) {
replacedert.replacedertTrue("Should have expected number of rows", rows.hasNext());
TestHelpers.replacedertRowData(schema.replacedtruct(), rowType, expected.next(), rows.next());
}
replacedert.replacedertFalse("Should not have extra rows", rows.hasNext());
}
}
9
Source : TestMetricsRowGroupFilterTypes.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public void createParquetInputFile(List<Record> records) throws IOException {
if (PARQUET_FILE.exists()) {
replacedert.replacedertTrue(PARQUET_FILE.delete());
}
OutputFile outFile = Files.localOutput(PARQUET_FILE);
try (FileAppender<Record> appender = Parquet.write(outFile).schema(FILE_SCHEMA).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
appender.addAll(records);
}
InputFile inFile = Files.localInput(PARQUET_FILE);
try (ParquetFileReader reader = ParquetFileReader.open(parquetInputFile(inFile))) {
replacedert.replacedertEquals("Should create only one row group", 1, reader.getRowGroups().size());
rowGroupMetadata = reader.getRowGroups().get(0);
parquetSchema = reader.getFileMetaData().getSchema();
}
PARQUET_FILE.deleteOnExit();
}
9
Source : TestGenericReadProjection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
File file = temp.newFile(desc + ".parquet");
file.delete();
try (FileAppender<Record> appender = Parquet.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(GenericParquetWriter::buildWriter).build()) {
appender.add(record);
}
Iterable<Record> records = Parquet.read(Files.localInput(file)).project(readSchema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(readSchema, fileSchema)).build();
return Iterables.getOnlyElement(records);
}
9
Source : TestGenericReadProjection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
File file = temp.newFile(desc + ".orc");
file.delete();
try (FileAppender<Record> appender = ORC.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
appender.add(record);
}
Iterable<Record> records = ORC.read(Files.localInput(file)).project(readSchema).createReaderFunc(fileSchema -> GenericOrcReader.buildReader(readSchema, fileSchema)).build();
return Iterables.getOnlyElement(records);
}
9
Source : DeleteReadTests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeleteByNull() throws IOException {
// data is required in the test table; make it optional for this test
table.updateSchema().makeColumnOptional("data").commit();
// add a new data file with a record where data is null
Record record = GenericRecord.create(table.schema());
DataFile dataFileWithNull = FileHelpers.writeDataFile(table, Files.localOutput(temp.newFile()), Row.of(0), Lists.newArrayList(record.copy("id", 131, "data", null)));
table.newAppend().appendFile(dataFileWithNull).commit();
// delete where data is null
Schema dataSchema = table.schema().select("data");
Record dataDelete = GenericRecord.create(dataSchema);
List<Record> dataDeletes = Lists.newArrayList(// id = 131
dataDelete.copy("data", null));
DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, dataSchema);
table.newRowDelta().addDeletes(eqDeletes).commit();
StructLikeSet expected = rowSetWithoutIds(131);
StructLikeSet actual = rowSet(tableName, table, "*");
replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}
9
Source : DeleteReadTests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testPositionDeletes() throws IOException {
List<Pair<CharSequence, Long>> deletes = Lists.newArrayList(// id = 29
Pair.of(dataFile.path(), 0L), // id = 89
Pair.of(dataFile.path(), 3L), // id = 122
Pair.of(dataFile.path(), 6L));
Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
table.newRowDelta().addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
StructLikeSet expected = rowSetWithoutIds(29, 89, 122);
StructLikeSet actual = rowSet(tableName, table, "*");
replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}
9
Source : TestGenericReadProjection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
File file = temp.newFile(desc + ".avro");
file.delete();
try (FileAppender<Record> appender = Avro.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(DataWriter::create).build()) {
appender.add(record);
}
Iterable<Record> records = Avro.read(Files.localInput(file)).project(readSchema).createReaderFunc(DataReader::create).build();
return Iterables.getOnlyElement(records);
}
8
Source : TestRowProjection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private RowData writeAndRead(String desc, Schema writeSchema, Schema readSchema, RowData row) throws IOException {
File file = temp.newFile(desc + ".avro");
replacedert.replacedertTrue(file.delete());
try (FileAppender<RowData> appender = Avro.write(Files.localOutput(file)).schema(writeSchema).createWriterFunc(ignore -> new FlinkAvroWriter(FlinkSchemaUtil.convert(writeSchema))).build()) {
appender.add(row);
}
Iterable<RowData> records = Avro.read(Files.localInput(file)).project(readSchema).createReaderFunc(FlinkAvroReader::new).build();
return Iterables.getOnlyElement(records);
}
8
Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletePlanningStats() throws IOException {
table.newAppend().appendFile(dataFile).commit();
List<Record> deletes = Lists.newArrayList();
Schema deleteRowSchema = SCHEMA.select("data");
Record delete = GenericRecord.create(deleteRowSchema);
deletes.add(delete.copy("data", "d"));
DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
table.newRowDelta().addDeletes(posDeletes).commit();
List<FileScanTask> tasks;
try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
tasks = Lists.newArrayList(tasksIterable);
}
replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
FileScanTask task = tasks.get(0);
replacedert.replacedertEquals("Should have one delete file, data contains a matching value", 1, task.deletes().size());
}
8
Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletePlanningStatsSomeNullValuesWithSomeNullDeletes() throws IOException {
table.newAppend().appendFile(// note that there are some nulls in the data column
dataFile).commit();
List<Record> deletes = Lists.newArrayList();
Schema deleteRowSchema = SCHEMA.select("data");
Record delete = GenericRecord.create(deleteRowSchema);
// the data and delete ranges do not overlap, but both contain null
deletes.add(delete.copy("data", null));
deletes.add(delete.copy("data", "x"));
DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
table.newRowDelta().addDeletes(posDeletes).commit();
List<FileScanTask> tasks;
try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
tasks = Lists.newArrayList(tasksIterable);
}
replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
FileScanTask task = tasks.get(0);
replacedert.replacedertEquals("Should have one delete file, data and deletes have null values", 1, task.deletes().size());
}
8
Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletePlanningStatsAllNullValuesWithNoNullDeletes() throws IOException {
table.newAppend().appendFile(// note that there are only nulls in the data column
dataFileOnlyNulls).commit();
List<Record> deletes = Lists.newArrayList();
Schema deleteRowSchema = SCHEMA.select("data");
Record delete = GenericRecord.create(deleteRowSchema);
deletes.add(delete.copy("data", "d"));
DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
table.newRowDelta().addDeletes(posDeletes).commit();
List<FileScanTask> tasks;
try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
tasks = Lists.newArrayList(tasksIterable);
}
replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
FileScanTask task = tasks.get(0);
replacedert.replacedertEquals("Should have no delete files, data contains no null values", 0, task.deletes().size());
}
8
Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletePlanningStatsNullValueWithAllNullDeletes() throws IOException {
table.newAppend().appendFile(dataFile).commit();
List<Record> deletes = Lists.newArrayList();
Schema deleteRowSchema = SCHEMA.select("data");
Record delete = GenericRecord.create(deleteRowSchema);
deletes.add(delete.copy("data", null));
DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
table.newRowDelta().addDeletes(posDeletes).commit();
List<FileScanTask> tasks;
try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
tasks = Lists.newArrayList(tasksIterable);
}
replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
FileScanTask task = tasks.get(0);
replacedert.replacedertEquals("Should have delete file, data contains a null value", 1, task.deletes().size());
}
8
Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletePlanningStatsNoNullValuesWithAllNullDeletes() throws IOException {
table.newAppend().appendFile(// note that there are no nulls in the data column
dataFileWithoutNulls).commit();
List<Record> deletes = Lists.newArrayList();
Schema deleteRowSchema = SCHEMA.select("data");
Record delete = GenericRecord.create(deleteRowSchema);
deletes.add(delete.copy("data", null));
DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
table.newRowDelta().addDeletes(posDeletes).commit();
List<FileScanTask> tasks;
try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
tasks = Lists.newArrayList(tasksIterable);
}
replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
FileScanTask task = tasks.get(0);
replacedert.replacedertEquals("Should have no delete files, data contains no null values", 0, task.deletes().size());
}
8
Source : TestGenericData.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private void writeAndValidateRecords(Schema schema, List<Record> expected) throws IOException {
File testFile = temp.newFile();
replacedert.replacedertTrue("Delete should succeed", testFile.delete());
try (FileAppender<Record> writer = ORC.write(Files.localOutput(testFile)).schema(schema).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
for (Record rec : expected) {
writer.add(rec);
}
}
List<Record> rows;
try (CloseableIterable<Record> reader = ORC.read(Files.localInput(testFile)).project(schema).createReaderFunc(fileSchema -> GenericOrcReader.buildReader(schema, fileSchema)).build()) {
rows = Lists.newArrayList(reader);
}
for (int i = 0; i < expected.size(); i += 1) {
DataTestHelpers.replacedertEquals(schema.replacedtruct(), expected.get(i), rows.get(i));
}
}
7
Source : TestMetricsRowGroupFilterTypes.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public void createOrcInputFile(List<Record> records) throws IOException {
if (ORC_FILE.exists()) {
replacedert.replacedertTrue(ORC_FILE.delete());
}
OutputFile outFile = Files.localOutput(ORC_FILE);
try (FileAppender<Record> appender = ORC.write(outFile).schema(FILE_SCHEMA).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
appender.addAll(records);
}
InputFile inFile = Files.localInput(ORC_FILE);
try (Reader reader = OrcFile.createReader(new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
replacedert.replacedertEquals("Should create only one stripe", 1, reader.getStripes().size());
}
ORC_FILE.deleteOnExit();
}
7
Source : TestMetricsRowGroupFilter.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public void createOrcInputFile() throws IOException {
this.orcFile = temp.newFile();
replacedert.replacedertTrue(orcFile.delete());
OutputFile outFile = Files.localOutput(orcFile);
try (FileAppender<GenericRecord> appender = ORC.write(outFile).schema(FILE_SCHEMA).createWriterFunc(GenericOrcWriter::buildWriter).build()) {
GenericRecord record = GenericRecord.create(FILE_SCHEMA);
// create 50 records
for (int i = 0; i < INT_MAX_VALUE - INT_MIN_VALUE + 1; i += 1) {
// min=30, max=79, num-nulls=0
record.setField("_id", INT_MIN_VALUE + i);
// value longer than 4k will produce no stats
record.setField("_no_stats_parquet", TOO_LONG_FOR_STATS_PARQUET);
// in Parquet, but will produce stats for ORC
// required, always non-null
record.setField("_required", "req");
// never non-null
record.setField("_all_nulls", null);
// includes some null values
record.setField("_some_nulls", (i % 10 == 0) ? null : "some");
// optional, but always non-null
record.setField("_no_nulls", "");
record.setField("_str", i + "str" + i);
// never non-nan
record.setField("_all_nans", Double.NaN);
// includes some nan values
record.setField("_some_nans", (i % 10 == 0) ? Float.NaN : 2F);
// optional, but always non-nan
record.setField("_no_nans", 3D);
GenericRecord structNotNull = GenericRecord.create(_structFieldType);
structNotNull.setField("_int_field", INT_MIN_VALUE + i);
// struct with int
record.setField("_struct_not_null", structNotNull);
appender.add(record);
}
}
InputFile inFile = Files.localInput(orcFile);
try (Reader reader = OrcFile.createReader(new Path(inFile.location()), OrcFile.readerOptions(new Configuration()))) {
replacedert.replacedertEquals("Should create only one stripe", 1, reader.getStripes().size());
}
orcFile.deleteOnExit();
}
7
Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeletePlanningStatsFilter() throws IOException {
table.newAppend().appendFile(dataFile).commit();
List<Record> deletes = Lists.newArrayList();
Schema deleteRowSchema = table.schema().select("data");
Record delete = GenericRecord.create(deleteRowSchema);
deletes.add(delete.copy("data", "x"));
deletes.add(delete.copy("data", "y"));
deletes.add(delete.copy("data", "z"));
DeleteFile posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes, deleteRowSchema);
table.newRowDelta().addDeletes(posDeletes).commit();
List<FileScanTask> tasks;
try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
tasks = Lists.newArrayList(tasksIterable);
}
replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
FileScanTask task = tasks.get(0);
replacedert.replacedertEquals("Should not have delete file, filtered by data column stats", 0, task.deletes().size());
}
7
Source : DeleteReadTests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testMixedPositionAndEqualityDeletes() throws IOException {
Schema dataSchema = table.schema().select("data");
Record dataDelete = GenericRecord.create(dataSchema);
List<Record> dataDeletes = Lists.newArrayList(// id = 29
dataDelete.copy("data", "a"), // id = 89
dataDelete.copy("data", "d"), // id = 122
dataDelete.copy("data", "g"));
DeleteFile eqDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), dataDeletes, dataSchema);
List<Pair<CharSequence, Long>> deletes = Lists.newArrayList(// id = 89
Pair.of(dataFile.path(), 3L), // id = 121
Pair.of(dataFile.path(), 5L));
Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), Row.of(0), deletes);
table.newRowDelta().addDeletes(eqDeletes).addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
StructLikeSet expected = rowSetWithoutIds(29, 89, 121, 122);
StructLikeSet actual = rowSet(tableName, table, "*");
replacedert.replacedertEquals("Table should contain expected rows", expected, actual);
}
5
Source : TestDataFileIndexStatsFilters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testPositionDeletePlanningPathFilter() throws IOException {
table.newAppend().appendFile(dataFile).commit();
List<Pair<CharSequence, Long>> deletes = Lists.newArrayList();
deletes.add(Pair.of("some-other-file.parquet", 0L));
deletes.add(Pair.of("some-other-file.parquet", 1L));
Pair<DeleteFile, Set<CharSequence>> posDeletes = FileHelpers.writeDeleteFile(table, Files.localOutput(temp.newFile()), deletes);
table.newRowDelta().addDeletes(posDeletes.first()).validateDataFilesExist(posDeletes.second()).commit();
List<FileScanTask> tasks;
try (CloseableIterable<FileScanTask> tasksIterable = table.newScan().planFiles()) {
tasks = Lists.newArrayList(tasksIterable);
}
replacedert.replacedertEquals("Should produce one task", 1, tasks.size());
FileScanTask task = tasks.get(0);
replacedert.replacedertEquals("Should not have delete file, filtered by file_path stats", 0, task.deletes().size());
}
See More Examples