Here are the examples of the java api org.apache.iceberg.io.OutputFile taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
77 Examples
19
Source : HiveTableOperations.java
with Apache License 2.0
from trinodb
with Apache License 2.0
from trinodb
private String writeNewMetadata(TableMetadata metadata, int newVersion) {
String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion);
OutputFile newMetadataLocation = fileIo.newOutputFile(newTableMetadataFilePath);
// write the new metadata
TableMetadataParser.write(metadata, newMetadataLocation);
return newTableMetadataFilePath;
}
19
Source : TestManifestFileSerialization.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private ManifestFile writeManifest(DataFile... files) throws IOException {
File manifestFile = temp.newFile("input.m0.avro");
replacedert.replacedertTrue(manifestFile.delete());
OutputFile outputFile = FILE_IO.newOutputFile(manifestFile.getCanonicalPath());
ManifestWriter writer = ManifestFiles.write(SPEC, outputFile);
try {
for (DataFile file : files) {
writer.add(file);
}
} finally {
writer.close();
}
return writer.toManifestFile();
}
19
Source : Parquet.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static DeleteWriteBuilder writeDeletes(OutputFile file) {
return new DeleteWriteBuilder(file);
}
19
Source : Parquet.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static WriteBuilder write(OutputFile file) {
return new WriteBuilder(file);
}
19
Source : TestParquetMetrics.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected Metrics getMetricsForRecordsWithSmallRowGroups(Schema schema, OutputFile outputFile, Record... records) throws IOException {
return getMetrics(schema, outputFile, SMALL_ROW_GROUP_CONFIG, MetricsConfig.getDefault(), records);
}
19
Source : TestOrcMetrics.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected Metrics getMetricsForRecordsWithSmallRowGroups(Schema schema, OutputFile outputFile, Record... records) {
throw new UnsupportedOperationException("supportsSmallRowGroups = " + supportsSmallRowGroups());
}
19
Source : FileHelpers.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static Pair<DeleteFile, Set<CharSequence>> writeDeleteFile(Table table, OutputFile out, List<Pair<CharSequence, Long>> deletes) throws IOException {
return writeDeleteFile(table, out, null, deletes);
}
19
Source : FileHelpers.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static DeleteFile writeDeleteFile(Table table, OutputFile out, StructLike parreplacedion, List<Record> deletes, Schema deleteRowSchema) throws IOException {
EqualityDeleteWriter<Record> writer = Parquet.writeDeletes(out).forTable(table).withParreplacedion(parreplacedion).rowSchema(deleteRowSchema).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().equalityFieldIds(deleteRowSchema.columns().stream().mapToInt(Types.NestedField::fieldId).toArray()).buildEqualityWriter();
try (Closeable toClose = writer) {
writer.deleteAll(deletes);
}
return writer.toDeleteFile();
}
19
Source : FileHelpers.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static DeleteFile writeDeleteFile(Table table, OutputFile out, List<Record> deletes, Schema deleteRowSchema) throws IOException {
return writeDeleteFile(table, out, null, deletes, deleteRowSchema);
}
19
Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private ManifestFile writeManifest(DataFile file, int formatVersion) throws IOException {
OutputFile manifestFile = Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, SPEC, manifestFile, SNAPSHOT_ID);
try {
writer.add(file);
} finally {
writer.close();
}
return writer.toManifestFile();
}
19
Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private ManifestFile writeDeleteManifest(int formatVersion) throws IOException {
OutputFile manifestFile = Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
ManifestWriter<DeleteFile> writer = ManifestFiles.writeDeleteManifest(formatVersion, SPEC, manifestFile, SNAPSHOT_ID);
try {
writer.add(DELETE_FILE);
} finally {
writer.close();
}
return writer.toManifestFile();
}
19
Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private ManifestFile rewriteManifest(ManifestFile manifest, int formatVersion) throws IOException {
OutputFile manifestFile = Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, SPEC, manifestFile, SNAPSHOT_ID);
try {
writer.existing(readManifest(manifest));
} finally {
writer.close();
}
return writer.toManifestFile();
}
19
Source : TableTestBase.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
ManifestFile writeDeleteManifest(int newFormatVersion, Long snapshotId, DeleteFile... deleteFiles) throws IOException {
OutputFile manifestFile = org.apache.iceberg.Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
ManifestWriter<DeleteFile> writer = ManifestFiles.writeDeleteManifest(newFormatVersion, SPEC, manifestFile, snapshotId);
try {
for (DeleteFile deleteFile : deleteFiles) {
writer.add(deleteFile);
}
} finally {
writer.close();
}
return writer.toManifestFile();
}
19
Source : TableTestBase.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
ManifestFile writeManifestWithName(String name, DataFile... files) throws IOException {
File manifestFile = temp.newFile(name + ".avro");
replacedert.replacedertTrue(manifestFile.delete());
OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath());
ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, table.spec(), outputFile, null);
try {
for (DataFile file : files) {
writer.add(file);
}
} finally {
writer.close();
}
return writer.toManifestFile();
}
19
Source : TableTestBase.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
ManifestFile writeManifest(Long snapshotId, DataFile... files) throws IOException {
File manifestFile = temp.newFile("input.m0.avro");
replacedert.replacedertTrue(manifestFile.delete());
OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath());
ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, table.spec(), outputFile, snapshotId);
try {
for (DataFile file : files) {
writer.add(file);
}
} finally {
writer.close();
}
return writer.toManifestFile();
}
19
Source : TableMetadataParser.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static void write(TableMetadata metadata, OutputFile outputFile) {
internalWrite(metadata, outputFile, false);
}
19
Source : TableMetadataParser.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static void overwrite(TableMetadata metadata, OutputFile outputFile) {
internalWrite(metadata, outputFile, true);
}
19
Source : MergingSnapshotProducer.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private ManifestFile copyManifest(ManifestFile manifest) {
TableMetadata current = ops.current();
InputFile toCopy = ops.io().newInputFile(manifest.path());
OutputFile newManifestPath = newManifestOutput();
return ManifestFiles.copyAppendManifest(current.formatVersion(), toCopy, current.specsById(), newManifestPath, snapshotId(), appendedManifestsSummary);
}
19
Source : ManifestWriter.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Writer for manifest files.
*
* @param <F> Java clreplaced of files written to the manifest, either {@link DataFile} or {@link DeleteFile}.
*/
public abstract clreplaced ManifestWriter<F extends ContentFile<F>> implements FileAppender<F> {
// stand-in for the current sequence number that will be replacedigned when the commit is successful
// this is replaced when writing a manifest list by the ManifestFile wrapper
static final long UNreplacedIGNED_SEQ = -1L;
private final OutputFile file;
private final int specId;
private final FileAppender<ManifestEntry<F>> writer;
private final Long snapshotId;
private final GenericManifestEntry<F> reused;
private final ParreplacedionSummary stats;
private boolean closed = false;
private int addedFiles = 0;
private long addedRows = 0L;
private int existingFiles = 0;
private long existingRows = 0L;
private int deletedFiles = 0;
private long deletedRows = 0L;
private Long minSequenceNumber = null;
private ManifestWriter(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
this.file = file;
this.specId = spec.specId();
this.writer = newAppender(spec, file);
this.snapshotId = snapshotId;
this.reused = new GenericManifestEntry<>(spec.parreplacedionType());
this.stats = new ParreplacedionSummary(spec);
}
protected abstract ManifestEntry<F> prepare(ManifestEntry<F> entry);
protected abstract FileAppender<ManifestEntry<F>> newAppender(ParreplacedionSpec spec, OutputFile outputFile);
protected ManifestContent content() {
return ManifestContent.DATA;
}
void addEntry(ManifestEntry<F> entry) {
switch(entry.status()) {
case ADDED:
addedFiles += 1;
addedRows += entry.file().recordCount();
break;
case EXISTING:
existingFiles += 1;
existingRows += entry.file().recordCount();
break;
case DELETED:
deletedFiles += 1;
deletedRows += entry.file().recordCount();
break;
}
stats.update(entry.file().parreplacedion());
if (entry.sequenceNumber() != null && (minSequenceNumber == null || entry.sequenceNumber() < minSequenceNumber)) {
this.minSequenceNumber = entry.sequenceNumber();
}
writer.add(prepare(entry));
}
/**
* Add an added entry for a file.
* <p>
* The entry's snapshot ID will be this manifest's snapshot ID.
*
* @param addedFile a data file
*/
@Override
public void add(F addedFile) {
addEntry(reused.wrapAppend(snapshotId, addedFile));
}
void add(ManifestEntry<F> entry) {
addEntry(reused.wrapAppend(snapshotId, entry.file()));
}
/**
* Add an existing entry for a file.
*
* @param existingFile a file
* @param fileSnapshotId snapshot ID when the data file was added to the table
* @param sequenceNumber sequence number for the data file
*/
public void existing(F existingFile, long fileSnapshotId, long sequenceNumber) {
addEntry(reused.wrapExisting(fileSnapshotId, sequenceNumber, existingFile));
}
void existing(ManifestEntry<F> entry) {
addEntry(reused.wrapExisting(entry.snapshotId(), entry.sequenceNumber(), entry.file()));
}
/**
* Add a delete entry for a file.
* <p>
* The entry's snapshot ID will be this manifest's snapshot ID.
*
* @param deletedFile a file
*/
public void delete(F deletedFile) {
addEntry(reused.wrapDelete(snapshotId, deletedFile));
}
void delete(ManifestEntry<F> entry) {
// Use the current Snapshot ID for the delete. It is safe to delete the data file from disk
// when this Snapshot has been removed or when there are no Snapshots older than this one.
addEntry(reused.wrapDelete(snapshotId, entry.file()));
}
@Override
public Metrics metrics() {
return writer.metrics();
}
@Override
public long length() {
return writer.length();
}
public ManifestFile toManifestFile() {
Preconditions.checkState(closed, "Cannot build ManifestFile, writer is not closed");
// if the minSequenceNumber is null, then no manifests with a sequence number have been written, so the min
// sequence number is the one that will be replacedigned when this is committed. preplaced UNreplacedIGNED_SEQ to inherit it.
long minSeqNumber = minSequenceNumber != null ? minSequenceNumber : UNreplacedIGNED_SEQ;
return new GenericManifestFile(file.location(), writer.length(), specId, content(), UNreplacedIGNED_SEQ, minSeqNumber, snapshotId, addedFiles, addedRows, existingFiles, existingRows, deletedFiles, deletedRows, stats.summaries());
}
@Override
public void close() throws IOException {
this.closed = true;
writer.close();
}
static clreplaced V2Writer extends ManifestWriter<DataFile> {
private final V2Metadata.IndexedManifestEntry<DataFile> entryWrapper;
V2Writer(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
super(spec, file, snapshotId);
this.entryWrapper = new V2Metadata.IndexedManifestEntry<>(snapshotId, spec.parreplacedionType());
}
@Override
protected ManifestEntry<DataFile> prepare(ManifestEntry<DataFile> entry) {
return entryWrapper.wrap(entry);
}
@Override
protected FileAppender<ManifestEntry<DataFile>> newAppender(ParreplacedionSpec spec, OutputFile file) {
Schema manifestSchema = V2Metadata.entrySchema(spec.parreplacedionType());
try {
return Avro.write(file).schema(manifestSchema).named("manifest_entry").meta("schema", SchemaParser.toJson(spec.schema())).meta("parreplacedion-spec", ParreplacedionSpecParser.toJsonFields(spec)).meta("parreplacedion-spec-id", String.valueOf(spec.specId())).meta("format-version", "2").meta("content", "data").overwrite().build();
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to create manifest writer for path: %s", file);
}
}
}
static clreplaced V2DeleteWriter extends ManifestWriter<DeleteFile> {
private final V2Metadata.IndexedManifestEntry<DeleteFile> entryWrapper;
V2DeleteWriter(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
super(spec, file, snapshotId);
this.entryWrapper = new V2Metadata.IndexedManifestEntry<>(snapshotId, spec.parreplacedionType());
}
@Override
protected ManifestEntry<DeleteFile> prepare(ManifestEntry<DeleteFile> entry) {
return entryWrapper.wrap(entry);
}
@Override
protected FileAppender<ManifestEntry<DeleteFile>> newAppender(ParreplacedionSpec spec, OutputFile file) {
Schema manifestSchema = V2Metadata.entrySchema(spec.parreplacedionType());
try {
return Avro.write(file).schema(manifestSchema).named("manifest_entry").meta("schema", SchemaParser.toJson(spec.schema())).meta("parreplacedion-spec", ParreplacedionSpecParser.toJsonFields(spec)).meta("parreplacedion-spec-id", String.valueOf(spec.specId())).meta("format-version", "2").meta("content", "deletes").overwrite().build();
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to create manifest writer for path: %s", file);
}
}
@Override
protected ManifestContent content() {
return ManifestContent.DELETES;
}
}
static clreplaced V1Writer extends ManifestWriter<DataFile> {
private final V1Metadata.IndexedManifestEntry entryWrapper;
V1Writer(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
super(spec, file, snapshotId);
this.entryWrapper = new V1Metadata.IndexedManifestEntry(spec.parreplacedionType());
}
@Override
protected ManifestEntry<DataFile> prepare(ManifestEntry<DataFile> entry) {
return entryWrapper.wrap(entry);
}
@Override
protected FileAppender<ManifestEntry<DataFile>> newAppender(ParreplacedionSpec spec, OutputFile file) {
Schema manifestSchema = V1Metadata.entrySchema(spec.parreplacedionType());
try {
return Avro.write(file).schema(manifestSchema).named("manifest_entry").meta("schema", SchemaParser.toJson(spec.schema())).meta("parreplacedion-spec", ParreplacedionSpecParser.toJsonFields(spec)).meta("parreplacedion-spec-id", String.valueOf(spec.specId())).meta("format-version", "1").overwrite().build();
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to create manifest writer for path: %s", file);
}
}
}
}
19
Source : ManifestFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Create a new {@link ManifestWriter} for the given format version.
*
* @param formatVersion a target format version
* @param spec a {@link ParreplacedionSpec}
* @param outputFile an {@link OutputFile} where the manifest will be written
* @param snapshotId a snapshot ID for the manifest entries, or null for an inherited ID
* @return a manifest writer
*/
public static ManifestWriter<DataFile> write(int formatVersion, ParreplacedionSpec spec, OutputFile outputFile, Long snapshotId) {
switch(formatVersion) {
case 1:
return new ManifestWriter.V1Writer(spec, outputFile, snapshotId);
case 2:
return new ManifestWriter.V2Writer(spec, outputFile, snapshotId);
}
throw new UnsupportedOperationException("Cannot write manifest for table version: " + formatVersion);
}
19
Source : ManifestFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Create a new {@link ManifestWriter}.
* <p>
* Manifests created by this writer have all entry snapshot IDs set to null.
* All entries will inherit the snapshot ID that will be replacedigned to the manifest on commit.
*
* @param spec {@link ParreplacedionSpec} used to produce {@link DataFile} parreplacedion tuples
* @param outputFile the destination file location
* @return a manifest writer
*/
public static ManifestWriter<DataFile> write(ParreplacedionSpec spec, OutputFile outputFile) {
return write(1, spec, outputFile, null);
}
19
Source : ManifestFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Create a new {@link ManifestWriter} for the given format version.
*
* @param formatVersion a target format version
* @param spec a {@link ParreplacedionSpec}
* @param outputFile an {@link OutputFile} where the manifest will be written
* @param snapshotId a snapshot ID for the manifest entries, or null for an inherited ID
* @return a manifest writer
*/
public static ManifestWriter<DeleteFile> writeDeleteManifest(int formatVersion, ParreplacedionSpec spec, OutputFile outputFile, Long snapshotId) {
switch(formatVersion) {
case 1:
throw new IllegalArgumentException("Cannot write delete files in a v1 table");
case 2:
return new ManifestWriter.V2DeleteWriter(spec, outputFile, snapshotId);
}
throw new UnsupportedOperationException("Cannot write manifest for table version: " + formatVersion);
}
19
Source : FastAppend.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private ManifestFile copyManifest(ManifestFile manifest) {
TableMetadata current = ops.current();
InputFile toCopy = ops.io().newInputFile(manifest.path());
OutputFile newManifestPath = newManifestOutput();
return ManifestFiles.copyAppendManifest(current.formatVersion(), toCopy, current.specsById(), newManifestPath, snapshotId(), summaryBuilder);
}
19
Source : EncryptedFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static EncryptedOutputFile encryptedOutput(OutputFile encryptingOutputFile, ByteBuffer keyMetadata) {
return encryptedOutput(encryptingOutputFile, BaseEncryptionKeyMetadata.fromKeyMetadata(keyMetadata));
}
19
Source : EncryptedFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static EncryptedOutputFile encryptedOutput(OutputFile encryptingOutputFile, EncryptionKeyMetadata keyMetadata) {
return new BaseEncryptedOutputFile(encryptingOutputFile, keyMetadata);
}
19
Source : EncryptedFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static EncryptedOutputFile encryptedOutput(OutputFile encryptedOutputFile, byte[] keyMetadata) {
return encryptedOutput(encryptedOutputFile, BaseEncryptionKeyMetadata.fromByteArray(keyMetadata));
}
19
Source : BaseEncryptedOutputFile.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
clreplaced BaseEncryptedOutputFile implements EncryptedOutputFile {
private final OutputFile encryptingOutputFile;
private final EncryptionKeyMetadata keyMetadata;
BaseEncryptedOutputFile(OutputFile encryptingOutputFile, EncryptionKeyMetadata keyMetadata) {
this.encryptingOutputFile = encryptingOutputFile;
this.keyMetadata = keyMetadata;
}
@Override
public OutputFile encryptingOutputFile() {
return encryptingOutputFile;
}
@Override
public EncryptionKeyMetadata keyMetadata() {
return keyMetadata;
}
}
19
Source : BaseRewriteManifests.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private ManifestFile copyManifest(ManifestFile manifest) {
TableMetadata current = ops.current();
InputFile toCopy = ops.io().newInputFile(manifest.path());
OutputFile newFile = newManifestOutput();
return ManifestFiles.copyRewriteManifest(current.formatVersion(), toCopy, specsById, newFile, snapshotId(), summaryBuilder);
}
18
Source : RewriteManifestsAction.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static ManifestFile writeManifest(List<Row> rows, int startIndex, int endIndex, Broadcast<FileIO> io, String location, int format, ParreplacedionSpec spec, StructType sparkType) throws IOException {
String manifestName = "optimized-m-" + UUID.randomUUID();
Path manifestPath = new Path(location, manifestName);
OutputFile outputFile = io.value().newOutputFile(FileFormat.AVRO.addExtension(manifestPath.toString()));
Types.StructType dataFileType = DataFile.getType(spec.parreplacedionType());
SparkDataFile wrapper = new SparkDataFile(dataFileType, sparkType);
ManifestWriter writer = ManifestFiles.write(format, spec, outputFile, null);
try {
for (int index = startIndex; index < endIndex; index++) {
Row row = rows.get(index);
long snapshotId = row.getLong(0);
long sequenceNumber = row.getLong(1);
Row file = row.getStruct(2);
writer.existing(wrapper.wrap(file), snapshotId, sequenceNumber);
}
} finally {
writer.close();
}
return writer.toManifestFile();
}
18
Source : TestParquetDeleteWriters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testPositionDeleteWriter() throws IOException {
File deleteFile = temp.newFile();
Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS, NestedField.optional(MetadataColumns.DELETE_FILE_ROW_FIELD_ID, "row", SCHEMA.replacedtruct()));
String deletePath = "s3://bucket/path/file.parquet";
GenericRecord posDelete = GenericRecord.create(deleteSchema);
List<Record> expectedDeleteRecords = Lists.newArrayList();
OutputFile out = Files.localOutput(deleteFile);
PositionDeleteWriter<Record> deleteWriter = Parquet.writeDeletes(out).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
try (PositionDeleteWriter<Record> writer = deleteWriter) {
for (int i = 0; i < records.size(); i += 1) {
int pos = i * 3 + 2;
writer.delete(deletePath, pos, records.get(i));
expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos, "row", records.get(i))));
}
}
DeleteFile metadata = deleteWriter.toDeleteFile();
replacedert.replacedertEquals("Format should be Parquet", FileFormat.PARQUET, metadata.format());
replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
List<Record> deletedRecords;
try (CloseableIterable<Record> reader = Parquet.read(out.toInputFile()).project(deleteSchema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(deleteSchema, fileSchema)).build()) {
deletedRecords = Lists.newArrayList(reader);
}
replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}
18
Source : TestParquetDeleteWriters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testPositionDeleteWriterWithEmptyRow() throws IOException {
File deleteFile = temp.newFile();
Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS);
String deletePath = "s3://bucket/path/file.parquet";
GenericRecord posDelete = GenericRecord.create(deleteSchema);
List<Record> expectedDeleteRecords = Lists.newArrayList();
OutputFile out = Files.localOutput(deleteFile);
PositionDeleteWriter<Void> deleteWriter = Parquet.writeDeletes(out).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
try (PositionDeleteWriter<Void> writer = deleteWriter) {
for (int i = 0; i < records.size(); i += 1) {
int pos = i * 3 + 2;
writer.delete(deletePath, pos, null);
expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos)));
}
}
DeleteFile metadata = deleteWriter.toDeleteFile();
replacedert.replacedertEquals("Format should be Parquet", FileFormat.PARQUET, metadata.format());
replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
List<Record> deletedRecords;
try (CloseableIterable<Record> reader = Parquet.read(out.toInputFile()).project(deleteSchema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(deleteSchema, fileSchema)).build()) {
deletedRecords = Lists.newArrayList(reader);
}
replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}
18
Source : TestParquetDeleteWriters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeleteWriter() throws IOException {
File deleteFile = temp.newFile();
OutputFile out = Files.localOutput(deleteFile);
EqualityDeleteWriter<Record> deleteWriter = Parquet.writeDeletes(out).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).equalityFieldIds(1).buildEqualityWriter();
try (EqualityDeleteWriter<Record> writer = deleteWriter) {
writer.deleteAll(records);
}
DeleteFile metadata = deleteWriter.toDeleteFile();
replacedert.replacedertEquals("Format should be Parquet", FileFormat.PARQUET, metadata.format());
replacedert.replacedertEquals("Should be equality deletes", FileContent.EQUALITY_DELETES, metadata.content());
replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
List<Record> deletedRecords;
try (CloseableIterable<Record> reader = Parquet.read(out.toInputFile()).project(SCHEMA).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(SCHEMA, fileSchema)).build()) {
deletedRecords = Lists.newArrayList(reader);
}
replacedert.replacedertEquals("Deleted records should match expected", records, deletedRecords);
}
18
Source : OrcFileAppender.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static Writer newOrcWriter(OutputFile file, OrcFile.WriterOptions options, Map<String, byte[]> metadata) {
final Path locPath = new Path(file.location());
final Writer writer;
try {
writer = OrcFile.createWriter(locPath, options);
} catch (IOException ioe) {
throw new RuntimeIOException(ioe, "Can't create file %s", locPath);
}
metadata.forEach((key, value) -> writer.addUserMetadata(key, ByteBuffer.wrap(value)));
return writer;
}
18
Source : HiveIcebergOutputCommitter.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static void createFileForCommit(DataFile[] closedFiles, String location, FileIO io) throws IOException {
OutputFile fileForCommit = io.newOutputFile(location);
try (ObjectOutputStream oos = new ObjectOutputStream(fileForCommit.createOrOverwrite())) {
oos.writeObject(closedFiles);
}
LOG.debug("Iceberg committed file is created {}", fileForCommit);
}
18
Source : TestParquetMetrics.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private Metrics getMetrics(Schema schema, OutputFile file, Map<String, String> properties, MetricsConfig metricsConfig, Record... records) throws IOException {
FileAppender<Record> writer = Parquet.write(file).schema(schema).setAll(properties).createWriterFunc(GenericParquetWriter::buildWriter).metricsConfig(metricsConfig).build();
try (FileAppender<Record> appender = writer) {
appender.addAll(Lists.newArrayList(records));
}
return writer.metrics();
}
18
Source : TestOrcMetrics.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private Metrics getMetrics(Schema schema, OutputFile file, Map<String, String> properties, MetricsConfig metricsConfig, Record... records) throws IOException {
FileAppender<Record> writer = ORC.write(file).schema(schema).setAll(properties).createWriterFunc(GenericOrcWriter::buildWriter).metricsConfig(metricsConfig).build();
try (FileAppender<Record> appender = writer) {
appender.addAll(Lists.newArrayList(records));
}
return writer.metrics();
}
18
Source : FileHelpers.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static DataFile writeDataFile(Table table, OutputFile out, StructLike parreplacedion, List<Record> rows) throws IOException {
FileAppender<Record> writer = Parquet.write(out).createWriterFunc(GenericParquetWriter::buildWriter).schema(table.schema()).overwrite().build();
try (Closeable toClose = writer) {
writer.addAll(rows);
}
return DataFiles.builder(table.spec()).withFormat(FileFormat.PARQUET).withPath(out.location()).withParreplacedion(parreplacedion).withFileSizeInBytes(writer.length()).withSplitOffsets(writer.splitOffsets()).withMetrics(writer.metrics()).build();
}
18
Source : FileHelpers.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static DataFile writeDataFile(Table table, OutputFile out, List<Record> rows) throws IOException {
FileAppender<Record> writer = Parquet.write(out).createWriterFunc(GenericParquetWriter::buildWriter).schema(table.schema()).overwrite().build();
try (Closeable toClose = writer) {
writer.addAll(rows);
}
return DataFiles.builder(table.spec()).withFormat(FileFormat.PARQUET).withPath(out.location()).withFileSizeInBytes(writer.length()).withSplitOffsets(writer.splitOffsets()).withMetrics(writer.metrics()).build();
}
18
Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException {
OutputFile manifestList = Files.localOutput(temp.newFile());
try (FileAppender<ManifestFile> writer = ManifestLists.write(formatVersion, manifestList, SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQUENCE_NUMBER : 0)) {
writer.add(manifest);
}
return manifestList.toInputFile();
}
18
Source : TestManifestListVersions.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException {
OutputFile manifestList = Files.localOutput(temp.newFile());
try (FileAppender<ManifestFile> writer = ManifestLists.write(formatVersion, manifestList, SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQ_NUM : 0)) {
writer.add(manifest);
}
return manifestList.toInputFile();
}
18
Source : TableTestBase.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@SuppressWarnings("unchecked")
<F extends ContentFile<F>> ManifestFile writeManifest(Long snapshotId, String fileName, ManifestEntry<?>... entries) throws IOException {
File manifestFile = temp.newFile(fileName);
replacedert.replacedertTrue(manifestFile.delete());
OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath());
ManifestWriter<F> writer;
if (entries[0].file() instanceof DataFile) {
writer = (ManifestWriter<F>) ManifestFiles.write(formatVersion, table.spec(), outputFile, snapshotId);
} else {
writer = (ManifestWriter<F>) ManifestFiles.writeDeleteManifest(formatVersion, table.spec(), outputFile, snapshotId);
}
try {
for (ManifestEntry<?> entry : entries) {
writer.addEntry((ManifestEntry<F>) entry);
}
} finally {
writer.close();
}
return writer.toManifestFile();
}
18
Source : TestAvroDeleteWriters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testPositionDeleteWriterWithEmptyRow() throws IOException {
File deleteFile = temp.newFile();
Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS);
String deletePath = "s3://bucket/path/file.parquet";
GenericRecord posDelete = GenericRecord.create(deleteSchema);
List<Record> expectedDeleteRecords = Lists.newArrayList();
OutputFile out = Files.localOutput(deleteFile);
PositionDeleteWriter<Void> deleteWriter = Avro.writeDeletes(out).createWriterFunc(DataWriter::create).overwrite().withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
try (PositionDeleteWriter<Void> writer = deleteWriter) {
for (int i = 0; i < records.size(); i += 1) {
int pos = i * 3 + 2;
writer.delete(deletePath, pos, null);
expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos)));
}
}
DeleteFile metadata = deleteWriter.toDeleteFile();
replacedert.replacedertEquals("Format should be Avro", FileFormat.AVRO, metadata.format());
replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
List<Record> deletedRecords;
try (AvroIterable<Record> reader = Avro.read(out.toInputFile()).project(deleteSchema).createReaderFunc(DataReader::create).build()) {
deletedRecords = Lists.newArrayList(reader);
}
replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}
18
Source : TestAvroDeleteWriters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testPositionDeleteWriter() throws IOException {
File deleteFile = temp.newFile();
Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS, NestedField.optional(MetadataColumns.DELETE_FILE_ROW_FIELD_ID, "row", SCHEMA.replacedtruct()));
String deletePath = "s3://bucket/path/file.parquet";
GenericRecord posDelete = GenericRecord.create(deleteSchema);
List<Record> expectedDeleteRecords = Lists.newArrayList();
OutputFile out = Files.localOutput(deleteFile);
PositionDeleteWriter<Record> deleteWriter = Avro.writeDeletes(out).createWriterFunc(DataWriter::create).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
try (PositionDeleteWriter<Record> writer = deleteWriter) {
for (int i = 0; i < records.size(); i += 1) {
int pos = i * 3 + 2;
writer.delete(deletePath, pos, records.get(i));
expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos, "row", records.get(i))));
}
}
DeleteFile metadata = deleteWriter.toDeleteFile();
replacedert.replacedertEquals("Format should be Avro", FileFormat.AVRO, metadata.format());
replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
List<Record> deletedRecords;
try (AvroIterable<Record> reader = Avro.read(out.toInputFile()).project(deleteSchema).createReaderFunc(DataReader::create).build()) {
deletedRecords = Lists.newArrayList(reader);
}
replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}
18
Source : TestAvroDeleteWriters.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEqualityDeleteWriter() throws IOException {
File deleteFile = temp.newFile();
OutputFile out = Files.localOutput(deleteFile);
EqualityDeleteWriter<Record> deleteWriter = Avro.writeDeletes(out).createWriterFunc(DataWriter::create).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).equalityFieldIds(1).buildEqualityWriter();
try (EqualityDeleteWriter<Record> writer = deleteWriter) {
writer.deleteAll(records);
}
DeleteFile metadata = deleteWriter.toDeleteFile();
replacedert.replacedertEquals("Format should be Avro", FileFormat.AVRO, metadata.format());
replacedert.replacedertEquals("Should be equality deletes", FileContent.EQUALITY_DELETES, metadata.content());
replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
List<Record> deletedRecords;
try (AvroIterable<Record> reader = Avro.read(out.toInputFile()).project(SCHEMA).createReaderFunc(DataReader::create).build()) {
deletedRecords = Lists.newArrayList(reader);
}
replacedert.replacedertEquals("Deleted records should match expected", records, deletedRecords);
}
18
Source : SnapshotProducer.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
public Snapshot apply() {
this.base = refresh();
Long parentSnapshotId = base.currentSnapshot() != null ? base.currentSnapshot().snapshotId() : null;
long sequenceNumber = base.nextSequenceNumber();
// run validations from the child operation
validate(base);
List<ManifestFile> manifests = apply(base);
if (base.formatVersion() > 1 || base.propertyAsBoolean(MANIFEST_LISTS_ENABLED, MANIFEST_LISTS_ENABLED_DEFAULT)) {
OutputFile manifestList = manifestListPath();
try (ManifestListWriter writer = ManifestLists.write(ops.current().formatVersion(), manifestList, snapshotId(), parentSnapshotId, sequenceNumber)) {
// keep track of the manifest lists created
manifestLists.add(manifestList.location());
ManifestFile[] manifestFiles = new ManifestFile[manifests.size()];
Tasks.range(manifestFiles.length).stopOnFailure().throwFailureWhenFinished().executeWith(ThreadPools.getWorkerPool()).run(index -> manifestFiles[index] = manifestsWithMetadata.get(manifests.get(index)));
writer.addAll(Arrays.asList(manifestFiles));
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to write manifest list file");
}
return new BaseSnapshot(ops.io(), sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), operation(), summary(base), manifestList.location());
} else {
return new BaseSnapshot(ops.io(), snapshotId(), parentSnapshotId, System.currentTimeMillis(), operation(), summary(base), manifests);
}
}
18
Source : ManifestLists.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
static ManifestListWriter write(int formatVersion, OutputFile manifestListFile, long snapshotId, Long parentSnapshotId, long sequenceNumber) {
switch(formatVersion) {
case 1:
Preconditions.checkArgument(sequenceNumber == TableMetadata.INITIAL_SEQUENCE_NUMBER, "Invalid sequence number for v1 manifest list: %s", sequenceNumber);
return new ManifestListWriter.V1Writer(manifestListFile, snapshotId, parentSnapshotId);
case 2:
return new ManifestListWriter.V2Writer(manifestListFile, snapshotId, parentSnapshotId, sequenceNumber);
}
throw new UnsupportedOperationException("Cannot write manifest list for table version: " + formatVersion);
}
18
Source : ManifestFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static ManifestFile copyManifestInternal(int formatVersion, ManifestReader<DataFile> reader, OutputFile outputFile, long snapshotId, SnapshotSummary.Builder summaryBuilder, ManifestEntry.Status allowedEntryStatus) {
ManifestWriter<DataFile> writer = write(formatVersion, reader.spec(), outputFile, snapshotId);
boolean threw = true;
try {
for (ManifestEntry<DataFile> entry : reader.entries()) {
Preconditions.checkArgument(allowedEntryStatus == entry.status(), "Invalid manifest entry status: %s (allowed status: %s)", entry.status(), allowedEntryStatus);
switch(entry.status()) {
case ADDED:
summaryBuilder.addedFile(reader.spec(), entry.file());
writer.add(entry);
break;
case EXISTING:
writer.existing(entry);
break;
case DELETED:
summaryBuilder.deletedFile(reader.spec(), entry.file());
writer.delete(entry);
break;
}
}
threw = false;
} finally {
try {
writer.close();
} catch (IOException e) {
if (!threw) {
throw new RuntimeIOException(e, "Failed to close manifest: %s", outputFile);
}
}
}
return writer.toManifestFile();
}
18
Source : ManifestFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
static ManifestFile copyRewriteManifest(int formatVersion, InputFile toCopy, Map<Integer, ParreplacedionSpec> specsById, OutputFile outputFile, long snapshotId, SnapshotSummary.Builder summaryBuilder) {
// for a rewritten manifest all snapshot ids should be set. use empty metadata to throw an exception if it is not
InheritableMetadata inheritableMetadata = InheritableMetadataFactory.empty();
try (ManifestReader<DataFile> reader = new ManifestReader<>(toCopy, specsById, inheritableMetadata, FileType.DATA_FILES)) {
return copyManifestInternal(formatVersion, reader, outputFile, snapshotId, summaryBuilder, ManifestEntry.Status.EXISTING);
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location());
}
}
18
Source : ManifestFiles.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
static ManifestFile copyAppendManifest(int formatVersion, InputFile toCopy, Map<Integer, ParreplacedionSpec> specsById, OutputFile outputFile, long snapshotId, SnapshotSummary.Builder summaryBuilder) {
// use metadata that will add the current snapshot's ID for the rewrite
InheritableMetadata inheritableMetadata = InheritableMetadataFactory.forCopy(snapshotId);
try (ManifestReader<DataFile> reader = new ManifestReader<>(toCopy, specsById, inheritableMetadata, FileType.DATA_FILES)) {
return copyManifestInternal(formatVersion, reader, outputFile, snapshotId, summaryBuilder, ManifestEntry.Status.ADDED);
} catch (IOException e) {
throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location());
}
}
18
Source : PlaintextEncryptionManager.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
public EncryptedOutputFile encrypt(OutputFile rawOutput) {
return EncryptedFiles.encryptedOutput(rawOutput, (ByteBuffer) null);
}
See More Examples