org.apache.iceberg.io.OutputFile

Here are the examples of the java api org.apache.iceberg.io.OutputFile taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

77 Examples 7

19 Source : HiveTableOperations.java
with Apache License 2.0
from trinodb

private String writeNewMetadata(TableMetadata metadata, int newVersion) {
    String newTableMetadataFilePath = newTableMetadataFilePath(metadata, newVersion);
    OutputFile newMetadataLocation = fileIo.newOutputFile(newTableMetadataFilePath);
    // write the new metadata
    TableMetadataParser.write(metadata, newMetadataLocation);
    return newTableMetadataFilePath;
}

19 Source : TestManifestFileSerialization.java
with Apache License 2.0
from apache

private ManifestFile writeManifest(DataFile... files) throws IOException {
    File manifestFile = temp.newFile("input.m0.avro");
    replacedert.replacedertTrue(manifestFile.delete());
    OutputFile outputFile = FILE_IO.newOutputFile(manifestFile.getCanonicalPath());
    ManifestWriter writer = ManifestFiles.write(SPEC, outputFile);
    try {
        for (DataFile file : files) {
            writer.add(file);
        }
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

19 Source : Parquet.java
with Apache License 2.0
from apache

public static DeleteWriteBuilder writeDeletes(OutputFile file) {
    return new DeleteWriteBuilder(file);
}

19 Source : Parquet.java
with Apache License 2.0
from apache

public static WriteBuilder write(OutputFile file) {
    return new WriteBuilder(file);
}

19 Source : TestParquetMetrics.java
with Apache License 2.0
from apache

@Override
protected Metrics getMetricsForRecordsWithSmallRowGroups(Schema schema, OutputFile outputFile, Record... records) throws IOException {
    return getMetrics(schema, outputFile, SMALL_ROW_GROUP_CONFIG, MetricsConfig.getDefault(), records);
}

19 Source : TestOrcMetrics.java
with Apache License 2.0
from apache

@Override
protected Metrics getMetricsForRecordsWithSmallRowGroups(Schema schema, OutputFile outputFile, Record... records) {
    throw new UnsupportedOperationException("supportsSmallRowGroups = " + supportsSmallRowGroups());
}

19 Source : FileHelpers.java
with Apache License 2.0
from apache

public static Pair<DeleteFile, Set<CharSequence>> writeDeleteFile(Table table, OutputFile out, List<Pair<CharSequence, Long>> deletes) throws IOException {
    return writeDeleteFile(table, out, null, deletes);
}

19 Source : FileHelpers.java
with Apache License 2.0
from apache

public static DeleteFile writeDeleteFile(Table table, OutputFile out, StructLike parreplacedion, List<Record> deletes, Schema deleteRowSchema) throws IOException {
    EqualityDeleteWriter<Record> writer = Parquet.writeDeletes(out).forTable(table).withParreplacedion(parreplacedion).rowSchema(deleteRowSchema).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().equalityFieldIds(deleteRowSchema.columns().stream().mapToInt(Types.NestedField::fieldId).toArray()).buildEqualityWriter();
    try (Closeable toClose = writer) {
        writer.deleteAll(deletes);
    }
    return writer.toDeleteFile();
}

19 Source : FileHelpers.java
with Apache License 2.0
from apache

public static DeleteFile writeDeleteFile(Table table, OutputFile out, List<Record> deletes, Schema deleteRowSchema) throws IOException {
    return writeDeleteFile(table, out, null, deletes, deleteRowSchema);
}

19 Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache

private ManifestFile writeManifest(DataFile file, int formatVersion) throws IOException {
    OutputFile manifestFile = Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
    ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, SPEC, manifestFile, SNAPSHOT_ID);
    try {
        writer.add(file);
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

19 Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache

private ManifestFile writeDeleteManifest(int formatVersion) throws IOException {
    OutputFile manifestFile = Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
    ManifestWriter<DeleteFile> writer = ManifestFiles.writeDeleteManifest(formatVersion, SPEC, manifestFile, SNAPSHOT_ID);
    try {
        writer.add(DELETE_FILE);
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

19 Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache

private ManifestFile rewriteManifest(ManifestFile manifest, int formatVersion) throws IOException {
    OutputFile manifestFile = Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
    ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, SPEC, manifestFile, SNAPSHOT_ID);
    try {
        writer.existing(readManifest(manifest));
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

19 Source : TableTestBase.java
with Apache License 2.0
from apache

ManifestFile writeDeleteManifest(int newFormatVersion, Long snapshotId, DeleteFile... deleteFiles) throws IOException {
    OutputFile manifestFile = org.apache.iceberg.Files.localOutput(FileFormat.AVRO.addExtension(temp.newFile().toString()));
    ManifestWriter<DeleteFile> writer = ManifestFiles.writeDeleteManifest(newFormatVersion, SPEC, manifestFile, snapshotId);
    try {
        for (DeleteFile deleteFile : deleteFiles) {
            writer.add(deleteFile);
        }
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

19 Source : TableTestBase.java
with Apache License 2.0
from apache

ManifestFile writeManifestWithName(String name, DataFile... files) throws IOException {
    File manifestFile = temp.newFile(name + ".avro");
    replacedert.replacedertTrue(manifestFile.delete());
    OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath());
    ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, table.spec(), outputFile, null);
    try {
        for (DataFile file : files) {
            writer.add(file);
        }
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

19 Source : TableTestBase.java
with Apache License 2.0
from apache

ManifestFile writeManifest(Long snapshotId, DataFile... files) throws IOException {
    File manifestFile = temp.newFile("input.m0.avro");
    replacedert.replacedertTrue(manifestFile.delete());
    OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath());
    ManifestWriter<DataFile> writer = ManifestFiles.write(formatVersion, table.spec(), outputFile, snapshotId);
    try {
        for (DataFile file : files) {
            writer.add(file);
        }
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

19 Source : TableMetadataParser.java
with Apache License 2.0
from apache

public static void write(TableMetadata metadata, OutputFile outputFile) {
    internalWrite(metadata, outputFile, false);
}

19 Source : TableMetadataParser.java
with Apache License 2.0
from apache

public static void overwrite(TableMetadata metadata, OutputFile outputFile) {
    internalWrite(metadata, outputFile, true);
}

19 Source : MergingSnapshotProducer.java
with Apache License 2.0
from apache

private ManifestFile copyManifest(ManifestFile manifest) {
    TableMetadata current = ops.current();
    InputFile toCopy = ops.io().newInputFile(manifest.path());
    OutputFile newManifestPath = newManifestOutput();
    return ManifestFiles.copyAppendManifest(current.formatVersion(), toCopy, current.specsById(), newManifestPath, snapshotId(), appendedManifestsSummary);
}

19 Source : ManifestWriter.java
with Apache License 2.0
from apache

/**
 * Writer for manifest files.
 *
 * @param <F> Java clreplaced of files written to the manifest, either {@link DataFile} or {@link DeleteFile}.
 */
public abstract clreplaced ManifestWriter<F extends ContentFile<F>> implements FileAppender<F> {

    // stand-in for the current sequence number that will be replacedigned when the commit is successful
    // this is replaced when writing a manifest list by the ManifestFile wrapper
    static final long UNreplacedIGNED_SEQ = -1L;

    private final OutputFile file;

    private final int specId;

    private final FileAppender<ManifestEntry<F>> writer;

    private final Long snapshotId;

    private final GenericManifestEntry<F> reused;

    private final ParreplacedionSummary stats;

    private boolean closed = false;

    private int addedFiles = 0;

    private long addedRows = 0L;

    private int existingFiles = 0;

    private long existingRows = 0L;

    private int deletedFiles = 0;

    private long deletedRows = 0L;

    private Long minSequenceNumber = null;

    private ManifestWriter(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
        this.file = file;
        this.specId = spec.specId();
        this.writer = newAppender(spec, file);
        this.snapshotId = snapshotId;
        this.reused = new GenericManifestEntry<>(spec.parreplacedionType());
        this.stats = new ParreplacedionSummary(spec);
    }

    protected abstract ManifestEntry<F> prepare(ManifestEntry<F> entry);

    protected abstract FileAppender<ManifestEntry<F>> newAppender(ParreplacedionSpec spec, OutputFile outputFile);

    protected ManifestContent content() {
        return ManifestContent.DATA;
    }

    void addEntry(ManifestEntry<F> entry) {
        switch(entry.status()) {
            case ADDED:
                addedFiles += 1;
                addedRows += entry.file().recordCount();
                break;
            case EXISTING:
                existingFiles += 1;
                existingRows += entry.file().recordCount();
                break;
            case DELETED:
                deletedFiles += 1;
                deletedRows += entry.file().recordCount();
                break;
        }
        stats.update(entry.file().parreplacedion());
        if (entry.sequenceNumber() != null && (minSequenceNumber == null || entry.sequenceNumber() < minSequenceNumber)) {
            this.minSequenceNumber = entry.sequenceNumber();
        }
        writer.add(prepare(entry));
    }

    /**
     * Add an added entry for a file.
     * <p>
     * The entry's snapshot ID will be this manifest's snapshot ID.
     *
     * @param addedFile a data file
     */
    @Override
    public void add(F addedFile) {
        addEntry(reused.wrapAppend(snapshotId, addedFile));
    }

    void add(ManifestEntry<F> entry) {
        addEntry(reused.wrapAppend(snapshotId, entry.file()));
    }

    /**
     * Add an existing entry for a file.
     *
     * @param existingFile a file
     * @param fileSnapshotId snapshot ID when the data file was added to the table
     * @param sequenceNumber sequence number for the data file
     */
    public void existing(F existingFile, long fileSnapshotId, long sequenceNumber) {
        addEntry(reused.wrapExisting(fileSnapshotId, sequenceNumber, existingFile));
    }

    void existing(ManifestEntry<F> entry) {
        addEntry(reused.wrapExisting(entry.snapshotId(), entry.sequenceNumber(), entry.file()));
    }

    /**
     * Add a delete entry for a file.
     * <p>
     * The entry's snapshot ID will be this manifest's snapshot ID.
     *
     * @param deletedFile a file
     */
    public void delete(F deletedFile) {
        addEntry(reused.wrapDelete(snapshotId, deletedFile));
    }

    void delete(ManifestEntry<F> entry) {
        // Use the current Snapshot ID for the delete. It is safe to delete the data file from disk
        // when this Snapshot has been removed or when there are no Snapshots older than this one.
        addEntry(reused.wrapDelete(snapshotId, entry.file()));
    }

    @Override
    public Metrics metrics() {
        return writer.metrics();
    }

    @Override
    public long length() {
        return writer.length();
    }

    public ManifestFile toManifestFile() {
        Preconditions.checkState(closed, "Cannot build ManifestFile, writer is not closed");
        // if the minSequenceNumber is null, then no manifests with a sequence number have been written, so the min
        // sequence number is the one that will be replacedigned when this is committed. preplaced UNreplacedIGNED_SEQ to inherit it.
        long minSeqNumber = minSequenceNumber != null ? minSequenceNumber : UNreplacedIGNED_SEQ;
        return new GenericManifestFile(file.location(), writer.length(), specId, content(), UNreplacedIGNED_SEQ, minSeqNumber, snapshotId, addedFiles, addedRows, existingFiles, existingRows, deletedFiles, deletedRows, stats.summaries());
    }

    @Override
    public void close() throws IOException {
        this.closed = true;
        writer.close();
    }

    static clreplaced V2Writer extends ManifestWriter<DataFile> {

        private final V2Metadata.IndexedManifestEntry<DataFile> entryWrapper;

        V2Writer(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
            super(spec, file, snapshotId);
            this.entryWrapper = new V2Metadata.IndexedManifestEntry<>(snapshotId, spec.parreplacedionType());
        }

        @Override
        protected ManifestEntry<DataFile> prepare(ManifestEntry<DataFile> entry) {
            return entryWrapper.wrap(entry);
        }

        @Override
        protected FileAppender<ManifestEntry<DataFile>> newAppender(ParreplacedionSpec spec, OutputFile file) {
            Schema manifestSchema = V2Metadata.entrySchema(spec.parreplacedionType());
            try {
                return Avro.write(file).schema(manifestSchema).named("manifest_entry").meta("schema", SchemaParser.toJson(spec.schema())).meta("parreplacedion-spec", ParreplacedionSpecParser.toJsonFields(spec)).meta("parreplacedion-spec-id", String.valueOf(spec.specId())).meta("format-version", "2").meta("content", "data").overwrite().build();
            } catch (IOException e) {
                throw new RuntimeIOException(e, "Failed to create manifest writer for path: %s", file);
            }
        }
    }

    static clreplaced V2DeleteWriter extends ManifestWriter<DeleteFile> {

        private final V2Metadata.IndexedManifestEntry<DeleteFile> entryWrapper;

        V2DeleteWriter(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
            super(spec, file, snapshotId);
            this.entryWrapper = new V2Metadata.IndexedManifestEntry<>(snapshotId, spec.parreplacedionType());
        }

        @Override
        protected ManifestEntry<DeleteFile> prepare(ManifestEntry<DeleteFile> entry) {
            return entryWrapper.wrap(entry);
        }

        @Override
        protected FileAppender<ManifestEntry<DeleteFile>> newAppender(ParreplacedionSpec spec, OutputFile file) {
            Schema manifestSchema = V2Metadata.entrySchema(spec.parreplacedionType());
            try {
                return Avro.write(file).schema(manifestSchema).named("manifest_entry").meta("schema", SchemaParser.toJson(spec.schema())).meta("parreplacedion-spec", ParreplacedionSpecParser.toJsonFields(spec)).meta("parreplacedion-spec-id", String.valueOf(spec.specId())).meta("format-version", "2").meta("content", "deletes").overwrite().build();
            } catch (IOException e) {
                throw new RuntimeIOException(e, "Failed to create manifest writer for path: %s", file);
            }
        }

        @Override
        protected ManifestContent content() {
            return ManifestContent.DELETES;
        }
    }

    static clreplaced V1Writer extends ManifestWriter<DataFile> {

        private final V1Metadata.IndexedManifestEntry entryWrapper;

        V1Writer(ParreplacedionSpec spec, OutputFile file, Long snapshotId) {
            super(spec, file, snapshotId);
            this.entryWrapper = new V1Metadata.IndexedManifestEntry(spec.parreplacedionType());
        }

        @Override
        protected ManifestEntry<DataFile> prepare(ManifestEntry<DataFile> entry) {
            return entryWrapper.wrap(entry);
        }

        @Override
        protected FileAppender<ManifestEntry<DataFile>> newAppender(ParreplacedionSpec spec, OutputFile file) {
            Schema manifestSchema = V1Metadata.entrySchema(spec.parreplacedionType());
            try {
                return Avro.write(file).schema(manifestSchema).named("manifest_entry").meta("schema", SchemaParser.toJson(spec.schema())).meta("parreplacedion-spec", ParreplacedionSpecParser.toJsonFields(spec)).meta("parreplacedion-spec-id", String.valueOf(spec.specId())).meta("format-version", "1").overwrite().build();
            } catch (IOException e) {
                throw new RuntimeIOException(e, "Failed to create manifest writer for path: %s", file);
            }
        }
    }
}

19 Source : ManifestFiles.java
with Apache License 2.0
from apache

/**
 * Create a new {@link ManifestWriter} for the given format version.
 *
 * @param formatVersion a target format version
 * @param spec a {@link ParreplacedionSpec}
 * @param outputFile an {@link OutputFile} where the manifest will be written
 * @param snapshotId a snapshot ID for the manifest entries, or null for an inherited ID
 * @return a manifest writer
 */
public static ManifestWriter<DataFile> write(int formatVersion, ParreplacedionSpec spec, OutputFile outputFile, Long snapshotId) {
    switch(formatVersion) {
        case 1:
            return new ManifestWriter.V1Writer(spec, outputFile, snapshotId);
        case 2:
            return new ManifestWriter.V2Writer(spec, outputFile, snapshotId);
    }
    throw new UnsupportedOperationException("Cannot write manifest for table version: " + formatVersion);
}

19 Source : ManifestFiles.java
with Apache License 2.0
from apache

/**
 * Create a new {@link ManifestWriter}.
 * <p>
 * Manifests created by this writer have all entry snapshot IDs set to null.
 * All entries will inherit the snapshot ID that will be replacedigned to the manifest on commit.
 *
 * @param spec {@link ParreplacedionSpec} used to produce {@link DataFile} parreplacedion tuples
 * @param outputFile the destination file location
 * @return a manifest writer
 */
public static ManifestWriter<DataFile> write(ParreplacedionSpec spec, OutputFile outputFile) {
    return write(1, spec, outputFile, null);
}

19 Source : ManifestFiles.java
with Apache License 2.0
from apache

/**
 * Create a new {@link ManifestWriter} for the given format version.
 *
 * @param formatVersion a target format version
 * @param spec a {@link ParreplacedionSpec}
 * @param outputFile an {@link OutputFile} where the manifest will be written
 * @param snapshotId a snapshot ID for the manifest entries, or null for an inherited ID
 * @return a manifest writer
 */
public static ManifestWriter<DeleteFile> writeDeleteManifest(int formatVersion, ParreplacedionSpec spec, OutputFile outputFile, Long snapshotId) {
    switch(formatVersion) {
        case 1:
            throw new IllegalArgumentException("Cannot write delete files in a v1 table");
        case 2:
            return new ManifestWriter.V2DeleteWriter(spec, outputFile, snapshotId);
    }
    throw new UnsupportedOperationException("Cannot write manifest for table version: " + formatVersion);
}

19 Source : FastAppend.java
with Apache License 2.0
from apache

private ManifestFile copyManifest(ManifestFile manifest) {
    TableMetadata current = ops.current();
    InputFile toCopy = ops.io().newInputFile(manifest.path());
    OutputFile newManifestPath = newManifestOutput();
    return ManifestFiles.copyAppendManifest(current.formatVersion(), toCopy, current.specsById(), newManifestPath, snapshotId(), summaryBuilder);
}

19 Source : EncryptedFiles.java
with Apache License 2.0
from apache

public static EncryptedOutputFile encryptedOutput(OutputFile encryptingOutputFile, ByteBuffer keyMetadata) {
    return encryptedOutput(encryptingOutputFile, BaseEncryptionKeyMetadata.fromKeyMetadata(keyMetadata));
}

19 Source : EncryptedFiles.java
with Apache License 2.0
from apache

public static EncryptedOutputFile encryptedOutput(OutputFile encryptingOutputFile, EncryptionKeyMetadata keyMetadata) {
    return new BaseEncryptedOutputFile(encryptingOutputFile, keyMetadata);
}

19 Source : EncryptedFiles.java
with Apache License 2.0
from apache

public static EncryptedOutputFile encryptedOutput(OutputFile encryptedOutputFile, byte[] keyMetadata) {
    return encryptedOutput(encryptedOutputFile, BaseEncryptionKeyMetadata.fromByteArray(keyMetadata));
}

19 Source : BaseEncryptedOutputFile.java
with Apache License 2.0
from apache

clreplaced BaseEncryptedOutputFile implements EncryptedOutputFile {

    private final OutputFile encryptingOutputFile;

    private final EncryptionKeyMetadata keyMetadata;

    BaseEncryptedOutputFile(OutputFile encryptingOutputFile, EncryptionKeyMetadata keyMetadata) {
        this.encryptingOutputFile = encryptingOutputFile;
        this.keyMetadata = keyMetadata;
    }

    @Override
    public OutputFile encryptingOutputFile() {
        return encryptingOutputFile;
    }

    @Override
    public EncryptionKeyMetadata keyMetadata() {
        return keyMetadata;
    }
}

19 Source : BaseRewriteManifests.java
with Apache License 2.0
from apache

private ManifestFile copyManifest(ManifestFile manifest) {
    TableMetadata current = ops.current();
    InputFile toCopy = ops.io().newInputFile(manifest.path());
    OutputFile newFile = newManifestOutput();
    return ManifestFiles.copyRewriteManifest(current.formatVersion(), toCopy, specsById, newFile, snapshotId(), summaryBuilder);
}

18 Source : RewriteManifestsAction.java
with Apache License 2.0
from apache

private static ManifestFile writeManifest(List<Row> rows, int startIndex, int endIndex, Broadcast<FileIO> io, String location, int format, ParreplacedionSpec spec, StructType sparkType) throws IOException {
    String manifestName = "optimized-m-" + UUID.randomUUID();
    Path manifestPath = new Path(location, manifestName);
    OutputFile outputFile = io.value().newOutputFile(FileFormat.AVRO.addExtension(manifestPath.toString()));
    Types.StructType dataFileType = DataFile.getType(spec.parreplacedionType());
    SparkDataFile wrapper = new SparkDataFile(dataFileType, sparkType);
    ManifestWriter writer = ManifestFiles.write(format, spec, outputFile, null);
    try {
        for (int index = startIndex; index < endIndex; index++) {
            Row row = rows.get(index);
            long snapshotId = row.getLong(0);
            long sequenceNumber = row.getLong(1);
            Row file = row.getStruct(2);
            writer.existing(wrapper.wrap(file), snapshotId, sequenceNumber);
        }
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

18 Source : TestParquetDeleteWriters.java
with Apache License 2.0
from apache

@Test
public void testPositionDeleteWriter() throws IOException {
    File deleteFile = temp.newFile();
    Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS, NestedField.optional(MetadataColumns.DELETE_FILE_ROW_FIELD_ID, "row", SCHEMA.replacedtruct()));
    String deletePath = "s3://bucket/path/file.parquet";
    GenericRecord posDelete = GenericRecord.create(deleteSchema);
    List<Record> expectedDeleteRecords = Lists.newArrayList();
    OutputFile out = Files.localOutput(deleteFile);
    PositionDeleteWriter<Record> deleteWriter = Parquet.writeDeletes(out).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
    try (PositionDeleteWriter<Record> writer = deleteWriter) {
        for (int i = 0; i < records.size(); i += 1) {
            int pos = i * 3 + 2;
            writer.delete(deletePath, pos, records.get(i));
            expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos, "row", records.get(i))));
        }
    }
    DeleteFile metadata = deleteWriter.toDeleteFile();
    replacedert.replacedertEquals("Format should be Parquet", FileFormat.PARQUET, metadata.format());
    replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
    replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
    replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
    replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
    List<Record> deletedRecords;
    try (CloseableIterable<Record> reader = Parquet.read(out.toInputFile()).project(deleteSchema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(deleteSchema, fileSchema)).build()) {
        deletedRecords = Lists.newArrayList(reader);
    }
    replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}

18 Source : TestParquetDeleteWriters.java
with Apache License 2.0
from apache

@Test
public void testPositionDeleteWriterWithEmptyRow() throws IOException {
    File deleteFile = temp.newFile();
    Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS);
    String deletePath = "s3://bucket/path/file.parquet";
    GenericRecord posDelete = GenericRecord.create(deleteSchema);
    List<Record> expectedDeleteRecords = Lists.newArrayList();
    OutputFile out = Files.localOutput(deleteFile);
    PositionDeleteWriter<Void> deleteWriter = Parquet.writeDeletes(out).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
    try (PositionDeleteWriter<Void> writer = deleteWriter) {
        for (int i = 0; i < records.size(); i += 1) {
            int pos = i * 3 + 2;
            writer.delete(deletePath, pos, null);
            expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos)));
        }
    }
    DeleteFile metadata = deleteWriter.toDeleteFile();
    replacedert.replacedertEquals("Format should be Parquet", FileFormat.PARQUET, metadata.format());
    replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
    replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
    replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
    replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
    List<Record> deletedRecords;
    try (CloseableIterable<Record> reader = Parquet.read(out.toInputFile()).project(deleteSchema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(deleteSchema, fileSchema)).build()) {
        deletedRecords = Lists.newArrayList(reader);
    }
    replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}

18 Source : TestParquetDeleteWriters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeleteWriter() throws IOException {
    File deleteFile = temp.newFile();
    OutputFile out = Files.localOutput(deleteFile);
    EqualityDeleteWriter<Record> deleteWriter = Parquet.writeDeletes(out).createWriterFunc(GenericParquetWriter::buildWriter).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).equalityFieldIds(1).buildEqualityWriter();
    try (EqualityDeleteWriter<Record> writer = deleteWriter) {
        writer.deleteAll(records);
    }
    DeleteFile metadata = deleteWriter.toDeleteFile();
    replacedert.replacedertEquals("Format should be Parquet", FileFormat.PARQUET, metadata.format());
    replacedert.replacedertEquals("Should be equality deletes", FileContent.EQUALITY_DELETES, metadata.content());
    replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
    replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
    replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
    List<Record> deletedRecords;
    try (CloseableIterable<Record> reader = Parquet.read(out.toInputFile()).project(SCHEMA).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(SCHEMA, fileSchema)).build()) {
        deletedRecords = Lists.newArrayList(reader);
    }
    replacedert.replacedertEquals("Deleted records should match expected", records, deletedRecords);
}

18 Source : OrcFileAppender.java
with Apache License 2.0
from apache

private static Writer newOrcWriter(OutputFile file, OrcFile.WriterOptions options, Map<String, byte[]> metadata) {
    final Path locPath = new Path(file.location());
    final Writer writer;
    try {
        writer = OrcFile.createWriter(locPath, options);
    } catch (IOException ioe) {
        throw new RuntimeIOException(ioe, "Can't create file %s", locPath);
    }
    metadata.forEach((key, value) -> writer.addUserMetadata(key, ByteBuffer.wrap(value)));
    return writer;
}

18 Source : HiveIcebergOutputCommitter.java
with Apache License 2.0
from apache

private static void createFileForCommit(DataFile[] closedFiles, String location, FileIO io) throws IOException {
    OutputFile fileForCommit = io.newOutputFile(location);
    try (ObjectOutputStream oos = new ObjectOutputStream(fileForCommit.createOrOverwrite())) {
        oos.writeObject(closedFiles);
    }
    LOG.debug("Iceberg committed file is created {}", fileForCommit);
}

18 Source : TestParquetMetrics.java
with Apache License 2.0
from apache

private Metrics getMetrics(Schema schema, OutputFile file, Map<String, String> properties, MetricsConfig metricsConfig, Record... records) throws IOException {
    FileAppender<Record> writer = Parquet.write(file).schema(schema).setAll(properties).createWriterFunc(GenericParquetWriter::buildWriter).metricsConfig(metricsConfig).build();
    try (FileAppender<Record> appender = writer) {
        appender.addAll(Lists.newArrayList(records));
    }
    return writer.metrics();
}

18 Source : TestOrcMetrics.java
with Apache License 2.0
from apache

private Metrics getMetrics(Schema schema, OutputFile file, Map<String, String> properties, MetricsConfig metricsConfig, Record... records) throws IOException {
    FileAppender<Record> writer = ORC.write(file).schema(schema).setAll(properties).createWriterFunc(GenericOrcWriter::buildWriter).metricsConfig(metricsConfig).build();
    try (FileAppender<Record> appender = writer) {
        appender.addAll(Lists.newArrayList(records));
    }
    return writer.metrics();
}

18 Source : FileHelpers.java
with Apache License 2.0
from apache

public static DataFile writeDataFile(Table table, OutputFile out, StructLike parreplacedion, List<Record> rows) throws IOException {
    FileAppender<Record> writer = Parquet.write(out).createWriterFunc(GenericParquetWriter::buildWriter).schema(table.schema()).overwrite().build();
    try (Closeable toClose = writer) {
        writer.addAll(rows);
    }
    return DataFiles.builder(table.spec()).withFormat(FileFormat.PARQUET).withPath(out.location()).withParreplacedion(parreplacedion).withFileSizeInBytes(writer.length()).withSplitOffsets(writer.splitOffsets()).withMetrics(writer.metrics()).build();
}

18 Source : FileHelpers.java
with Apache License 2.0
from apache

public static DataFile writeDataFile(Table table, OutputFile out, List<Record> rows) throws IOException {
    FileAppender<Record> writer = Parquet.write(out).createWriterFunc(GenericParquetWriter::buildWriter).schema(table.schema()).overwrite().build();
    try (Closeable toClose = writer) {
        writer.addAll(rows);
    }
    return DataFiles.builder(table.spec()).withFormat(FileFormat.PARQUET).withPath(out.location()).withFileSizeInBytes(writer.length()).withSplitOffsets(writer.splitOffsets()).withMetrics(writer.metrics()).build();
}

18 Source : TestManifestWriterVersions.java
with Apache License 2.0
from apache

private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException {
    OutputFile manifestList = Files.localOutput(temp.newFile());
    try (FileAppender<ManifestFile> writer = ManifestLists.write(formatVersion, manifestList, SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQUENCE_NUMBER : 0)) {
        writer.add(manifest);
    }
    return manifestList.toInputFile();
}

18 Source : TestManifestListVersions.java
with Apache License 2.0
from apache

private InputFile writeManifestList(ManifestFile manifest, int formatVersion) throws IOException {
    OutputFile manifestList = Files.localOutput(temp.newFile());
    try (FileAppender<ManifestFile> writer = ManifestLists.write(formatVersion, manifestList, SNAPSHOT_ID, SNAPSHOT_ID - 1, formatVersion > 1 ? SEQ_NUM : 0)) {
        writer.add(manifest);
    }
    return manifestList.toInputFile();
}

18 Source : TableTestBase.java
with Apache License 2.0
from apache

@SuppressWarnings("unchecked")
<F extends ContentFile<F>> ManifestFile writeManifest(Long snapshotId, String fileName, ManifestEntry<?>... entries) throws IOException {
    File manifestFile = temp.newFile(fileName);
    replacedert.replacedertTrue(manifestFile.delete());
    OutputFile outputFile = table.ops().io().newOutputFile(manifestFile.getCanonicalPath());
    ManifestWriter<F> writer;
    if (entries[0].file() instanceof DataFile) {
        writer = (ManifestWriter<F>) ManifestFiles.write(formatVersion, table.spec(), outputFile, snapshotId);
    } else {
        writer = (ManifestWriter<F>) ManifestFiles.writeDeleteManifest(formatVersion, table.spec(), outputFile, snapshotId);
    }
    try {
        for (ManifestEntry<?> entry : entries) {
            writer.addEntry((ManifestEntry<F>) entry);
        }
    } finally {
        writer.close();
    }
    return writer.toManifestFile();
}

18 Source : TestAvroDeleteWriters.java
with Apache License 2.0
from apache

@Test
public void testPositionDeleteWriterWithEmptyRow() throws IOException {
    File deleteFile = temp.newFile();
    Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS);
    String deletePath = "s3://bucket/path/file.parquet";
    GenericRecord posDelete = GenericRecord.create(deleteSchema);
    List<Record> expectedDeleteRecords = Lists.newArrayList();
    OutputFile out = Files.localOutput(deleteFile);
    PositionDeleteWriter<Void> deleteWriter = Avro.writeDeletes(out).createWriterFunc(DataWriter::create).overwrite().withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
    try (PositionDeleteWriter<Void> writer = deleteWriter) {
        for (int i = 0; i < records.size(); i += 1) {
            int pos = i * 3 + 2;
            writer.delete(deletePath, pos, null);
            expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos)));
        }
    }
    DeleteFile metadata = deleteWriter.toDeleteFile();
    replacedert.replacedertEquals("Format should be Avro", FileFormat.AVRO, metadata.format());
    replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
    replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
    replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
    replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
    List<Record> deletedRecords;
    try (AvroIterable<Record> reader = Avro.read(out.toInputFile()).project(deleteSchema).createReaderFunc(DataReader::create).build()) {
        deletedRecords = Lists.newArrayList(reader);
    }
    replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}

18 Source : TestAvroDeleteWriters.java
with Apache License 2.0
from apache

@Test
public void testPositionDeleteWriter() throws IOException {
    File deleteFile = temp.newFile();
    Schema deleteSchema = new Schema(MetadataColumns.DELETE_FILE_PATH, MetadataColumns.DELETE_FILE_POS, NestedField.optional(MetadataColumns.DELETE_FILE_ROW_FIELD_ID, "row", SCHEMA.replacedtruct()));
    String deletePath = "s3://bucket/path/file.parquet";
    GenericRecord posDelete = GenericRecord.create(deleteSchema);
    List<Record> expectedDeleteRecords = Lists.newArrayList();
    OutputFile out = Files.localOutput(deleteFile);
    PositionDeleteWriter<Record> deleteWriter = Avro.writeDeletes(out).createWriterFunc(DataWriter::create).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).buildPositionWriter();
    try (PositionDeleteWriter<Record> writer = deleteWriter) {
        for (int i = 0; i < records.size(); i += 1) {
            int pos = i * 3 + 2;
            writer.delete(deletePath, pos, records.get(i));
            expectedDeleteRecords.add(posDelete.copy(ImmutableMap.of("file_path", deletePath, "pos", (long) pos, "row", records.get(i))));
        }
    }
    DeleteFile metadata = deleteWriter.toDeleteFile();
    replacedert.replacedertEquals("Format should be Avro", FileFormat.AVRO, metadata.format());
    replacedert.replacedertEquals("Should be position deletes", FileContent.POSITION_DELETES, metadata.content());
    replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
    replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
    replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
    List<Record> deletedRecords;
    try (AvroIterable<Record> reader = Avro.read(out.toInputFile()).project(deleteSchema).createReaderFunc(DataReader::create).build()) {
        deletedRecords = Lists.newArrayList(reader);
    }
    replacedert.replacedertEquals("Deleted records should match expected", expectedDeleteRecords, deletedRecords);
}

18 Source : TestAvroDeleteWriters.java
with Apache License 2.0
from apache

@Test
public void testEqualityDeleteWriter() throws IOException {
    File deleteFile = temp.newFile();
    OutputFile out = Files.localOutput(deleteFile);
    EqualityDeleteWriter<Record> deleteWriter = Avro.writeDeletes(out).createWriterFunc(DataWriter::create).overwrite().rowSchema(SCHEMA).withSpec(ParreplacedionSpec.unparreplacedioned()).equalityFieldIds(1).buildEqualityWriter();
    try (EqualityDeleteWriter<Record> writer = deleteWriter) {
        writer.deleteAll(records);
    }
    DeleteFile metadata = deleteWriter.toDeleteFile();
    replacedert.replacedertEquals("Format should be Avro", FileFormat.AVRO, metadata.format());
    replacedert.replacedertEquals("Should be equality deletes", FileContent.EQUALITY_DELETES, metadata.content());
    replacedert.replacedertEquals("Record count should be correct", records.size(), metadata.recordCount());
    replacedert.replacedertEquals("Parreplacedion should be empty", 0, metadata.parreplacedion().size());
    replacedert.replacedertNull("Key metadata should be null", metadata.keyMetadata());
    List<Record> deletedRecords;
    try (AvroIterable<Record> reader = Avro.read(out.toInputFile()).project(SCHEMA).createReaderFunc(DataReader::create).build()) {
        deletedRecords = Lists.newArrayList(reader);
    }
    replacedert.replacedertEquals("Deleted records should match expected", records, deletedRecords);
}

18 Source : SnapshotProducer.java
with Apache License 2.0
from apache

@Override
public Snapshot apply() {
    this.base = refresh();
    Long parentSnapshotId = base.currentSnapshot() != null ? base.currentSnapshot().snapshotId() : null;
    long sequenceNumber = base.nextSequenceNumber();
    // run validations from the child operation
    validate(base);
    List<ManifestFile> manifests = apply(base);
    if (base.formatVersion() > 1 || base.propertyAsBoolean(MANIFEST_LISTS_ENABLED, MANIFEST_LISTS_ENABLED_DEFAULT)) {
        OutputFile manifestList = manifestListPath();
        try (ManifestListWriter writer = ManifestLists.write(ops.current().formatVersion(), manifestList, snapshotId(), parentSnapshotId, sequenceNumber)) {
            // keep track of the manifest lists created
            manifestLists.add(manifestList.location());
            ManifestFile[] manifestFiles = new ManifestFile[manifests.size()];
            Tasks.range(manifestFiles.length).stopOnFailure().throwFailureWhenFinished().executeWith(ThreadPools.getWorkerPool()).run(index -> manifestFiles[index] = manifestsWithMetadata.get(manifests.get(index)));
            writer.addAll(Arrays.asList(manifestFiles));
        } catch (IOException e) {
            throw new RuntimeIOException(e, "Failed to write manifest list file");
        }
        return new BaseSnapshot(ops.io(), sequenceNumber, snapshotId(), parentSnapshotId, System.currentTimeMillis(), operation(), summary(base), manifestList.location());
    } else {
        return new BaseSnapshot(ops.io(), snapshotId(), parentSnapshotId, System.currentTimeMillis(), operation(), summary(base), manifests);
    }
}

18 Source : ManifestLists.java
with Apache License 2.0
from apache

static ManifestListWriter write(int formatVersion, OutputFile manifestListFile, long snapshotId, Long parentSnapshotId, long sequenceNumber) {
    switch(formatVersion) {
        case 1:
            Preconditions.checkArgument(sequenceNumber == TableMetadata.INITIAL_SEQUENCE_NUMBER, "Invalid sequence number for v1 manifest list: %s", sequenceNumber);
            return new ManifestListWriter.V1Writer(manifestListFile, snapshotId, parentSnapshotId);
        case 2:
            return new ManifestListWriter.V2Writer(manifestListFile, snapshotId, parentSnapshotId, sequenceNumber);
    }
    throw new UnsupportedOperationException("Cannot write manifest list for table version: " + formatVersion);
}

18 Source : ManifestFiles.java
with Apache License 2.0
from apache

private static ManifestFile copyManifestInternal(int formatVersion, ManifestReader<DataFile> reader, OutputFile outputFile, long snapshotId, SnapshotSummary.Builder summaryBuilder, ManifestEntry.Status allowedEntryStatus) {
    ManifestWriter<DataFile> writer = write(formatVersion, reader.spec(), outputFile, snapshotId);
    boolean threw = true;
    try {
        for (ManifestEntry<DataFile> entry : reader.entries()) {
            Preconditions.checkArgument(allowedEntryStatus == entry.status(), "Invalid manifest entry status: %s (allowed status: %s)", entry.status(), allowedEntryStatus);
            switch(entry.status()) {
                case ADDED:
                    summaryBuilder.addedFile(reader.spec(), entry.file());
                    writer.add(entry);
                    break;
                case EXISTING:
                    writer.existing(entry);
                    break;
                case DELETED:
                    summaryBuilder.deletedFile(reader.spec(), entry.file());
                    writer.delete(entry);
                    break;
            }
        }
        threw = false;
    } finally {
        try {
            writer.close();
        } catch (IOException e) {
            if (!threw) {
                throw new RuntimeIOException(e, "Failed to close manifest: %s", outputFile);
            }
        }
    }
    return writer.toManifestFile();
}

18 Source : ManifestFiles.java
with Apache License 2.0
from apache

static ManifestFile copyRewriteManifest(int formatVersion, InputFile toCopy, Map<Integer, ParreplacedionSpec> specsById, OutputFile outputFile, long snapshotId, SnapshotSummary.Builder summaryBuilder) {
    // for a rewritten manifest all snapshot ids should be set. use empty metadata to throw an exception if it is not
    InheritableMetadata inheritableMetadata = InheritableMetadataFactory.empty();
    try (ManifestReader<DataFile> reader = new ManifestReader<>(toCopy, specsById, inheritableMetadata, FileType.DATA_FILES)) {
        return copyManifestInternal(formatVersion, reader, outputFile, snapshotId, summaryBuilder, ManifestEntry.Status.EXISTING);
    } catch (IOException e) {
        throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location());
    }
}

18 Source : ManifestFiles.java
with Apache License 2.0
from apache

static ManifestFile copyAppendManifest(int formatVersion, InputFile toCopy, Map<Integer, ParreplacedionSpec> specsById, OutputFile outputFile, long snapshotId, SnapshotSummary.Builder summaryBuilder) {
    // use metadata that will add the current snapshot's ID for the rewrite
    InheritableMetadata inheritableMetadata = InheritableMetadataFactory.forCopy(snapshotId);
    try (ManifestReader<DataFile> reader = new ManifestReader<>(toCopy, specsById, inheritableMetadata, FileType.DATA_FILES)) {
        return copyManifestInternal(formatVersion, reader, outputFile, snapshotId, summaryBuilder, ManifestEntry.Status.ADDED);
    } catch (IOException e) {
        throw new RuntimeIOException(e, "Failed to close manifest: %s", toCopy.location());
    }
}

18 Source : PlaintextEncryptionManager.java
with Apache License 2.0
from apache

@Override
public EncryptedOutputFile encrypt(OutputFile rawOutput) {
    return EncryptedFiles.encryptedOutput(rawOutput, (ByteBuffer) null);
}

See More Examples