org.apache.iceberg.FileFormat

Here are the examples of the java api org.apache.iceberg.FileFormat taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

89 Examples 7

19 Source : IcebergQueryRunner.java
with Apache License 2.0
from trinodb

public static DistributedQueryRunner createIcebergQueryRunner(Map<String, String> extraProperties) throws Exception {
    FileFormat defaultFormat = new IcebergConfig().getFileFormat();
    return createIcebergQueryRunner(extraProperties, defaultFormat, TpchTable.getTables());
}

19 Source : IcebergQueryRunner.java
with Apache License 2.0
from trinodb

public static DistributedQueryRunner createIcebergQueryRunner(Map<String, String> extraProperties, FileFormat format) throws Exception {
    return createIcebergQueryRunner(extraProperties, format, TpchTable.getTables());
}

19 Source : IcebergQueryRunner.java
with Apache License 2.0
from trinodb

public static DistributedQueryRunner createIcebergQueryRunner(Map<String, String> extraProperties, List<TpchTable<?>> tpchTables) throws Exception {
    FileFormat defaultFormat = new IcebergConfig().getFileFormat();
    return createIcebergQueryRunner(extraProperties, defaultFormat, tpchTables);
}

19 Source : TestFlinkTableSource.java
with Apache License 2.0
from apache

public clreplaced TestFlinkTableSource extends FlinkTestBase {

    private static final String CATALOG_NAME = "test_catalog";

    private static final String DATABASE_NAME = "test_db";

    private static final String TABLE_NAME = "test_table";

    private final FileFormat format = FileFormat.AVRO;

    private static String warehouse;

    private int scanEventCount = 0;

    private ScanEvent lastScanEvent = null;

    public TestFlinkTableSource() {
        // register a scan event listener to validate pushdown
        Listeners.register(event -> {
            scanEventCount += 1;
            lastScanEvent = event;
        }, ScanEvent.clreplaced);
    }

    @Override
    protected TableEnvironment getTableEnv() {
        super.getTableEnv().getConfig().getConfiguration().set(CoreOptions.DEFAULT_PARALLELISM, 1);
        return super.getTableEnv();
    }

    @BeforeClreplaced
    public static void createWarehouse() throws IOException {
        File warehouseFile = TEMPORARY_FOLDER.newFolder();
        replacedert.replacedertTrue("The warehouse should be deleted", warehouseFile.delete());
        // before variables
        warehouse = "file:" + warehouseFile;
    }

    @Before
    public void before() {
        sql("CREATE CATALOG %s WITH ('type'='iceberg', 'catalog-type'='hadoop', 'warehouse'='%s')", CATALOG_NAME, warehouse);
        sql("USE CATALOG %s", CATALOG_NAME);
        sql("CREATE DATABASE %s", DATABASE_NAME);
        sql("USE %s", DATABASE_NAME);
        sql("CREATE TABLE %s (id INT, data VARCHAR,d DOUBLE) WITH ('write.format.default'='%s')", TABLE_NAME, format.name());
        sql("INSERT INTO %s VALUES (1,'iceberg',10),(2,'b',20),(3,CAST(NULL AS VARCHAR),30)", TABLE_NAME);
        this.scanEventCount = 0;
        this.lastScanEvent = null;
    }

    @After
    public void clean() {
        sql("DROP TABLE IF EXISTS %s.%s", DATABASE_NAME, TABLE_NAME);
        sql("DROP DATABASE IF EXISTS %s", DATABASE_NAME);
        sql("DROP CATALOG IF EXISTS %s", CATALOG_NAME);
    }

    @Test
    public void testLimitPushDown() {
        String querySql = String.format("SELECT * FROM %s LIMIT 1", TABLE_NAME);
        String explain = getTableEnv().explainSql(querySql);
        String expectedExplain = "limit=[1]";
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        replacedert.replacedertTrue("Explain should contain LimitPushDown", explain.contains(expectedExplain));
        List<Object[]> result = sql(querySql);
        replacedert.replacedertEquals("Should have 1 record", 1, result.size());
        replacedert.replacedertArrayEquals("Should produce the expected records", expectRecord, result.get(0));
        replacedertHelpers.replacedertThrows("Invalid limit number: -1 ", SqlParserException.clreplaced, () -> sql("SELECT * FROM %s LIMIT -1", TABLE_NAME));
        replacedert.replacedertEquals("Should have 0 record", 0, sql("SELECT * FROM %s LIMIT 0", TABLE_NAME).size());
        String sqlLimitExceed = String.format("SELECT * FROM %s LIMIT 4", TABLE_NAME);
        List<Object[]> resultExceed = sql(sqlLimitExceed);
        replacedert.replacedertEquals("Should have 3 records", 3, resultExceed.size());
        List<Object[]> expectedList = Lists.newArrayList();
        expectedList.add(new Object[] { 1, "iceberg", 10.0 });
        expectedList.add(new Object[] { 2, "b", 20.0 });
        expectedList.add(new Object[] { 3, null, 30.0 });
        replacedert.replacedertArrayEquals("Should produce the expected records", expectedList.toArray(), resultExceed.toArray());
        String sqlMixed = String.format("SELECT * FROM %s WHERE id = 1 LIMIT 2", TABLE_NAME);
        List<Object[]> mixedResult = sql(sqlMixed);
        replacedert.replacedertEquals("Should have 1 record", 1, mixedResult.size());
        replacedert.replacedertArrayEquals("Should produce the expected records", expectRecord, mixedResult.get(0));
    }

    @Test
    public void testNoFilterPushDown() {
        String sql = String.format("SELECT * FROM %s ", TABLE_NAME);
        List<Object[]> result = sql(sql);
        List<Object[]> expectedRecords = Lists.newArrayList();
        expectedRecords.add(new Object[] { 1, "iceberg", 10.0 });
        expectedRecords.add(new Object[] { 2, "b", 20.0 });
        expectedRecords.add(new Object[] { 3, null, 30.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedRecords.toArray(), result.toArray());
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
    }

    @Test
    public void testFilterPushDownEqual() {
        String sqlLiteralRight = String.format("SELECT * FROM %s WHERE id = 1 ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") == 1";
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        List<Object[]> result = sql(sqlLiteralRight);
        replacedert.replacedertEquals("Should have 1 record", 1, result.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, result.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownEqualNull() {
        String sqlEqualNull = String.format("SELECT * FROM %s WHERE data = NULL ", TABLE_NAME);
        List<Object[]> result = sql(sqlEqualNull);
        replacedert.replacedertEquals("Should have 0 record", 0, result.size());
        replacedert.replacedertNull("Should not push down a filter", lastScanEvent);
    }

    @Test
    public void testFilterPushDownEqualLiteralOnLeft() {
        String sqlLiteralLeft = String.format("SELECT * FROM %s WHERE 1 = id ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") == 1";
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        List<Object[]> resultLeft = sql(sqlLiteralLeft);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLeft.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLeft.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownNoEqual() {
        String sqlNE = String.format("SELECT * FROM %s WHERE id <> 1 ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") != 1";
        List<Object[]> resultNE = sql(sqlNE);
        replacedert.replacedertEquals("Should have 2 records", 2, resultNE.size());
        List<Object[]> expectedNE = Lists.newArrayList();
        expectedNE.add(new Object[] { 2, "b", 20.0 });
        expectedNE.add(new Object[] { 3, null, 30.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedNE.toArray(), resultNE.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownNoEqualNull() {
        String sqlNotEqualNull = String.format("SELECT * FROM %s WHERE data <> NULL ", TABLE_NAME);
        List<Object[]> resultNE = sql(sqlNotEqualNull);
        replacedert.replacedertEquals("Should have 0 records", 0, resultNE.size());
        replacedert.replacedertNull("Should not push down a filter", lastScanEvent);
    }

    @Test
    public void testFilterPushDownAnd() {
        String sqlAnd = String.format("SELECT * FROM %s WHERE id = 1 AND data = 'iceberg' ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        List<Object[]> resultAnd = sql(sqlAnd);
        replacedert.replacedertEquals("Should have 1 record", 1, resultAnd.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultAnd.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        String expected = "(ref(name=\"id\") == 1 and ref(name=\"data\") == \"iceberg\")";
        replacedert.replacedertEquals("Should contain the push down filter", expected, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownOr() {
        String sqlOr = String.format("SELECT * FROM %s WHERE id = 1 OR data = 'b' ", TABLE_NAME);
        String expectedFilter = "(ref(name=\"id\") == 1 or ref(name=\"data\") == \"b\")";
        List<Object[]> resultOr = sql(sqlOr);
        replacedert.replacedertEquals("Should have 2 record", 2, resultOr.size());
        List<Object[]> expectedOR = Lists.newArrayList();
        expectedOR.add(new Object[] { 1, "iceberg", 10.0 });
        expectedOR.add(new Object[] { 2, "b", 20.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedOR.toArray(), resultOr.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownGreaterThan() {
        String sqlGT = String.format("SELECT * FROM %s WHERE id > 1 ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") > 1";
        List<Object[]> resultGT = sql(sqlGT);
        replacedert.replacedertEquals("Should have 2 record", 2, resultGT.size());
        List<Object[]> expectedGT = Lists.newArrayList();
        expectedGT.add(new Object[] { 2, "b", 20.0 });
        expectedGT.add(new Object[] { 3, null, 30.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedGT.toArray(), resultGT.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownGreaterThanNull() {
        String sqlGT = String.format("SELECT * FROM %s WHERE data > null ", TABLE_NAME);
        List<Object[]> resultGT = sql(sqlGT);
        replacedert.replacedertEquals("Should have 0 record", 0, resultGT.size());
        replacedert.replacedertNull("Should not push down a filter", lastScanEvent);
    }

    @Test
    public void testFilterPushDownGreaterThanLiteralOnLeft() {
        String sqlGT = String.format("SELECT * FROM %s WHERE 3 > id ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") < 3";
        List<Object[]> resultGT = sql(sqlGT);
        replacedert.replacedertEquals("Should have 2 records", 2, resultGT.size());
        List<Object[]> expectedGT = Lists.newArrayList();
        expectedGT.add(new Object[] { 1, "iceberg", 10.0 });
        expectedGT.add(new Object[] { 2, "b", 20.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedGT.toArray(), resultGT.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownGreaterThanEqual() {
        String sqlGTE = String.format("SELECT * FROM %s WHERE id >= 2 ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") >= 2";
        List<Object[]> resultGTE = sql(sqlGTE);
        replacedert.replacedertEquals("Should have 2 records", 2, resultGTE.size());
        List<Object[]> expectedGTE = Lists.newArrayList();
        expectedGTE.add(new Object[] { 2, "b", 20.0 });
        expectedGTE.add(new Object[] { 3, null, 30.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedGTE.toArray(), resultGTE.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownGreaterThanEqualNull() {
        String sqlGTE = String.format("SELECT * FROM %s WHERE data >= null ", TABLE_NAME);
        List<Object[]> resultGT = sql(sqlGTE);
        replacedert.replacedertEquals("Should have 0 record", 0, resultGT.size());
        replacedert.replacedertNull("Should not push down a filter", lastScanEvent);
    }

    @Test
    public void testFilterPushDownGreaterThanEqualLiteralOnLeft() {
        String sqlGTE = String.format("SELECT * FROM %s WHERE 2 >= id ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") <= 2";
        List<Object[]> resultGTE = sql(sqlGTE);
        replacedert.replacedertEquals("Should have 2 records", 2, resultGTE.size());
        List<Object[]> expectedGTE = Lists.newArrayList();
        expectedGTE.add(new Object[] { 1, "iceberg", 10.0 });
        expectedGTE.add(new Object[] { 2, "b", 20.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedGTE.toArray(), resultGTE.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownLessThan() {
        String sqlLT = String.format("SELECT * FROM %s WHERE id < 2 ", TABLE_NAME);
        String expectedFilter = "ref(name=\"id\") < 2";
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        List<Object[]> resultLT = sql(sqlLT);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLT.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLT.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownLessThanNull() {
        String sqlLT = String.format("SELECT * FROM %s WHERE data < null ", TABLE_NAME);
        List<Object[]> resultGT = sql(sqlLT);
        replacedert.replacedertEquals("Should have 0 record", 0, resultGT.size());
        replacedert.replacedertNull("Should not push down a filter", lastScanEvent);
    }

    @Test
    public void testFilterPushDownLessThanLiteralOnLeft() {
        String sqlLT = String.format("SELECT * FROM %s WHERE 2 < id ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 3, null, 30.0 };
        String expectedFilter = "ref(name=\"id\") > 2";
        List<Object[]> resultLT = sql(sqlLT);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLT.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLT.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownLessThanEqual() {
        String sqlLTE = String.format("SELECT * FROM %s WHERE id <= 1 ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        String expectedFilter = "ref(name=\"id\") <= 1";
        List<Object[]> resultLTE = sql(sqlLTE);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLTE.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLTE.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownLessThanEqualNull() {
        String sqlLTE = String.format("SELECT * FROM %s WHERE data <= null ", TABLE_NAME);
        List<Object[]> resultGT = sql(sqlLTE);
        replacedert.replacedertEquals("Should have 0 record", 0, resultGT.size());
        replacedert.replacedertNull("Should not push down a filter", lastScanEvent);
    }

    @Test
    public void testFilterPushDownLessThanEqualLiteralOnLeft() {
        String sqlLTE = String.format("SELECT * FROM %s WHERE 3 <= id  ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 3, null, 30.0 };
        String expectedFilter = "ref(name=\"id\") >= 3";
        List<Object[]> resultLTE = sql(sqlLTE);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLTE.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLTE.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownIn() {
        String sqlIN = String.format("SELECT * FROM %s WHERE id IN (1,2) ", TABLE_NAME);
        String expectedFilter = "(ref(name=\"id\") == 1 or ref(name=\"id\") == 2)";
        List<Object[]> resultIN = sql(sqlIN);
        replacedert.replacedertEquals("Should have 2 records", 2, resultIN.size());
        List<Object[]> expectedIN = Lists.newArrayList();
        expectedIN.add(new Object[] { 1, "iceberg", 10.0 });
        expectedIN.add(new Object[] { 2, "b", 20.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedIN.toArray(), resultIN.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownInNull() {
        String sqlInNull = String.format("SELECT * FROM %s WHERE data IN ('iceberg',NULL) ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        List<Object[]> result = sql(sqlInNull);
        replacedert.replacedertEquals("Should have 1 record", 1, result.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, result.get(0));
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
    }

    @Test
    public void testFilterPushDownNotIn() {
        String sqlNotIn = String.format("SELECT * FROM %s WHERE id NOT IN (3,2) ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        List<Object[]> resultNotIn = sql(sqlNotIn);
        replacedert.replacedertEquals("Should have 1 record", 1, resultNotIn.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultNotIn.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        String expectedScan = "(ref(name=\"id\") != 2 and ref(name=\"id\") != 3)";
        replacedert.replacedertEquals("Should contain the push down filter", expectedScan, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownNotInNull() {
        String sqlNotInNull = String.format("SELECT * FROM %s WHERE id NOT IN (1,2,NULL) ", TABLE_NAME);
        List<Object[]> resultGT = sql(sqlNotInNull);
        replacedert.replacedertEquals("Should have 0 record", 0, resultGT.size());
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
    }

    @Test
    public void testFilterPushDownIsNotNull() {
        String sqlNotNull = String.format("SELECT * FROM %s WHERE data IS NOT NULL", TABLE_NAME);
        String expectedFilter = "not_null(ref(name=\"data\"))";
        List<Object[]> resultNotNull = sql(sqlNotNull);
        replacedert.replacedertEquals("Should have 2 record", 2, resultNotNull.size());
        List<Object[]> expected = Lists.newArrayList();
        expected.add(new Object[] { 1, "iceberg", 10.0 });
        expected.add(new Object[] { 2, "b", 20.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expected.toArray(), resultNotNull.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownIsNull() {
        String sqlNull = String.format("SELECT * FROM %s WHERE data IS  NULL", TABLE_NAME);
        Object[] expectRecord = new Object[] { 3, null, 30.0 };
        String expectedFilter = "is_null(ref(name=\"data\"))";
        List<Object[]> resultNull = sql(sqlNull);
        replacedert.replacedertEquals("Should have 1 record", 1, resultNull.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultNull.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownNot() {
        String sqlNot = String.format("SELECT * FROM %s WHERE NOT (id = 1 OR id = 2 ) ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 3, null, 30.0 };
        List<Object[]> resultNot = sql(sqlNot);
        replacedert.replacedertEquals("Should have 1 record", 1, resultNot.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultNot.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        String expectedFilter = "(ref(name=\"id\") != 1 and ref(name=\"id\") != 2)";
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownBetween() {
        String sqlBetween = String.format("SELECT * FROM %s WHERE id BETWEEN 1 AND 2 ", TABLE_NAME);
        List<Object[]> resultBetween = sql(sqlBetween);
        replacedert.replacedertEquals("Should have 2 record", 2, resultBetween.size());
        List<Object[]> expectedBetween = Lists.newArrayList();
        expectedBetween.add(new Object[] { 1, "iceberg", 10.0 });
        expectedBetween.add(new Object[] { 2, "b", 20.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedBetween.toArray(), resultBetween.toArray());
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        String expected = "(ref(name=\"id\") >= 1 and ref(name=\"id\") <= 2)";
        replacedert.replacedertEquals("Should contain the push down filter", expected, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownNotBetween() {
        String sqlNotBetween = String.format("SELECT * FROM %s WHERE id  NOT BETWEEN 2 AND 3 ", TABLE_NAME);
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        String expectedFilter = "(ref(name=\"id\") < 2 or ref(name=\"id\") > 3)";
        List<Object[]> resultNotBetween = sql(sqlNotBetween);
        replacedert.replacedertEquals("Should have 1 record", 1, resultNotBetween.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultNotBetween.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterPushDownLike() {
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        String expectedFilter = "ref(name=\"data\") startsWith \"\"ice\"\"";
        String sqlLike = "SELECT * FROM " + TABLE_NAME + " WHERE data LIKE 'ice%%' ";
        List<Object[]> resultLike = sql(sqlLike);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLike.size());
        replacedert.replacedertArrayEquals("The like result should produce the expected record", expectRecord, resultLike.get(0));
        replacedert.replacedertEquals("Should create only one scan", 1, scanEventCount);
        replacedert.replacedertEquals("Should contain the push down filter", expectedFilter, lastScanEvent.filter().toString());
    }

    @Test
    public void testFilterNotPushDownLike() {
        Object[] expectRecord = new Object[] { 1, "iceberg", 10.0 };
        String sqlNoPushDown = "SELECT * FROM " + TABLE_NAME + " WHERE data LIKE '%%i' ";
        List<Object[]> resultLike = sql(sqlNoPushDown);
        replacedert.replacedertEquals("Should have 1 record", 0, resultLike.size());
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
        sqlNoPushDown = "SELECT * FROM " + TABLE_NAME + " WHERE data LIKE '%%i%%' ";
        resultLike = sql(sqlNoPushDown);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLike.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLike.get(0));
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
        sqlNoPushDown = "SELECT * FROM  " + TABLE_NAME + "  WHERE data LIKE '%%ice%%g' ";
        resultLike = sql(sqlNoPushDown);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLike.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLike.get(0));
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
        sqlNoPushDown = "SELECT * FROM  " + TABLE_NAME + "  WHERE data LIKE '%%' ";
        resultLike = sql(sqlNoPushDown);
        replacedert.replacedertEquals("Should have 3 records", 3, resultLike.size());
        List<Object[]> expectedRecords = Lists.newArrayList();
        expectedRecords.add(new Object[] { 1, "iceberg", 10.0 });
        expectedRecords.add(new Object[] { 2, "b", 20.0 });
        expectedRecords.add(new Object[] { 3, null, 30.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedRecords.toArray(), resultLike.toArray());
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
        sqlNoPushDown = "SELECT * FROM  " + TABLE_NAME + "  WHERE data LIKE 'iceber_' ";
        resultLike = sql(sqlNoPushDown);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLike.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLike.get(0));
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
        sqlNoPushDown = "SELECT * FROM  " + TABLE_NAME + "  WHERE data LIKE 'i%%g' ";
        resultLike = sql(sqlNoPushDown);
        replacedert.replacedertEquals("Should have 1 record", 1, resultLike.size());
        replacedert.replacedertArrayEquals("Should produce the expected record", expectRecord, resultLike.get(0));
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
    }

    @Test
    public void testFilterPushDown2Literal() {
        String sql2Literal = String.format("SELECT * FROM %s WHERE 1 > 0 ", TABLE_NAME);
        List<Object[]> result = sql(sql2Literal);
        List<Object[]> expectedRecords = Lists.newArrayList();
        expectedRecords.add(new Object[] { 1, "iceberg", 10.0 });
        expectedRecords.add(new Object[] { 2, "b", 20.0 });
        expectedRecords.add(new Object[] { 3, null, 30.0 });
        replacedert.replacedertArrayEquals("Should produce the expected record", expectedRecords.toArray(), result.toArray());
        replacedert.replacedertEquals("Should not push down a filter", Expressions.alwaysTrue(), lastScanEvent.filter());
    }

    /**
     * NaN is not supported by flink now, so we add the test case to replacedert the parse error, when we upgrade the flink
     * that supports NaN, we will delele the method, and add some test case to test NaN.
     */
    @Test
    public void testSqlParseError() {
        String sqlParseErrorEqual = String.format("SELECT * FROM %s WHERE d = CAST('NaN' AS DOUBLE) ", TABLE_NAME);
        replacedertHelpers.replacedertThrows("The NaN is not supported by flink now. ", NumberFormatException.clreplaced, () -> sql(sqlParseErrorEqual));
        String sqlParseErrorNotEqual = String.format("SELECT * FROM %s WHERE d <> CAST('NaN' AS DOUBLE) ", TABLE_NAME);
        replacedertHelpers.replacedertThrows("The NaN is not supported by flink now. ", NumberFormatException.clreplaced, () -> sql(sqlParseErrorNotEqual));
        String sqlParseErrorGT = String.format("SELECT * FROM %s WHERE d > CAST('NaN' AS DOUBLE) ", TABLE_NAME);
        replacedertHelpers.replacedertThrows("The NaN is not supported by flink now. ", NumberFormatException.clreplaced, () -> sql(sqlParseErrorGT));
        String sqlParseErrorLT = String.format("SELECT * FROM %s WHERE d < CAST('NaN' AS DOUBLE) ", TABLE_NAME);
        replacedertHelpers.replacedertThrows("The NaN is not supported by flink now. ", NumberFormatException.clreplaced, () -> sql(sqlParseErrorLT));
        String sqlParseErrorGTE = String.format("SELECT * FROM %s WHERE d >= CAST('NaN' AS DOUBLE) ", TABLE_NAME);
        replacedertHelpers.replacedertThrows("The NaN is not supported by flink now. ", NumberFormatException.clreplaced, () -> sql(sqlParseErrorGTE));
        String sqlParseErrorLTE = String.format("SELECT * FROM %s WHERE d <= CAST('NaN' AS DOUBLE) ", TABLE_NAME);
        replacedertHelpers.replacedertThrows("The NaN is not supported by flink now. ", NumberFormatException.clreplaced, () -> sql(sqlParseErrorLTE));
    }
}

19 Source : TestFlinkCatalogTablePartitions.java
with Apache License 2.0
from apache

public clreplaced TestFlinkCatalogTableParreplacedions extends FlinkCatalogTestBase {

    private String tableName = "test_table";

    private final FileFormat format;

    @Parameterized.Parameters(name = "catalogName={0}, baseNamespace={1}, format={2}, cacheEnabled={3}")
    public static Iterable<Object[]> parameters() {
        List<Object[]> parameters = Lists.newArrayList();
        for (FileFormat format : new FileFormat[] { FileFormat.ORC, FileFormat.AVRO, FileFormat.PARQUET }) {
            for (Boolean cacheEnabled : new Boolean[] { true, false }) {
                for (Object[] catalogParams : FlinkCatalogTestBase.parameters()) {
                    String catalogName = (String) catalogParams[0];
                    Namespace baseNamespace = (Namespace) catalogParams[1];
                    parameters.add(new Object[] { catalogName, baseNamespace, format, cacheEnabled });
                }
            }
        }
        return parameters;
    }

    public TestFlinkCatalogTableParreplacedions(String catalogName, Namespace baseNamespace, FileFormat format, boolean cacheEnabled) {
        super(catalogName, baseNamespace);
        this.format = format;
        config.put(CACHE_ENABLED, String.valueOf(cacheEnabled));
    }

    @Before
    public void before() {
        super.before();
        sql("CREATE DATABASE %s", flinkDatabase);
        sql("USE CATALOG %s", catalogName);
        sql("USE %s", DATABASE);
    }

    @After
    public void cleanNamespaces() {
        sql("DROP TABLE IF EXISTS %s.%s", flinkDatabase, tableName);
        sql("DROP DATABASE IF EXISTS %s", flinkDatabase);
        super.clean();
    }

    @Test
    public void testListParreplacedionsWithUnparreplacedionedTable() {
        sql("CREATE TABLE %s (id INT, data VARCHAR) with ('write.format.default'='%s')", tableName, format.name());
        sql("INSERT INTO %s SELECT 1,'a'", tableName);
        ObjectPath objectPath = new ObjectPath(DATABASE, tableName);
        FlinkCatalog flinkCatalog = (FlinkCatalog) getTableEnv().getCatalog(catalogName).get();
        replacedertHelpers.replacedertThrows("Should not list parreplacedions for unparreplacedioned table.", TableNotParreplacedionedException.clreplaced, () -> flinkCatalog.listParreplacedions(objectPath));
    }

    @Test
    public void testListParreplacedionsWithParreplacedionedTable() throws TableNotExistException, TableNotParreplacedionedException {
        sql("CREATE TABLE %s (id INT, data VARCHAR) PARreplacedIONED BY (data) " + "with ('write.format.default'='%s')", tableName, format.name());
        sql("INSERT INTO %s SELECT 1,'a'", tableName);
        sql("INSERT INTO %s SELECT 2,'b'", tableName);
        ObjectPath objectPath = new ObjectPath(DATABASE, tableName);
        FlinkCatalog flinkCatalog = (FlinkCatalog) getTableEnv().getCatalog(catalogName).get();
        List<CatalogParreplacedionSpec> list = flinkCatalog.listParreplacedions(objectPath);
        replacedert.replacedertEquals("Should have 2 parreplacedion", 2, list.size());
        List<CatalogParreplacedionSpec> expected = Lists.newArrayList();
        CatalogParreplacedionSpec parreplacedionSpec1 = new CatalogParreplacedionSpec(ImmutableMap.of("data", "a"));
        CatalogParreplacedionSpec parreplacedionSpec2 = new CatalogParreplacedionSpec(ImmutableMap.of("data", "b"));
        expected.add(parreplacedionSpec1);
        expected.add(parreplacedionSpec2);
        replacedert.replacedertEquals("Should produce the expected catalog parreplacedion specs.", list, expected);
    }
}

19 Source : TestFlinkCatalogTablePartitions.java
with Apache License 2.0
from apache

@Parameterized.Parameters(name = "catalogName={0}, baseNamespace={1}, format={2}, cacheEnabled={3}")
public static Iterable<Object[]> parameters() {
    List<Object[]> parameters = Lists.newArrayList();
    for (FileFormat format : new FileFormat[] { FileFormat.ORC, FileFormat.AVRO, FileFormat.PARQUET }) {
        for (Boolean cacheEnabled : new Boolean[] { true, false }) {
            for (Object[] catalogParams : FlinkCatalogTestBase.parameters()) {
                String catalogName = (String) catalogParams[0];
                Namespace baseNamespace = (Namespace) catalogParams[1];
                parameters.add(new Object[] { catalogName, baseNamespace, format, cacheEnabled });
            }
        }
    }
    return parameters;
}

18 Source : IcebergWritableTableHandle.java
with Apache License 2.0
from trinodb

public clreplaced IcebergWritableTableHandle implements ConnectorInsertTableHandle, ConnectorOutputTableHandle {

    private final String schemaName;

    private final String tableName;

    private final String schemaAsJson;

    private final String parreplacedionSpecAsJson;

    private final List<IcebergColumnHandle> inputColumns;

    private final String outputPath;

    private final FileFormat fileFormat;

    @JsonCreator
    public IcebergWritableTableHandle(@JsonProperty("schemaName") String schemaName, @JsonProperty("tableName") String tableName, @JsonProperty("schemaAsJson") String schemaAsJson, @JsonProperty("parreplacedionSpecAsJson") String parreplacedionSpecAsJson, @JsonProperty("inputColumns") List<IcebergColumnHandle> inputColumns, @JsonProperty("outputPath") String outputPath, @JsonProperty("fileFormat") FileFormat fileFormat) {
        this.schemaName = requireNonNull(schemaName, "schemaName is null");
        this.tableName = requireNonNull(tableName, "tableName is null");
        this.schemaAsJson = requireNonNull(schemaAsJson, "schemaAsJson is null");
        this.parreplacedionSpecAsJson = requireNonNull(parreplacedionSpecAsJson, "parreplacedionSpecAsJson is null");
        this.inputColumns = ImmutableList.copyOf(requireNonNull(inputColumns, "inputColumns is null"));
        this.outputPath = requireNonNull(outputPath, "filePrefix is null");
        this.fileFormat = requireNonNull(fileFormat, "fileFormat is null");
    }

    @JsonProperty
    public String getSchemaName() {
        return schemaName;
    }

    @JsonProperty
    public String getTableName() {
        return tableName;
    }

    @JsonProperty
    public String getSchemaAsJson() {
        return schemaAsJson;
    }

    @JsonProperty
    public String getParreplacedionSpecAsJson() {
        return parreplacedionSpecAsJson;
    }

    @JsonProperty
    public List<IcebergColumnHandle> getInputColumns() {
        return inputColumns;
    }

    @JsonProperty
    public String getOutputPath() {
        return outputPath;
    }

    @JsonProperty
    public FileFormat getFileFormat() {
        return fileFormat;
    }

    @Override
    public String toString() {
        return schemaName + "." + tableName;
    }
}

18 Source : TestSparkReadProjection.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public abstract clreplaced TestSparkReadProjection extends TestReadProjection {

    private static SparkSession spark = null;

    @Parameterized.Parameters(name = "format = {0}, vectorized = {1}")
    public static Object[][] parameters() {
        return new Object[][] { { "parquet", false }, { "parquet", true }, { "avro", false }, { "orc", false }, { "orc", true } };
    }

    private final FileFormat format;

    private final boolean vectorized;

    public TestSparkReadProjection(String format, boolean vectorized) {
        super(format);
        this.format = FileFormat.valueOf(format.toUpperCase(Locale.ROOT));
        this.vectorized = vectorized;
    }

    @BeforeClreplaced
    public static void startSpark() {
        TestSparkReadProjection.spark = SparkSession.builder().master("local[2]").getOrCreate();
        ImmutableMap<String, String> config = ImmutableMap.of("type", "hive", "default-namespace", "default", "parquet-enabled", "true", "cache-enabled", "false");
        spark.conf().set("spark.sql.catalog.spark_catalog", "org.apache.iceberg.spark.source.TestSparkCatalog");
        config.forEach((key, value) -> spark.conf().set("spark.sql.catalog.spark_catalog." + key, value));
    }

    @AfterClreplaced
    public static void stopSpark() {
        SparkSession currentSpark = TestSparkReadProjection.spark;
        TestSparkReadProjection.spark = null;
        currentSpark.stop();
    }

    @Override
    protected Record writeAndRead(String desc, Schema writeSchema, Schema readSchema, Record record) throws IOException {
        File parent = temp.newFolder(desc);
        File location = new File(parent, "test");
        File dataFolder = new File(location, "data");
        replacedert.replacedertTrue("mkdirs should succeed", dataFolder.mkdirs());
        File testFile = new File(dataFolder, format.addExtension(UUID.randomUUID().toString()));
        Table table = TestTables.create(location, desc, writeSchema, ParreplacedionSpec.unparreplacedioned());
        try {
            // Important: use the table's schema for the rest of the test
            // When tables are created, the column ids are rereplacedigned.
            Schema tableSchema = table.schema();
            try (FileAppender<Record> writer = new GenericAppenderFactory(tableSchema).newAppender(localOutput(testFile), format)) {
                writer.add(record);
            }
            DataFile file = DataFiles.builder(ParreplacedionSpec.unparreplacedioned()).withRecordCount(100).withFileSizeInBytes(testFile.length()).withPath(testFile.toString()).build();
            table.newAppend().appendFile(file).commit();
            // rewrite the read schema for the table's rereplacedigned ids
            Map<Integer, Integer> idMapping = Maps.newHashMap();
            for (int id : allIds(writeSchema)) {
                // translate each id to the original schema's column name, then to the new schema's id
                String originalName = writeSchema.findColumnName(id);
                idMapping.put(id, tableSchema.findField(originalName).fieldId());
            }
            Schema expectedSchema = rereplacedignIds(readSchema, idMapping);
            // Set the schema to the expected schema directly to simulate the table schema evolving
            TestTables.replaceMetadata(desc, TestTables.readMetadata(desc).updateSchema(expectedSchema, 100));
            Dataset<Row> df = spark.read().format("org.apache.iceberg.spark.source.TestIcebergSource").option("iceberg.table.name", desc).option(SparkReadOptions.VECTORIZATION_ENABLED, String.valueOf(vectorized)).load();
            return SparkValueConverter.convert(readSchema, df.collectAsList().get(0));
        } finally {
            TestTables.clearTables();
        }
    }

    private List<Integer> allIds(Schema schema) {
        List<Integer> ids = Lists.newArrayList();
        TypeUtil.visit(schema, new TypeUtil.SchemaVisitor<Void>() {

            @Override
            public Void field(Types.NestedField field, Void fieldResult) {
                ids.add(field.fieldId());
                return null;
            }

            @Override
            public Void list(Types.ListType list, Void elementResult) {
                ids.add(list.elementId());
                return null;
            }

            @Override
            public Void map(Types.MapType map, Void keyResult, Void valueResult) {
                ids.add(map.keyId());
                ids.add(map.valueId());
                return null;
            }
        });
        return ids;
    }

    private Schema rereplacedignIds(Schema schema, Map<Integer, Integer> idMapping) {
        return new Schema(TypeUtil.visit(schema, new TypeUtil.SchemaVisitor<Type>() {

            private int mapId(int id) {
                if (idMapping.containsKey(id)) {
                    return idMapping.get(id);
                }
                // make sure the new IDs don't conflict with rereplacedignment
                return 1000 + id;
            }

            @Override
            public Type schema(Schema schema, Type structResult) {
                return structResult;
            }

            @Override
            public Type struct(Types.StructType struct, List<Type> fieldResults) {
                List<Types.NestedField> newFields = Lists.newArrayListWithExpectedSize(fieldResults.size());
                List<Types.NestedField> fields = struct.fields();
                for (int i = 0; i < fields.size(); i += 1) {
                    Types.NestedField field = fields.get(i);
                    if (field.isOptional()) {
                        newFields.add(optional(mapId(field.fieldId()), field.name(), fieldResults.get(i)));
                    } else {
                        newFields.add(required(mapId(field.fieldId()), field.name(), fieldResults.get(i)));
                    }
                }
                return Types.StructType.of(newFields);
            }

            @Override
            public Type field(Types.NestedField field, Type fieldResult) {
                return fieldResult;
            }

            @Override
            public Type list(Types.ListType list, Type elementResult) {
                if (list.isElementOptional()) {
                    return Types.ListType.ofOptional(mapId(list.elementId()), elementResult);
                } else {
                    return Types.ListType.ofRequired(mapId(list.elementId()), elementResult);
                }
            }

            @Override
            public Type map(Types.MapType map, Type keyResult, Type valueResult) {
                if (map.isValueOptional()) {
                    return Types.MapType.ofOptional(mapId(map.keyId()), mapId(map.valueId()), keyResult, valueResult);
                } else {
                    return Types.MapType.ofRequired(mapId(map.keyId()), mapId(map.valueId()), keyResult, valueResult);
                }
            }

            @Override
            public Type primitive(Type.PrimitiveType primitive) {
                return primitive;
            }
        }).asNestedType().replacedtructType().fields());
    }
}

18 Source : SparkAppenderFactory.java
with Apache License 2.0
from apache

@Override
public DataWriter<InternalRow> newDataWriter(EncryptedOutputFile file, FileFormat format, StructLike parreplacedion) {
    return new DataWriter<>(newAppender(file.encryptingOutputFile(), format), format, file.encryptingOutputFile().location(), spec, parreplacedion, file.keyMetadata());
}

18 Source : TestHiveIcebergStorageHandlerWithEngine.java
with Apache License 2.0
from apache

@Parameters(name = "fileFormat={0}, engine={1}, catalog={2}")
public static Collection<Object[]> parameters() {
    Collection<Object[]> testParams = new ArrayList<>();
    String javaVersion = System.getProperty("java.specification.version");
    // Run tests with every FileFormat for a single Catalog (HiveCatalog)
    for (FileFormat fileFormat : HiveIcebergStorageHandlerTestUtils.FILE_FORMATS) {
        for (String engine : EXECUTION_ENGINES) {
            // include Tez tests only for Java 8
            if (javaVersion.equals("1.8") || "mr".equals(engine)) {
                testParams.add(new Object[] { fileFormat, engine, TestTables.TestTableType.HIVE_CATALOG });
            }
        }
    }
    // Run tests for every Catalog for a single FileFormat (PARQUET) and execution engine (mr)
    // skip HiveCatalog tests as they are added before
    for (TestTables.TestTableType testTableType : TestTables.ALL_TABLE_TYPES) {
        if (!TestTables.TestTableType.HIVE_CATALOG.equals(testTableType)) {
            testParams.add(new Object[] { FileFormat.PARQUET, "mr", testTableType });
        }
    }
    return testParams;
}

18 Source : TestHiveIcebergStorageHandlerLocalScan.java
with Apache License 2.0
from apache

@Parameters(name = "fileFormat={0}, catalog={1}")
public static Collection<Object[]> parameters() {
    Collection<Object[]> testParams = new ArrayList<>();
    // Run tests with every FileFormat for a single Catalog (HiveCatalog)
    for (FileFormat fileFormat : HiveIcebergStorageHandlerTestUtils.FILE_FORMATS) {
        testParams.add(new Object[] { fileFormat, TestTables.TestTableType.HIVE_CATALOG });
    }
    // Run tests for every Catalog for a single FileFormat (PARQUET) - skip HiveCatalog tests as they are added before
    for (TestTables.TestTableType testTableType : TestTables.ALL_TABLE_TYPES) {
        if (!TestTables.TestTableType.HIVE_CATALOG.equals(testTableType)) {
            testParams.add(new Object[] { FileFormat.PARQUET, testTableType });
        }
    }
    return testParams;
}

18 Source : TestStreamScanSql.java
with Apache License 2.0
from apache

public clreplaced TestStreamScanSql extends FlinkCatalogTestBase {

    private static final String TABLE = "test_table";

    private static final FileFormat FORMAT = FileFormat.PARQUET;

    private TableEnvironment tEnv;

    public TestStreamScanSql(String catalogName, Namespace baseNamespace) {
        super(catalogName, baseNamespace);
    }

    @Override
    protected TableEnvironment getTableEnv() {
        if (tEnv == null) {
            synchronized (this) {
                if (tEnv == null) {
                    EnvironmentSettings.Builder settingsBuilder = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode();
                    StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(MiniClusterResource.DISABLE_CLreplacedLOADER_CHECK_CONFIG);
                    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
                    env.enableCheckpointing(400);
                    StreamTableEnvironment streamTableEnv = StreamTableEnvironment.create(env, settingsBuilder.build());
                    streamTableEnv.getConfig().getConfiguration().set(TableConfigOptions.TABLE_DYNAMIC_TABLE_OPTIONS_ENABLED, true);
                    tEnv = streamTableEnv;
                }
            }
        }
        return tEnv;
    }

    @Before
    public void before() {
        super.before();
        sql("CREATE DATABASE %s", flinkDatabase);
        sql("USE CATALOG %s", catalogName);
        sql("USE %s", DATABASE);
    }

    @After
    public void clean() {
        sql("DROP TABLE IF EXISTS %s.%s", flinkDatabase, TABLE);
        sql("DROP DATABASE IF EXISTS %s", flinkDatabase);
        super.clean();
    }

    private void insertRows(String parreplacedion, Table table, Row... rows) throws IOException {
        GenericAppenderHelper appender = new GenericAppenderHelper(table, FORMAT, TEMPORARY_FOLDER);
        GenericRecord gRecord = GenericRecord.create(table.schema());
        List<Record> records = Lists.newArrayList();
        for (Row row : rows) {
            records.add(gRecord.copy("id", row.getField(0), "data", row.getField(1), "dt", row.getField(2)));
        }
        if (parreplacedion != null) {
            appender.appendToTable(TestHelpers.Row.of(parreplacedion, 0), records);
        } else {
            appender.appendToTable(records);
        }
    }

    private void insertRows(Table table, Row... rows) throws IOException {
        insertRows(null, table, rows);
    }

    private void replacedertRows(List<Row> expectedRows, Iterator<Row> iterator) {
        for (Row expectedRow : expectedRows) {
            replacedert.replacedertTrue("Should have more records", iterator.hasNext());
            Row actualRow = iterator.next();
            replacedert.replacedertEquals("Should have expected fields", 3, actualRow.getArity());
            replacedert.replacedertEquals("Should have expected id", expectedRow.getField(0), actualRow.getField(0));
            replacedert.replacedertEquals("Should have expected data", expectedRow.getField(1), actualRow.getField(1));
            replacedert.replacedertEquals("Should have expected dt", expectedRow.getField(2), actualRow.getField(2));
        }
    }

    @Test
    public void testUnParreplacedionedTable() throws Exception {
        sql("CREATE TABLE %s (id INT, data VARCHAR, dt VARCHAR)", TABLE);
        Table table = validationCatalog.loadTable(TableIdentifier.of(icebergNamespace, TABLE));
        TableResult result = exec("SELECT * FROM %s /*+ OPTIONS('streaming'='true', 'monitor-interval'='1s')*/", TABLE);
        try (CloseableIterator<Row> iterator = result.collect()) {
            Row row1 = Row.of(1, "aaa", "2021-01-01");
            insertRows(table, row1);
            replacedertRows(ImmutableList.of(row1), iterator);
            Row row2 = Row.of(2, "bbb", "2021-01-01");
            insertRows(table, row2);
            replacedertRows(ImmutableList.of(row2), iterator);
        }
        result.getJobClient().ifPresent(JobClient::cancel);
    }

    @Test
    public void testParreplacedionedTable() throws Exception {
        sql("CREATE TABLE %s (id INT, data VARCHAR, dt VARCHAR) PARreplacedIONED BY (dt)", TABLE);
        Table table = validationCatalog.loadTable(TableIdentifier.of(icebergNamespace, TABLE));
        TableResult result = exec("SELECT * FROM %s /*+ OPTIONS('streaming'='true', 'monitor-interval'='1s')*/", TABLE);
        try (CloseableIterator<Row> iterator = result.collect()) {
            Row row1 = Row.of(1, "aaa", "2021-01-01");
            insertRows("2021-01-01", table, row1);
            replacedertRows(ImmutableList.of(row1), iterator);
            Row row2 = Row.of(2, "bbb", "2021-01-02");
            insertRows("2021-01-02", table, row2);
            replacedertRows(ImmutableList.of(row2), iterator);
            Row row3 = Row.of(1, "aaa", "2021-01-02");
            insertRows("2021-01-02", table, row3);
            replacedertRows(ImmutableList.of(row3), iterator);
            Row row4 = Row.of(2, "bbb", "2021-01-01");
            insertRows("2021-01-01", table, row4);
            replacedertRows(ImmutableList.of(row4), iterator);
        }
        result.getJobClient().ifPresent(JobClient::cancel);
    }

    @Test
    public void testConsumeFromBeginning() throws Exception {
        sql("CREATE TABLE %s (id INT, data VARCHAR, dt VARCHAR)", TABLE);
        Table table = validationCatalog.loadTable(TableIdentifier.of(icebergNamespace, TABLE));
        Row row1 = Row.of(1, "aaa", "2021-01-01");
        Row row2 = Row.of(2, "bbb", "2021-01-01");
        insertRows(table, row1, row2);
        TableResult result = exec("SELECT * FROM %s /*+ OPTIONS('streaming'='true', 'monitor-interval'='1s')*/", TABLE);
        try (CloseableIterator<Row> iterator = result.collect()) {
            replacedertRows(ImmutableList.of(row1, row2), iterator);
            Row row3 = Row.of(3, "ccc", "2021-01-01");
            insertRows(table, row3);
            replacedertRows(ImmutableList.of(row3), iterator);
            Row row4 = Row.of(4, "ddd", "2021-01-01");
            insertRows(table, row4);
            replacedertRows(ImmutableList.of(row4), iterator);
        }
        result.getJobClient().ifPresent(JobClient::cancel);
    }

    @Test
    public void testConsumeFromStartSnapshotId() throws Exception {
        sql("CREATE TABLE %s (id INT, data VARCHAR, dt VARCHAR)", TABLE);
        Table table = validationCatalog.loadTable(TableIdentifier.of(icebergNamespace, TABLE));
        // Produce two snapshots.
        Row row1 = Row.of(1, "aaa", "2021-01-01");
        Row row2 = Row.of(2, "bbb", "2021-01-01");
        insertRows(table, row1);
        insertRows(table, row2);
        long startSnapshotId = table.currentSnapshot().snapshotId();
        Row row3 = Row.of(3, "ccc", "2021-01-01");
        Row row4 = Row.of(4, "ddd", "2021-01-01");
        insertRows(table, row3, row4);
        TableResult result = exec("SELECT * FROM %s /*+ OPTIONS('streaming'='true', 'monitor-interval'='1s', " + "'start-snapshot-id'='%d')*/", TABLE, startSnapshotId);
        try (CloseableIterator<Row> iterator = result.collect()) {
            // The row2 in start snapshot will be excluded.
            replacedertRows(ImmutableList.of(row3, row4), iterator);
            Row row5 = Row.of(5, "eee", "2021-01-01");
            Row row6 = Row.of(6, "fff", "2021-01-01");
            insertRows(table, row5, row6);
            replacedertRows(ImmutableList.of(row5, row6), iterator);
            Row row7 = Row.of(7, "ggg", "2021-01-01");
            insertRows(table, row7);
            replacedertRows(ImmutableList.of(row7), iterator);
        }
        result.getJobClient().ifPresent(JobClient::cancel);
    }
}

18 Source : TestDeltaTaskWriter.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestDeltaTaskWriter extends TableTestBase {

    private static final int FORMAT_V2 = 2;

    private final FileFormat format;

    @Parameterized.Parameters(name = "FileFormat = {0}")
    public static Object[][] parameters() {
        return new Object[][] { { "avro" }, { "parquet" } };
    }

    public TestDeltaTaskWriter(String fileFormat) {
        super(FORMAT_V2);
        this.format = FileFormat.valueOf(fileFormat.toUpperCase(Locale.ENGLISH));
    }

    @Before
    public void setupTable() throws IOException {
        this.tableDir = temp.newFolder();
        // created by table create
        replacedert.replacedertTrue(tableDir.delete());
        this.metadataDir = new File(tableDir, "metadata");
    }

    private void initTable(boolean parreplacedioned) {
        if (parreplacedioned) {
            this.table = create(SCHEMA, ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("data").build());
        } else {
            this.table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
        }
        table.updateProperties().defaultFormat(format).commit();
    }

    private int idFieldId() {
        return table.schema().findField("id").fieldId();
    }

    private int dataFieldId() {
        return table.schema().findField("data").fieldId();
    }

    private void testCdcEvents(boolean parreplacedioned) throws IOException {
        List<Integer> equalityFieldIds = Lists.newArrayList(idFieldId());
        TaskWriterFactory<RowData> taskWriterFactory = createTaskWriterFactory(equalityFieldIds);
        taskWriterFactory.initialize(1, 1);
        // Start the 1th transaction.
        TaskWriter<RowData> writer = taskWriterFactory.create();
        writer.write(createInsert(1, "aaa"));
        writer.write(createInsert(2, "bbb"));
        writer.write(createInsert(3, "ccc"));
        // Update <2, 'bbb'> to <2, 'ddd'>
        // 1 pos-delete and 1 eq-delete.
        writer.write(createUpdateBefore(2, "bbb"));
        writer.write(createUpdateAfter(2, "ddd"));
        // Update <1, 'aaa'> to <1, 'eee'>
        // 1 pos-delete and 1 eq-delete.
        writer.write(createUpdateBefore(1, "aaa"));
        writer.write(createUpdateAfter(1, "eee"));
        // Insert <4, 'fff'>
        writer.write(createInsert(4, "fff"));
        // Insert <5, 'ggg'>
        writer.write(createInsert(5, "ggg"));
        // Delete <3, 'ccc'>
        // 1 pos-delete and 1 eq-delete.
        writer.write(createDelete(3, "ccc"));
        WriteResult result = writer.complete();
        replacedert.replacedertEquals(parreplacedioned ? 7 : 1, result.dataFiles().length);
        replacedert.replacedertEquals(parreplacedioned ? 6 : 2, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records.", expectedRowSet(createRecord(1, "eee"), createRecord(2, "ddd"), createRecord(4, "fff"), createRecord(5, "ggg")), actualRowSet("*"));
        // Start the 2nd transaction.
        writer = taskWriterFactory.create();
        // Update <2, 'ddd'> to <6, 'hhh'> - (Update both key and value)
        // 1 eq-delete
        writer.write(createUpdateBefore(2, "ddd"));
        writer.write(createUpdateAfter(6, "hhh"));
        // Update <5, 'ggg'> to <5, 'iii'>
        // 1 eq-delete
        writer.write(createUpdateBefore(5, "ggg"));
        writer.write(createUpdateAfter(5, "iii"));
        // Delete <4, 'fff'>
        // 1 eq-delete.
        writer.write(createDelete(4, "fff"));
        result = writer.complete();
        replacedert.replacedertEquals(parreplacedioned ? 2 : 1, result.dataFiles().length);
        replacedert.replacedertEquals(parreplacedioned ? 3 : 1, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(createRecord(1, "eee"), createRecord(5, "iii"), createRecord(6, "hhh")), actualRowSet("*"));
    }

    @Test
    public void testUnparreplacedioned() throws IOException {
        initTable(false);
        testCdcEvents(false);
    }

    @Test
    public void testParreplacedioned() throws IOException {
        initTable(true);
        testCdcEvents(true);
    }

    private void testWritePureEqDeletes(boolean parreplacedioned) throws IOException {
        initTable(parreplacedioned);
        List<Integer> equalityFieldIds = Lists.newArrayList(idFieldId());
        TaskWriterFactory<RowData> taskWriterFactory = createTaskWriterFactory(equalityFieldIds);
        taskWriterFactory.initialize(1, 1);
        TaskWriter<RowData> writer = taskWriterFactory.create();
        writer.write(createDelete(1, "aaa"));
        writer.write(createDelete(2, "bbb"));
        writer.write(createDelete(3, "ccc"));
        WriteResult result = writer.complete();
        replacedert.replacedertEquals(0, result.dataFiles().length);
        replacedert.replacedertEquals(parreplacedioned ? 3 : 1, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have no record", expectedRowSet(), actualRowSet("*"));
    }

    @Test
    public void testUnparreplacedionedPureEqDeletes() throws IOException {
        testWritePureEqDeletes(false);
    }

    @Test
    public void testParreplacedionedPureEqDeletes() throws IOException {
        testWritePureEqDeletes(true);
    }

    private void testAbort(boolean parreplacedioned) throws IOException {
        initTable(parreplacedioned);
        List<Integer> equalityFieldIds = Lists.newArrayList(idFieldId());
        TaskWriterFactory<RowData> taskWriterFactory = createTaskWriterFactory(equalityFieldIds);
        taskWriterFactory.initialize(1, 1);
        TaskWriter<RowData> writer = taskWriterFactory.create();
        writer.write(createUpdateBefore(1, "aaa"));
        writer.write(createUpdateAfter(1, "bbb"));
        writer.write(createUpdateBefore(2, "aaa"));
        writer.write(createUpdateAfter(2, "bbb"));
        // replacedert the current data/delete file count.
        List<Path> files = Files.walk(Paths.get(tableDir.getPath(), "data")).filter(p -> p.toFile().isFile()).filter(p -> !p.toString().endsWith(".crc")).collect(Collectors.toList());
        replacedert.replacedertEquals("Should have expected file count, but files are: " + files, parreplacedioned ? 4 : 2, files.size());
        writer.abort();
        for (Path file : files) {
            replacedert.replacedertFalse(Files.exists(file));
        }
    }

    @Test
    public void testUnparreplacedionedAbort() throws IOException {
        testAbort(false);
    }

    @Test
    public void testParreplacedionedAbort() throws IOException {
        testAbort(true);
    }

    @Test
    public void testParreplacedionedTableWithDataAsKey() throws IOException {
        initTable(true);
        List<Integer> equalityFieldIds = Lists.newArrayList(dataFieldId());
        TaskWriterFactory<RowData> taskWriterFactory = createTaskWriterFactory(equalityFieldIds);
        taskWriterFactory.initialize(1, 1);
        // Start the 1th transaction.
        TaskWriter<RowData> writer = taskWriterFactory.create();
        writer.write(createInsert(1, "aaa"));
        writer.write(createInsert(2, "aaa"));
        writer.write(createInsert(3, "bbb"));
        writer.write(createInsert(4, "ccc"));
        WriteResult result = writer.complete();
        replacedert.replacedertEquals(3, result.dataFiles().length);
        replacedert.replacedertEquals(1, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(createRecord(2, "aaa"), createRecord(3, "bbb"), createRecord(4, "ccc")), actualRowSet("*"));
        // Start the 2nd transaction.
        writer = taskWriterFactory.create();
        writer.write(createInsert(5, "aaa"));
        writer.write(createInsert(6, "bbb"));
        // 1 eq-delete.
        writer.write(createDelete(7, "ccc"));
        result = writer.complete();
        replacedert.replacedertEquals(2, result.dataFiles().length);
        replacedert.replacedertEquals(1, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(createRecord(2, "aaa"), createRecord(5, "aaa"), createRecord(3, "bbb"), createRecord(6, "bbb")), actualRowSet("*"));
    }

    @Test
    public void testParreplacedionedTableWithDataAndIdAsKey() throws IOException {
        initTable(true);
        List<Integer> equalityFieldIds = Lists.newArrayList(dataFieldId(), idFieldId());
        TaskWriterFactory<RowData> taskWriterFactory = createTaskWriterFactory(equalityFieldIds);
        taskWriterFactory.initialize(1, 1);
        TaskWriter<RowData> writer = taskWriterFactory.create();
        writer.write(createInsert(1, "aaa"));
        writer.write(createInsert(2, "aaa"));
        // 1 pos-delete and 1 eq-delete.
        writer.write(createDelete(2, "aaa"));
        WriteResult result = writer.complete();
        replacedert.replacedertEquals(1, result.dataFiles().length);
        replacedert.replacedertEquals(2, result.deleteFiles().length);
        replacedert.replacedertEquals(Sets.newHashSet(FileContent.EQUALITY_DELETES, FileContent.POSITION_DELETES), Sets.newHashSet(result.deleteFiles()[0].content(), result.deleteFiles()[1].content()));
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(createRecord(1, "aaa")), actualRowSet("*"));
    }

    private void commitTransaction(WriteResult result) {
        RowDelta rowDelta = table.newRowDelta();
        Arrays.stream(result.dataFiles()).forEach(rowDelta::addRows);
        Arrays.stream(result.deleteFiles()).forEach(rowDelta::addDeletes);
        rowDelta.validateDeletedFiles().validateDataFilesExist(Lists.newArrayList(result.referencedDataFiles())).commit();
    }

    private StructLikeSet expectedRowSet(Record... records) {
        return SimpleDataUtil.expectedRowSet(table, records);
    }

    private StructLikeSet actualRowSet(String... columns) throws IOException {
        return SimpleDataUtil.actualRowSet(table, columns);
    }

    private TaskWriterFactory<RowData> createTaskWriterFactory(List<Integer> equalityFieldIds) {
        return new RowDataTaskWriterFactory(table.schema(), FlinkSchemaUtil.convert(table.schema()), table.spec(), table.locationProvider(), table.io(), table.encryption(), 128 * 1024 * 1024, format, table.properties(), equalityFieldIds);
    }
}

18 Source : FlinkAppenderFactory.java
with Apache License 2.0
from apache

@Override
public DataWriter<RowData> newDataWriter(EncryptedOutputFile file, FileFormat format, StructLike parreplacedion) {
    return new DataWriter<>(newAppender(file.encryptingOutputFile(), format), format, file.encryptingOutputFile().location(), spec, parreplacedion, file.keyMetadata());
}

18 Source : GenericAppenderHelper.java
with Apache License 2.0
from apache

/**
 * Helper for appending {@link DataFile} to a table or appending {@link Record}s to a table.
 */
public clreplaced GenericAppenderHelper {

    private final Table table;

    private final FileFormat fileFormat;

    private final TemporaryFolder tmp;

    public GenericAppenderHelper(Table table, FileFormat fileFormat, TemporaryFolder tmp) {
        this.table = table;
        this.fileFormat = fileFormat;
        this.tmp = tmp;
    }

    public void appendToTable(DataFile... dataFiles) {
        Preconditions.checkNotNull(table, "table not set");
        AppendFiles append = table.newAppend();
        for (DataFile dataFile : dataFiles) {
            append = append.appendFile(dataFile);
        }
        append.commit();
    }

    public void appendToTable(List<Record> records) throws IOException {
        appendToTable(null, records);
    }

    public void appendToTable(StructLike parreplacedion, List<Record> records) throws IOException {
        appendToTable(writeFile(parreplacedion, records));
    }

    public DataFile writeFile(StructLike parreplacedion, List<Record> records) throws IOException {
        Preconditions.checkNotNull(table, "table not set");
        File file = tmp.newFile();
        replacedert.replacedertTrue(file.delete());
        return appendToLocalFile(table, file, fileFormat, parreplacedion, records);
    }

    private static DataFile appendToLocalFile(Table table, File file, FileFormat format, StructLike parreplacedion, List<Record> records) throws IOException {
        FileAppender<Record> appender = new GenericAppenderFactory(table.schema()).newAppender(Files.localOutput(file), format);
        try (FileAppender<Record> fileAppender = appender) {
            fileAppender.addAll(records);
        }
        return DataFiles.builder(table.spec()).withRecordCount(records.size()).withFileSizeInBytes(file.length()).withPath(Files.localInput(file).location()).withMetrics(appender.metrics()).withFormat(format).withParreplacedion(parreplacedion).build();
    }
}

18 Source : GenericAppenderFactory.java
with Apache License 2.0
from apache

@Override
public org.apache.iceberg.io.DataWriter<Record> newDataWriter(EncryptedOutputFile file, FileFormat format, StructLike parreplacedion) {
    return new org.apache.iceberg.io.DataWriter<>(newAppender(file.encryptingOutputFile(), format), format, file.encryptingOutputFile().location(), spec, parreplacedion, file.keyMetadata());
}

17 Source : IcebergSplit.java
with Apache License 2.0
from trinodb

public clreplaced IcebergSplit implements ConnectorSplit {

    private final String path;

    private final long start;

    private final long length;

    private final long fileSize;

    private final FileFormat fileFormat;

    private final List<HostAddress> addresses;

    private final Map<Integer, String> parreplacedionKeys;

    @JsonCreator
    public IcebergSplit(@JsonProperty("path") String path, @JsonProperty("start") long start, @JsonProperty("length") long length, @JsonProperty("fileSize") long fileSize, @JsonProperty("fileFormat") FileFormat fileFormat, @JsonProperty("addresses") List<HostAddress> addresses, @JsonProperty("parreplacedionKeys") Map<Integer, String> parreplacedionKeys) {
        this.path = requireNonNull(path, "path is null");
        this.start = start;
        this.length = length;
        this.fileSize = fileSize;
        this.fileFormat = requireNonNull(fileFormat, "fileFormat is null");
        this.addresses = ImmutableList.copyOf(requireNonNull(addresses, "addresses is null"));
        this.parreplacedionKeys = Collections.unmodifiableMap(requireNonNull(parreplacedionKeys, "parreplacedionKeys is null"));
    }

    @Override
    public boolean isRemotelyAccessible() {
        return true;
    }

    @JsonProperty
    @Override
    public List<HostAddress> getAddresses() {
        return addresses;
    }

    @JsonProperty
    public String getPath() {
        return path;
    }

    @JsonProperty
    public long getStart() {
        return start;
    }

    @JsonProperty
    public long getLength() {
        return length;
    }

    @JsonProperty
    public long getFileSize() {
        return fileSize;
    }

    @JsonProperty
    public FileFormat getFileFormat() {
        return fileFormat;
    }

    @JsonProperty
    public Map<Integer, String> getParreplacedionKeys() {
        return parreplacedionKeys;
    }

    @Override
    public Object getInfo() {
        return ImmutableMap.builder().put("path", path).put("start", start).put("length", length).build();
    }

    @Override
    public String toString() {
        return toStringHelper(this).addValue(path).addValue(start).addValue(length).toString();
    }
}

17 Source : IcebergFileWriterFactory.java
with Apache License 2.0
from trinodb

public IcebergFileWriter createFileWriter(Path outputPath, Schema icebergSchema, JobConf jobConf, ConnectorSession session, HdfsContext hdfsContext, FileFormat fileFormat) {
    switch(fileFormat) {
        case PARQUET:
            return createParquetWriter(outputPath, icebergSchema, jobConf, session, hdfsContext);
        case ORC:
            return createOrcWriter(outputPath, icebergSchema, jobConf, session);
        default:
            throw new TrinoException(NOT_SUPPORTED, "File format not supported for Iceberg: " + fileFormat);
    }
}

17 Source : SparkAppenderFactory.java
with Apache License 2.0
from apache

@Override
public EqualityDeleteWriter<InternalRow> newEqDeleteWriter(EncryptedOutputFile file, FileFormat format, StructLike parreplacedion) {
    Preconditions.checkState(equalityFieldIds != null && equalityFieldIds.length > 0, "Equality field ids shouldn't be null or empty when creating equality-delete writer");
    Preconditions.checkNotNull(eqDeleteRowSchema, "Equality delete row schema shouldn't be null when creating equality-delete writer");
    try {
        switch(format) {
            case PARQUET:
                return Parquet.writeDeletes(file.encryptingOutputFile()).createWriterFunc(msgType -> SparkParquetWriters.buildWriter(lazyEqDeleteSparkType(), msgType)).overwrite().rowSchema(eqDeleteRowSchema).withSpec(spec).withParreplacedion(parreplacedion).equalityFieldIds(equalityFieldIds).withKeyMetadata(file.keyMetadata()).buildEqualityWriter();
            case AVRO:
                return Avro.writeDeletes(file.encryptingOutputFile()).createWriterFunc(ignored -> new SparkAvroWriter(lazyEqDeleteSparkType())).overwrite().rowSchema(eqDeleteRowSchema).withSpec(spec).withParreplacedion(parreplacedion).equalityFieldIds(equalityFieldIds).withKeyMetadata(file.keyMetadata()).buildEqualityWriter();
            default:
                throw new UnsupportedOperationException("Cannot write equality-deletes for unsupported file format: " + format);
        }
    } catch (IOException e) {
        throw new UncheckedIOException("Failed to create new equality delete writer", e);
    }
}

17 Source : TestTables.java
with Apache License 2.0
from apache

/**
 * Creates a Hive test table. Creates the Iceberg table/data and creates the corresponding Hive table as well when
 * needed. The table will be in the 'default' database. The table will be populated with the provided with randomly
 * generated {@link Record}s.
 * @param shell The HiveShell used for Hive table creation
 * @param tableName The name of the test table
 * @param schema The schema used for the table creation
 * @param fileFormat The file format used for writing the data
 * @param numRecords The number of records should be generated and stored in the table
 * @throws IOException If there is an error writing data
 */
public List<Record> createTableWithGeneratedRecords(TestHiveShell shell, String tableName, Schema schema, FileFormat fileFormat, int numRecords) throws IOException {
    List<Record> records = TestHelper.generateRandomRecords(schema, numRecords, 0L);
    createTable(shell, tableName, schema, fileFormat, records);
    return records;
}

17 Source : TestTables.java
with Apache License 2.0
from apache

/**
 * Creates an non parreplacedioned Hive test table. Creates the Iceberg table/data and creates the corresponding Hive
 * table as well when needed. The table will be in the 'default' database. The table will be populated with the
 * provided List of {@link Record}s.
 * @param shell The HiveShell used for Hive table creation
 * @param tableName The name of the test table
 * @param schema The schema used for the table creation
 * @param fileFormat The file format used for writing the data
 * @param records The records with which the table is populated
 * @return The created table
 * @throws IOException If there is an error writing data
 */
public Table createTable(TestHiveShell shell, String tableName, Schema schema, FileFormat fileFormat, List<Record> records) throws IOException {
    Table table = createIcebergTable(shell.getHiveConf(), tableName, schema, fileFormat, records);
    String createHiveSQL = createHiveTableSQL(TableIdentifier.of("default", tableName), ImmutableMap.of());
    if (createHiveSQL != null) {
        shell.executeStatement(createHiveSQL);
    }
    return table;
}

17 Source : TestTables.java
with Apache License 2.0
from apache

/**
 * Creates a parreplacedioned Hive test table using Hive SQL. The table will be in the 'default' database.
 * The table will be populated with the provided List of {@link Record}s using a Hive insert statement.
 * @param shell The HiveShell used for Hive table creation
 * @param tableName The name of the test table
 * @param schema The schema used for the table creation
 * @param spec The parreplacedion specification for the table
 * @param fileFormat The file format used for writing the data
 * @param records The records with which the table is populated
 * @return The created table
 * @throws IOException If there is an error writing data
 */
public Table createTable(TestHiveShell shell, String tableName, Schema schema, ParreplacedionSpec spec, FileFormat fileFormat, List<Record> records) {
    TableIdentifier identifier = TableIdentifier.of("default", tableName);
    shell.executeStatement("CREATE EXTERNAL TABLE " + identifier + " STORED BY '" + HiveIcebergStorageHandler.clreplaced.getName() + "' " + locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(schema) + "', " + "'" + InputFormatConfig.PARreplacedION_SPEC + "'='" + ParreplacedionSpecParser.toJson(spec) + "', " + "'" + TableProperties.DEFAULT_FILE_FORMAT + "'='" + fileFormat + "')");
    if (records != null && !records.isEmpty()) {
        StringBuilder query = new StringBuilder().append("INSERT INTO " + identifier + " VALUES ");
        records.forEach(record -> {
            query.append("(");
            query.append(record.struct().fields().stream().map(field -> getStringValueForInsert(record.getField(field.name()), field.type())).collect(Collectors.joining(",")));
            query.append("),");
        });
        query.setLength(query.length() - 1);
        shell.executeStatement(query.toString());
    }
    return loadTable(identifier);
}

17 Source : HiveIcebergStorageHandlerTestUtils.java
with Apache License 2.0
from apache

public clreplaced HiveIcebergStorageHandlerTestUtils {

    static final FileFormat[] FILE_FORMATS = new FileFormat[] { FileFormat.AVRO, FileFormat.ORC, FileFormat.PARQUET };

    static final Schema CUSTOMER_SCHEMA = new Schema(optional(1, "customer_id", Types.LongType.get()), optional(2, "first_name", Types.StringType.get(), "This is first name"), optional(3, "last_name", Types.StringType.get(), "This is last name"));

    static final Schema CUSTOMER_SCHEMA_WITH_UPPERCASE = new Schema(optional(1, "CustomER_Id", Types.LongType.get()), optional(2, "First_name", Types.StringType.get()), optional(3, "Last_name", Types.StringType.get()));

    static final List<Record> CUSTOMER_RECORDS = TestHelper.RecordsBuilder.newInstance(CUSTOMER_SCHEMA).add(0L, "Alice", "Brown").add(1L, "Bob", "Green").add(2L, "Trudy", "Pink").build();

    private HiveIcebergStorageHandlerTestUtils() {
    // Empty constructor for the utility clreplaced
    }

    static TestHiveShell shell() {
        TestHiveShell shell = new TestHiveShell();
        shell.setHiveConfValue("hive.notification.event.poll.interval", "-1");
        shell.setHiveConfValue("hive.tez.exec.print.summary", "true");
        // We would like to make sure that ORC reading overrides this config, so reading Iceberg tables could work in
        // systems (like Hive 3.2 and higher) where this value is set to true explicitly.
        shell.setHiveConfValue(OrcConf.FORCE_POSITIONAL_EVOLUTION.getHiveConfName(), "true");
        shell.start();
        return shell;
    }

    static TestTables testTables(TestHiveShell shell, TestTables.TestTableType testTableType, TemporaryFolder temp) throws IOException {
        return testTableType.instance(shell.metastore().hiveConf(), temp);
    }

    static void init(TestHiveShell shell, TestTables testTables, TemporaryFolder temp, String engine) {
        shell.openSession();
        for (Map.Entry<String, String> property : testTables.properties().entrySet()) {
            shell.setHiveSessionValue(property.getKey(), property.getValue());
        }
        shell.setHiveSessionValue("hive.execution.engine", engine);
        shell.setHiveSessionValue("hive.jar.directory", temp.getRoot().getAbsolutePath());
        shell.setHiveSessionValue("tez.staging-dir", temp.getRoot().getAbsolutePath());
        // temporarily disabling vectorization in Tez, since it doesn't work with projection pruning (fix: TEZ-4248)
        // TODO: remove this once TEZ-4248 has been released and the Tez dependencies updated here
        if (engine.equals("tez")) {
            shell.setHiveSessionValue("hive.vectorized.execution.enabled", "false");
        }
    }

    static void close(TestHiveShell shell) throws Exception {
        shell.closeSession();
        shell.metastore().reset();
        // HiveServer2 thread pools are using thread local Hive -> HMSClient objects. These are not cleaned up when the
        // HiveServer2 is stopped. Only Finalizer closes the HMS connections.
        System.gc();
    }
}

17 Source : TestFlinkTableSink.java
with Apache License 2.0
from apache

@Parameterized.Parameters(name = "catalogName={0}, baseNamespace={1}, format={2}, isStreaming={3}")
public static Iterable<Object[]> parameters() {
    List<Object[]> parameters = Lists.newArrayList();
    for (FileFormat format : new FileFormat[] { FileFormat.ORC, FileFormat.AVRO, FileFormat.PARQUET }) {
        for (Boolean isStreaming : new Boolean[] { true, false }) {
            for (Object[] catalogParams : FlinkCatalogTestBase.parameters()) {
                String catalogName = (String) catalogParams[0];
                Namespace baseNamespace = (Namespace) catalogParams[1];
                parameters.add(new Object[] { catalogName, baseNamespace, format, isStreaming });
            }
        }
    }
    return parameters;
}

17 Source : TestStreamingReaderOperator.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestStreamingReaderOperator extends TableTestBase {

    private static final Schema SCHEMA = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get()), Types.NestedField.required(2, "data", Types.StringType.get()));

    private static final FileFormat DEFAULT_FORMAT = FileFormat.PARQUET;

    @Parameterized.Parameters(name = "FormatVersion={0}")
    public static Iterable<Object[]> parameters() {
        return ImmutableList.of(new Object[] { 1 }, new Object[] { 2 });
    }

    public TestStreamingReaderOperator(int formatVersion) {
        super(formatVersion);
    }

    @Before
    @Override
    public void setupTable() throws IOException {
        this.tableDir = temp.newFolder();
        this.metadataDir = new File(tableDir, "metadata");
        replacedert.replacedertTrue(tableDir.delete());
        // Construct the iceberg table.
        table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
    }

    @Test
    public void testProcessAllRecords() throws Exception {
        List<List<Record>> expectedRecords = generateRecordsAndCommitTxn(10);
        List<FlinkInputSplit> splits = generateSplits();
        replacedert.replacedertEquals("Should have 10 splits", 10, splits.size());
        try (OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> harness = createReader()) {
            harness.setup();
            harness.open();
            SteppingMailboxProcessor processor = createLocalMailbox(harness);
            List<Record> expected = Lists.newArrayList();
            for (int i = 0; i < splits.size(); i++) {
                // Process this element to enqueue to mail-box.
                harness.processElement(splits.get(i), -1);
                // Run the mail-box once to read all records from the given split.
                replacedert.replacedertTrue("Should processed 1 split", processor.runMailboxStep());
                // replacedert the output has expected elements.
                expected.addAll(expectedRecords.get(i));
                TestHelpers.replacedertRecords(readOutputValues(harness), expected, SCHEMA);
            }
        }
    }

    @Test
    public void testTriggerCheckpoint() throws Exception {
        // Received emitted splits: split1, split2, split3, checkpoint request is triggered when reading records from
        // split1.
        List<List<Record>> expectedRecords = generateRecordsAndCommitTxn(3);
        List<FlinkInputSplit> splits = generateSplits();
        replacedert.replacedertEquals("Should have 3 splits", 3, splits.size());
        long timestamp = 0;
        try (OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> harness = createReader()) {
            harness.setup();
            harness.open();
            SteppingMailboxProcessor processor = createLocalMailbox(harness);
            harness.processElement(splits.get(0), ++timestamp);
            harness.processElement(splits.get(1), ++timestamp);
            harness.processElement(splits.get(2), ++timestamp);
            // Trigger snapshot state, it will start to work once all records from split0 are read.
            processor.getMainMailboxExecutor().execute(() -> harness.snapshot(1, 3), "Trigger snapshot");
            replacedert.replacedertTrue("Should have processed the split0", processor.runMailboxStep());
            replacedert.replacedertTrue("Should have processed the snapshot state action", processor.runMailboxStep());
            TestHelpers.replacedertRecords(readOutputValues(harness), expectedRecords.get(0), SCHEMA);
            // Read records from split1.
            replacedert.replacedertTrue("Should have processed the split1", processor.runMailboxStep());
            // Read records from split2.
            replacedert.replacedertTrue("Should have processed the split2", processor.runMailboxStep());
            TestHelpers.replacedertRecords(readOutputValues(harness), Lists.newArrayList(Iterables.concat(expectedRecords)), SCHEMA);
        }
    }

    @Test
    public void testCheckpointRestore() throws Exception {
        List<List<Record>> expectedRecords = generateRecordsAndCommitTxn(15);
        List<FlinkInputSplit> splits = generateSplits();
        replacedert.replacedertEquals("Should have 10 splits", 15, splits.size());
        OperatorSubtaskState state;
        List<Record> expected = Lists.newArrayList();
        try (OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> harness = createReader()) {
            harness.setup();
            harness.open();
            // Enqueue all the splits.
            for (FlinkInputSplit split : splits) {
                harness.processElement(split, -1);
            }
            // Read all records from the first five splits.
            SteppingMailboxProcessor localMailbox = createLocalMailbox(harness);
            for (int i = 0; i < 5; i++) {
                expected.addAll(expectedRecords.get(i));
                replacedert.replacedertTrue("Should have processed the split#" + i, localMailbox.runMailboxStep());
                TestHelpers.replacedertRecords(readOutputValues(harness), expected, SCHEMA);
            }
            // Snapshot state now,  there're 10 splits left in the state.
            state = harness.snapshot(1, 1);
        }
        expected.clear();
        try (OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> harness = createReader()) {
            harness.setup();
            // Recover to process the remaining splits.
            harness.initializeState(state);
            harness.open();
            SteppingMailboxProcessor localMailbox = createLocalMailbox(harness);
            for (int i = 5; i < 10; i++) {
                expected.addAll(expectedRecords.get(i));
                replacedert.replacedertTrue("Should have processed one split#" + i, localMailbox.runMailboxStep());
                TestHelpers.replacedertRecords(readOutputValues(harness), expected, SCHEMA);
            }
            // Let's process the final 5 splits now.
            for (int i = 10; i < 15; i++) {
                expected.addAll(expectedRecords.get(i));
                harness.processElement(splits.get(i), 1);
                replacedert.replacedertTrue("Should have processed the split#" + i, localMailbox.runMailboxStep());
                TestHelpers.replacedertRecords(readOutputValues(harness), expected, SCHEMA);
            }
        }
    }

    private List<Row> readOutputValues(OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> harness) {
        List<Row> results = Lists.newArrayList();
        for (RowData rowData : harness.extractOutputValues()) {
            results.add(Row.of(rowData.getInt(0), rowData.getString(1).toString()));
        }
        return results;
    }

    private List<List<Record>> generateRecordsAndCommitTxn(int commitTimes) throws IOException {
        List<List<Record>> expectedRecords = Lists.newArrayList();
        for (int i = 0; i < commitTimes; i++) {
            List<Record> records = RandomGenericData.generate(SCHEMA, 100, 0L);
            expectedRecords.add(records);
            // Commit those records to iceberg table.
            writeRecords(records);
        }
        return expectedRecords;
    }

    private void writeRecords(List<Record> records) throws IOException {
        GenericAppenderHelper appender = new GenericAppenderHelper(table, DEFAULT_FORMAT, temp);
        appender.appendToTable(records);
    }

    private List<FlinkInputSplit> generateSplits() {
        List<FlinkInputSplit> inputSplits = Lists.newArrayList();
        List<Long> snapshotIds = SnapshotUtil.currentAncestors(table);
        for (int i = snapshotIds.size() - 1; i >= 0; i--) {
            ScanContext scanContext;
            if (i == snapshotIds.size() - 1) {
                // Generate the splits from the first snapshot.
                scanContext = ScanContext.builder().useSnapshotId(snapshotIds.get(i)).build();
            } else {
                // Generate the splits between the previous snapshot and current snapshot.
                scanContext = ScanContext.builder().startSnapshotId(snapshotIds.get(i + 1)).endSnapshotId(snapshotIds.get(i)).build();
            }
            Collections.addAll(inputSplits, FlinkSplitGenerator.createInputSplits(table, scanContext));
        }
        return inputSplits;
    }

    private OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> createReader() throws Exception {
        // This input format is used to opening the emitted split.
        FlinkInputFormat inputFormat = FlinkSource.forRowData().tableLoader(TestTableLoader.of(tableDir.getAbsolutePath())).buildFormat();
        OneInputStreamOperatorFactory<FlinkInputSplit, RowData> factory = StreamingReaderOperator.factory(inputFormat);
        OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> harness = new OneInputStreamOperatorTestHarness<>(factory, 1, 1, 0);
        harness.getStreamConfig().setTimeCharacteristic(TimeCharacteristic.ProcessingTime);
        return harness;
    }

    private SteppingMailboxProcessor createLocalMailbox(OneInputStreamOperatorTestHarness<FlinkInputSplit, RowData> harness) {
        return new SteppingMailboxProcessor(MailboxDefaultAction.Controller::suspendDefaultAction, harness.getTaskMailbox(), StreamTaskActionExecutor.IMMEDIATE);
    }
}

17 Source : TestStreamingMonitorFunction.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestStreamingMonitorFunction extends TableTestBase {

    private static final Schema SCHEMA = new Schema(Types.NestedField.required(1, "id", Types.IntegerType.get()), Types.NestedField.required(2, "data", Types.StringType.get()));

    private static final FileFormat DEFAULT_FORMAT = FileFormat.PARQUET;

    private static final long WAIT_TIME_MILLIS = 10 * 1000L;

    @Parameterized.Parameters(name = "FormatVersion={0}")
    public static Iterable<Object[]> parameters() {
        return ImmutableList.of(new Object[] { 1 }, new Object[] { 2 });
    }

    public TestStreamingMonitorFunction(int formatVersion) {
        super(formatVersion);
    }

    @Before
    @Override
    public void setupTable() throws IOException {
        this.tableDir = temp.newFolder();
        this.metadataDir = new File(tableDir, "metadata");
        replacedert.replacedertTrue(tableDir.delete());
        // Construct the iceberg table.
        table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
    }

    private void runSourceFunctionInTask(TestSourceContext sourceContext, StreamingMonitorFunction function) {
        Thread task = new Thread(() -> {
            try {
                function.run(sourceContext);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        });
        task.start();
    }

    @Test
    public void testConsumeWithoutStartSnapshotId() throws Exception {
        List<List<Record>> recordsList = generateRecordsAndCommitTxn(10);
        ScanContext scanContext = ScanContext.builder().monitorInterval(Duration.ofMillis(100)).build();
        StreamingMonitorFunction function = createFunction(scanContext);
        try (AbstractStreamOperatorTestHarness<FlinkInputSplit> harness = createHarness(function)) {
            harness.setup();
            harness.open();
            CountDownLatch latch = new CountDownLatch(1);
            TestSourceContext sourceContext = new TestSourceContext(latch);
            runSourceFunctionInTask(sourceContext, function);
            replacedert.replacedertTrue("Should have expected elements.", latch.await(WAIT_TIME_MILLIS, TimeUnit.MILLISECONDS));
            Thread.sleep(1000L);
            // Stop the stream task.
            function.close();
            replacedert.replacedertEquals("Should produce the expected splits", 1, sourceContext.splits.size());
            TestHelpers.replacedertRecords(sourceContext.toRows(), Lists.newArrayList(Iterables.concat(recordsList)), SCHEMA);
        }
    }

    @Test
    public void testConsumeFromStartSnapshotId() throws Exception {
        // Commit the first five transactions.
        generateRecordsAndCommitTxn(5);
        long startSnapshotId = table.currentSnapshot().snapshotId();
        // Commit the next five transactions.
        List<List<Record>> recordsList = generateRecordsAndCommitTxn(5);
        ScanContext scanContext = ScanContext.builder().monitorInterval(Duration.ofMillis(100)).startSnapshotId(startSnapshotId).build();
        StreamingMonitorFunction function = createFunction(scanContext);
        try (AbstractStreamOperatorTestHarness<FlinkInputSplit> harness = createHarness(function)) {
            harness.setup();
            harness.open();
            CountDownLatch latch = new CountDownLatch(1);
            TestSourceContext sourceContext = new TestSourceContext(latch);
            runSourceFunctionInTask(sourceContext, function);
            replacedert.replacedertTrue("Should have expected elements.", latch.await(WAIT_TIME_MILLIS, TimeUnit.MILLISECONDS));
            Thread.sleep(1000L);
            // Stop the stream task.
            function.close();
            replacedert.replacedertEquals("Should produce the expected splits", 1, sourceContext.splits.size());
            TestHelpers.replacedertRecords(sourceContext.toRows(), Lists.newArrayList(Iterables.concat(recordsList)), SCHEMA);
        }
    }

    @Test
    public void testCheckpointRestore() throws Exception {
        List<List<Record>> recordsList = generateRecordsAndCommitTxn(10);
        ScanContext scanContext = ScanContext.builder().monitorInterval(Duration.ofMillis(100)).build();
        StreamingMonitorFunction func = createFunction(scanContext);
        OperatorSubtaskState state;
        try (AbstractStreamOperatorTestHarness<FlinkInputSplit> harness = createHarness(func)) {
            harness.setup();
            harness.open();
            CountDownLatch latch = new CountDownLatch(1);
            TestSourceContext sourceContext = new TestSourceContext(latch);
            runSourceFunctionInTask(sourceContext, func);
            replacedert.replacedertTrue("Should have expected elements.", latch.await(WAIT_TIME_MILLIS, TimeUnit.MILLISECONDS));
            Thread.sleep(1000L);
            state = harness.snapshot(1, 1);
            // Stop the stream task.
            func.close();
            replacedert.replacedertEquals("Should produce the expected splits", 1, sourceContext.splits.size());
            TestHelpers.replacedertRecords(sourceContext.toRows(), Lists.newArrayList(Iterables.concat(recordsList)), SCHEMA);
        }
        List<List<Record>> newRecordsList = generateRecordsAndCommitTxn(10);
        StreamingMonitorFunction newFunc = createFunction(scanContext);
        try (AbstractStreamOperatorTestHarness<FlinkInputSplit> harness = createHarness(newFunc)) {
            harness.setup();
            // Recover to process the remaining snapshots.
            harness.initializeState(state);
            harness.open();
            CountDownLatch latch = new CountDownLatch(1);
            TestSourceContext sourceContext = new TestSourceContext(latch);
            runSourceFunctionInTask(sourceContext, newFunc);
            replacedert.replacedertTrue("Should have expected elements.", latch.await(WAIT_TIME_MILLIS, TimeUnit.MILLISECONDS));
            Thread.sleep(1000L);
            // Stop the stream task.
            newFunc.close();
            replacedert.replacedertEquals("Should produce the expected splits", 1, sourceContext.splits.size());
            TestHelpers.replacedertRecords(sourceContext.toRows(), Lists.newArrayList(Iterables.concat(newRecordsList)), SCHEMA);
        }
    }

    private List<List<Record>> generateRecordsAndCommitTxn(int commitTimes) throws IOException {
        List<List<Record>> expectedRecords = Lists.newArrayList();
        for (int i = 0; i < commitTimes; i++) {
            List<Record> records = RandomGenericData.generate(SCHEMA, 100, 0L);
            expectedRecords.add(records);
            // Commit those records to iceberg table.
            writeRecords(records);
        }
        return expectedRecords;
    }

    private void writeRecords(List<Record> records) throws IOException {
        GenericAppenderHelper appender = new GenericAppenderHelper(table, DEFAULT_FORMAT, temp);
        appender.appendToTable(records);
    }

    private StreamingMonitorFunction createFunction(ScanContext scanContext) {
        return new StreamingMonitorFunction(TestTableLoader.of(tableDir.getAbsolutePath()), scanContext);
    }

    private AbstractStreamOperatorTestHarness<FlinkInputSplit> createHarness(StreamingMonitorFunction function) throws Exception {
        StreamSource<FlinkInputSplit, StreamingMonitorFunction> streamSource = new StreamSource<>(function);
        return new AbstractStreamOperatorTestHarness<>(streamSource, 1, 1, 0);
    }

    private clreplaced TestSourceContext implements SourceFunction.SourceContext<FlinkInputSplit> {

        private final List<FlinkInputSplit> splits = Lists.newArrayList();

        private final Object checkpointLock = new Object();

        private final CountDownLatch latch;

        TestSourceContext(CountDownLatch latch) {
            this.latch = latch;
        }

        @Override
        public void collect(FlinkInputSplit element) {
            splits.add(element);
            latch.countDown();
        }

        @Override
        public void collectWithTimestamp(FlinkInputSplit element, long timestamp) {
            collect(element);
        }

        @Override
        public void emireplacedermark(Watermark mark) {
        }

        @Override
        public void markAsTemporarilyIdle() {
        }

        @Override
        public Object getCheckpointLock() {
            return checkpointLock;
        }

        @Override
        public void close() {
        }

        private List<Row> toRows() throws IOException {
            FlinkInputFormat format = FlinkSource.forRowData().tableLoader(TestTableLoader.of(tableDir.getAbsolutePath())).buildFormat();
            List<Row> rows = Lists.newArrayList();
            for (FlinkInputSplit split : splits) {
                format.open(split);
                RowData element = null;
                try {
                    while (!format.reachedEnd()) {
                        element = format.nextRecord(element);
                        rows.add(Row.of(element.getInt(0), element.getString(1).toString()));
                    }
                } finally {
                    format.close();
                }
            }
            return rows;
        }
    }
}

17 Source : TestIcebergFilesCommitter.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestIcebergFilesCommitter extends TableTestBase {

    private static final Configuration CONF = new Configuration();

    private String tablePath;

    private File flinkManifestFolder;

    private final FileFormat format;

    @Parameterized.Parameters(name = "FileFormat = {0}, FormatVersion={1}")
    public static Object[][] parameters() {
        return new Object[][] { new Object[] { "avro", 1 }, new Object[] { "avro", 2 }, new Object[] { "parquet", 1 }, new Object[] { "parquet", 2 }, new Object[] { "orc", 1 } };
    }

    public TestIcebergFilesCommitter(String format, int formatVersion) {
        super(formatVersion);
        this.format = FileFormat.valueOf(format.toUpperCase(Locale.ENGLISH));
    }

    @Before
    public void setupTable() throws IOException {
        flinkManifestFolder = temp.newFolder();
        this.tableDir = temp.newFolder();
        this.metadataDir = new File(tableDir, "metadata");
        replacedert.replacedertTrue(tableDir.delete());
        tablePath = tableDir.getAbsolutePath();
        // Construct the iceberg table.
        table = create(SimpleDataUtil.SCHEMA, ParreplacedionSpec.unparreplacedioned());
        table.updateProperties().set(DEFAULT_FILE_FORMAT, format.name()).set(FLINK_MANIFEST_LOCATION, flinkManifestFolder.getAbsolutePath()).commit();
    }

    @Test
    public void testCommitTxnWithoutDataFiles() throws Exception {
        long checkpointId = 0;
        long timestamp = 0;
        JobID jobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            SimpleDataUtil.replacedertTableRows(table, Lists.newArrayList());
            replacedertSnapshotSize(0);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            // It's better to advance the max-committed-checkpoint-id in iceberg snapshot, so that the future flink job
            // failover won't fail.
            for (int i = 1; i <= 3; i++) {
                harness.snapshot(++checkpointId, ++timestamp);
                replacedertFlinkManifests(0);
                harness.notifyOfCompletedCheckpoint(checkpointId);
                replacedertFlinkManifests(0);
                replacedertSnapshotSize(i);
                replacedertMaxCommittedCheckpointId(jobId, checkpointId);
            }
        }
    }

    private WriteResult of(DataFile dataFile) {
        return WriteResult.builder().addDataFiles(dataFile).build();
    }

    @Test
    public void testCommitTxn() throws Exception {
        // Test with 3 continues checkpoints:
        // 1. snapshotState for checkpoint#1
        // 2. notifyCheckpointComplete for checkpoint#1
        // 3. snapshotState for checkpoint#2
        // 4. notifyCheckpointComplete for checkpoint#2
        // 5. snapshotState for checkpoint#3
        // 6. notifyCheckpointComplete for checkpoint#3
        long timestamp = 0;
        JobID jobID = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobID)) {
            harness.setup();
            harness.open();
            replacedertSnapshotSize(0);
            List<RowData> rows = Lists.newArrayListWithExpectedSize(3);
            for (int i = 1; i <= 3; i++) {
                RowData rowData = SimpleDataUtil.createRowData(i, "hello" + i);
                DataFile dataFile = writeDataFile("data-" + i, ImmutableList.of(rowData));
                harness.processElement(of(dataFile), ++timestamp);
                rows.add(rowData);
                harness.snapshot(i, ++timestamp);
                replacedertFlinkManifests(1);
                harness.notifyOfCompletedCheckpoint(i);
                replacedertFlinkManifests(0);
                SimpleDataUtil.replacedertTableRows(table, ImmutableList.copyOf(rows));
                replacedertSnapshotSize(i);
                replacedertMaxCommittedCheckpointId(jobID, i);
            }
        }
    }

    @Test
    public void testOrderedEventsBetweenCheckpoints() throws Exception {
        // It's possible that two checkpoints happen in the following orders:
        // 1. snapshotState for checkpoint#1;
        // 2. snapshotState for checkpoint#2;
        // 3. notifyCheckpointComplete for checkpoint#1;
        // 4. notifyCheckpointComplete for checkpoint#2;
        long timestamp = 0;
        JobID jobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            RowData row1 = SimpleDataUtil.createRowData(1, "hello");
            DataFile dataFile1 = writeDataFile("data-1", ImmutableList.of(row1));
            harness.processElement(of(dataFile1), ++timestamp);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            // 1. snapshotState for checkpoint#1
            long firstCheckpointId = 1;
            harness.snapshot(firstCheckpointId, ++timestamp);
            replacedertFlinkManifests(1);
            RowData row2 = SimpleDataUtil.createRowData(2, "world");
            DataFile dataFile2 = writeDataFile("data-2", ImmutableList.of(row2));
            harness.processElement(of(dataFile2), ++timestamp);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            // 2. snapshotState for checkpoint#2
            long secondCheckpointId = 2;
            harness.snapshot(secondCheckpointId, ++timestamp);
            replacedertFlinkManifests(2);
            // 3. notifyCheckpointComplete for checkpoint#1
            harness.notifyOfCompletedCheckpoint(firstCheckpointId);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row1));
            replacedertMaxCommittedCheckpointId(jobId, firstCheckpointId);
            replacedertFlinkManifests(1);
            // 4. notifyCheckpointComplete for checkpoint#2
            harness.notifyOfCompletedCheckpoint(secondCheckpointId);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row1, row2));
            replacedertMaxCommittedCheckpointId(jobId, secondCheckpointId);
            replacedertFlinkManifests(0);
        }
    }

    @Test
    public void testDisorderedEventsBetweenCheckpoints() throws Exception {
        // It's possible that the two checkpoints happen in the following orders:
        // 1. snapshotState for checkpoint#1;
        // 2. snapshotState for checkpoint#2;
        // 3. notifyCheckpointComplete for checkpoint#2;
        // 4. notifyCheckpointComplete for checkpoint#1;
        long timestamp = 0;
        JobID jobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            RowData row1 = SimpleDataUtil.createRowData(1, "hello");
            DataFile dataFile1 = writeDataFile("data-1", ImmutableList.of(row1));
            harness.processElement(of(dataFile1), ++timestamp);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            // 1. snapshotState for checkpoint#1
            long firstCheckpointId = 1;
            harness.snapshot(firstCheckpointId, ++timestamp);
            replacedertFlinkManifests(1);
            RowData row2 = SimpleDataUtil.createRowData(2, "world");
            DataFile dataFile2 = writeDataFile("data-2", ImmutableList.of(row2));
            harness.processElement(of(dataFile2), ++timestamp);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            // 2. snapshotState for checkpoint#2
            long secondCheckpointId = 2;
            harness.snapshot(secondCheckpointId, ++timestamp);
            replacedertFlinkManifests(2);
            // 3. notifyCheckpointComplete for checkpoint#2
            harness.notifyOfCompletedCheckpoint(secondCheckpointId);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row1, row2));
            replacedertMaxCommittedCheckpointId(jobId, secondCheckpointId);
            replacedertFlinkManifests(0);
            // 4. notifyCheckpointComplete for checkpoint#1
            harness.notifyOfCompletedCheckpoint(firstCheckpointId);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row1, row2));
            replacedertMaxCommittedCheckpointId(jobId, secondCheckpointId);
            replacedertFlinkManifests(0);
        }
    }

    @Test
    public void testRecoveryFromValidSnapshot() throws Exception {
        long checkpointId = 0;
        long timestamp = 0;
        List<RowData> expectedRows = Lists.newArrayList();
        OperatorSubtaskState snapshot;
        JobID jobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertSnapshotSize(0);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            RowData row = SimpleDataUtil.createRowData(1, "hello");
            expectedRows.add(row);
            DataFile dataFile1 = writeDataFile("data-1", ImmutableList.of(row));
            harness.processElement(of(dataFile1), ++timestamp);
            snapshot = harness.snapshot(++checkpointId, ++timestamp);
            replacedertFlinkManifests(1);
            harness.notifyOfCompletedCheckpoint(checkpointId);
            replacedertFlinkManifests(0);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row));
            replacedertSnapshotSize(1);
            replacedertMaxCommittedCheckpointId(jobId, checkpointId);
        }
        // Restore from the given snapshot
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.initializeState(snapshot);
            harness.open();
            SimpleDataUtil.replacedertTableRows(table, expectedRows);
            replacedertSnapshotSize(1);
            replacedertMaxCommittedCheckpointId(jobId, checkpointId);
            RowData row = SimpleDataUtil.createRowData(2, "world");
            expectedRows.add(row);
            DataFile dataFile = writeDataFile("data-2", ImmutableList.of(row));
            harness.processElement(of(dataFile), ++timestamp);
            harness.snapshot(++checkpointId, ++timestamp);
            replacedertFlinkManifests(1);
            harness.notifyOfCompletedCheckpoint(checkpointId);
            replacedertFlinkManifests(0);
            SimpleDataUtil.replacedertTableRows(table, expectedRows);
            replacedertSnapshotSize(2);
            replacedertMaxCommittedCheckpointId(jobId, checkpointId);
        }
    }

    @Test
    public void testRecoveryFromSnapshotWithoutCompletedNotification() throws Exception {
        // We've two steps in checkpoint: 1. snapshotState(ckp); 2. notifyCheckpointComplete(ckp). It's possible that we
        // flink job will restore from a checkpoint with only step#1 finished.
        long checkpointId = 0;
        long timestamp = 0;
        OperatorSubtaskState snapshot;
        List<RowData> expectedRows = Lists.newArrayList();
        JobID jobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertSnapshotSize(0);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            RowData row = SimpleDataUtil.createRowData(1, "hello");
            expectedRows.add(row);
            DataFile dataFile = writeDataFile("data-1", ImmutableList.of(row));
            harness.processElement(of(dataFile), ++timestamp);
            snapshot = harness.snapshot(++checkpointId, ++timestamp);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of());
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            replacedertFlinkManifests(1);
        }
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.initializeState(snapshot);
            harness.open();
            // All flink manifests should be cleaned because it has committed the unfinished iceberg transaction.
            replacedertFlinkManifests(0);
            SimpleDataUtil.replacedertTableRows(table, expectedRows);
            replacedertMaxCommittedCheckpointId(jobId, checkpointId);
            harness.snapshot(++checkpointId, ++timestamp);
            // Did not write any new record, so it won't generate new manifest.
            replacedertFlinkManifests(0);
            harness.notifyOfCompletedCheckpoint(checkpointId);
            replacedertFlinkManifests(0);
            SimpleDataUtil.replacedertTableRows(table, expectedRows);
            replacedertSnapshotSize(2);
            replacedertMaxCommittedCheckpointId(jobId, checkpointId);
            RowData row = SimpleDataUtil.createRowData(2, "world");
            expectedRows.add(row);
            DataFile dataFile = writeDataFile("data-2", ImmutableList.of(row));
            harness.processElement(of(dataFile), ++timestamp);
            snapshot = harness.snapshot(++checkpointId, ++timestamp);
            replacedertFlinkManifests(1);
        }
        // Redeploying flink job from external checkpoint.
        JobID newJobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(newJobId)) {
            harness.setup();
            harness.initializeState(snapshot);
            harness.open();
            // All flink manifests should be cleaned because it has committed the unfinished iceberg transaction.
            replacedertFlinkManifests(0);
            replacedertMaxCommittedCheckpointId(newJobId, -1);
            replacedertMaxCommittedCheckpointId(jobId, checkpointId);
            SimpleDataUtil.replacedertTableRows(table, expectedRows);
            replacedertSnapshotSize(3);
            RowData row = SimpleDataUtil.createRowData(3, "foo");
            expectedRows.add(row);
            DataFile dataFile = writeDataFile("data-3", ImmutableList.of(row));
            harness.processElement(of(dataFile), ++timestamp);
            harness.snapshot(++checkpointId, ++timestamp);
            replacedertFlinkManifests(1);
            harness.notifyOfCompletedCheckpoint(checkpointId);
            replacedertFlinkManifests(0);
            SimpleDataUtil.replacedertTableRows(table, expectedRows);
            replacedertSnapshotSize(4);
            replacedertMaxCommittedCheckpointId(newJobId, checkpointId);
        }
    }

    @Test
    public void testStartAnotherJobToWriteSameTable() throws Exception {
        long checkpointId = 0;
        long timestamp = 0;
        List<RowData> rows = Lists.newArrayList();
        List<RowData> tableRows = Lists.newArrayList();
        JobID oldJobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(oldJobId)) {
            harness.setup();
            harness.open();
            replacedertSnapshotSize(0);
            replacedertMaxCommittedCheckpointId(oldJobId, -1L);
            for (int i = 1; i <= 3; i++) {
                rows.add(SimpleDataUtil.createRowData(i, "hello" + i));
                tableRows.addAll(rows);
                DataFile dataFile = writeDataFile(String.format("data-%d", i), rows);
                harness.processElement(of(dataFile), ++timestamp);
                harness.snapshot(++checkpointId, ++timestamp);
                replacedertFlinkManifests(1);
                harness.notifyOfCompletedCheckpoint(checkpointId);
                replacedertFlinkManifests(0);
                SimpleDataUtil.replacedertTableRows(table, tableRows);
                replacedertSnapshotSize(i);
                replacedertMaxCommittedCheckpointId(oldJobId, checkpointId);
            }
        }
        // The new started job will start with checkpoint = 1 again.
        checkpointId = 0;
        timestamp = 0;
        JobID newJobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(newJobId)) {
            harness.setup();
            harness.open();
            replacedertSnapshotSize(3);
            replacedertMaxCommittedCheckpointId(oldJobId, 3);
            replacedertMaxCommittedCheckpointId(newJobId, -1);
            rows.add(SimpleDataUtil.createRowData(2, "world"));
            tableRows.addAll(rows);
            DataFile dataFile = writeDataFile("data-new-1", rows);
            harness.processElement(of(dataFile), ++timestamp);
            harness.snapshot(++checkpointId, ++timestamp);
            replacedertFlinkManifests(1);
            harness.notifyOfCompletedCheckpoint(checkpointId);
            replacedertFlinkManifests(0);
            SimpleDataUtil.replacedertTableRows(table, tableRows);
            replacedertSnapshotSize(4);
            replacedertMaxCommittedCheckpointId(newJobId, checkpointId);
        }
    }

    @Test
    public void testMultipleJobsWriteSameTable() throws Exception {
        long timestamp = 0;
        List<RowData> tableRows = Lists.newArrayList();
        JobID[] jobs = new JobID[] { new JobID(), new JobID(), new JobID() };
        for (int i = 0; i < 20; i++) {
            int jobIndex = i % 3;
            int checkpointId = i / 3;
            JobID jobId = jobs[jobIndex];
            try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
                harness.setup();
                harness.open();
                replacedertSnapshotSize(i);
                replacedertMaxCommittedCheckpointId(jobId, checkpointId == 0 ? -1 : checkpointId);
                List<RowData> rows = Lists.newArrayList(SimpleDataUtil.createRowData(i, "word-" + i));
                tableRows.addAll(rows);
                DataFile dataFile = writeDataFile(String.format("data-%d", i), rows);
                harness.processElement(of(dataFile), ++timestamp);
                harness.snapshot(checkpointId + 1, ++timestamp);
                replacedertFlinkManifests(1);
                harness.notifyOfCompletedCheckpoint(checkpointId + 1);
                replacedertFlinkManifests(0);
                SimpleDataUtil.replacedertTableRows(table, tableRows);
                replacedertSnapshotSize(i + 1);
                replacedertMaxCommittedCheckpointId(jobId, checkpointId + 1);
            }
        }
    }

    @Test
    public void testBoundedStream() throws Exception {
        JobID jobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertFlinkManifests(0);
            replacedertSnapshotSize(0);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            List<RowData> tableRows = Lists.newArrayList(SimpleDataUtil.createRowData(1, "word-1"));
            DataFile dataFile = writeDataFile("data-1", tableRows);
            harness.processElement(of(dataFile), 1);
            ((BoundedOneInput) harness.getOneInputOperator()).endInput();
            replacedertFlinkManifests(0);
            SimpleDataUtil.replacedertTableRows(table, tableRows);
            replacedertSnapshotSize(1);
            replacedertMaxCommittedCheckpointId(jobId, Long.MAX_VALUE);
        }
    }

    @Test
    public void testFlinkManifests() throws Exception {
        long timestamp = 0;
        final long checkpoint = 10;
        JobID jobId = new JobID();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            RowData row1 = SimpleDataUtil.createRowData(1, "hello");
            DataFile dataFile1 = writeDataFile("data-1", ImmutableList.of(row1));
            harness.processElement(of(dataFile1), ++timestamp);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            // 1. snapshotState for checkpoint#1
            harness.snapshot(checkpoint, ++timestamp);
            List<Path> manifestPaths = replacedertFlinkManifests(1);
            Path manifestPath = manifestPaths.get(0);
            replacedert.replacedertEquals("File name should have the expected pattern.", String.format("%s-%05d-%d-%d-%05d.avro", jobId, 0, 0, checkpoint, 1), manifestPath.getFileName().toString());
            // 2. Read the data files from manifests and replacedert.
            List<DataFile> dataFiles = FlinkManifestUtil.readDataFiles(createTestingManifestFile(manifestPath), table.io());
            replacedert.replacedertEquals(1, dataFiles.size());
            TestFlinkManifest.checkContentFile(dataFile1, dataFiles.get(0));
            // 3. notifyCheckpointComplete for checkpoint#1
            harness.notifyOfCompletedCheckpoint(checkpoint);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row1));
            replacedertMaxCommittedCheckpointId(jobId, checkpoint);
            replacedertFlinkManifests(0);
        }
    }

    @Test
    public void testDeleteFiles() throws Exception {
        replacedume.replacedumeFalse("Only support equality-delete in format v2.", formatVersion < 2);
        long timestamp = 0;
        long checkpoint = 10;
        JobID jobId = new JobID();
        FileAppenderFactory<RowData> appenderFactory = createDeletableAppenderFactory();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            RowData row1 = SimpleDataUtil.createInsert(1, "aaa");
            DataFile dataFile1 = writeDataFile("data-file-1", ImmutableList.of(row1));
            harness.processElement(of(dataFile1), ++timestamp);
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            // 1. snapshotState for checkpoint#1
            harness.snapshot(checkpoint, ++timestamp);
            List<Path> manifestPaths = replacedertFlinkManifests(1);
            Path manifestPath = manifestPaths.get(0);
            replacedert.replacedertEquals("File name should have the expected pattern.", String.format("%s-%05d-%d-%d-%05d.avro", jobId, 0, 0, checkpoint, 1), manifestPath.getFileName().toString());
            // 2. Read the data files from manifests and replacedert.
            List<DataFile> dataFiles = FlinkManifestUtil.readDataFiles(createTestingManifestFile(manifestPath), table.io());
            replacedert.replacedertEquals(1, dataFiles.size());
            TestFlinkManifest.checkContentFile(dataFile1, dataFiles.get(0));
            // 3. notifyCheckpointComplete for checkpoint#1
            harness.notifyOfCompletedCheckpoint(checkpoint);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row1));
            replacedertMaxCommittedCheckpointId(jobId, checkpoint);
            replacedertFlinkManifests(0);
            // 4. process both data files and delete files.
            RowData row2 = SimpleDataUtil.createInsert(2, "bbb");
            DataFile dataFile2 = writeDataFile("data-file-2", ImmutableList.of(row2));
            RowData delete1 = SimpleDataUtil.createDelete(1, "aaa");
            DeleteFile deleteFile1 = writeEqDeleteFile(appenderFactory, "delete-file-1", ImmutableList.of(delete1));
            harness.processElement(WriteResult.builder().addDataFiles(dataFile2).addDeleteFiles(deleteFile1).build(), ++timestamp);
            replacedertMaxCommittedCheckpointId(jobId, checkpoint);
            // 5. snapshotState for checkpoint#2
            harness.snapshot(++checkpoint, ++timestamp);
            replacedertFlinkManifests(2);
            // 6. notifyCheckpointComplete for checkpoint#2
            harness.notifyOfCompletedCheckpoint(checkpoint);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(row2));
            replacedertMaxCommittedCheckpointId(jobId, checkpoint);
            replacedertFlinkManifests(0);
        }
    }

    @Test
    public void testValidateDataFileExist() throws Exception {
        replacedume.replacedumeFalse("Only support equality-delete in format v2.", formatVersion < 2);
        long timestamp = 0;
        long checkpoint = 10;
        JobID jobId = new JobID();
        FileAppenderFactory<RowData> appenderFactory = createDeletableAppenderFactory();
        RowData insert1 = SimpleDataUtil.createInsert(1, "aaa");
        DataFile dataFile1 = writeDataFile("data-file-1", ImmutableList.of(insert1));
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            // Txn#1: insert the row <1, 'aaa'>
            harness.processElement(WriteResult.builder().addDataFiles(dataFile1).build(), ++timestamp);
            harness.snapshot(checkpoint, ++timestamp);
            harness.notifyOfCompletedCheckpoint(checkpoint);
            // Txn#2: Overwrite the committed data-file-1
            RowData insert2 = SimpleDataUtil.createInsert(2, "bbb");
            DataFile dataFile2 = writeDataFile("data-file-2", ImmutableList.of(insert2));
            new TestTableLoader(tablePath).loadTable().newOverwrite().addFile(dataFile2).deleteFile(dataFile1).commit();
        }
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            // Txn#3: position-delete the <1, 'aaa'> (NOT committed).
            DeleteFile deleteFile1 = writePosDeleteFile(appenderFactory, "pos-delete-file-1", ImmutableList.of(Pair.of(dataFile1.path(), 0L)));
            harness.processElement(WriteResult.builder().addDeleteFiles(deleteFile1).addReferencedDataFiles(dataFile1.path()).build(), ++timestamp);
            harness.snapshot(++checkpoint, ++timestamp);
            // Txn#3: validate will be failure when committing.
            final long currentCheckpointId = checkpoint;
            replacedertHelpers.replacedertThrows("Validation should be failure because of non-exist data files.", ValidationException.clreplaced, "Cannot commit, missing data files", () -> {
                harness.notifyOfCompletedCheckpoint(currentCheckpointId);
                return null;
            });
        }
    }

    @Test
    public void testCommitTwoCheckpointsInSingleTxn() throws Exception {
        replacedume.replacedumeFalse("Only support equality-delete in format v2.", formatVersion < 2);
        long timestamp = 0;
        long checkpoint = 10;
        JobID jobId = new JobID();
        FileAppenderFactory<RowData> appenderFactory = createDeletableAppenderFactory();
        try (OneInputStreamOperatorTestHarness<WriteResult, Void> harness = createStreamSink(jobId)) {
            harness.setup();
            harness.open();
            replacedertMaxCommittedCheckpointId(jobId, -1L);
            RowData insert1 = SimpleDataUtil.createInsert(1, "aaa");
            RowData insert2 = SimpleDataUtil.createInsert(2, "bbb");
            RowData delete3 = SimpleDataUtil.createDelete(3, "ccc");
            DataFile dataFile1 = writeDataFile("data-file-1", ImmutableList.of(insert1, insert2));
            DeleteFile deleteFile1 = writeEqDeleteFile(appenderFactory, "delete-file-1", ImmutableList.of(delete3));
            harness.processElement(WriteResult.builder().addDataFiles(dataFile1).addDeleteFiles(deleteFile1).build(), ++timestamp);
            // The 1th snapshotState.
            harness.snapshot(checkpoint, ++timestamp);
            RowData insert4 = SimpleDataUtil.createInsert(4, "ddd");
            RowData delete2 = SimpleDataUtil.createDelete(2, "bbb");
            DataFile dataFile2 = writeDataFile("data-file-2", ImmutableList.of(insert4));
            DeleteFile deleteFile2 = writeEqDeleteFile(appenderFactory, "delete-file-2", ImmutableList.of(delete2));
            harness.processElement(WriteResult.builder().addDataFiles(dataFile2).addDeleteFiles(deleteFile2).build(), ++timestamp);
            // The 2nd snapshotState.
            harness.snapshot(++checkpoint, ++timestamp);
            // Notify the 2nd snapshot to complete.
            harness.notifyOfCompletedCheckpoint(checkpoint);
            SimpleDataUtil.replacedertTableRows(table, ImmutableList.of(insert1, insert4));
            replacedertMaxCommittedCheckpointId(jobId, checkpoint);
            replacedertFlinkManifests(0);
            replacedert.replacedertEquals("Should have committed 2 txn.", 2, ImmutableList.copyOf(table.snapshots()).size());
        }
    }

    private DeleteFile writeEqDeleteFile(FileAppenderFactory<RowData> appenderFactory, String filename, List<RowData> deletes) throws IOException {
        return SimpleDataUtil.writeEqDeleteFile(table, FileFormat.PARQUET, tablePath, filename, appenderFactory, deletes);
    }

    private DeleteFile writePosDeleteFile(FileAppenderFactory<RowData> appenderFactory, String filename, List<Pair<CharSequence, Long>> positions) throws IOException {
        return SimpleDataUtil.writePosDeleteFile(table, FileFormat.PARQUET, tablePath, filename, appenderFactory, positions);
    }

    private FileAppenderFactory<RowData> createDeletableAppenderFactory() {
        int[] equalityFieldIds = new int[] { table.schema().findField("id").fieldId(), table.schema().findField("data").fieldId() };
        return new FlinkAppenderFactory(table.schema(), FlinkSchemaUtil.convert(table.schema()), table.properties(), table.spec(), equalityFieldIds, table.schema(), null);
    }

    private ManifestFile createTestingManifestFile(Path manifestPath) {
        return new GenericManifestFile(manifestPath.toAbsolutePath().toString(), manifestPath.toFile().length(), 0, ManifestContent.DATA, 0, 0, 0L, 0, 0, 0, 0, 0, 0, null);
    }

    private List<Path> replacedertFlinkManifests(int expectedCount) throws IOException {
        List<Path> manifests = Files.list(flinkManifestFolder.toPath()).filter(p -> !p.toString().endsWith(".crc")).collect(Collectors.toList());
        replacedert.replacedertEquals(String.format("Expected %s flink manifests, but the list is: %s", expectedCount, manifests), expectedCount, manifests.size());
        return manifests;
    }

    private DataFile writeDataFile(String filename, List<RowData> rows) throws IOException {
        return SimpleDataUtil.writeFile(table.schema(), table.spec(), CONF, tablePath, format.addExtension(filename), rows);
    }

    private void replacedertMaxCommittedCheckpointId(JobID jobID, long expectedId) {
        table.refresh();
        long actualId = IcebergFilesCommitter.getMaxCommittedCheckpointId(table, jobID.toString());
        replacedert.replacedertEquals(expectedId, actualId);
    }

    private void replacedertSnapshotSize(int expectedSnapshotSize) {
        table.refresh();
        replacedert.replacedertEquals(expectedSnapshotSize, Lists.newArrayList(table.snapshots()).size());
    }

    private OneInputStreamOperatorTestHarness<WriteResult, Void> createStreamSink(JobID jobID) throws Exception {
        TestOperatorFactory factory = TestOperatorFactory.of(tablePath);
        return new OneInputStreamOperatorTestHarness<>(factory, createEnvironment(jobID));
    }

    private static MockEnvironment createEnvironment(JobID jobID) {
        return new MockEnvironmentBuilder().setTaskName("test task").setManagedMemorySize(32 * 1024).setInputSplitProvider(new MockInputSplitProvider()).setBufferSize(256).setTaskConfiguration(new org.apache.flink.configuration.Configuration()).setExecutionConfig(new ExecutionConfig()).setMaxParallelism(16).setJobID(jobID).build();
    }

    private static clreplaced TestOperatorFactory extends AbstractStreamOperatorFactory<Void> implements OneInputStreamOperatorFactory<WriteResult, Void> {

        private final String tablePath;

        private TestOperatorFactory(String tablePath) {
            this.tablePath = tablePath;
        }

        private static TestOperatorFactory of(String tablePath) {
            return new TestOperatorFactory(tablePath);
        }

        @Override
        @SuppressWarnings("unchecked")
        public <T extends StreamOperator<Void>> T createStreamOperator(StreamOperatorParameters<Void> param) {
            IcebergFilesCommitter committer = new IcebergFilesCommitter(new TestTableLoader(tablePath), false);
            committer.setup(param.getContainingTask(), param.getStreamConfig(), param.getOutput());
            return (T) committer;
        }

        @Override
        public Clreplaced<? extends StreamOperator> getStreamOperatorClreplaced(ClreplacedLoader clreplacedLoader) {
            return IcebergFilesCommitter.clreplaced;
        }
    }
}

17 Source : FlinkSink.java
with Apache License 2.0
from apache

static IcebergStreamWriter<RowData> createStreamWriter(Table table, RowType flinkRowType, List<Integer> equalityFieldIds) {
    Map<String, String> props = table.properties();
    long targetFileSize = getTargetFileSizeBytes(props);
    FileFormat fileFormat = getFileFormat(props);
    TaskWriterFactory<RowData> taskWriterFactory = new RowDataTaskWriterFactory(table.schema(), flinkRowType, table.spec(), table.locationProvider(), table.io(), table.encryption(), targetFileSize, fileFormat, props, equalityFieldIds);
    return new IcebergStreamWriter<>(table.name(), taskWriterFactory);
}

17 Source : TestTaskEqualityDeltaWriter.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestTaskEqualityDeltaWriter extends TableTestBase {

    private static final int FORMAT_V2 = 2;

    private static final long TARGET_FILE_SIZE = 128 * 1024 * 1024L;

    private final FileFormat format;

    private final GenericRecord gRecord = GenericRecord.create(SCHEMA);

    private final GenericRecord posRecord = GenericRecord.create(DeleteSchemaUtil.pathPosSchema());

    private OutputFileFactory fileFactory = null;

    private int idFieldId;

    private int dataFieldId;

    @Parameterized.Parameters(name = "FileFormat = {0}")
    public static Object[][] parameters() {
        return new Object[][] { { "avro" }, { "parquet" } };
    }

    public TestTaskEqualityDeltaWriter(String fileFormat) {
        super(FORMAT_V2);
        this.format = FileFormat.valueOf(fileFormat.toUpperCase(Locale.ENGLISH));
    }

    @Before
    public void setupTable() throws IOException {
        this.tableDir = temp.newFolder();
        // created by table create
        replacedert.replacedertTrue(tableDir.delete());
        this.metadataDir = new File(tableDir, "metadata");
        this.table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
        this.fileFactory = new OutputFileFactory(table.spec(), format, table.locationProvider(), table.io(), table.encryption(), 1, 1);
        this.idFieldId = table.schema().findField("id").fieldId();
        this.dataFieldId = table.schema().findField("data").fieldId();
        table.updateProperties().defaultFormat(format).commit();
    }

    private Record createRecord(Integer id, String data) {
        return gRecord.copy("id", id, "data", data);
    }

    @Test
    public void testPureInsert() throws IOException {
        List<Integer> eqDeleteFieldIds = Lists.newArrayList(idFieldId, dataFieldId);
        Schema eqDeleteRowSchema = table.schema();
        GenericTaskDeltaWriter deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        List<Record> expected = Lists.newArrayList();
        for (int i = 0; i < 20; i++) {
            Record record = createRecord(i, String.format("val-%d", i));
            expected.add(record);
            deltaWriter.write(record);
        }
        WriteResult result = deltaWriter.complete();
        replacedert.replacedertEquals("Should only have a data file.", 1, result.dataFiles().length);
        replacedert.replacedertEquals("Should have no delete file", 0, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(expected), actualRowSet("*"));
        deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        for (int i = 20; i < 30; i++) {
            Record record = createRecord(i, String.format("val-%d", i));
            expected.add(record);
            deltaWriter.write(record);
        }
        result = deltaWriter.complete();
        replacedert.replacedertEquals("Should only have a data file.", 1, result.dataFiles().length);
        replacedert.replacedertEquals("Should have no delete file", 0, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(expected), actualRowSet("*"));
    }

    @Test
    public void testInsertDuplicatedKey() throws IOException {
        List<Integer> equalityFieldIds = Lists.newArrayList(idFieldId);
        Schema eqDeleteRowSchema = table.schema();
        GenericTaskDeltaWriter deltaWriter = createTaskWriter(equalityFieldIds, eqDeleteRowSchema);
        deltaWriter.write(createRecord(1, "aaa"));
        deltaWriter.write(createRecord(2, "bbb"));
        deltaWriter.write(createRecord(3, "ccc"));
        deltaWriter.write(createRecord(4, "ddd"));
        deltaWriter.write(createRecord(4, "eee"));
        deltaWriter.write(createRecord(3, "fff"));
        deltaWriter.write(createRecord(2, "ggg"));
        deltaWriter.write(createRecord(1, "hhh"));
        WriteResult result = deltaWriter.complete();
        commitTransaction(result);
        replacedert.replacedertEquals("Should have a data file.", 1, result.dataFiles().length);
        replacedert.replacedertEquals("Should have a pos-delete file", 1, result.deleteFiles().length);
        DeleteFile posDeleteFile = result.deleteFiles()[0];
        replacedert.replacedertEquals("Should be a pos-delete file", FileContent.POSITION_DELETES, posDeleteFile.content());
        replacedert.replacedertEquals(1, result.referencedDataFiles().length);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(ImmutableList.of(createRecord(4, "eee"), createRecord(3, "fff"), createRecord(2, "ggg"), createRecord(1, "hhh"))), actualRowSet("*"));
        // Check records in the data file.
        DataFile dataFile = result.dataFiles()[0];
        replacedert.replacedertEquals(ImmutableList.of(createRecord(1, "aaa"), createRecord(2, "bbb"), createRecord(3, "ccc"), createRecord(4, "ddd"), createRecord(4, "eee"), createRecord(3, "fff"), createRecord(2, "ggg"), createRecord(1, "hhh")), readRecordsAsList(table.schema(), dataFile.path()));
        // Check records in the pos-delete file.
        Schema posDeleteSchema = DeleteSchemaUtil.pathPosSchema();
        replacedert.replacedertEquals(ImmutableList.of(posRecord.copy("file_path", dataFile.path(), "pos", 0L), posRecord.copy("file_path", dataFile.path(), "pos", 1L), posRecord.copy("file_path", dataFile.path(), "pos", 2L), posRecord.copy("file_path", dataFile.path(), "pos", 3L)), readRecordsAsList(posDeleteSchema, posDeleteFile.path()));
    }

    @Test
    public void testUpsertSameRow() throws IOException {
        List<Integer> eqDeleteFieldIds = Lists.newArrayList(idFieldId, dataFieldId);
        Schema eqDeleteRowSchema = table.schema();
        GenericTaskDeltaWriter deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        Record record = createRecord(1, "aaa");
        deltaWriter.write(record);
        // UPSERT <1, 'aaa'> to <1, 'aaa'>
        deltaWriter.delete(record);
        deltaWriter.write(record);
        WriteResult result = deltaWriter.complete();
        replacedert.replacedertEquals("Should have a data file.", 1, result.dataFiles().length);
        replacedert.replacedertEquals("Should have a pos-delete file and an eq-delete file", 2, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have an expected record", expectedRowSet(ImmutableList.of(record)), actualRowSet("*"));
        // Check records in the data file.
        DataFile dataFile = result.dataFiles()[0];
        replacedert.replacedertEquals(ImmutableList.of(record, record), readRecordsAsList(table.schema(), dataFile.path()));
        // Check records in the eq-delete file.
        DeleteFile eqDeleteFile = result.deleteFiles()[0];
        replacedert.replacedertEquals(ImmutableList.of(record), readRecordsAsList(eqDeleteRowSchema, eqDeleteFile.path()));
        // Check records in the pos-delete file.
        DeleteFile posDeleteFile = result.deleteFiles()[1];
        replacedert.replacedertEquals(ImmutableList.of(posRecord.copy("file_path", dataFile.path(), "pos", 0L)), readRecordsAsList(DeleteSchemaUtil.pathPosSchema(), posDeleteFile.path()));
        deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        deltaWriter.delete(record);
        result = deltaWriter.complete();
        replacedert.replacedertEquals("Should have 0 data file.", 0, result.dataFiles().length);
        replacedert.replacedertEquals("Should have 1 eq-delete file", 1, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have no record", expectedRowSet(ImmutableList.of()), actualRowSet("*"));
    }

    @Test
    public void testUpsertData() throws IOException {
        List<Integer> eqDeleteFieldIds = Lists.newArrayList(dataFieldId);
        Schema eqDeleteRowSchema = table.schema().select("data");
        GenericTaskDeltaWriter deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        deltaWriter.write(createRecord(1, "aaa"));
        deltaWriter.write(createRecord(2, "bbb"));
        deltaWriter.write(createRecord(3, "aaa"));
        deltaWriter.write(createRecord(3, "ccc"));
        deltaWriter.write(createRecord(4, "ccc"));
        // Commit the 1th transaction.
        WriteResult result = deltaWriter.complete();
        replacedert.replacedertEquals("Should have a data file", 1, result.dataFiles().length);
        replacedert.replacedertEquals("Should have a pos-delete file for deduplication purpose", 1, result.deleteFiles().length);
        replacedert.replacedertEquals("Should be pos-delete file", FileContent.POSITION_DELETES, result.deleteFiles()[0].content());
        replacedert.replacedertEquals(1, result.referencedDataFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(ImmutableList.of(createRecord(2, "bbb"), createRecord(3, "aaa"), createRecord(4, "ccc"))), actualRowSet("*"));
        // Start the 2nd transaction.
        deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        GenericRecord keyRecord = GenericRecord.create(eqDeleteRowSchema);
        Function<String, Record> keyFunc = data -> keyRecord.copy("data", data);
        // UPSERT <3,'aaa'> to <5,'aaa'> - (by delete the key)
        deltaWriter.deleteKey(keyFunc.apply("aaa"));
        deltaWriter.write(createRecord(5, "aaa"));
        // UPSERT <5,'aaa'> to <6,'aaa'> - (by delete the key)
        deltaWriter.deleteKey(keyFunc.apply("aaa"));
        deltaWriter.write(createRecord(6, "aaa"));
        // UPSERT <4,'ccc'> to <7,'ccc'> - (by delete the key)
        deltaWriter.deleteKey(keyFunc.apply("ccc"));
        deltaWriter.write(createRecord(7, "ccc"));
        // DELETE <2, 'bbb'> - (by delete the key)
        deltaWriter.deleteKey(keyFunc.apply("bbb"));
        // Commit the 2nd transaction.
        result = deltaWriter.complete();
        replacedert.replacedertEquals(1, result.dataFiles().length);
        replacedert.replacedertEquals(2, result.deleteFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(ImmutableList.of(createRecord(6, "aaa"), createRecord(7, "ccc"))), actualRowSet("*"));
        // Check records in the data file.
        DataFile dataFile = result.dataFiles()[0];
        replacedert.replacedertEquals(ImmutableList.of(createRecord(5, "aaa"), createRecord(6, "aaa"), createRecord(7, "ccc")), readRecordsAsList(table.schema(), dataFile.path()));
        // Check records in the eq-delete file.
        DeleteFile eqDeleteFile = result.deleteFiles()[0];
        replacedert.replacedertEquals(FileContent.EQUALITY_DELETES, eqDeleteFile.content());
        replacedert.replacedertEquals(ImmutableList.of(keyFunc.apply("aaa"), keyFunc.apply("aaa"), keyFunc.apply("ccc"), keyFunc.apply("bbb")), readRecordsAsList(eqDeleteRowSchema, eqDeleteFile.path()));
        // Check records in the pos-delete file.
        DeleteFile posDeleteFile = result.deleteFiles()[1];
        Schema posDeleteSchema = DeleteSchemaUtil.pathPosSchema();
        replacedert.replacedertEquals(FileContent.POSITION_DELETES, posDeleteFile.content());
        replacedert.replacedertEquals(ImmutableList.of(posRecord.copy("file_path", dataFile.path(), "pos", 0L)), readRecordsAsList(posDeleteSchema, posDeleteFile.path()));
    }

    @Test
    public void testUpsertDataWithFullRowSchema() throws IOException {
        List<Integer> eqDeleteFieldIds = Lists.newArrayList(dataFieldId);
        Schema eqDeleteRowSchema = table.schema();
        GenericTaskDeltaWriter deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        deltaWriter.write(createRecord(1, "aaa"));
        deltaWriter.write(createRecord(2, "bbb"));
        deltaWriter.write(createRecord(3, "aaa"));
        deltaWriter.write(createRecord(3, "ccc"));
        deltaWriter.write(createRecord(4, "ccc"));
        // Commit the 1th transaction.
        WriteResult result = deltaWriter.complete();
        replacedert.replacedertEquals("Should have a data file", 1, result.dataFiles().length);
        replacedert.replacedertEquals("Should have a pos-delete file for deduplication purpose", 1, result.deleteFiles().length);
        replacedert.replacedertEquals("Should be pos-delete file", FileContent.POSITION_DELETES, result.deleteFiles()[0].content());
        replacedert.replacedertEquals(1, result.referencedDataFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(ImmutableList.of(createRecord(2, "bbb"), createRecord(3, "aaa"), createRecord(4, "ccc"))), actualRowSet("*"));
        // Start the 2nd transaction.
        deltaWriter = createTaskWriter(eqDeleteFieldIds, eqDeleteRowSchema);
        // UPSERT <3,'aaa'> to <5,'aaa'> - (by delete the entire row)
        deltaWriter.delete(createRecord(3, "aaa"));
        deltaWriter.write(createRecord(5, "aaa"));
        // UPSERT <5,'aaa'> to <6,'aaa'> - (by delete the entire row)
        deltaWriter.delete(createRecord(5, "aaa"));
        deltaWriter.write(createRecord(6, "aaa"));
        // UPSERT <4,'ccc'> to <7,'ccc'> - (by delete the entire row)
        deltaWriter.delete(createRecord(4, "ccc"));
        deltaWriter.write(createRecord(7, "ccc"));
        // DELETE <2, 'bbb'> - (by delete the entire row)
        deltaWriter.delete(createRecord(2, "bbb"));
        // Commit the 2nd transaction.
        result = deltaWriter.complete();
        replacedert.replacedertEquals(1, result.dataFiles().length);
        replacedert.replacedertEquals(2, result.deleteFiles().length);
        replacedert.replacedertEquals(1, result.referencedDataFiles().length);
        commitTransaction(result);
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(ImmutableList.of(createRecord(6, "aaa"), createRecord(7, "ccc"))), actualRowSet("*"));
        // Check records in the data file.
        DataFile dataFile = result.dataFiles()[0];
        replacedert.replacedertEquals(ImmutableList.of(createRecord(5, "aaa"), createRecord(6, "aaa"), createRecord(7, "ccc")), readRecordsAsList(table.schema(), dataFile.path()));
        // Check records in the eq-delete file.
        DeleteFile eqDeleteFile = result.deleteFiles()[0];
        replacedert.replacedertEquals(FileContent.EQUALITY_DELETES, eqDeleteFile.content());
        replacedert.replacedertEquals(ImmutableList.of(createRecord(3, "aaa"), createRecord(5, "aaa"), createRecord(4, "ccc"), createRecord(2, "bbb")), readRecordsAsList(eqDeleteRowSchema, eqDeleteFile.path()));
        // Check records in the pos-delete file.
        DeleteFile posDeleteFile = result.deleteFiles()[1];
        Schema posDeleteSchema = DeleteSchemaUtil.pathPosSchema();
        replacedert.replacedertEquals(FileContent.POSITION_DELETES, posDeleteFile.content());
        replacedert.replacedertEquals(ImmutableList.of(posRecord.copy("file_path", dataFile.path(), "pos", 0L)), readRecordsAsList(posDeleteSchema, posDeleteFile.path()));
    }

    private void commitTransaction(WriteResult result) {
        RowDelta rowDelta = table.newRowDelta();
        Arrays.stream(result.dataFiles()).forEach(rowDelta::addRows);
        Arrays.stream(result.deleteFiles()).forEach(rowDelta::addDeletes);
        rowDelta.validateDeletedFiles().validateDataFilesExist(Lists.newArrayList(result.referencedDataFiles())).commit();
    }

    private StructLikeSet expectedRowSet(Iterable<Record> records) {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        records.forEach(set::add);
        return set;
    }

    private StructLikeSet actualRowSet(String... columns) throws IOException {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        try (CloseableIterable<Record> reader = IcebergGenerics.read(table).select(columns).build()) {
            reader.forEach(set::add);
        }
        return set;
    }

    /**
     * Create a generic task equality delta writer.
     *
     * @param equalityFieldIds  defines the equality field ids.
     * @param eqDeleteRowSchema defines the schema of rows that eq-delete writer will write, it could be the entire fields
     *                          of the table schema.
     */
    private GenericTaskDeltaWriter createTaskWriter(List<Integer> equalityFieldIds, Schema eqDeleteRowSchema) {
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), ArrayUtil.toIntArray(equalityFieldIds), eqDeleteRowSchema, null);
        List<String> columns = Lists.newArrayList();
        for (Integer fieldId : equalityFieldIds) {
            columns.add(table.schema().findField(fieldId).name());
        }
        Schema deleteSchema = table.schema().select(columns);
        return new GenericTaskDeltaWriter(table.schema(), deleteSchema, table.spec(), format, appenderFactory, fileFactory, table.io(), TARGET_FILE_SIZE);
    }

    private static clreplaced GenericTaskDeltaWriter extends BaseTaskWriter<Record> {

        private final GenericEqualityDeltaWriter deltaWriter;

        private GenericTaskDeltaWriter(Schema schema, Schema deleteSchema, ParreplacedionSpec spec, FileFormat format, FileAppenderFactory<Record> appenderFactory, OutputFileFactory fileFactory, FileIO io, long targetFileSize) {
            super(spec, format, appenderFactory, fileFactory, io, targetFileSize);
            this.deltaWriter = new GenericEqualityDeltaWriter(null, schema, deleteSchema);
        }

        @Override
        public void write(Record row) throws IOException {
            deltaWriter.write(row);
        }

        public void delete(Record row) throws IOException {
            deltaWriter.delete(row);
        }

        public void deleteKey(Record key) throws IOException {
            deltaWriter.deleteKey(key);
        }

        @Override
        public void close() throws IOException {
            deltaWriter.close();
        }

        private clreplaced GenericEqualityDeltaWriter extends BaseEqualityDeltaWriter {

            private GenericEqualityDeltaWriter(ParreplacedionKey parreplacedion, Schema schema, Schema eqDeleteSchema) {
                super(parreplacedion, schema, eqDeleteSchema);
            }

            @Override
            protected StructLike replacedtructLike(Record row) {
                return row;
            }
        }
    }

    private List<Record> readRecordsAsList(Schema schema, CharSequence path) throws IOException {
        CloseableIterable<Record> iterable;
        InputFile inputFile = Files.localInput(path.toString());
        switch(format) {
            case PARQUET:
                iterable = Parquet.read(inputFile).project(schema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)).build();
                break;
            case AVRO:
                iterable = Avro.read(inputFile).project(schema).createReaderFunc(DataReader::create).build();
                break;
            default:
                throw new UnsupportedOperationException("Unsupported file format: " + format);
        }
        try (CloseableIterable<Record> closeableIterable = iterable) {
            return Lists.newArrayList(closeableIterable);
        }
    }
}

17 Source : TestGenericSortedPosDeleteWriter.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestGenericSortedPosDeleteWriter extends TableTestBase {

    private static final int FORMAT_V2 = 2;

    private final FileFormat format;

    private OutputFileFactory fileFactory;

    private Record gRecord;

    @Parameterized.Parameters(name = "FileFormat={0}")
    public static Object[] parameters() {
        return new Object[][] { new Object[] { "avro" }, new Object[] { "parquet" } };
    }

    public TestGenericSortedPosDeleteWriter(String fileFormat) {
        super(FORMAT_V2);
        this.format = FileFormat.valueOf(fileFormat.toUpperCase(Locale.ENGLISH));
    }

    @Before
    public void setupTable() throws IOException {
        this.tableDir = temp.newFolder();
        replacedert.replacedertTrue(tableDir.delete());
        this.metadataDir = new File(tableDir, "metadata");
        this.table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
        this.gRecord = GenericRecord.create(SCHEMA);
        this.fileFactory = new OutputFileFactory(table.spec(), format, table.locationProvider(), table.io(), table.encryption(), 1, 1);
        table.updateProperties().defaultFormat(format).commit();
    }

    private EncryptedOutputFile createEncryptedOutputFile() {
        return fileFactory.newOutputFile();
    }

    private DataFile prepareDataFile(FileAppenderFactory<Record> appenderFactory, List<Record> rowSet) throws IOException {
        DataWriter<Record> writer = appenderFactory.newDataWriter(createEncryptedOutputFile(), format, null);
        try (DataWriter<Record> closeableWriter = writer) {
            for (Record record : rowSet) {
                closeableWriter.add(record);
            }
        }
        return writer.toDataFile();
    }

    private Record createRow(Integer id, String data) {
        Record row = gRecord.copy();
        row.setField("id", id);
        row.setField("data", data);
        return row;
    }

    private StructLikeSet expectedRowSet(Iterable<Record> records) {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        records.forEach(set::add);
        return set;
    }

    private StructLikeSet actualRowSet(String... columns) throws IOException {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        try (CloseableIterable<Record> reader = IcebergGenerics.read(table).select(columns).build()) {
            reader.forEach(set::add);
        }
        return set;
    }

    @Test
    public void testSortedPosDelete() throws IOException {
        List<Record> rowSet = Lists.newArrayList(createRow(0, "aaa"), createRow(1, "bbb"), createRow(2, "ccc"), createRow(3, "ddd"), createRow(4, "eee"));
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, null);
        DataFile dataFile = prepareDataFile(appenderFactory, rowSet);
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 100);
        try (SortedPosDeleteWriter<Record> closeableWriter = writer) {
            for (int index = rowSet.size() - 1; index >= 0; index -= 2) {
                closeableWriter.delete(dataFile.path(), index);
            }
        }
        List<DeleteFile> deleteFiles = writer.complete();
        replacedert.replacedertEquals(1, deleteFiles.size());
        DeleteFile deleteFile = deleteFiles.get(0);
        // Check whether the path-pos pairs are sorted as expected.
        Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema();
        Record record = GenericRecord.create(pathPosSchema);
        List<Record> expectedDeletes = Lists.newArrayList(record.copy("file_path", dataFile.path(), "pos", 0L), record.copy("file_path", dataFile.path(), "pos", 2L), record.copy("file_path", dataFile.path(), "pos", 4L));
        replacedert.replacedertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path()));
        table.newRowDelta().addRows(dataFile).addDeletes(deleteFiles.get(0)).validateDataFilesExist(writer.referencedDataFiles()).validateDeletedFiles().commit();
        List<Record> expectedData = Lists.newArrayList(createRow(1, "bbb"), createRow(3, "ddd"));
        replacedert.replacedertEquals("Should have the expected records", expectedRowSet(expectedData), actualRowSet("*"));
    }

    @Test
    public void testSortedPosDeleteWithSchemaAndNullRow() throws IOException {
        List<Record> rowSet = Lists.newArrayList(createRow(0, "aaa"), createRow(1, "bbb"), createRow(2, "ccc"));
        // Create a FileAppenderFactory which requires pos-delete row schema.
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, table.schema());
        DataFile dataFile = prepareDataFile(appenderFactory, rowSet);
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 1);
        boolean caughtError = false;
        try {
            writer.delete(dataFile.path(), 0L);
        } catch (Exception e) {
            caughtError = true;
        }
        replacedert.replacedertTrue("Should fail because the appender are required non-null rows to write", caughtError);
    }

    @Test
    public void testSortedPosDeleteWithRow() throws IOException {
        List<Record> rowSet = Lists.newArrayList(createRow(0, "aaa"), createRow(1, "bbb"), createRow(2, "ccc"), createRow(3, "ddd"), createRow(4, "eee"));
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, table.schema());
        DataFile dataFile = prepareDataFile(appenderFactory, rowSet);
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 100);
        try (SortedPosDeleteWriter<Record> closeableWriter = writer) {
            for (int index = rowSet.size() - 1; index >= 0; index -= 2) {
                // Write deletes with row.
                closeableWriter.delete(dataFile.path(), index, rowSet.get(index));
            }
        }
        List<DeleteFile> deleteFiles = writer.complete();
        replacedert.replacedertEquals(1, deleteFiles.size());
        DeleteFile deleteFile = deleteFiles.get(0);
        // Check whether the path-pos pairs are sorted as expected.
        Schema pathPosSchema = DeleteSchemaUtil.posDeleteSchema(table.schema());
        Record record = GenericRecord.create(pathPosSchema);
        List<Record> expectedDeletes = Lists.newArrayList(record.copy("file_path", dataFile.path(), "pos", 0L, "row", createRow(0, "aaa")), record.copy("file_path", dataFile.path(), "pos", 2L, "row", createRow(2, "ccc")), record.copy("file_path", dataFile.path(), "pos", 4L, "row", createRow(4, "eee")));
        replacedert.replacedertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path()));
        table.newRowDelta().addRows(dataFile).addDeletes(deleteFiles.get(0)).validateDataFilesExist(writer.referencedDataFiles()).validateDeletedFiles().commit();
        List<Record> expectedData = Lists.newArrayList(createRow(1, "bbb"), createRow(3, "ddd"));
        replacedert.replacedertEquals("Should have the expected records", expectedRowSet(expectedData), actualRowSet("*"));
    }

    @Test
    public void testMultipleFlush() throws IOException {
        FileAppenderFactory<Record> appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), null, null, null);
        // It will produce 5 record lists, each list will write into a separate data file:
        // The 1th file has: <0  , val-0>   , <1  , val-1>   , ... , <99 , val-99>
        // The 2th file has: <100, val-100> , <101, val-101> , ... , <199, val-199>
        // The 3th file has: <200, val-200> , <201, val-201> , ... , <299, val-299>
        // The 4th file has: <300, val-300> , <301, val-301> , ... , <399, val-399>
        // The 5th file has: <400, val-400> , <401, val-401> , ... , <499, val-499>
        List<DataFile> dataFiles = Lists.newArrayList();
        for (int fileIndex = 0; fileIndex < 5; fileIndex++) {
            List<Record> recordList = Lists.newLinkedList();
            for (int recordIndex = 0; recordIndex < 100; recordIndex++) {
                int id = fileIndex * 100 + recordIndex;
                recordList.add(createRow(id, String.format("val-%s", id)));
            }
            // Write the records and generate the data file.
            dataFiles.add(prepareDataFile(appenderFactory, recordList));
        }
        // Commit those data files to iceberg table.
        RowDelta rowDelta = table.newRowDelta();
        dataFiles.forEach(rowDelta::addRows);
        rowDelta.commit();
        SortedPosDeleteWriter<Record> writer = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, null, 50);
        try (SortedPosDeleteWriter<Record> closeableWriter = writer) {
            for (int pos = 0; pos < 100; pos++) {
                for (int fileIndex = 4; fileIndex >= 0; fileIndex--) {
                    closeableWriter.delete(dataFiles.get(fileIndex).path(), pos);
                }
            }
        }
        List<DeleteFile> deleteFiles = writer.complete();
        replacedert.replacedertEquals(10, deleteFiles.size());
        Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema();
        Record record = GenericRecord.create(pathPosSchema);
        for (int deleteFileIndex = 0; deleteFileIndex < 10; deleteFileIndex++) {
            List<Record> expectedDeletes = Lists.newArrayList();
            for (int dataFileIndex = 0; dataFileIndex < 5; dataFileIndex++) {
                DataFile dataFile = dataFiles.get(dataFileIndex);
                for (long pos = deleteFileIndex * 10; pos < deleteFileIndex * 10 + 10; pos++) {
                    expectedDeletes.add(record.copy("file_path", dataFile.path(), "pos", pos));
                }
            }
            DeleteFile deleteFile = deleteFiles.get(deleteFileIndex);
            replacedert.replacedertEquals(expectedDeletes, readRecordsAsList(pathPosSchema, deleteFile.path()));
        }
        rowDelta = table.newRowDelta();
        deleteFiles.forEach(rowDelta::addDeletes);
        rowDelta.commit();
        replacedert.replacedertEquals("Should have no record.", expectedRowSet(ImmutableList.of()), actualRowSet("*"));
    }

    private List<Record> readRecordsAsList(Schema schema, CharSequence path) throws IOException {
        CloseableIterable<Record> iterable;
        InputFile inputFile = Files.localInput(path.toString());
        switch(format) {
            case PARQUET:
                iterable = Parquet.read(inputFile).project(schema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)).build();
                break;
            case AVRO:
                iterable = Avro.read(inputFile).project(schema).createReaderFunc(DataReader::create).build();
                break;
            default:
                throw new UnsupportedOperationException("Unsupported file format: " + format);
        }
        try (CloseableIterable<Record> closeableIterable = iterable) {
            return Lists.newArrayList(closeableIterable);
        }
    }
}

17 Source : TestBaseTaskWriter.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestBaseTaskWriter extends TableTestBase {

    private static final int FORMAT_V2 = 2;

    private final FileFormat format;

    private final GenericRecord gRecord = GenericRecord.create(SCHEMA);

    private OutputFileFactory fileFactory = null;

    private FileAppenderFactory<Record> appenderFactory = null;

    @Parameterized.Parameters(name = "FileFormat = {0}")
    public static Object[][] parameters() {
        return new Object[][] { { "avro" }, { "parquet" } };
    }

    public TestBaseTaskWriter(String fileFormat) {
        super(FORMAT_V2);
        this.format = FileFormat.valueOf(fileFormat.toUpperCase(Locale.ENGLISH));
    }

    @Before
    public void setupTable() throws IOException {
        this.tableDir = temp.newFolder();
        // created by table create
        replacedert.replacedertTrue(tableDir.delete());
        this.metadataDir = new File(tableDir, "metadata");
        this.table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
        this.fileFactory = new OutputFileFactory(table.spec(), format, table.locationProvider(), table.io(), table.encryption(), 1, 1);
        int firstFieldId = table.schema().findField("id").fieldId();
        int secondFieldId = table.schema().findField("data").fieldId();
        this.appenderFactory = new GenericAppenderFactory(table.schema(), table.spec(), new int[] { firstFieldId, secondFieldId }, table.schema(), null);
        table.updateProperties().defaultFormat(format).commit();
    }

    private Record createRecord(Integer id, String data) {
        return gRecord.copy("id", id, "data", data);
    }

    @Test
    public void testWriteZeroRecord() throws IOException {
        try (TestTaskWriter writer = createTaskWriter(128 * 1024 * 1024)) {
            writer.close();
            WriteResult result = writer.complete();
            replacedert.replacedertEquals(0, result.dataFiles().length);
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            writer.close();
            result = writer.complete();
            replacedert.replacedertEquals(0, result.dataFiles().length);
            replacedert.replacedertEquals(0, result.deleteFiles().length);
        }
    }

    @Test
    public void testAbort() throws IOException {
        List<Record> records = Lists.newArrayList();
        for (int i = 0; i < 2000; i++) {
            records.add(createRecord(i, "aaa"));
        }
        List<Path> files;
        try (TestTaskWriter taskWriter = createTaskWriter(4)) {
            for (Record record : records) {
                taskWriter.write(record);
                taskWriter.delete(record);
            }
            // Close the current opened files.
            taskWriter.close();
            // replacedert the current data file count.
            files = Files.list(Paths.get(tableDir.getPath(), "data")).filter(p -> !p.toString().endsWith(".crc")).collect(Collectors.toList());
            replacedert.replacedertEquals("Should have 4 files but the files are: " + files, 4, files.size());
            // Abort to clean all delete files and data files.
            taskWriter.abort();
        }
        for (Path path : files) {
            replacedert.replacedertFalse(Files.exists(path));
        }
    }

    @Test
    public void testRollIfExceedTargetFileSize() throws IOException {
        List<Record> records = Lists.newArrayListWithCapacity(8000);
        for (int i = 0; i < 2000; i++) {
            records.add(createRecord(i, "aaa"));
            records.add(createRecord(i, "bbb"));
            records.add(createRecord(i, "ccc"));
            records.add(createRecord(i, "ddd"));
        }
        WriteResult result;
        try (TaskWriter<Record> taskWriter = createTaskWriter(4)) {
            for (Record record : records) {
                taskWriter.write(record);
            }
            result = taskWriter.complete();
            replacedert.replacedertEquals(8, result.dataFiles().length);
            replacedert.replacedertEquals(0, result.deleteFiles().length);
        }
        RowDelta rowDelta = table.newRowDelta();
        Arrays.stream(result.dataFiles()).forEach(rowDelta::addRows);
        rowDelta.commit();
        List<Record> expected = Lists.newArrayList();
        try (TestTaskWriter taskWriter = createTaskWriter(3)) {
            for (Record record : records) {
                // ex: UPSERT <0, 'aaa'> to <0, 'AAA'>
                taskWriter.delete(record);
                int id = record.get(0, Integer.clreplaced);
                String data = record.get(1, String.clreplaced);
                Record newRecord = createRecord(id, data.toUpperCase());
                expected.add(newRecord);
                taskWriter.write(newRecord);
            }
            result = taskWriter.complete();
            replacedert.replacedertEquals(8, result.dataFiles().length);
            replacedert.replacedertEquals(8, result.deleteFiles().length);
        }
        rowDelta = table.newRowDelta();
        Arrays.stream(result.dataFiles()).forEach(rowDelta::addRows);
        Arrays.stream(result.deleteFiles()).forEach(rowDelta::addDeletes);
        rowDelta.commit();
        replacedert.replacedertEquals("Should have expected records", expectedRowSet(expected), actualRowSet("*"));
    }

    private StructLikeSet expectedRowSet(Iterable<Record> records) {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        records.forEach(set::add);
        return set;
    }

    private StructLikeSet actualRowSet(String... columns) throws IOException {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        try (CloseableIterable<Record> reader = IcebergGenerics.read(table).select(columns).build()) {
            reader.forEach(set::add);
        }
        return set;
    }

    private TestTaskWriter createTaskWriter(long targetFileSize) {
        return new TestTaskWriter(table.spec(), format, appenderFactory, fileFactory, table.io(), targetFileSize);
    }

    private static clreplaced TestTaskWriter extends BaseTaskWriter<Record> {

        private RollingFileWriter dataWriter;

        private RollingEqDeleteWriter deleteWriter;

        private TestTaskWriter(ParreplacedionSpec spec, FileFormat format, FileAppenderFactory<Record> appenderFactory, OutputFileFactory fileFactory, FileIO io, long targetFileSize) {
            super(spec, format, appenderFactory, fileFactory, io, targetFileSize);
            this.dataWriter = new RollingFileWriter(null);
            this.deleteWriter = new RollingEqDeleteWriter(null);
        }

        @Override
        public void write(Record row) throws IOException {
            dataWriter.write(row);
        }

        void delete(Record row) throws IOException {
            deleteWriter.write(row);
        }

        @Override
        public void close() throws IOException {
            if (dataWriter != null) {
                dataWriter.close();
            }
            if (deleteWriter != null) {
                deleteWriter.close();
            }
        }
    }
}

17 Source : TestAppenderFactory.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public abstract clreplaced TestAppenderFactory<T> extends TableTestBase {

    private static final int FORMAT_V2 = 2;

    private final FileFormat format;

    private final boolean parreplacedioned;

    private ParreplacedionKey parreplacedion = null;

    private OutputFileFactory fileFactory = null;

    @Parameterized.Parameters(name = "FileFormat={0}, Parreplacedioned={1}")
    public static Object[] parameters() {
        return new Object[][] { new Object[] { "avro", false }, new Object[] { "avro", true }, new Object[] { "parquet", false }, new Object[] { "parquet", true } };
    }

    public TestAppenderFactory(String fileFormat, boolean parreplacedioned) {
        super(FORMAT_V2);
        this.format = FileFormat.valueOf(fileFormat.toUpperCase(Locale.ENGLISH));
        this.parreplacedioned = parreplacedioned;
    }

    @Before
    public void setupTable() throws Exception {
        this.tableDir = temp.newFolder();
        // created by table create
        replacedert.replacedertTrue(tableDir.delete());
        this.metadataDir = new File(tableDir, "metadata");
        if (parreplacedioned) {
            this.table = create(SCHEMA, SPEC);
        } else {
            this.table = create(SCHEMA, ParreplacedionSpec.unparreplacedioned());
        }
        this.parreplacedion = createParreplacedionKey();
        this.fileFactory = new OutputFileFactory(table.spec(), format, table.locationProvider(), table.io(), table.encryption(), 1, 1);
        table.updateProperties().defaultFormat(format).commit();
    }

    protected abstract FileAppenderFactory<T> createAppenderFactory(List<Integer> equalityFieldIds, Schema eqDeleteSchema, Schema posDeleteRowSchema);

    protected abstract T createRow(Integer id, String data);

    protected abstract StructLikeSet expectedRowSet(Iterable<T> records) throws IOException;

    private StructLikeSet actualRowSet(String... columns) throws IOException {
        StructLikeSet set = StructLikeSet.create(table.schema().replacedtruct());
        try (CloseableIterable<Record> reader = IcebergGenerics.read(table).select(columns).build()) {
            reader.forEach(set::add);
        }
        return set;
    }

    private ParreplacedionKey createParreplacedionKey() {
        if (table.spec().isUnparreplacedioned()) {
            return null;
        }
        Record record = GenericRecord.create(table.schema()).copy(ImmutableMap.of("data", "aaa"));
        ParreplacedionKey parreplacedionKey = new ParreplacedionKey(table.spec(), table.schema());
        parreplacedionKey.parreplacedion(record);
        return parreplacedionKey;
    }

    private EncryptedOutputFile createEncryptedOutputFile() {
        if (parreplacedion == null) {
            return fileFactory.newOutputFile();
        } else {
            return fileFactory.newOutputFile(parreplacedion);
        }
    }

    private List<T> testRowSet() {
        return Lists.newArrayList(createRow(1, "aaa"), createRow(2, "bbb"), createRow(3, "ccc"), createRow(4, "ddd"), createRow(5, "eee"));
    }

    private DataFile prepareDataFile(List<T> rowSet, FileAppenderFactory<T> appenderFactory) throws IOException {
        DataWriter<T> writer = appenderFactory.newDataWriter(createEncryptedOutputFile(), format, parreplacedion);
        try (DataWriter<T> closeableWriter = writer) {
            for (T row : rowSet) {
                closeableWriter.add(row);
            }
        }
        return writer.toDataFile();
    }

    @Test
    public void testDataWriter() throws IOException {
        FileAppenderFactory<T> appenderFactory = createAppenderFactory(null, null, null);
        List<T> rowSet = testRowSet();
        DataFile dataFile = prepareDataFile(rowSet, appenderFactory);
        table.newRowDelta().addRows(dataFile).commit();
        replacedert.replacedertEquals("Should have the expected records.", expectedRowSet(rowSet), actualRowSet("*"));
    }

    @Test
    public void testEqDeleteWriter() throws IOException {
        List<Integer> equalityFieldIds = Lists.newArrayList(table.schema().findField("id").fieldId());
        Schema eqDeleteRowSchema = table.schema().select("id");
        FileAppenderFactory<T> appenderFactory = createAppenderFactory(equalityFieldIds, eqDeleteRowSchema, null);
        List<T> rowSet = testRowSet();
        DataFile dataFile = prepareDataFile(rowSet, appenderFactory);
        table.newRowDelta().addRows(dataFile).commit();
        // The equality field is 'id'. No matter what the value  of 'data' field is, we should delete the 1th, 3th, 5th
        // rows.
        List<T> deletes = Lists.newArrayList(createRow(1, "aaa"), createRow(3, "bbb"), createRow(5, "ccc"));
        EncryptedOutputFile out = createEncryptedOutputFile();
        EqualityDeleteWriter<T> eqDeleteWriter = appenderFactory.newEqDeleteWriter(out, format, parreplacedion);
        try (EqualityDeleteWriter<T> closeableWriter = eqDeleteWriter) {
            closeableWriter.deleteAll(deletes);
        }
        // Check that the delete equality file has the expected equality deletes.
        GenericRecord gRecord = GenericRecord.create(eqDeleteRowSchema);
        Set<Record> expectedDeletes = Sets.newHashSet(gRecord.copy("id", 1), gRecord.copy("id", 3), gRecord.copy("id", 5));
        replacedert.replacedertEquals(expectedDeletes, Sets.newHashSet(createReader(eqDeleteRowSchema, out.encryptingOutputFile().toInputFile())));
        table.newRowDelta().addDeletes(eqDeleteWriter.toDeleteFile()).commit();
        List<T> expected = Lists.newArrayList(createRow(2, "bbb"), createRow(4, "ddd"));
        replacedert.replacedertEquals("Should have the expected records", expectedRowSet(expected), actualRowSet("*"));
    }

    @Test
    public void testPosDeleteWriter() throws IOException {
        // Initialize FileAppenderFactory without pos-delete row schema.
        FileAppenderFactory<T> appenderFactory = createAppenderFactory(null, null, null);
        List<T> rowSet = testRowSet();
        DataFile dataFile = prepareDataFile(rowSet, appenderFactory);
        List<Pair<CharSequence, Long>> deletes = Lists.newArrayList(Pair.of(dataFile.path(), 0L), Pair.of(dataFile.path(), 2L), Pair.of(dataFile.path(), 4L));
        EncryptedOutputFile out = createEncryptedOutputFile();
        PositionDeleteWriter<T> eqDeleteWriter = appenderFactory.newPosDeleteWriter(out, format, parreplacedion);
        try (PositionDeleteWriter<T> closeableWriter = eqDeleteWriter) {
            for (Pair<CharSequence, Long> delete : deletes) {
                closeableWriter.delete(delete.first(), delete.second());
            }
        }
        // Check that the pos delete file has the expected pos deletes.
        Schema pathPosSchema = DeleteSchemaUtil.pathPosSchema();
        GenericRecord gRecord = GenericRecord.create(pathPosSchema);
        Set<Record> expectedDeletes = Sets.newHashSet(gRecord.copy("file_path", dataFile.path(), "pos", 0L), gRecord.copy("file_path", dataFile.path(), "pos", 2L), gRecord.copy("file_path", dataFile.path(), "pos", 4L));
        replacedert.replacedertEquals(expectedDeletes, Sets.newHashSet(createReader(pathPosSchema, out.encryptingOutputFile().toInputFile())));
        table.newRowDelta().addRows(dataFile).addDeletes(eqDeleteWriter.toDeleteFile()).validateDataFilesExist(eqDeleteWriter.referencedDataFiles()).validateDeletedFiles().commit();
        List<T> expected = Lists.newArrayList(createRow(2, "bbb"), createRow(4, "ddd"));
        replacedert.replacedertEquals("Should have the expected records", expectedRowSet(expected), actualRowSet("*"));
    }

    @Test
    public void testPosDeleteWriterWithRowSchema() throws IOException {
        FileAppenderFactory<T> appenderFactory = createAppenderFactory(null, null, table.schema());
        List<T> rowSet = testRowSet();
        DataFile dataFile = prepareDataFile(rowSet, appenderFactory);
        List<PositionDelete<T>> deletes = Lists.newArrayList(new PositionDelete<T>().set(dataFile.path(), 0, rowSet.get(0)), new PositionDelete<T>().set(dataFile.path(), 2, rowSet.get(2)), new PositionDelete<T>().set(dataFile.path(), 4, rowSet.get(4)));
        EncryptedOutputFile out = createEncryptedOutputFile();
        PositionDeleteWriter<T> eqDeleteWriter = appenderFactory.newPosDeleteWriter(out, format, parreplacedion);
        try (PositionDeleteWriter<T> closeableWriter = eqDeleteWriter) {
            for (PositionDelete<T> delete : deletes) {
                closeableWriter.delete(delete.path(), delete.pos(), delete.row());
            }
        }
        // Check that the pos delete file has the expected pos deletes.
        Schema pathPosRowSchema = DeleteSchemaUtil.posDeleteSchema(table.schema());
        GenericRecord gRecord = GenericRecord.create(pathPosRowSchema);
        GenericRecord rowRecord = GenericRecord.create(table.schema());
        Set<Record> expectedDeletes = Sets.newHashSet(gRecord.copy("file_path", dataFile.path(), "pos", 0L, "row", rowRecord.copy("id", 1, "data", "aaa")), gRecord.copy("file_path", dataFile.path(), "pos", 2L, "row", rowRecord.copy("id", 3, "data", "ccc")), gRecord.copy("file_path", dataFile.path(), "pos", 4L, "row", rowRecord.copy("id", 5, "data", "eee")));
        replacedert.replacedertEquals(expectedDeletes, Sets.newHashSet(createReader(pathPosRowSchema, out.encryptingOutputFile().toInputFile())));
        table.newRowDelta().addRows(dataFile).addDeletes(eqDeleteWriter.toDeleteFile()).validateDataFilesExist(eqDeleteWriter.referencedDataFiles()).validateDeletedFiles().commit();
        List<T> expected = Lists.newArrayList(createRow(2, "bbb"), createRow(4, "ddd"));
        replacedert.replacedertEquals("Should have the expected records", expectedRowSet(expected), actualRowSet("*"));
    }

    private CloseableIterable<Record> createReader(Schema schema, InputFile inputFile) {
        switch(format) {
            case PARQUET:
                return Parquet.read(inputFile).project(schema).createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(schema, fileSchema)).build();
            case AVRO:
                return Avro.read(inputFile).project(schema).createReaderFunc(DataReader::create).build();
            default:
                throw new UnsupportedOperationException("Unsupported file format: " + format);
        }
    }
}

17 Source : OutputFileFactory.java
with Apache License 2.0
from apache

/**
 * Factory responsible for generating unique but recognizable data file names.
 */
public clreplaced OutputFileFactory {

    private final ParreplacedionSpec spec;

    private final FileFormat format;

    private final LocationProvider locations;

    private final FileIO io;

    private final EncryptionManager encryptionManager;

    private final int parreplacedionId;

    private final long taskId;

    // The purpose of this uuid is to be able to know from two paths that they were written by the same operation.
    // That's useful, for example, if a Spark job dies and leaves files in the file system, you can identify them all
    // with a recursive listing and grep.
    private final String operationId;

    private final AtomicInteger fileCount = new AtomicInteger(0);

    /**
     * Constructor where a generated UUID is used as the operationId to ensure uniqueness.
     * @param spec Parreplacedion specification used by the location provider
     * @param format File format used for the extension
     * @param locations Location provider used for generating locations
     * @param io FileIO to store the files
     * @param encryptionManager Encryption manager used for encrypting the files
     * @param parreplacedionId First part of the file name
     * @param taskId Second part of the file name
     */
    public OutputFileFactory(ParreplacedionSpec spec, FileFormat format, LocationProvider locations, FileIO io, EncryptionManager encryptionManager, int parreplacedionId, long taskId) {
        this(spec, format, locations, io, encryptionManager, parreplacedionId, taskId, UUID.randomUUID().toString());
    }

    /**
     * Constructor with specific operationId. The [parreplacedionId, taskId, operationId] triplet has to be unique across JVM
     * instances otherwise the same file name could be generated by different instances of the OutputFileFactory.
     * @param spec Parreplacedion specification used by the location provider
     * @param format File format used for the extension
     * @param locations Location provider used for generating locations
     * @param io FileIO to store the files
     * @param encryptionManager Encryption manager used for encrypting the files
     * @param parreplacedionId First part of the file name
     * @param taskId Second part of the file name
     * @param operationId Third part of the file name
     */
    public OutputFileFactory(ParreplacedionSpec spec, FileFormat format, LocationProvider locations, FileIO io, EncryptionManager encryptionManager, int parreplacedionId, long taskId, String operationId) {
        this.spec = spec;
        this.format = format;
        this.locations = locations;
        this.io = io;
        this.encryptionManager = encryptionManager;
        this.parreplacedionId = parreplacedionId;
        this.taskId = taskId;
        this.operationId = operationId;
    }

    private String generateFilename() {
        return format.addExtension(String.format("%05d-%d-%s-%05d", parreplacedionId, taskId, operationId, fileCount.incrementAndGet()));
    }

    /**
     * Generates EncryptedOutputFile for UnparreplacedionedWriter.
     */
    public EncryptedOutputFile newOutputFile() {
        OutputFile file = io.newOutputFile(locations.newDataLocation(generateFilename()));
        return encryptionManager.encrypt(file);
    }

    /**
     * Generates EncryptedOutputFile for ParreplacedionedWriter.
     */
    public EncryptedOutputFile newOutputFile(StructLike parreplacedion) {
        String newDataLocation = locations.newDataLocation(spec, parreplacedion, generateFilename());
        OutputFile rawOutputFile = io.newOutputFile(newDataLocation);
        return encryptionManager.encrypt(rawOutputFile);
    }
}

16 Source : Spark3Util.java
with Apache License 2.0
from apache

public static boolean isVectorizationEnabled(FileFormat fileFormat, Map<String, String> properties, RuntimeConfig sessionConf, CaseInsensitiveStringMap readOptions) {
    String readOptionValue = readOptions.get(SparkReadOptions.VECTORIZATION_ENABLED);
    if (readOptionValue != null) {
        return Boolean.parseBoolean(readOptionValue);
    }
    String sessionConfValue = sessionConf.get("spark.sql.iceberg.vectorization.enabled", null);
    if (sessionConfValue != null) {
        return Boolean.parseBoolean(sessionConfValue);
    }
    switch(fileFormat) {
        case PARQUET:
            return PropertyUtil.propertyAsBoolean(properties, TableProperties.PARQUET_VECTORIZATION_ENABLED, TableProperties.PARQUET_VECTORIZATION_ENABLED_DEFAULT);
        case ORC:
            return PropertyUtil.propertyAsBoolean(properties, TableProperties.ORC_VECTORIZATION_ENABLED, TableProperties.ORC_VECTORIZATION_ENABLED_DEFAULT);
        default:
            return false;
    }
}

16 Source : TestInputFormatReaderDeletes.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestInputFormatReaderDeletes extends DeleteReadTests {

    private final Configuration conf = new Configuration();

    private final HadoopTables tables = new HadoopTables(conf);

    private TestHelper helper;

    // parametrized variables
    private final String inputFormat;

    private final FileFormat fileFormat;

    @Parameterized.Parameters(name = "inputFormat = {0}, fileFormat={1}")
    public static Object[][] parameters() {
        return new Object[][] { { "IcebergInputFormat", FileFormat.PARQUET }, { "IcebergInputFormat", FileFormat.AVRO }, { "IcebergInputFormat", FileFormat.ORC }, { "MapredIcebergInputFormat", FileFormat.PARQUET }, { "MapredIcebergInputFormat", FileFormat.AVRO }, { "MapredIcebergInputFormat", FileFormat.ORC } };
    }

    public TestInputFormatReaderDeletes(String inputFormat, FileFormat fileFormat) {
        this.inputFormat = inputFormat;
        this.fileFormat = fileFormat;
    }

    @Override
    protected Table createTable(String name, Schema schema, ParreplacedionSpec spec) throws IOException {
        Table table;
        File location = temp.newFolder(inputFormat, fileFormat.name());
        replacedert.replacedertTrue(location.delete());
        helper = new TestHelper(conf, tables, location.toString(), schema, spec, fileFormat, temp);
        table = helper.createTable();
        TableOperations ops = ((BaseTable) table).operations();
        TableMetadata meta = ops.current();
        ops.commit(meta, meta.upgradeToFormatVersion(2));
        return table;
    }

    @Override
    protected void dropTable(String name) {
        tables.dropTable(helper.table().location());
    }

    @Override
    public StructLikeSet rowSet(String name, Table table, String... columns) {
        InputFormatConfig.ConfigBuilder builder = new InputFormatConfig.ConfigBuilder(conf).readFrom(table.location());
        Schema projected = table.schema().select(columns);
        StructLikeSet set = StructLikeSet.create(projected.replacedtruct());
        set.addAll(TestIcebergInputFormats.TESTED_INPUT_FORMATS.stream().filter(recordFactory -> recordFactory.name().equals(inputFormat)).map(recordFactory -> recordFactory.create(builder.project(projected).conf()).getRecords()).flatMap(List::stream).collect(Collectors.toList()));
        return set;
    }

    @Override
    protected boolean expectPruned() {
        return false;
    }
}

16 Source : TestIcebergStreamWriter.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestIcebergStreamWriter {

    @Rule
    public TemporaryFolder tempFolder = new TemporaryFolder();

    private String tablePath;

    private Table table;

    private final FileFormat format;

    private final boolean parreplacedioned;

    @Parameterized.Parameters(name = "format = {0}, parreplacedioned = {1}")
    public static Object[][] parameters() {
        return new Object[][] { { "avro", true }, { "avro", false }, { "orc", true }, { "orc", false }, { "parquet", true }, { "parquet", false } };
    }

    public TestIcebergStreamWriter(String format, boolean parreplacedioned) {
        this.format = FileFormat.valueOf(format.toUpperCase(Locale.ENGLISH));
        this.parreplacedioned = parreplacedioned;
    }

    @Before
    public void before() throws IOException {
        File folder = tempFolder.newFolder();
        tablePath = folder.getAbsolutePath();
        // Construct the iceberg table.
        Map<String, String> props = ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name());
        table = SimpleDataUtil.createTable(tablePath, props, parreplacedioned);
    }

    @Test
    public void testWritingTable() throws Exception {
        long checkpointId = 1L;
        try (OneInputStreamOperatorTestHarness<RowData, WriteResult> testHarness = createIcebergStreamWriter()) {
            // The first checkpoint
            testHarness.processElement(SimpleDataUtil.createRowData(1, "hello"), 1);
            testHarness.processElement(SimpleDataUtil.createRowData(2, "world"), 1);
            testHarness.processElement(SimpleDataUtil.createRowData(3, "hello"), 1);
            testHarness.prepareSnapshotPreBarrier(checkpointId);
            long expectedDataFiles = parreplacedioned ? 2 : 1;
            WriteResult result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            replacedert.replacedertEquals(expectedDataFiles, result.dataFiles().length);
            checkpointId = checkpointId + 1;
            // The second checkpoint
            testHarness.processElement(SimpleDataUtil.createRowData(4, "foo"), 1);
            testHarness.processElement(SimpleDataUtil.createRowData(5, "bar"), 2);
            testHarness.prepareSnapshotPreBarrier(checkpointId);
            expectedDataFiles = parreplacedioned ? 4 : 2;
            result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            replacedert.replacedertEquals(expectedDataFiles, result.dataFiles().length);
            // Commit the iceberg transaction.
            AppendFiles appendFiles = table.newAppend();
            Arrays.stream(result.dataFiles()).forEach(appendFiles::appendFile);
            appendFiles.commit();
            // replacedert the table records.
            SimpleDataUtil.replacedertTableRecords(tablePath, Lists.newArrayList(SimpleDataUtil.createRecord(1, "hello"), SimpleDataUtil.createRecord(2, "world"), SimpleDataUtil.createRecord(3, "hello"), SimpleDataUtil.createRecord(4, "foo"), SimpleDataUtil.createRecord(5, "bar")));
        }
    }

    @Test
    public void testSnapshotTwice() throws Exception {
        long checkpointId = 1;
        long timestamp = 1;
        try (OneInputStreamOperatorTestHarness<RowData, WriteResult> testHarness = createIcebergStreamWriter()) {
            testHarness.processElement(SimpleDataUtil.createRowData(1, "hello"), timestamp++);
            testHarness.processElement(SimpleDataUtil.createRowData(2, "world"), timestamp);
            testHarness.prepareSnapshotPreBarrier(checkpointId++);
            long expectedDataFiles = parreplacedioned ? 2 : 1;
            WriteResult result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            replacedert.replacedertEquals(expectedDataFiles, result.dataFiles().length);
            // snapshot again immediately.
            for (int i = 0; i < 5; i++) {
                testHarness.prepareSnapshotPreBarrier(checkpointId++);
                result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
                replacedert.replacedertEquals(0, result.deleteFiles().length);
                replacedert.replacedertEquals(expectedDataFiles, result.dataFiles().length);
            }
        }
    }

    @Test
    public void testTableWithoutSnapshot() throws Exception {
        try (OneInputStreamOperatorTestHarness<RowData, WriteResult> testHarness = createIcebergStreamWriter()) {
            replacedert.replacedertEquals(0, testHarness.extractOutputValues().size());
        }
        // Even if we closed the iceberg stream writer, there's no orphan data file.
        replacedert.replacedertEquals(0, scanDataFiles().size());
        try (OneInputStreamOperatorTestHarness<RowData, WriteResult> testHarness = createIcebergStreamWriter()) {
            testHarness.processElement(SimpleDataUtil.createRowData(1, "hello"), 1);
            // Still not emit the data file yet, because there is no checkpoint.
            replacedert.replacedertEquals(0, testHarness.extractOutputValues().size());
        }
        // Once we closed the iceberg stream writer, there will left an orphan data file.
        replacedert.replacedertEquals(1, scanDataFiles().size());
    }

    private Set<String> scanDataFiles() throws IOException {
        Path dataDir = new Path(tablePath, "data");
        FileSystem fs = FileSystem.get(new Configuration());
        if (!fs.exists(dataDir)) {
            return ImmutableSet.of();
        } else {
            Set<String> paths = Sets.newHashSet();
            RemoteIterator<LocatedFileStatus> iterators = fs.listFiles(dataDir, true);
            while (iterators.hasNext()) {
                LocatedFileStatus status = iterators.next();
                if (status.isFile()) {
                    Path path = status.getPath();
                    if (path.getName().endsWith("." + format.toString().toLowerCase())) {
                        paths.add(path.toString());
                    }
                }
            }
            return paths;
        }
    }

    @Test
    public void testBoundedStreamCloseWithEmittingDataFiles() throws Exception {
        try (OneInputStreamOperatorTestHarness<RowData, WriteResult> testHarness = createIcebergStreamWriter()) {
            testHarness.processElement(SimpleDataUtil.createRowData(1, "hello"), 1);
            testHarness.processElement(SimpleDataUtil.createRowData(2, "world"), 2);
            replacedert.replacedertTrue(testHarness.getOneInputOperator() instanceof BoundedOneInput);
            ((BoundedOneInput) testHarness.getOneInputOperator()).endInput();
            long expectedDataFiles = parreplacedioned ? 2 : 1;
            WriteResult result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            replacedert.replacedertEquals(expectedDataFiles, result.dataFiles().length);
            // invoke endInput again.
            ((BoundedOneInput) testHarness.getOneInputOperator()).endInput();
            result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            replacedert.replacedertEquals(expectedDataFiles * 2, result.dataFiles().length);
        }
    }

    @Test
    public void testTableWithTargetFileSize() throws Exception {
        // TODO: ORC file does not support target file size before closed.
        if (format == FileFormat.ORC) {
            return;
        }
        // Adjust the target-file-size in table properties.
        table.updateProperties().set(TableProperties.WRITE_TARGET_FILE_SIZE_BYTES, // ~4 bytes; low enough to trigger
        "4").commit();
        List<RowData> rows = Lists.newArrayListWithCapacity(8000);
        List<Record> records = Lists.newArrayListWithCapacity(8000);
        for (int i = 0; i < 2000; i++) {
            for (String data : new String[] { "a", "b", "c", "d" }) {
                rows.add(SimpleDataUtil.createRowData(i, data));
                records.add(SimpleDataUtil.createRecord(i, data));
            }
        }
        try (OneInputStreamOperatorTestHarness<RowData, WriteResult> testHarness = createIcebergStreamWriter()) {
            for (RowData row : rows) {
                testHarness.processElement(row, 1);
            }
            // snapshot the operator.
            testHarness.prepareSnapshotPreBarrier(1);
            WriteResult result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            replacedert.replacedertEquals(8, result.dataFiles().length);
            // replacedert that the data file have the expected records.
            for (DataFile dataFile : result.dataFiles()) {
                replacedert.replacedertEquals(1000, dataFile.recordCount());
            }
            // Commit the iceberg transaction.
            AppendFiles appendFiles = table.newAppend();
            Arrays.stream(result.dataFiles()).forEach(appendFiles::appendFile);
            appendFiles.commit();
        }
        // replacedert the table records.
        SimpleDataUtil.replacedertTableRecords(tablePath, records);
    }

    @Test
    public void testPromotedFlinkDataType() throws Exception {
        Schema iSchema = new Schema(Types.NestedField.required(1, "tinyint", Types.IntegerType.get()), Types.NestedField.required(2, "smallint", Types.IntegerType.get()), Types.NestedField.optional(3, "int", Types.IntegerType.get()));
        TableSchema flinkSchema = TableSchema.builder().field("tinyint", DataTypes.TINYINT().notNull()).field("smallint", DataTypes.SMALLINT().notNull()).field("int", DataTypes.INT().nullable()).build();
        ParreplacedionSpec spec;
        if (parreplacedioned) {
            spec = ParreplacedionSpec.builderFor(iSchema).idenreplacedy("smallint").idenreplacedy("tinyint").idenreplacedy("int").build();
        } else {
            spec = ParreplacedionSpec.unparreplacedioned();
        }
        String location = tempFolder.newFolder().getAbsolutePath();
        Map<String, String> props = ImmutableMap.of(TableProperties.DEFAULT_FILE_FORMAT, format.name());
        Table icebergTable = new HadoopTables().create(iSchema, spec, props, location);
        List<RowData> rows = Lists.newArrayList(GenericRowData.of((byte) 0x01, (short) -32768, 101), GenericRowData.of((byte) 0x02, (short) 0, 102), GenericRowData.of((byte) 0x03, (short) 32767, 103));
        Record record = GenericRecord.create(iSchema);
        List<Record> expected = Lists.newArrayList(record.copy(ImmutableMap.of("tinyint", 1, "smallint", -32768, "int", 101)), record.copy(ImmutableMap.of("tinyint", 2, "smallint", 0, "int", 102)), record.copy(ImmutableMap.of("tinyint", 3, "smallint", 32767, "int", 103)));
        try (OneInputStreamOperatorTestHarness<RowData, WriteResult> testHarness = createIcebergStreamWriter(icebergTable, flinkSchema)) {
            for (RowData row : rows) {
                testHarness.processElement(row, 1);
            }
            testHarness.prepareSnapshotPreBarrier(1);
            WriteResult result = WriteResult.builder().addAll(testHarness.extractOutputValues()).build();
            replacedert.replacedertEquals(0, result.deleteFiles().length);
            replacedert.replacedertEquals(parreplacedioned ? 3 : 1, result.dataFiles().length);
            // Commit the iceberg transaction.
            AppendFiles appendFiles = icebergTable.newAppend();
            Arrays.stream(result.dataFiles()).forEach(appendFiles::appendFile);
            appendFiles.commit();
        }
        SimpleDataUtil.replacedertTableRecords(location, expected);
    }

    private OneInputStreamOperatorTestHarness<RowData, WriteResult> createIcebergStreamWriter() throws Exception {
        return createIcebergStreamWriter(table, SimpleDataUtil.FLINK_SCHEMA);
    }

    private OneInputStreamOperatorTestHarness<RowData, WriteResult> createIcebergStreamWriter(Table icebergTable, TableSchema flinkSchema) throws Exception {
        RowType flinkRowType = FlinkSink.toFlinkRowType(icebergTable.schema(), flinkSchema);
        IcebergStreamWriter<RowData> streamWriter = FlinkSink.createStreamWriter(icebergTable, flinkRowType, null);
        OneInputStreamOperatorTestHarness<RowData, WriteResult> harness = new OneInputStreamOperatorTestHarness<>(streamWriter, 1, 1, 0);
        harness.setup();
        harness.open();
        return harness;
    }
}

16 Source : TestRewriteDataFilesAction.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestRewriteDataFilesAction extends FlinkCatalogTestBase {

    private static final String TABLE_NAME_UNPARreplacedIONED = "test_table_unparreplacedioned";

    private static final String TABLE_NAME_PARreplacedIONED = "test_table_parreplacedioned";

    private final FileFormat format;

    private Table icebergTableUnParreplacedioned;

    private Table icebergTableParreplacedioned;

    public TestRewriteDataFilesAction(String catalogName, Namespace baseNamespace, FileFormat format) {
        super(catalogName, baseNamespace);
        this.format = format;
    }

    @Override
    protected TableEnvironment getTableEnv() {
        super.getTableEnv().getConfig().getConfiguration().set(CoreOptions.DEFAULT_PARALLELISM, 1);
        return super.getTableEnv();
    }

    @Parameterized.Parameters(name = "catalogName={0}, baseNamespace={1}, format={2}")
    public static Iterable<Object[]> parameters() {
        List<Object[]> parameters = Lists.newArrayList();
        for (FileFormat format : new FileFormat[] { FileFormat.AVRO, FileFormat.ORC, FileFormat.PARQUET }) {
            for (Object[] catalogParams : FlinkCatalogTestBase.parameters()) {
                String catalogName = (String) catalogParams[0];
                Namespace baseNamespace = (Namespace) catalogParams[1];
                parameters.add(new Object[] { catalogName, baseNamespace, format });
            }
        }
        return parameters;
    }

    @Rule
    public TemporaryFolder temp = new TemporaryFolder();

    @Before
    public void before() {
        super.before();
        sql("CREATE DATABASE %s", flinkDatabase);
        sql("USE CATALOG %s", catalogName);
        sql("USE %s", DATABASE);
        sql("CREATE TABLE %s (id int, data varchar) with ('write.format.default'='%s')", TABLE_NAME_UNPARreplacedIONED, format.name());
        icebergTableUnParreplacedioned = validationCatalog.loadTable(TableIdentifier.of(icebergNamespace, TABLE_NAME_UNPARreplacedIONED));
        sql("CREATE TABLE %s (id int, data varchar,spec varchar) " + " PARreplacedIONED BY (data,spec) with ('write.format.default'='%s')", TABLE_NAME_PARreplacedIONED, format.name());
        icebergTableParreplacedioned = validationCatalog.loadTable(TableIdentifier.of(icebergNamespace, TABLE_NAME_PARreplacedIONED));
    }

    @After
    public void clean() {
        sql("DROP TABLE IF EXISTS %s.%s", flinkDatabase, TABLE_NAME_UNPARreplacedIONED);
        sql("DROP TABLE IF EXISTS %s.%s", flinkDatabase, TABLE_NAME_PARreplacedIONED);
        sql("DROP DATABASE IF EXISTS %s", flinkDatabase);
        super.clean();
    }

    @Test
    public void testRewriteDataFilesEmptyTable() throws Exception {
        replacedert.replacedertNull("Table must be empty", icebergTableUnParreplacedioned.currentSnapshot());
        Actions.forTable(icebergTableUnParreplacedioned).rewriteDataFiles().execute();
        replacedert.replacedertNull("Table must stay empty", icebergTableUnParreplacedioned.currentSnapshot());
    }

    @Test
    public void testRewriteDataFilesUnparreplacedionedTable() throws Exception {
        sql("INSERT INTO %s SELECT 1, 'hello'", TABLE_NAME_UNPARreplacedIONED);
        sql("INSERT INTO %s SELECT 2, 'world'", TABLE_NAME_UNPARreplacedIONED);
        icebergTableUnParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks = icebergTableUnParreplacedioned.newScan().planFiles();
        List<DataFile> dataFiles = Lists.newArrayList(CloseableIterable.transform(tasks, FileScanTask::file));
        replacedert.replacedertEquals("Should have 2 data files before rewrite", 2, dataFiles.size());
        RewriteDataFilesActionResult result = Actions.forTable(icebergTableUnParreplacedioned).rewriteDataFiles().execute();
        replacedert.replacedertEquals("Action should rewrite 2 data files", 2, result.deletedDataFiles().size());
        replacedert.replacedertEquals("Action should add 1 data file", 1, result.addedDataFiles().size());
        icebergTableUnParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks1 = icebergTableUnParreplacedioned.newScan().planFiles();
        List<DataFile> dataFiles1 = Lists.newArrayList(CloseableIterable.transform(tasks1, FileScanTask::file));
        replacedert.replacedertEquals("Should have 1 data files after rewrite", 1, dataFiles1.size());
        // replacedert the table records as expected.
        SimpleDataUtil.replacedertTableRecords(icebergTableUnParreplacedioned, Lists.newArrayList(SimpleDataUtil.createRecord(1, "hello"), SimpleDataUtil.createRecord(2, "world")));
    }

    @Test
    public void testRewriteDataFilesParreplacedionedTable() throws Exception {
        sql("INSERT INTO %s SELECT 1, 'hello' ,'a'", TABLE_NAME_PARreplacedIONED);
        sql("INSERT INTO %s SELECT 2, 'hello' ,'a'", TABLE_NAME_PARreplacedIONED);
        sql("INSERT INTO %s SELECT 3, 'world' ,'b'", TABLE_NAME_PARreplacedIONED);
        sql("INSERT INTO %s SELECT 4, 'world' ,'b'", TABLE_NAME_PARreplacedIONED);
        icebergTableParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks = icebergTableParreplacedioned.newScan().planFiles();
        List<DataFile> dataFiles = Lists.newArrayList(CloseableIterable.transform(tasks, FileScanTask::file));
        replacedert.replacedertEquals("Should have 4 data files before rewrite", 4, dataFiles.size());
        RewriteDataFilesActionResult result = Actions.forTable(icebergTableParreplacedioned).rewriteDataFiles().execute();
        replacedert.replacedertEquals("Action should rewrite 4 data files", 4, result.deletedDataFiles().size());
        replacedert.replacedertEquals("Action should add 2 data file", 2, result.addedDataFiles().size());
        icebergTableParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks1 = icebergTableParreplacedioned.newScan().planFiles();
        List<DataFile> dataFiles1 = Lists.newArrayList(CloseableIterable.transform(tasks1, FileScanTask::file));
        replacedert.replacedertEquals("Should have 2 data files after rewrite", 2, dataFiles1.size());
        // replacedert the table records as expected.
        Schema schema = new Schema(Types.NestedField.optional(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get()), Types.NestedField.optional(3, "spec", Types.StringType.get()));
        Record record = GenericRecord.create(schema);
        SimpleDataUtil.replacedertTableRecords(icebergTableParreplacedioned, Lists.newArrayList(record.copy("id", 1, "data", "hello", "spec", "a"), record.copy("id", 2, "data", "hello", "spec", "a"), record.copy("id", 3, "data", "world", "spec", "b"), record.copy("id", 4, "data", "world", "spec", "b")));
    }

    @Test
    public void testRewriteDataFilesWithFilter() throws Exception {
        sql("INSERT INTO %s SELECT 1, 'hello' ,'a'", TABLE_NAME_PARreplacedIONED);
        sql("INSERT INTO %s SELECT 2, 'hello' ,'a'", TABLE_NAME_PARreplacedIONED);
        sql("INSERT INTO %s SELECT 3, 'world' ,'a'", TABLE_NAME_PARreplacedIONED);
        sql("INSERT INTO %s SELECT 4, 'world' ,'b'", TABLE_NAME_PARreplacedIONED);
        sql("INSERT INTO %s SELECT 5, 'world' ,'b'", TABLE_NAME_PARreplacedIONED);
        icebergTableParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks = icebergTableParreplacedioned.newScan().planFiles();
        List<DataFile> dataFiles = Lists.newArrayList(CloseableIterable.transform(tasks, FileScanTask::file));
        replacedert.replacedertEquals("Should have 5 data files before rewrite", 5, dataFiles.size());
        RewriteDataFilesActionResult result = Actions.forTable(icebergTableParreplacedioned).rewriteDataFiles().filter(Expressions.equal("spec", "a")).filter(Expressions.startsWith("data", "he")).execute();
        replacedert.replacedertEquals("Action should rewrite 2 data files", 2, result.deletedDataFiles().size());
        replacedert.replacedertEquals("Action should add 1 data file", 1, result.addedDataFiles().size());
        icebergTableParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks1 = icebergTableParreplacedioned.newScan().planFiles();
        List<DataFile> dataFiles1 = Lists.newArrayList(CloseableIterable.transform(tasks1, FileScanTask::file));
        replacedert.replacedertEquals("Should have 4 data files after rewrite", 4, dataFiles1.size());
        // replacedert the table records as expected.
        Schema schema = new Schema(Types.NestedField.optional(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "data", Types.StringType.get()), Types.NestedField.optional(3, "spec", Types.StringType.get()));
        Record record = GenericRecord.create(schema);
        SimpleDataUtil.replacedertTableRecords(icebergTableParreplacedioned, Lists.newArrayList(record.copy("id", 1, "data", "hello", "spec", "a"), record.copy("id", 2, "data", "hello", "spec", "a"), record.copy("id", 3, "data", "world", "spec", "a"), record.copy("id", 4, "data", "world", "spec", "b"), record.copy("id", 5, "data", "world", "spec", "b")));
    }

    @Test
    public void testRewriteLargeTableHasResiduals() throws IOException {
        // all records belong to the same parreplacedion
        List<String> records1 = Lists.newArrayList();
        List<String> records2 = Lists.newArrayList();
        List<Record> expected = Lists.newArrayList();
        for (int i = 0; i < 100; i++) {
            int id = i;
            String data = String.valueOf(i % 3);
            if (i % 2 == 0) {
                records1.add("(" + id + ",'" + data + "')");
            } else {
                records2.add("(" + id + ",'" + data + "')");
            }
            Record record = RECORD.copy();
            record.setField("id", id);
            record.setField("data", data);
            expected.add(record);
        }
        sql("INSERT INTO %s values " + StringUtils.join(records1, ","), TABLE_NAME_UNPARreplacedIONED);
        sql("INSERT INTO %s values " + StringUtils.join(records2, ","), TABLE_NAME_UNPARreplacedIONED);
        icebergTableUnParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks = icebergTableUnParreplacedioned.newScan().ignoreResiduals().filter(Expressions.equal("data", "0")).planFiles();
        for (FileScanTask task : tasks) {
            replacedert.replacedertEquals("Residuals must be ignored", Expressions.alwaysTrue(), task.residual());
        }
        List<DataFile> dataFiles = Lists.newArrayList(CloseableIterable.transform(tasks, FileScanTask::file));
        replacedert.replacedertEquals("Should have 2 data files before rewrite", 2, dataFiles.size());
        Actions actions = Actions.forTable(icebergTableUnParreplacedioned);
        RewriteDataFilesActionResult result = actions.rewriteDataFiles().filter(Expressions.equal("data", "0")).execute();
        replacedert.replacedertEquals("Action should rewrite 2 data files", 2, result.deletedDataFiles().size());
        replacedert.replacedertEquals("Action should add 1 data file", 1, result.addedDataFiles().size());
        // replacedert the table records as expected.
        SimpleDataUtil.replacedertTableRecords(icebergTableUnParreplacedioned, expected);
    }

    /**
     * a test case to test avoid repeate compress
     * <p>
     * If datafile cannot be combined to CombinedScanTask with other DataFiles, the size of the CombinedScanTask list size
     * is 1, so we remove these CombinedScanTasks to avoid compressed repeatedly.
     * <p>
     * In this test case,we generated 3 data files and set targetSizeInBytes greater than the largest file size so that it
     * cannot be  combined a CombinedScanTask with other datafiles. The datafile with the largest file size will not be
     * compressed.
     *
     * @throws IOException IOException
     */
    @Test
    public void testRewriteAvoidRepeateCompress() throws IOException {
        replacedume.replacedumeFalse("ORC does not support getting length when file is opening", format.equals(FileFormat.ORC));
        List<Record> expected = Lists.newArrayList();
        Schema schema = icebergTableUnParreplacedioned.schema();
        GenericAppenderFactory genericAppenderFactory = new GenericAppenderFactory(schema);
        File file = temp.newFile();
        int count = 0;
        try (FileAppender<Record> fileAppender = genericAppenderFactory.newAppender(Files.localOutput(file), format)) {
            long filesize = 20000;
            for (; fileAppender.length() < filesize; count++) {
                Record record = SimpleDataUtil.createRecord(count, "iceberg");
                fileAppender.add(record);
                expected.add(record);
            }
        }
        DataFile dataFile = DataFiles.builder(icebergTableUnParreplacedioned.spec()).withPath(file.getAbsolutePath()).withFileSizeInBytes(file.length()).withFormat(format).withRecordCount(count).build();
        icebergTableUnParreplacedioned.newAppend().appendFile(dataFile).commit();
        sql("INSERT INTO %s SELECT 1,'a' ", TABLE_NAME_UNPARreplacedIONED);
        sql("INSERT INTO %s SELECT 2,'b' ", TABLE_NAME_UNPARreplacedIONED);
        icebergTableUnParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks = icebergTableUnParreplacedioned.newScan().planFiles();
        List<DataFile> dataFiles = Lists.newArrayList(CloseableIterable.transform(tasks, FileScanTask::file));
        replacedert.replacedertEquals("Should have 3 data files before rewrite", 3, dataFiles.size());
        Actions actions = Actions.forTable(icebergTableUnParreplacedioned);
        long targetSizeInBytes = file.length() + 10;
        RewriteDataFilesActionResult result = actions.rewriteDataFiles().targetSizeInBytes(targetSizeInBytes).splitOpenFileCost(1).execute();
        replacedert.replacedertEquals("Action should rewrite 2 data files", 2, result.deletedDataFiles().size());
        replacedert.replacedertEquals("Action should add 1 data file", 1, result.addedDataFiles().size());
        icebergTableUnParreplacedioned.refresh();
        CloseableIterable<FileScanTask> tasks1 = icebergTableUnParreplacedioned.newScan().planFiles();
        List<DataFile> dataFilesRewrote = Lists.newArrayList(CloseableIterable.transform(tasks1, FileScanTask::file));
        replacedert.replacedertEquals("Should have 2 data files after rewrite", 2, dataFilesRewrote.size());
        // the biggest file do not be rewrote
        List rewroteDataFileNames = dataFilesRewrote.stream().map(ContentFile::path).collect(Collectors.toList());
        replacedert.replacedertTrue(rewroteDataFileNames.contains(file.getAbsolutePath()));
        // replacedert the table records as expected.
        expected.add(SimpleDataUtil.createRecord(1, "a"));
        expected.add(SimpleDataUtil.createRecord(2, "b"));
        SimpleDataUtil.replacedertTableRecords(icebergTableUnParreplacedioned, expected);
    }
}

16 Source : RowDataRewriter.java
with Apache License 2.0
from apache

public clreplaced RowDataRewriter {

    private static final Logger LOG = LoggerFactory.getLogger(RowDataRewriter.clreplaced);

    private final Schema schema;

    private final FileFormat format;

    private final String nameMapping;

    private final FileIO io;

    private final boolean caseSensitive;

    private final EncryptionManager encryptionManager;

    private final TaskWriterFactory<RowData> taskWriterFactory;

    private final String tableName;

    public RowDataRewriter(Table table, boolean caseSensitive, FileIO io, EncryptionManager encryptionManager) {
        this.schema = table.schema();
        this.caseSensitive = caseSensitive;
        this.io = io;
        this.encryptionManager = encryptionManager;
        this.nameMapping = PropertyUtil.propertyreplacedtring(table.properties(), DEFAULT_NAME_MAPPING, null);
        this.tableName = table.name();
        String formatString = PropertyUtil.propertyreplacedtring(table.properties(), TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT);
        this.format = FileFormat.valueOf(formatString.toUpperCase(Locale.ENGLISH));
        RowType flinkSchema = FlinkSchemaUtil.convert(table.schema());
        this.taskWriterFactory = new RowDataTaskWriterFactory(table.schema(), flinkSchema, table.spec(), table.locationProvider(), io, encryptionManager, Long.MAX_VALUE, format, table.properties(), null);
    }

    public List<DataFile> rewriteDataForTasks(DataStream<CombinedScanTask> dataStream, int parallelism) throws Exception {
        RewriteMap map = new RewriteMap(schema, nameMapping, io, caseSensitive, encryptionManager, taskWriterFactory);
        DataStream<List<DataFile>> ds = dataStream.map(map).setParallelism(parallelism);
        return Lists.newArrayList(ds.executeAndCollect("Rewrite table :" + tableName)).stream().flatMap(Collection::stream).collect(Collectors.toList());
    }

    public static clreplaced RewriteMap extends RichMapFunction<CombinedScanTask, List<DataFile>> {

        private TaskWriter<RowData> writer;

        private int subTaskId;

        private int attemptId;

        private final Schema schema;

        private final String nameMapping;

        private final FileIO io;

        private final boolean caseSensitive;

        private final EncryptionManager encryptionManager;

        private final TaskWriterFactory<RowData> taskWriterFactory;

        public RewriteMap(Schema schema, String nameMapping, FileIO io, boolean caseSensitive, EncryptionManager encryptionManager, TaskWriterFactory<RowData> taskWriterFactory) {
            this.schema = schema;
            this.nameMapping = nameMapping;
            this.io = io;
            this.caseSensitive = caseSensitive;
            this.encryptionManager = encryptionManager;
            this.taskWriterFactory = taskWriterFactory;
        }

        @Override
        public void open(Configuration parameters) {
            this.subTaskId = getRuntimeContext().getIndexOfThisSubtask();
            this.attemptId = getRuntimeContext().getAttemptNumber();
            // Initialize the task writer factory.
            this.taskWriterFactory.initialize(subTaskId, attemptId);
        }

        @Override
        public List<DataFile> map(CombinedScanTask task) throws Exception {
            // Initialize the task writer.
            this.writer = taskWriterFactory.create();
            try (RowDataIterator iterator = new RowDataIterator(task, io, encryptionManager, schema, schema, nameMapping, caseSensitive)) {
                while (iterator.hasNext()) {
                    RowData rowData = iterator.next();
                    writer.write(rowData);
                }
                return Lists.newArrayList(writer.dataFiles());
            } catch (Throwable originalThrowable) {
                try {
                    LOG.error("Aborting commit for  (subTaskId {}, attemptId {})", subTaskId, attemptId);
                    writer.abort();
                    LOG.error("Aborted commit for  (subTaskId {}, attemptId {})", subTaskId, attemptId);
                } catch (Throwable inner) {
                    if (originalThrowable != inner) {
                        originalThrowable.addSuppressed(inner);
                        LOG.warn("Suppressing exception in catch: {}", inner.getMessage(), inner);
                    }
                }
                if (originalThrowable instanceof Exception) {
                    throw originalThrowable;
                } else {
                    throw new RuntimeException(originalThrowable);
                }
            }
        }
    }
}

16 Source : GenericAppenderFactory.java
with Apache License 2.0
from apache

@Override
public EqualityDeleteWriter<Record> newEqDeleteWriter(EncryptedOutputFile file, FileFormat format, StructLike parreplacedion) {
    Preconditions.checkState(equalityFieldIds != null && equalityFieldIds.length > 0, "Equality field ids shouldn't be null or empty when creating equality-delete writer");
    Preconditions.checkNotNull(eqDeleteRowSchema, "Equality delete row schema shouldn't be null when creating equality-delete writer");
    MetricsConfig metricsConfig = MetricsConfig.fromProperties(config);
    try {
        switch(format) {
            case AVRO:
                return Avro.writeDeletes(file.encryptingOutputFile()).createWriterFunc(DataWriter::create).withParreplacedion(parreplacedion).overwrite().setAll(config).rowSchema(eqDeleteRowSchema).withSpec(spec).withKeyMetadata(file.keyMetadata()).equalityFieldIds(equalityFieldIds).buildEqualityWriter();
            case PARQUET:
                return Parquet.writeDeletes(file.encryptingOutputFile()).createWriterFunc(GenericParquetWriter::buildWriter).withParreplacedion(parreplacedion).overwrite().setAll(config).metricsConfig(metricsConfig).rowSchema(eqDeleteRowSchema).withSpec(spec).withKeyMetadata(file.keyMetadata()).equalityFieldIds(equalityFieldIds).buildEqualityWriter();
            default:
                throw new UnsupportedOperationException("Cannot write equality-deletes for unsupported file format: " + format);
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}

16 Source : GenericAppenderFactory.java
with Apache License 2.0
from apache

@Override
public PositionDeleteWriter<Record> newPosDeleteWriter(EncryptedOutputFile file, FileFormat format, StructLike parreplacedion) {
    MetricsConfig metricsConfig = MetricsConfig.fromProperties(config);
    try {
        switch(format) {
            case AVRO:
                return Avro.writeDeletes(file.encryptingOutputFile()).createWriterFunc(DataWriter::create).withParreplacedion(parreplacedion).overwrite().setAll(config).rowSchema(posDeleteRowSchema).withSpec(spec).withKeyMetadata(file.keyMetadata()).buildPositionWriter();
            case PARQUET:
                return Parquet.writeDeletes(file.encryptingOutputFile()).createWriterFunc(GenericParquetWriter::buildWriter).withParreplacedion(parreplacedion).overwrite().setAll(config).metricsConfig(metricsConfig).rowSchema(posDeleteRowSchema).withSpec(spec).withKeyMetadata(file.keyMetadata()).buildPositionWriter();
            default:
                throw new UnsupportedOperationException("Cannot write pos-deletes for unsupported file format: " + format);
        }
    } catch (IOException e) {
        throw new UncheckedIOException(e);
    }
}

16 Source : DataWriter.java
with Apache License 2.0
from apache

public clreplaced DataWriter<T> implements Closeable {

    private final FileAppender<T> appender;

    private final FileFormat format;

    private final String location;

    private final ParreplacedionSpec spec;

    private final StructLike parreplacedion;

    private final ByteBuffer keyMetadata;

    private DataFile dataFile = null;

    public DataWriter(FileAppender<T> appender, FileFormat format, String location, ParreplacedionSpec spec, StructLike parreplacedion, EncryptionKeyMetadata keyMetadata) {
        this.appender = appender;
        this.format = format;
        this.location = location;
        this.spec = spec;
        this.parreplacedion = parreplacedion;
        this.keyMetadata = keyMetadata != null ? keyMetadata.buffer() : null;
    }

    public void add(T row) {
        appender.add(row);
    }

    public long length() {
        return appender.length();
    }

    @Override
    public void close() throws IOException {
        if (dataFile == null) {
            appender.close();
            this.dataFile = DataFiles.builder(spec).withFormat(format).withPath(location).withParreplacedion(parreplacedion).withEncryptionKeyMetadata(keyMetadata).withFileSizeInBytes(appender.length()).withMetrics(appender.metrics()).withSplitOffsets(appender.splitOffsets()).build();
        }
    }

    public DataFile toDataFile() {
        Preconditions.checkState(dataFile != null, "Cannot create data file from unclosed writer");
        return dataFile;
    }
}

16 Source : BaseTaskWriter.java
with Apache License 2.0
from apache

public abstract clreplaced BaseTaskWriter<T> implements TaskWriter<T> {

    private final List<DataFile> completedDataFiles = Lists.newArrayList();

    private final List<DeleteFile> completedDeleteFiles = Lists.newArrayList();

    private final Set<CharSequence> referencedDataFiles = CharSequenceSet.empty();

    private final ParreplacedionSpec spec;

    private final FileFormat format;

    private final FileAppenderFactory<T> appenderFactory;

    private final OutputFileFactory fileFactory;

    private final FileIO io;

    private final long targetFileSize;

    protected BaseTaskWriter(ParreplacedionSpec spec, FileFormat format, FileAppenderFactory<T> appenderFactory, OutputFileFactory fileFactory, FileIO io, long targetFileSize) {
        this.spec = spec;
        this.format = format;
        this.appenderFactory = appenderFactory;
        this.fileFactory = fileFactory;
        this.io = io;
        this.targetFileSize = targetFileSize;
    }

    protected ParreplacedionSpec spec() {
        return spec;
    }

    @Override
    public void abort() throws IOException {
        close();
        // clean up files created by this writer
        Tasks.foreach(Iterables.concat(completedDataFiles, completedDeleteFiles)).throwFailureWhenFinished().noRetry().run(file -> io.deleteFile(file.path().toString()));
    }

    @Override
    public WriteResult complete() throws IOException {
        close();
        return WriteResult.builder().addDataFiles(completedDataFiles).addDeleteFiles(completedDeleteFiles).addReferencedDataFiles(referencedDataFiles).build();
    }

    /**
     * Base equality delta writer to write both insert records and equality-deletes.
     */
    protected abstract clreplaced BaseEqualityDeltaWriter implements Closeable {

        private final StructProjection structProjection;

        private RollingFileWriter dataWriter;

        private RollingEqDeleteWriter eqDeleteWriter;

        private SortedPosDeleteWriter<T> posDeleteWriter;

        private Map<StructLike, PathOffset> insertedRowMap;

        protected BaseEqualityDeltaWriter(StructLike parreplacedion, Schema schema, Schema deleteSchema) {
            Preconditions.checkNotNull(schema, "Iceberg table schema cannot be null.");
            Preconditions.checkNotNull(deleteSchema, "Equality-delete schema cannot be null.");
            this.structProjection = StructProjection.create(schema, deleteSchema);
            this.dataWriter = new RollingFileWriter(parreplacedion);
            this.eqDeleteWriter = new RollingEqDeleteWriter(parreplacedion);
            this.posDeleteWriter = new SortedPosDeleteWriter<>(appenderFactory, fileFactory, format, parreplacedion);
            this.insertedRowMap = StructLikeMap.create(deleteSchema.replacedtruct());
        }

        /**
         * Wrap the data as a {@link StructLike}.
         */
        protected abstract StructLike replacedtructLike(T data);

        public void write(T row) throws IOException {
            PathOffset pathOffset = PathOffset.of(dataWriter.currentPath(), dataWriter.currentRows());
            // Create a copied key from this row.
            StructLike copiedKey = StructCopy.copy(structProjection.wrap(replacedtructLike(row)));
            // Adding a pos-delete to replace the old path-offset.
            PathOffset previous = insertedRowMap.put(copiedKey, pathOffset);
            if (previous != null) {
                // TODO attach the previous row if has a positional-delete row schema in appender factory.
                posDeleteWriter.delete(previous.path, previous.rowOffset, null);
            }
            dataWriter.write(row);
        }

        /**
         * Write the pos-delete if there's an existing row matching the given key.
         *
         * @param key has the same columns with the equality fields.
         */
        private void internalPosDelete(StructLike key) {
            PathOffset previous = insertedRowMap.remove(key);
            if (previous != null) {
                // TODO attach the previous row if has a positional-delete row schema in appender factory.
                posDeleteWriter.delete(previous.path, previous.rowOffset, null);
            }
        }

        /**
         * Delete those rows whose equality fields has the same values with the given row. It will write the entire row into
         * the equality-delete file.
         *
         * @param row the given row to delete.
         */
        public void delete(T row) throws IOException {
            internalPosDelete(structProjection.wrap(replacedtructLike(row)));
            eqDeleteWriter.write(row);
        }

        /**
         * Delete those rows with the given key. It will only write the values of equality fields into the equality-delete
         * file.
         *
         * @param key is the projected data whose columns are the same as the equality fields.
         */
        public void deleteKey(T key) throws IOException {
            internalPosDelete(replacedtructLike(key));
            eqDeleteWriter.write(key);
        }

        @Override
        public void close() throws IOException {
            // Close data writer and add completed data files.
            if (dataWriter != null) {
                dataWriter.close();
                dataWriter = null;
            }
            // Close eq-delete writer and add completed equality-delete files.
            if (eqDeleteWriter != null) {
                eqDeleteWriter.close();
                eqDeleteWriter = null;
            }
            if (insertedRowMap != null) {
                insertedRowMap.clear();
                insertedRowMap = null;
            }
            // Add the completed pos-delete files.
            if (posDeleteWriter != null) {
                completedDeleteFiles.addAll(posDeleteWriter.complete());
                referencedDataFiles.addAll(posDeleteWriter.referencedDataFiles());
                posDeleteWriter = null;
            }
        }
    }

    private static clreplaced PathOffset {

        private final CharSequence path;

        private final long rowOffset;

        private PathOffset(CharSequence path, long rowOffset) {
            this.path = path;
            this.rowOffset = rowOffset;
        }

        private static PathOffset of(CharSequence path, long rowOffset) {
            return new PathOffset(path, rowOffset);
        }

        @Override
        public String toString() {
            return MoreObjects.toStringHelper(this).add("path", path).add("row_offset", rowOffset).toString();
        }
    }

    private abstract clreplaced BaseRollingWriter<W extends Closeable> implements Closeable {

        private static final int ROWS_DIVISOR = 1000;

        private final StructLike parreplacedionKey;

        private EncryptedOutputFile currentFile = null;

        private W currentWriter = null;

        private long currentRows = 0;

        private BaseRollingWriter(StructLike parreplacedionKey) {
            this.parreplacedionKey = parreplacedionKey;
            openCurrent();
        }

        abstract W newWriter(EncryptedOutputFile file, StructLike parreplacedion);

        abstract long length(W writer);

        abstract void write(W writer, T record);

        abstract void complete(W closedWriter);

        public void write(T record) throws IOException {
            write(currentWriter, record);
            this.currentRows++;
            if (shouldRollToNewFile()) {
                closeCurrent();
                openCurrent();
            }
        }

        public CharSequence currentPath() {
            Preconditions.checkNotNull(currentFile, "The currentFile shouldn't be null");
            return currentFile.encryptingOutputFile().location();
        }

        public long currentRows() {
            return currentRows;
        }

        private void openCurrent() {
            if (parreplacedionKey == null) {
                // unparreplacedioned
                this.currentFile = fileFactory.newOutputFile();
            } else {
                // parreplacedioned
                this.currentFile = fileFactory.newOutputFile(parreplacedionKey);
            }
            this.currentWriter = newWriter(currentFile, parreplacedionKey);
            this.currentRows = 0;
        }

        private boolean shouldRollToNewFile() {
            // TODO: ORC file now not support target file size before closed
            return !format.equals(FileFormat.ORC) && currentRows % ROWS_DIVISOR == 0 && length(currentWriter) >= targetFileSize;
        }

        private void closeCurrent() throws IOException {
            if (currentWriter != null) {
                currentWriter.close();
                if (currentRows == 0L) {
                    io.deleteFile(currentFile.encryptingOutputFile());
                } else {
                    complete(currentWriter);
                }
                this.currentFile = null;
                this.currentWriter = null;
                this.currentRows = 0;
            }
        }

        @Override
        public void close() throws IOException {
            closeCurrent();
        }
    }

    protected clreplaced RollingFileWriter extends BaseRollingWriter<DataWriter<T>> {

        public RollingFileWriter(StructLike parreplacedionKey) {
            super(parreplacedionKey);
        }

        @Override
        DataWriter<T> newWriter(EncryptedOutputFile file, StructLike parreplacedionKey) {
            return appenderFactory.newDataWriter(file, format, parreplacedionKey);
        }

        @Override
        long length(DataWriter<T> writer) {
            return writer.length();
        }

        @Override
        void write(DataWriter<T> writer, T record) {
            writer.add(record);
        }

        @Override
        void complete(DataWriter<T> closedWriter) {
            completedDataFiles.add(closedWriter.toDataFile());
        }
    }

    protected clreplaced RollingEqDeleteWriter extends BaseRollingWriter<EqualityDeleteWriter<T>> {

        RollingEqDeleteWriter(StructLike parreplacedionKey) {
            super(parreplacedionKey);
        }

        @Override
        EqualityDeleteWriter<T> newWriter(EncryptedOutputFile file, StructLike parreplacedionKey) {
            return appenderFactory.newEqDeleteWriter(file, format, parreplacedionKey);
        }

        @Override
        long length(EqualityDeleteWriter<T> writer) {
            return writer.length();
        }

        @Override
        void write(EqualityDeleteWriter<T> writer, T record) {
            writer.delete(record);
        }

        @Override
        void complete(EqualityDeleteWriter<T> closedWriter) {
            completedDeleteFiles.add(closedWriter.toDeleteFile());
        }
    }
}

16 Source : EqualityDeleteWriter.java
with Apache License 2.0
from apache

public clreplaced EqualityDeleteWriter<T> implements Closeable {

    private final FileAppender<T> appender;

    private final FileFormat format;

    private final String location;

    private final ParreplacedionSpec spec;

    private final StructLike parreplacedion;

    private final ByteBuffer keyMetadata;

    private final int[] equalityFieldIds;

    private DeleteFile deleteFile = null;

    public EqualityDeleteWriter(FileAppender<T> appender, FileFormat format, String location, ParreplacedionSpec spec, StructLike parreplacedion, EncryptionKeyMetadata keyMetadata, int... equalityFieldIds) {
        this.appender = appender;
        this.format = format;
        this.location = location;
        this.spec = spec;
        this.parreplacedion = parreplacedion;
        this.keyMetadata = keyMetadata != null ? keyMetadata.buffer() : null;
        this.equalityFieldIds = equalityFieldIds;
    }

    public void deleteAll(Iterable<T> rows) {
        appender.addAll(rows);
    }

    public void delete(T row) {
        appender.add(row);
    }

    public long length() {
        return appender.length();
    }

    @Override
    public void close() throws IOException {
        if (deleteFile == null) {
            appender.close();
            this.deleteFile = FileMetadata.deleteFileBuilder(spec).ofEqualityDeletes(equalityFieldIds).withFormat(format).withPath(location).withParreplacedion(parreplacedion).withEncryptionKeyMetadata(keyMetadata).withFileSizeInBytes(appender.length()).withMetrics(appender.metrics()).build();
        }
    }

    public DeleteFile toDeleteFile() {
        Preconditions.checkState(deleteFile != null, "Cannot create delete file from unclosed writer");
        return deleteFile;
    }
}

15 Source : IcebergQueryRunner.java
with Apache License 2.0
from trinodb

public static DistributedQueryRunner createIcebergQueryRunner(Map<String, String> extraProperties, FileFormat format, List<TpchTable<?>> tables) throws Exception {
    Session session = testSessionBuilder().setCatalog(ICEBERG_CATALOG).setSchema("tpch").build();
    DistributedQueryRunner queryRunner = DistributedQueryRunner.builder(session).setExtraProperties(extraProperties).build();
    queryRunner.installPlugin(new TpchPlugin());
    queryRunner.createCatalog("tpch", "tpch");
    Path dataDir = queryRunner.getCoordinator().getBaseDataDir().resolve("iceberg_data");
    queryRunner.installPlugin(new IcebergPlugin());
    Map<String, String> icebergProperties = ImmutableMap.<String, String>builder().put("hive.metastore", "file").put("hive.metastore.catalog.dir", dataDir.toString()).put("iceberg.file-format", format.name()).build();
    queryRunner.createCatalog(ICEBERG_CATALOG, "iceberg", icebergProperties);
    queryRunner.execute("CREATE SCHEMA tpch");
    copyTpchTables(queryRunner, "tpch", TINY_SCHEMA_NAME, session, tables);
    return queryRunner;
}

15 Source : TestHelpers.java
with Apache License 2.0
from ExpediaGroup

public static DataFile writeFile(File targetFile, Table table, StructLike parreplacedionData, FileFormat fileFormat, List<Record> records) throws IOException {
    if (targetFile.exists()) {
        if (!targetFile.delete()) {
            throw new IOException("Unable to delete " + targetFile.getAbsolutePath());
        }
    }
    FileAppender<Record> appender;
    switch(fileFormat) {
        case AVRO:
            appender = Avro.write(Files.localOutput(targetFile)).schema(table.schema()).createWriterFunc(DataWriter::create).named(fileFormat.name()).build();
            break;
        case PARQUET:
            appender = Parquet.write(Files.localOutput(targetFile)).schema(table.schema()).createWriterFunc(GenericParquetWriter::buildWriter).named(fileFormat.name()).build();
            break;
        case ORC:
            appender = ORC.write(Files.localOutput(targetFile)).schema(table.schema()).createWriterFunc(GenericOrcWriter::buildWriter).build();
            break;
        default:
            throw new UnsupportedOperationException("Cannot write format: " + fileFormat);
    }
    try {
        appender.addAll(records);
    } finally {
        appender.close();
    }
    DataFiles.Builder builder = DataFiles.builder(table.spec()).withPath(targetFile.toString()).withFormat(fileFormat).withFileSizeInBytes(targetFile.length()).withMetrics(appender.metrics());
    if (parreplacedionData != null) {
        builder.withParreplacedion(parreplacedionData);
    }
    return builder.build();
}

15 Source : TestSparkTableUtil.java
with Apache License 2.0
from apache

static void loadData(FileFormat fileFormat) {
    // Create a hive table.
    SQLContext sc = new SQLContext(TestSparkTableUtil.spark);
    sc.sql(String.format("CREATE TABLE %s (\n" + "    id int COMMENT 'unique id'\n" + ")\n" + "PARreplacedIONED BY (data string)\n" + "STORED AS %s\n" + "LOCATION '%s'", QUALIFIED_TABLE_NAME, fileFormat, TABLE_LOCATION_STR));
    List<SimpleRecord> expected = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
    Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.clreplaced);
    df.select("id", "data").orderBy("data").write().mode("append").insertInto(QUALIFIED_TABLE_NAME);
}

15 Source : TestSparkDataWrite.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public abstract clreplaced TestSparkDataWrite {

    private static final Configuration CONF = new Configuration();

    private final FileFormat format;

    private static SparkSession spark = null;

    private static final Schema SCHEMA = new Schema(optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get()));

    @Rule
    public TemporaryFolder temp = new TemporaryFolder();

    @Parameterized.Parameters(name = "format = {0}")
    public static Object[] parameters() {
        return new Object[] { "parquet", "avro", "orc" };
    }

    @BeforeClreplaced
    public static void startSpark() {
        TestSparkDataWrite.spark = SparkSession.builder().master("local[2]").getOrCreate();
    }

    @AfterClreplaced
    public static void stopSpark() {
        SparkSession currentSpark = TestSparkDataWrite.spark;
        TestSparkDataWrite.spark = null;
        currentSpark.stop();
    }

    public TestSparkDataWrite(String format) {
        this.format = FileFormat.valueOf(format.toUpperCase(Locale.ENGLISH));
    }

    @Test
    public void testBasicWrite() throws IOException {
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("data").build();
        Table table = tables.create(SCHEMA, spec, location.toString());
        List<SimpleRecord> expected = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
        Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.clreplaced);
        // TODO: incoming columns must be ordered according to the table's schema
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
        for (ManifestFile manifest : table.currentSnapshot().allManifests()) {
            for (DataFile file : ManifestFiles.read(manifest, table.io())) {
                // TODO: avro not support split
                if (!format.equals(FileFormat.AVRO)) {
                    replacedert.replacedertNotNull("Split offsets not present", file.splitOffsets());
                }
                replacedert.replacedertEquals("Should have reported record count as 1", 1, file.recordCount());
                // TODO: append more metric info
                if (format.equals(FileFormat.PARQUET)) {
                    replacedert.replacedertNotNull("Column sizes metric not present", file.columnSizes());
                    replacedert.replacedertNotNull("Counts metric not present", file.valueCounts());
                    replacedert.replacedertNotNull("Null value counts metric not present", file.nullValueCounts());
                    replacedert.replacedertNotNull("Lower bounds metric not present", file.lowerBounds());
                    replacedert.replacedertNotNull("Upper bounds metric not present", file.upperBounds());
                }
            }
        }
    }

    @Test
    public void testAppend() throws IOException {
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("data").build();
        Table table = tables.create(SCHEMA, spec, location.toString());
        List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
        List<SimpleRecord> expected = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"), new SimpleRecord(4, "a"), new SimpleRecord(5, "b"), new SimpleRecord(6, "c"));
        Dataset<Row> df = spark.createDataFrame(records, SimpleRecord.clreplaced);
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        df.withColumn("id", df.col("id").plus(3)).select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
    }

    @Test
    public void testOverwrite() throws IOException {
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("id").build();
        Table table = tables.create(SCHEMA, spec, location.toString());
        List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
        List<SimpleRecord> expected = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "a"), new SimpleRecord(3, "c"), new SimpleRecord(4, "b"), new SimpleRecord(6, "c"));
        Dataset<Row> df = spark.createDataFrame(records, SimpleRecord.clreplaced);
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        // overwrite with 2*id to replace record 2, append 4 and 6
        df.withColumn("id", df.col("id").multiply(2)).select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Overwrite).option("overwrite-mode", "dynamic").save(location.toString());
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
    }

    @Test
    public void testUnparreplacedionedOverwrite() throws IOException {
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
        Table table = tables.create(SCHEMA, spec, location.toString());
        List<SimpleRecord> expected = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
        Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.clreplaced);
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        // overwrite with the same data; should not produce two copies
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Overwrite).save(location.toString());
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
    }

    @Test
    public void testUnparreplacedionedCreateWithTargetFileSizeViaTableProperties() throws IOException {
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
        Table table = tables.create(SCHEMA, spec, location.toString());
        table.updateProperties().set(TableProperties.WRITE_TARGET_FILE_SIZE_BYTES, // ~4 bytes; low enough to trigger
        "4").commit();
        List<SimpleRecord> expected = Lists.newArrayListWithCapacity(4000);
        for (int i = 0; i < 4000; i++) {
            expected.add(new SimpleRecord(i, "a"));
        }
        Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.clreplaced);
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
        List<DataFile> files = Lists.newArrayList();
        for (ManifestFile manifest : table.currentSnapshot().allManifests()) {
            for (DataFile file : ManifestFiles.read(manifest, table.io())) {
                files.add(file);
            }
        }
        // TODO: ORC file now not support target file size
        if (!format.equals(FileFormat.ORC)) {
            replacedert.replacedertEquals("Should have 4 DataFiles", 4, files.size());
            replacedert.replacedertTrue("All DataFiles contain 1000 rows", files.stream().allMatch(d -> d.recordCount() == 1000));
        }
    }

    @Test
    public void testParreplacedionedCreateWithTargetFileSizeViaOption() throws IOException {
        parreplacedionedCreateWithTargetFileSizeViaOption(IcebergOptionsType.NONE);
    }

    @Test
    public void testParreplacedionedFanoutCreateWithTargetFileSizeViaOption() throws IOException {
        parreplacedionedCreateWithTargetFileSizeViaOption(IcebergOptionsType.TABLE);
    }

    @Test
    public void testParreplacedionedFanoutCreateWithTargetFileSizeViaOption2() throws IOException {
        parreplacedionedCreateWithTargetFileSizeViaOption(IcebergOptionsType.JOB);
    }

    @Test
    public void testWriteProjection() throws IOException {
        replacedume.replacedumeTrue("Not supported in Spark 3.0; replacedysis requires all columns are present", spark.version().startsWith("2"));
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
        Table table = tables.create(SCHEMA, spec, location.toString());
        List<SimpleRecord> expected = Lists.newArrayList(new SimpleRecord(1, null), new SimpleRecord(2, null), new SimpleRecord(3, null));
        Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.clreplaced);
        // select only id column
        df.select("id").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
    }

    @Test
    public void testWriteProjectionWithMiddle() throws IOException {
        replacedume.replacedumeTrue("Not supported in Spark 3.0; replacedysis requires all columns are present", spark.version().startsWith("2"));
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
        Schema schema = new Schema(optional(1, "c1", Types.IntegerType.get()), optional(2, "c2", Types.StringType.get()), optional(3, "c3", Types.StringType.get()));
        Table table = tables.create(schema, spec, location.toString());
        List<ThreeColumnRecord> expected = Lists.newArrayList(new ThreeColumnRecord(1, null, "hello"), new ThreeColumnRecord(2, null, "world"), new ThreeColumnRecord(3, null, null));
        Dataset<Row> df = spark.createDataFrame(expected, ThreeColumnRecord.clreplaced);
        df.select("c1", "c3").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<ThreeColumnRecord> actual = result.orderBy("c1").as(Encoders.bean(ThreeColumnRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
    }

    @Test
    public void testViewsReturnRecentResults() throws IOException {
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("data").build();
        tables.create(SCHEMA, spec, location.toString());
        List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
        Dataset<Row> df = spark.createDataFrame(records, SimpleRecord.clreplaced);
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        Dataset<Row> query = spark.read().format("iceberg").load(location.toString()).where("id = 1");
        query.createOrReplaceTempView("tmp");
        List<SimpleRecord> actual1 = spark.table("tmp").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        List<SimpleRecord> expected1 = Lists.newArrayList(new SimpleRecord(1, "a"));
        replacedert.replacedertEquals("Number of rows should match", expected1.size(), actual1.size());
        replacedert.replacedertEquals("Result rows should match", expected1, actual1);
        df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).save(location.toString());
        List<SimpleRecord> actual2 = spark.table("tmp").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        List<SimpleRecord> expected2 = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(1, "a"));
        replacedert.replacedertEquals("Number of rows should match", expected2.size(), actual2.size());
        replacedert.replacedertEquals("Result rows should match", expected2, actual2);
    }

    public void parreplacedionedCreateWithTargetFileSizeViaOption(IcebergOptionsType option) throws IOException {
        File parent = temp.newFolder(format.toString());
        File location = new File(parent, "test");
        HadoopTables tables = new HadoopTables(CONF);
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(SCHEMA).idenreplacedy("data").build();
        Table table = tables.create(SCHEMA, spec, location.toString());
        List<SimpleRecord> expected = Lists.newArrayListWithCapacity(8000);
        for (int i = 0; i < 2000; i++) {
            expected.add(new SimpleRecord(i, "a"));
            expected.add(new SimpleRecord(i, "b"));
            expected.add(new SimpleRecord(i, "c"));
            expected.add(new SimpleRecord(i, "d"));
        }
        Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.clreplaced);
        switch(option) {
            case NONE:
                df.select("id", "data").sort("data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).option(SparkWriteOptions.TARGET_FILE_SIZE_BYTES, // ~4 bytes; low enough to trigger
                4).save(location.toString());
                break;
            case TABLE:
                table.updateProperties().set(SPARK_WRITE_PARreplacedIONED_FANOUT_ENABLED, "true").commit();
                df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).option(SparkWriteOptions.TARGET_FILE_SIZE_BYTES, // ~4 bytes; low enough to trigger
                4).save(location.toString());
                break;
            case JOB:
                df.select("id", "data").write().format("iceberg").option(SparkWriteOptions.WRITE_FORMAT, format.toString()).mode(SaveMode.Append).option(SparkWriteOptions.TARGET_FILE_SIZE_BYTES, // ~4 bytes; low enough to trigger
                4).option(SparkWriteOptions.FANOUT_ENABLED, true).save(location.toString());
                break;
            default:
                break;
        }
        table.refresh();
        Dataset<Row> result = spark.read().format("iceberg").load(location.toString());
        List<SimpleRecord> actual = result.orderBy("id").as(Encoders.bean(SimpleRecord.clreplaced)).collectAsList();
        replacedert.replacedertEquals("Number of rows should match", expected.size(), actual.size());
        replacedert.replacedertEquals("Result rows should match", expected, actual);
        List<DataFile> files = Lists.newArrayList();
        for (ManifestFile manifest : table.currentSnapshot().allManifests()) {
            for (DataFile file : ManifestFiles.read(manifest, table.io())) {
                files.add(file);
            }
        }
        // TODO: ORC file now not support target file size
        if (!format.equals(FileFormat.ORC)) {
            replacedert.replacedertEquals("Should have 8 DataFiles", 8, files.size());
            replacedert.replacedertTrue("All DataFiles contain 1000 rows", files.stream().allMatch(d -> d.recordCount() == 1000));
        }
    }

    public enum IcebergOptionsType {

        NONE, TABLE, JOB
    }
}

15 Source : SparkAppenderFactory.java
with Apache License 2.0
from apache

@Override
public PositionDeleteWriter<InternalRow> newPosDeleteWriter(EncryptedOutputFile file, FileFormat format, StructLike parreplacedion) {
    try {
        switch(format) {
            case PARQUET:
                StructType sparkPosDeleteSchema = SparkSchemaUtil.convert(DeleteSchemaUtil.posDeleteSchema(posDeleteRowSchema));
                return Parquet.writeDeletes(file.encryptingOutputFile()).createWriterFunc(msgType -> SparkParquetWriters.buildWriter(sparkPosDeleteSchema, msgType)).overwrite().rowSchema(posDeleteRowSchema).withSpec(spec).withParreplacedion(parreplacedion).withKeyMetadata(file.keyMetadata()).transformPaths(path -> UTF8String.fromString(path.toString())).buildPositionWriter();
            case AVRO:
                return Avro.writeDeletes(file.encryptingOutputFile()).createWriterFunc(ignored -> new SparkAvroWriter(lazyPosDeleteSparkType())).overwrite().rowSchema(posDeleteRowSchema).withSpec(spec).withParreplacedion(parreplacedion).withKeyMetadata(file.keyMetadata()).buildPositionWriter();
            default:
                throw new UnsupportedOperationException("Cannot write pos-deletes for unsupported file format: " + format);
        }
    } catch (IOException e) {
        throw new UncheckedIOException("Failed to create new equality delete writer", e);
    }
}

15 Source : TestHiveIcebergStorageHandlerLocalScan.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public clreplaced TestHiveIcebergStorageHandlerLocalScan {

    @Parameters(name = "fileFormat={0}, catalog={1}")
    public static Collection<Object[]> parameters() {
        Collection<Object[]> testParams = new ArrayList<>();
        // Run tests with every FileFormat for a single Catalog (HiveCatalog)
        for (FileFormat fileFormat : HiveIcebergStorageHandlerTestUtils.FILE_FORMATS) {
            testParams.add(new Object[] { fileFormat, TestTables.TestTableType.HIVE_CATALOG });
        }
        // Run tests for every Catalog for a single FileFormat (PARQUET) - skip HiveCatalog tests as they are added before
        for (TestTables.TestTableType testTableType : TestTables.ALL_TABLE_TYPES) {
            if (!TestTables.TestTableType.HIVE_CATALOG.equals(testTableType)) {
                testParams.add(new Object[] { FileFormat.PARQUET, testTableType });
            }
        }
        return testParams;
    }

    private static TestHiveShell shell;

    private TestTables testTables;

    @Parameter(0)
    public FileFormat fileFormat;

    @Parameter(1)
    public TestTables.TestTableType testTableType;

    @Rule
    public TemporaryFolder temp = new TemporaryFolder();

    @BeforeClreplaced
    public static void beforeClreplaced() {
        shell = HiveIcebergStorageHandlerTestUtils.shell();
    }

    @AfterClreplaced
    public static void afterClreplaced() {
        shell.stop();
    }

    @Before
    public void before() throws IOException {
        testTables = HiveIcebergStorageHandlerTestUtils.testTables(shell, testTableType, temp);
        // Uses spark as an engine so we can detect if we unintentionally try to use any execution engines
        HiveIcebergStorageHandlerTestUtils.init(shell, testTables, temp, "spark");
    }

    @After
    public void after() throws Exception {
        HiveIcebergStorageHandlerTestUtils.close(shell);
    }

    @Test
    public void testScanEmptyTable() throws IOException {
        Schema emptySchema = new Schema(required(1, "empty", Types.StringType.get()));
        testTables.createTable(shell, "empty", emptySchema, fileFormat, ImmutableList.of());
        List<Object[]> rows = shell.executeStatement("SELECT * FROM default.empty");
        replacedert.replacedertEquals(0, rows.size());
    }

    @Test
    public void testScanTable() throws IOException {
        testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
        // Single fetch task: no MR job.
        List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers");
        replacedert.replacedertEquals(3, rows.size());
        replacedert.replacedertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, rows.get(0));
        replacedert.replacedertArrayEquals(new Object[] { 1L, "Bob", "Green" }, rows.get(1));
        replacedert.replacedertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, rows.get(2));
    }

    @Test
    public void testScanTableCaseInsensitive() throws IOException {
        testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA_WITH_UPPERCASE, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
        List<Object[]> rows = shell.executeStatement("SELECT * FROM default.customers");
        replacedert.replacedertEquals(3, rows.size());
        replacedert.replacedertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, rows.get(0));
        replacedert.replacedertArrayEquals(new Object[] { 1L, "Bob", "Green" }, rows.get(1));
        replacedert.replacedertArrayEquals(new Object[] { 2L, "Trudy", "Pink" }, rows.get(2));
        rows = shell.executeStatement("SELECT * FROM default.customers where CustomER_Id < 2 " + "and first_name in ('Alice', 'Bob')");
        replacedert.replacedertEquals(2, rows.size());
        replacedert.replacedertArrayEquals(new Object[] { 0L, "Alice", "Brown" }, rows.get(0));
        replacedert.replacedertArrayEquals(new Object[] { 1L, "Bob", "Green" }, rows.get(1));
    }

    @Test
    public void testDecimalTableWithPredicateLiterals() throws IOException {
        Schema schema = new Schema(required(1, "decimal_field", Types.DecimalType.of(7, 2)));
        List<Record> records = TestHelper.RecordsBuilder.newInstance(schema).add(new BigDecimal("85.00")).add(new BigDecimal("100.56")).add(new BigDecimal("100.57")).build();
        testTables.createTable(shell, "dec_test", schema, fileFormat, records);
        // Use integer literal in predicate
        List<Object[]> rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field >= 85");
        replacedert.replacedertEquals(3, rows.size());
        replacedert.replacedertArrayEquals(new Object[] { "85.00" }, rows.get(0));
        replacedert.replacedertArrayEquals(new Object[] { "100.56" }, rows.get(1));
        replacedert.replacedertArrayEquals(new Object[] { "100.57" }, rows.get(2));
        // Use decimal literal in predicate with smaller scale than schema type definition
        rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field > 99.1");
        replacedert.replacedertEquals(2, rows.size());
        replacedert.replacedertArrayEquals(new Object[] { "100.56" }, rows.get(0));
        replacedert.replacedertArrayEquals(new Object[] { "100.57" }, rows.get(1));
        // Use decimal literal in predicate with higher scale than schema type definition
        rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field > 100.565");
        replacedert.replacedertEquals(1, rows.size());
        replacedert.replacedertArrayEquals(new Object[] { "100.57" }, rows.get(0));
        // Use decimal literal in predicate with the same scale as schema type definition
        rows = shell.executeStatement("SELECT * FROM default.dec_test where decimal_field > 640.34");
        replacedert.replacedertEquals(0, rows.size());
    }

    @Test
    public void testColumnSelection() throws IOException {
        testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
        List<Object[]> outOfOrderColumns = shell.executeStatement("SELECT first_name, customer_id, last_name FROM default.customers");
        replacedert.replacedertEquals(3, outOfOrderColumns.size());
        replacedert.replacedertArrayEquals(new Object[] { "Alice", 0L, "Brown" }, outOfOrderColumns.get(0));
        replacedert.replacedertArrayEquals(new Object[] { "Bob", 1L, "Green" }, outOfOrderColumns.get(1));
        replacedert.replacedertArrayEquals(new Object[] { "Trudy", 2L, "Pink" }, outOfOrderColumns.get(2));
        List<Object[]> allButFirstColumn = shell.executeStatement("SELECT first_name, last_name FROM default.customers");
        replacedert.replacedertEquals(3, allButFirstColumn.size());
        replacedert.replacedertArrayEquals(new Object[] { "Alice", "Brown" }, allButFirstColumn.get(0));
        replacedert.replacedertArrayEquals(new Object[] { "Bob", "Green" }, allButFirstColumn.get(1));
        replacedert.replacedertArrayEquals(new Object[] { "Trudy", "Pink" }, allButFirstColumn.get(2));
        List<Object[]> allButMiddleColumn = shell.executeStatement("SELECT customer_id, last_name FROM default.customers");
        replacedert.replacedertEquals(3, allButMiddleColumn.size());
        replacedert.replacedertArrayEquals(new Object[] { 0L, "Brown" }, allButMiddleColumn.get(0));
        replacedert.replacedertArrayEquals(new Object[] { 1L, "Green" }, allButMiddleColumn.get(1));
        replacedert.replacedertArrayEquals(new Object[] { 2L, "Pink" }, allButMiddleColumn.get(2));
        List<Object[]> allButLastColumn = shell.executeStatement("SELECT customer_id, first_name FROM default.customers");
        replacedert.replacedertEquals(3, allButLastColumn.size());
        replacedert.replacedertArrayEquals(new Object[] { 0L, "Alice" }, allButLastColumn.get(0));
        replacedert.replacedertArrayEquals(new Object[] { 1L, "Bob" }, allButLastColumn.get(1));
        replacedert.replacedertArrayEquals(new Object[] { 2L, "Trudy" }, allButLastColumn.get(2));
    }

    @Test
    public void selectSameColumnTwice() throws IOException {
        testTables.createTable(shell, "customers", HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, fileFormat, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
        List<Object[]> columns = shell.executeStatement("SELECT first_name, first_name FROM default.customers");
        replacedert.replacedertEquals(3, columns.size());
        replacedert.replacedertArrayEquals(new Object[] { "Alice", "Alice" }, columns.get(0));
        replacedert.replacedertArrayEquals(new Object[] { "Bob", "Bob" }, columns.get(1));
        replacedert.replacedertArrayEquals(new Object[] { "Trudy", "Trudy" }, columns.get(2));
    }

    @Test
    public void testCreateTableWithColumnSpecification() throws IOException {
        TableIdentifier identifier = TableIdentifier.of("default", "customers");
        Map<StructLike, List<Record>> data = new HashMap<>(1);
        data.put(null, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
        String createSql = "CREATE EXTERNAL TABLE " + identifier + " (customer_id BIGINT, first_name STRING COMMENT 'This is first name', " + "last_name STRING COMMENT 'This is last name')" + " STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + testTables.locationForCreateTableSQL(identifier);
        runCreateAndReadTest(identifier, createSql, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, ParreplacedionSpec.unparreplacedioned(), data);
    }

    @Test
    public void testCreateTableWithColumnSpecificationParreplacedioned() throws IOException {
        TableIdentifier identifier = TableIdentifier.of("default", "customers");
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).idenreplacedy("last_name").build();
        Map<StructLike, List<Record>> data = ImmutableMap.of(Row.of("Brown"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(0)), Row.of("Green"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(1)), Row.of("Pink"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2)));
        String createSql = "CREATE EXTERNAL TABLE " + identifier + " (customer_id BIGINT, first_name STRING COMMENT 'This is first name') " + "PARreplacedIONED BY (last_name STRING COMMENT 'This is last name') STORED BY " + "'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + testTables.locationForCreateTableSQL(identifier);
        runCreateAndReadTest(identifier, createSql, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, data);
    }

    @Test
    public void testCreateParreplacedionedTableByProperty() throws IOException {
        TableIdentifier identifier = TableIdentifier.of("default", "customers");
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).idenreplacedy("last_name").build();
        Map<StructLike, List<Record>> data = ImmutableMap.of(Row.of("Brown"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(0)), Row.of("Green"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(1)), Row.of("Pink"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2)));
        String createSql = "CREATE EXTERNAL TABLE " + identifier + " STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.PARreplacedION_SPEC + "'='" + ParreplacedionSpecParser.toJson(spec) + "', " + "'" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "')";
        runCreateAndReadTest(identifier, createSql, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, data);
    }

    @Test
    public void testCreateTableWithColumnSpecificationMultilevelParreplacedioned() throws IOException {
        TableIdentifier identifier = TableIdentifier.of("default", "customers");
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA).idenreplacedy("first_name").idenreplacedy("last_name").build();
        Map<StructLike, List<Record>> data = ImmutableMap.of(Row.of("Alice", "Brown"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(0)), Row.of("Bob", "Green"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(1)), Row.of("Trudy", "Pink"), Collections.singletonList(HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS.get(2)));
        String createSql = "CREATE EXTERNAL TABLE " + identifier + " (customer_id BIGINT) " + "PARreplacedIONED BY (first_name STRING COMMENT 'This is first name', " + "last_name STRING COMMENT 'This is last name') " + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + testTables.locationForCreateTableSQL(identifier);
        runCreateAndReadTest(identifier, createSql, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, spec, data);
    }

    @Test
    public void testArrayOfPrimitivesInTable() throws IOException {
        Schema schema = new Schema(required(1, "arrayofprimitives", Types.ListType.ofRequired(2, Types.IntegerType.get())));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "arraytable", schema, fileFormat, 1);
        // access a single element from the array
        for (int i = 0; i < records.size(); i++) {
            List<?> expectedList = (List<?>) records.get(i).getField("arrayofprimitives");
            for (int j = 0; j < expectedList.size(); j++) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT arrayofprimitives[%d] FROM default.arraytable " + "LIMIT 1 OFFSET %d", j, i));
                replacedert.replacedertEquals(expectedList.get(j), queryResult.get(0)[0]);
            }
        }
    }

    @Test
    public void testArrayOfArraysInTable() throws IOException {
        Schema schema = new Schema(required(1, "arrayofarrays", Types.ListType.ofRequired(2, Types.ListType.ofRequired(3, Types.DateType.get()))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "arraytable", schema, fileFormat, 1);
        // access an element from a matrix
        for (int i = 0; i < records.size(); i++) {
            List<?> expectedList = (List<?>) records.get(i).getField("arrayofarrays");
            for (int j = 0; j < expectedList.size(); j++) {
                List<?> expectedInnerList = (List<?>) expectedList.get(j);
                for (int k = 0; k < expectedInnerList.size(); k++) {
                    List<Object[]> queryResult = shell.executeStatement(String.format("SELECT arrayofarrays[%d][%d] FROM default.arraytable " + "LIMIT 1 OFFSET %d", j, k, i));
                    replacedert.replacedertEquals(expectedInnerList.get(k).toString(), queryResult.get(0)[0]);
                }
            }
        }
    }

    @Test
    public void testArrayOfMapsInTable() throws IOException {
        Schema schema = new Schema(required(1, "arrayofmaps", Types.ListType.ofRequired(2, Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.BooleanType.get()))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "arraytable", schema, fileFormat, 1);
        // access an element from a map in an array
        for (int i = 0; i < records.size(); i++) {
            List<?> expectedList = (List<?>) records.get(i).getField("arrayofmaps");
            for (int j = 0; j < expectedList.size(); j++) {
                Map<?, ?> expectedMap = (Map<?, ?>) expectedList.get(j);
                for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                    List<Object[]> queryResult = shell.executeStatement(String.format("SELECT arrayofmaps[%d][\"%s\"] FROM default.arraytable LIMIT 1 OFFSET %d", j, entry.getKey(), i));
                    replacedert.replacedertEquals(entry.getValue(), queryResult.get(0)[0]);
                }
            }
        }
    }

    @Test
    public void testArrayOfStructsInTable() throws IOException {
        Schema schema = new Schema(required(1, "arrayofstructs", Types.ListType.ofRequired(2, Types.StructType.of(required(3, "something", Types.DoubleType.get()), required(4, "someone", Types.LongType.get()), required(5, "somewhere", Types.StringType.get())))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "arraytable", schema, fileFormat, 1);
        // access an element from a struct in an array
        for (int i = 0; i < records.size(); i++) {
            List<?> expectedList = (List<?>) records.get(i).getField("arrayofstructs");
            for (int j = 0; j < expectedList.size(); j++) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT arrayofstructs[%d].something, " + "arrayofstructs[%d].someone, arrayofstructs[%d].somewhere FROM default.arraytable LIMIT 1 " + "OFFSET %d", j, j, j, i));
                GenericRecord genericRecord = (GenericRecord) expectedList.get(j);
                replacedert.replacedertEquals(genericRecord.getField("something"), queryResult.get(0)[0]);
                replacedert.replacedertEquals(genericRecord.getField("someone"), queryResult.get(0)[1]);
                replacedert.replacedertEquals(genericRecord.getField("somewhere"), queryResult.get(0)[2]);
            }
        }
    }

    @Test
    public void testMapOfPrimitivesInTable() throws IOException {
        Schema schema = new Schema(required(1, "mapofprimitives", Types.MapType.ofRequired(2, 3, Types.StringType.get(), Types.IntegerType.get())));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "maptable", schema, fileFormat, 1);
        // access a single value from the map
        for (int i = 0; i < records.size(); i++) {
            Map<?, ?> expectedMap = (Map<?, ?>) records.get(i).getField("mapofprimitives");
            for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT mapofprimitives[\"%s\"] " + "FROM default.maptable LIMIT 1 OFFSET %d", entry.getKey(), i));
                replacedert.replacedertEquals(entry.getValue(), queryResult.get(0)[0]);
            }
        }
    }

    @Test
    public void testMapOfArraysInTable() throws IOException {
        Schema schema = new Schema(required(1, "mapofarrays", Types.MapType.ofRequired(2, 3, Types.StringType.get(), Types.ListType.ofRequired(4, Types.DateType.get()))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "maptable", schema, fileFormat, 1);
        // access a single element from a list in a map
        for (int i = 0; i < records.size(); i++) {
            Map<?, ?> expectedMap = (Map<?, ?>) records.get(i).getField("mapofarrays");
            for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                List<?> expectedList = (List<?>) entry.getValue();
                for (int j = 0; j < expectedList.size(); j++) {
                    List<Object[]> queryResult = shell.executeStatement(String.format("SELECT mapofarrays[\"%s\"]" + "[%d] FROM maptable LIMIT 1 OFFSET %d", entry.getKey(), j, i));
                    replacedert.replacedertEquals(expectedList.get(j).toString(), queryResult.get(0)[0]);
                }
            }
        }
    }

    @Test
    public void testMapOfMapsInTable() throws IOException {
        Schema schema = new Schema(required(1, "mapofmaps", Types.MapType.ofRequired(2, 3, Types.StringType.get(), Types.MapType.ofRequired(4, 5, Types.StringType.get(), Types.StringType.get()))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "maptable", schema, fileFormat, 1);
        // access a single element from a map in a map
        for (int i = 0; i < records.size(); i++) {
            Map<?, ?> expectedMap = (Map<?, ?>) records.get(i).getField("mapofmaps");
            for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                Map<?, ?> expectedInnerMap = (Map<?, ?>) entry.getValue();
                for (Map.Entry<?, ?> innerEntry : expectedInnerMap.entrySet()) {
                    List<Object[]> queryResult = shell.executeStatement(String.format("SELECT mapofmaps[\"%s\"]" + "[\"%s\"] FROM maptable LIMIT 1 OFFSET %d", entry.getKey(), innerEntry.getKey(), i));
                    replacedert.replacedertEquals(innerEntry.getValue(), queryResult.get(0)[0]);
                }
            }
        }
    }

    @Test
    public void testMapOfStructsInTable() throws IOException {
        Schema schema = new Schema(required(1, "mapofstructs", Types.MapType.ofRequired(2, 3, Types.StringType.get(), Types.StructType.of(required(4, "something", Types.DoubleType.get()), required(5, "someone", Types.LongType.get()), required(6, "somewhere", Types.StringType.get())))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "maptable", schema, fileFormat, 1);
        // access a single element from a struct in a map
        for (int i = 0; i < records.size(); i++) {
            Map<?, ?> expectedMap = (Map<?, ?>) records.get(i).getField("mapofstructs");
            for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT mapofstructs[\"%s\"].something, " + "mapofstructs[\"%s\"].someone, mapofstructs[\"%s\"].somewhere FROM default.maptable LIMIT 1 " + "OFFSET %d", entry.getKey(), entry.getKey(), entry.getKey(), i));
                GenericRecord genericRecord = (GenericRecord) entry.getValue();
                replacedert.replacedertEquals(genericRecord.getField("something"), queryResult.get(0)[0]);
                replacedert.replacedertEquals(genericRecord.getField("someone"), queryResult.get(0)[1]);
                replacedert.replacedertEquals(genericRecord.getField("somewhere"), queryResult.get(0)[2]);
            }
        }
    }

    @Test
    public void testStructOfPrimitivesInTable() throws IOException {
        Schema schema = new Schema(required(1, "structofprimitives", Types.StructType.of(required(2, "key", Types.StringType.get()), required(3, "value", Types.IntegerType.get()))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "structtable", schema, fileFormat, 1);
        // access a single value in a struct
        for (int i = 0; i < records.size(); i++) {
            GenericRecord expectedStruct = (GenericRecord) records.get(i).getField("structofprimitives");
            List<Object[]> queryResult = shell.executeStatement(String.format("SELECT structofprimitives.key, structofprimitives.value FROM default.structtable LIMIT 1 OFFSET %d", i));
            replacedert.replacedertEquals(expectedStruct.getField("key"), queryResult.get(0)[0]);
            replacedert.replacedertEquals(expectedStruct.getField("value"), queryResult.get(0)[1]);
        }
    }

    @Test
    public void testStructOfArraysInTable() throws IOException {
        Schema schema = new Schema(required(1, "structofarrays", Types.StructType.of(required(2, "names", Types.ListType.ofRequired(3, Types.StringType.get())), required(4, "birthdays", Types.ListType.ofRequired(5, Types.DateType.get())))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "structtable", schema, fileFormat, 1);
        // access an element of an array inside a struct
        for (int i = 0; i < records.size(); i++) {
            GenericRecord expectedStruct = (GenericRecord) records.get(i).getField("structofarrays");
            List<?> expectedList = (List<?>) expectedStruct.getField("names");
            for (int j = 0; j < expectedList.size(); j++) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT structofarrays.names[%d] FROM default.structtable LIMIT 1 OFFSET %d", j, i));
                replacedert.replacedertEquals(expectedList.get(j), queryResult.get(0)[0]);
            }
            expectedList = (List<?>) expectedStruct.getField("birthdays");
            for (int j = 0; j < expectedList.size(); j++) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT structofarrays.birthdays[%d] FROM default.structtable LIMIT 1 OFFSET %d", j, i));
                replacedert.replacedertEquals(expectedList.get(j).toString(), queryResult.get(0)[0]);
            }
        }
    }

    @Test
    public void testStructOfMapsInTable() throws IOException {
        Schema schema = new Schema(required(1, "structofmaps", Types.StructType.of(required(2, "map1", Types.MapType.ofRequired(3, 4, Types.StringType.get(), Types.StringType.get())), required(5, "map2", Types.MapType.ofRequired(6, 7, Types.StringType.get(), Types.IntegerType.get())))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "structtable", schema, fileFormat, 1);
        // access a map entry inside a struct
        for (int i = 0; i < records.size(); i++) {
            GenericRecord expectedStruct = (GenericRecord) records.get(i).getField("structofmaps");
            Map<?, ?> expectedMap = (Map<?, ?>) expectedStruct.getField("map1");
            for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT structofmaps.map1[\"%s\"] from default.structtable LIMIT 1 OFFSET %d", entry.getKey(), i));
                replacedert.replacedertEquals(entry.getValue(), queryResult.get(0)[0]);
            }
            expectedMap = (Map<?, ?>) expectedStruct.getField("map2");
            for (Map.Entry<?, ?> entry : expectedMap.entrySet()) {
                List<Object[]> queryResult = shell.executeStatement(String.format("SELECT structofmaps.map2[\"%s\"] from default.structtable LIMIT 1 OFFSET %d", entry.getKey(), i));
                replacedert.replacedertEquals(entry.getValue(), queryResult.get(0)[0]);
            }
        }
    }

    @Test
    public void testStructOfStructsInTable() throws IOException {
        Schema schema = new Schema(required(1, "structofstructs", Types.StructType.of(required(2, "struct1", Types.StructType.of(required(3, "key", Types.StringType.get()), required(4, "value", Types.IntegerType.get()))))));
        List<Record> records = testTables.createTableWithGeneratedRecords(shell, "structtable", schema, fileFormat, 1);
        // access a struct element inside a struct
        for (int i = 0; i < records.size(); i++) {
            GenericRecord expectedStruct = (GenericRecord) records.get(i).getField("structofstructs");
            GenericRecord expectedInnerStruct = (GenericRecord) expectedStruct.getField("struct1");
            List<Object[]> queryResult = shell.executeStatement(String.format("SELECT structofstructs.struct1.key, structofstructs.struct1.value FROM default.structtable " + "LIMIT 1 OFFSET %d", i));
            replacedert.replacedertEquals(expectedInnerStruct.getField("key"), queryResult.get(0)[0]);
            replacedert.replacedertEquals(expectedInnerStruct.getField("value"), queryResult.get(0)[1]);
        }
    }

    private void runCreateAndReadTest(TableIdentifier identifier, String createSQL, Schema expectedSchema, ParreplacedionSpec expectedSpec, Map<StructLike, List<Record>> data) throws IOException {
        shell.executeStatement(createSQL);
        org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
        replacedert.replacedertEquals(expectedSchema.replacedtruct(), icebergTable.schema().replacedtruct());
        replacedert.replacedertEquals(expectedSpec, icebergTable.spec());
        List<Record> expected = Lists.newArrayList();
        for (StructLike parreplacedion : data.keySet()) {
            testTables.appendIcebergTable(shell.getHiveConf(), icebergTable, fileFormat, parreplacedion, data.get(parreplacedion));
            expected.addAll(data.get(parreplacedion));
        }
        List<Object[]> descRows = shell.executeStatement("SELECT * FROM " + identifier.toString());
        List<Record> records = HiveIcebergTestUtils.valueForRow(icebergTable.schema(), descRows);
        HiveIcebergTestUtils.validateData(expected, records, 0);
    }
}

15 Source : TestFlinkScan.java
with Apache License 2.0
from apache

@RunWith(Parameterized.clreplaced)
public abstract clreplaced TestFlinkScan {

    @ClreplacedRule
    public static final MiniClusterWithClientResource MINI_CLUSTER_RESOURCE = MiniClusterResource.createWithClreplacedloaderCheckDisabled();

    @ClreplacedRule
    public static final TemporaryFolder TEMPORARY_FOLDER = new TemporaryFolder();

    protected HadoopCatalog catalog;

    protected String warehouse;

    protected String location;

    // parametrized variables
    protected final FileFormat fileFormat;

    @Parameterized.Parameters(name = "format={0}")
    public static Object[] parameters() {
        return new Object[] { "avro", "parquet", "orc" };
    }

    TestFlinkScan(String fileFormat) {
        this.fileFormat = FileFormat.valueOf(fileFormat.toUpperCase(Locale.ENGLISH));
    }

    @Before
    public void before() throws IOException {
        File warehouseFile = TEMPORARY_FOLDER.newFolder();
        replacedert.replacedertTrue(warehouseFile.delete());
        // before variables
        warehouse = "file:" + warehouseFile;
        Configuration conf = new Configuration();
        catalog = new HadoopCatalog(conf, warehouse);
        location = String.format("%s/%s/%s", warehouse, TestFixtures.DATABASE, TestFixtures.TABLE);
    }

    @After
    public void after() throws IOException {
    }

    protected TableLoader tableLoader() {
        return TableLoader.fromHadoopTable(location);
    }

    protected abstract List<Row> runWithProjection(String... projected) throws Exception;

    protected abstract List<Row> runWithFilter(Expression filter, String sqlFilter) throws Exception;

    protected abstract List<Row> runWithOptions(Map<String, String> options) throws Exception;

    protected abstract List<Row> run() throws Exception;

    @Test
    public void testUnparreplacedionedTable() throws Exception {
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, TestFixtures.SCHEMA);
        List<Record> expectedRecords = RandomGenericData.generate(TestFixtures.SCHEMA, 2, 0L);
        new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER).appendToTable(expectedRecords);
        TestHelpers.replacedertRecords(run(), expectedRecords, TestFixtures.SCHEMA);
    }

    @Test
    public void testParreplacedionedTable() throws Exception {
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, TestFixtures.SCHEMA, TestFixtures.SPEC);
        List<Record> expectedRecords = RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L);
        expectedRecords.get(0).set(2, "2020-03-20");
        new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER).appendToTable(org.apache.iceberg.TestHelpers.Row.of("2020-03-20", 0), expectedRecords);
        TestHelpers.replacedertRecords(run(), expectedRecords, TestFixtures.SCHEMA);
    }

    @Test
    public void testProjection() throws Exception {
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, TestFixtures.SCHEMA, TestFixtures.SPEC);
        List<Record> inputRecords = RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L);
        new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER).appendToTable(org.apache.iceberg.TestHelpers.Row.of("2020-03-20", 0), inputRecords);
        replacedertRows(runWithProjection("data"), Row.of(inputRecords.get(0).get(0)));
    }

    @Test
    public void testIdenreplacedyParreplacedionProjections() throws Exception {
        Schema logSchema = new Schema(Types.NestedField.optional(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "dt", Types.StringType.get()), Types.NestedField.optional(3, "level", Types.StringType.get()), Types.NestedField.optional(4, "message", Types.StringType.get()));
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(logSchema).idenreplacedy("dt").idenreplacedy("level").build();
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, logSchema, spec);
        List<Record> inputRecords = RandomGenericData.generate(logSchema, 10, 0L);
        int idx = 0;
        AppendFiles append = table.newAppend();
        for (Record record : inputRecords) {
            record.set(1, "2020-03-2" + idx);
            record.set(2, Integer.toString(idx));
            append.appendFile(new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER).writeFile(org.apache.iceberg.TestHelpers.Row.of("2020-03-2" + idx, Integer.toString(idx)), ImmutableList.of(record)));
            idx += 1;
        }
        append.commit();
        // individual fields
        validateIdenreplacedyParreplacedionProjections(table, Collections.singletonList("dt"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Collections.singletonList("level"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Collections.singletonList("message"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Collections.singletonList("id"), inputRecords);
        // field pairs
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("dt", "message"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("level", "message"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("dt", "level"), inputRecords);
        // out-of-order pairs
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("message", "dt"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("message", "level"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("level", "dt"), inputRecords);
        // out-of-order triplets
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("dt", "level", "message"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("level", "dt", "message"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("dt", "message", "level"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("level", "message", "dt"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("message", "dt", "level"), inputRecords);
        validateIdenreplacedyParreplacedionProjections(table, Arrays.asList("message", "level", "dt"), inputRecords);
    }

    private void validateIdenreplacedyParreplacedionProjections(Table table, List<String> projectedFields, List<Record> inputRecords) throws Exception {
        List<Row> rows = runWithProjection(projectedFields.toArray(new String[0]));
        for (int pos = 0; pos < inputRecords.size(); pos++) {
            Record inputRecord = inputRecords.get(pos);
            Row actualRecord = rows.get(pos);
            for (int i = 0; i < projectedFields.size(); i++) {
                String name = projectedFields.get(i);
                replacedert.replacedertEquals("Projected field " + name + " should match", inputRecord.getField(name), actualRecord.getField(i));
            }
        }
    }

    @Test
    public void testSnapshotReads() throws Exception {
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, TestFixtures.SCHEMA);
        GenericAppenderHelper helper = new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER);
        List<Record> expectedRecords = RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L);
        helper.appendToTable(expectedRecords);
        long snapshotId = table.currentSnapshot().snapshotId();
        long timestampMillis = table.currentSnapshot().timestampMillis();
        // produce another timestamp
        waitUntilAfter(timestampMillis);
        helper.appendToTable(RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L));
        TestHelpers.replacedertRecords(runWithOptions(ImmutableMap.of("snapshot-id", Long.toString(snapshotId))), expectedRecords, TestFixtures.SCHEMA);
        TestHelpers.replacedertRecords(runWithOptions(ImmutableMap.of("as-of-timestamp", Long.toString(timestampMillis))), expectedRecords, TestFixtures.SCHEMA);
    }

    @Test
    public void testIncrementalRead() throws Exception {
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, TestFixtures.SCHEMA);
        GenericAppenderHelper helper = new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER);
        List<Record> records1 = RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L);
        helper.appendToTable(records1);
        long snapshotId1 = table.currentSnapshot().snapshotId();
        // snapshot 2
        List<Record> records2 = RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L);
        helper.appendToTable(records2);
        List<Record> records3 = RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L);
        helper.appendToTable(records3);
        long snapshotId3 = table.currentSnapshot().snapshotId();
        // snapshot 4
        helper.appendToTable(RandomGenericData.generate(TestFixtures.SCHEMA, 1, 0L));
        List<Record> expected2 = Lists.newArrayList();
        expected2.addAll(records2);
        expected2.addAll(records3);
        TestHelpers.replacedertRecords(runWithOptions(ImmutableMap.<String, String>builder().put("start-snapshot-id", Long.toString(snapshotId1)).put("end-snapshot-id", Long.toString(snapshotId3)).build()), expected2, TestFixtures.SCHEMA);
    }

    @Test
    public void testFilterExp() throws Exception {
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, TestFixtures.SCHEMA, TestFixtures.SPEC);
        List<Record> expectedRecords = RandomGenericData.generate(TestFixtures.SCHEMA, 2, 0L);
        expectedRecords.get(0).set(2, "2020-03-20");
        expectedRecords.get(1).set(2, "2020-03-20");
        GenericAppenderHelper helper = new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER);
        DataFile dataFile1 = helper.writeFile(org.apache.iceberg.TestHelpers.Row.of("2020-03-20", 0), expectedRecords);
        DataFile dataFile2 = helper.writeFile(org.apache.iceberg.TestHelpers.Row.of("2020-03-21", 0), RandomGenericData.generate(TestFixtures.SCHEMA, 2, 0L));
        helper.appendToTable(dataFile1, dataFile2);
        TestHelpers.replacedertRecords(runWithFilter(Expressions.equal("dt", "2020-03-20"), "where dt='2020-03-20'"), expectedRecords, TestFixtures.SCHEMA);
    }

    @Test
    public void testParreplacedionTypes() throws Exception {
        Schema typesSchema = new Schema(Types.NestedField.optional(1, "id", Types.IntegerType.get()), Types.NestedField.optional(2, "decimal", Types.DecimalType.of(38, 18)), Types.NestedField.optional(3, "str", Types.StringType.get()), Types.NestedField.optional(4, "binary", Types.BinaryType.get()), Types.NestedField.optional(5, "date", Types.DateType.get()), Types.NestedField.optional(6, "time", Types.TimeType.get()), Types.NestedField.optional(7, "timestamp", Types.TimestampType.withoutZone()));
        ParreplacedionSpec spec = ParreplacedionSpec.builderFor(typesSchema).idenreplacedy("decimal").idenreplacedy("str").idenreplacedy("binary").idenreplacedy("date").idenreplacedy("time").idenreplacedy("timestamp").build();
        Table table = catalog.createTable(TestFixtures.TABLE_IDENTIFIER, typesSchema, spec);
        List<Record> records = RandomGenericData.generate(typesSchema, 10, 0L);
        GenericAppenderHelper appender = new GenericAppenderHelper(table, fileFormat, TEMPORARY_FOLDER);
        for (Record record : records) {
            org.apache.iceberg.TestHelpers.Row parreplacedion = org.apache.iceberg.TestHelpers.Row.of(record.get(1), record.get(2), record.get(3), record.get(4) == null ? null : DateTimeUtil.daysFromDate((LocalDate) record.get(4)), record.get(5) == null ? null : DateTimeUtil.microsFromTime((LocalTime) record.get(5)), record.get(6) == null ? null : DateTimeUtil.microsFromTimestamp((LocalDateTime) record.get(6)));
            appender.appendToTable(parreplacedion, Collections.singletonList(record));
        }
        TestHelpers.replacedertRecords(run(), records, typesSchema);
    }

    private static void replacedertRows(List<Row> results, Row... expected) {
        TestHelpers.replacedertRows(results, Arrays.asList(expected));
    }

    private static void waitUntilAfter(long timestampMillis) {
        long current = System.currentTimeMillis();
        while (current <= timestampMillis) {
            current = System.currentTimeMillis();
        }
    }
}

See More Examples