Here are the examples of the java api org.apache.iceberg.PartitionSpec.unpartitioned() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
119 Examples
19
Source : TestHelper.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public Table createUnparreplacedionedTable() {
return createTable(schema, ParreplacedionSpec.unparreplacedioned());
}
17
Source : IcebergCodecsTest.java
with Apache License 2.0
from Netflix
with Apache License 2.0
from Netflix
@Test
void shouldEncodeAndDecodeDataFile() {
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
DataFile expected = DataFiles.builder(spec).withPath("/path/filename.parquet").withFileSizeInBytes(1).withParreplacedion(null).withMetrics(mock(Metrics.clreplaced)).withSplitOffsets(Collections.singletonList(1L)).build();
byte[] encoded = dataFileCodec.encode(expected);
DataFile actual = dataFileCodec.decode(encoded);
replacedertEquals(expected.path(), actual.path());
replacedertEquals(expected.fileSizeInBytes(), actual.fileSizeInBytes());
replacedertEquals(expected.parreplacedion(), actual.parreplacedion());
replacedertEquals(expected.splitOffsets(), actual.splitOffsets());
}
17
Source : Spark3Util.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Converts Spark transforms into a {@link ParreplacedionSpec}.
*
* @param schema the table schema
* @param parreplacedioning Spark Transforms
* @return a ParreplacedionSpec
*/
public static ParreplacedionSpec toParreplacedionSpec(Schema schema, Transform[] parreplacedioning) {
if (parreplacedioning == null || parreplacedioning.length == 0) {
return ParreplacedionSpec.unparreplacedioned();
}
ParreplacedionSpec.Builder builder = ParreplacedionSpec.builderFor(schema);
for (Transform transform : parreplacedioning) {
Preconditions.checkArgument(transform.references().length == 1, "Cannot convert transform with more than one column reference: %s", transform);
String colName = DOT.join(transform.references()[0].fieldNames());
switch(transform.name()) {
case "idenreplacedy":
builder.idenreplacedy(colName);
break;
case "bucket":
builder.bucket(colName, findWidth(transform));
break;
case "years":
builder.year(colName);
break;
case "months":
builder.month(colName);
break;
case "date":
case "days":
builder.day(colName);
break;
case "date_hour":
case "hours":
builder.hour(colName);
break;
case "truncate":
builder.truncate(colName, findWidth(transform));
break;
default:
throw new UnsupportedOperationException("Transform is not supported: " + transform);
}
}
return builder.build();
}
17
Source : VectorizedReadFlatParquetDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected Table initTable() {
Schema schema = new Schema(optional(1, "longCol", Types.LongType.get()), optional(2, "intCol", Types.IntegerType.get()), optional(3, "floatCol", Types.FloatType.get()), optional(4, "doubleCol", Types.DoubleType.get()), optional(5, "decimalCol", Types.DecimalType.of(20, 5)), optional(6, "dateCol", Types.DateType.get()), optional(7, "timestampCol", Types.TimestampType.withZone()), optional(8, "stringCol", Types.StringType.get()));
ParreplacedionSpec parreplacedionSpec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables(hadoopConf());
Map<String, String> properties = parquetWriteProps();
return tables.create(schema, parreplacedionSpec, properties, newTableLocation());
}
17
Source : HiveTableTest.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEngineHiveEnabledConfig() throws TException {
// Drop the previously created table to make place for the new one
catalog.dropTable(TABLE_IDENTIFIER);
// Enable by hive-conf
catalog.getConf().set(ConfigProperties.ENGINE_HIVE_ENABLED, "true");
catalog.createTable(TABLE_IDENTIFIER, schema, ParreplacedionSpec.unparreplacedioned());
org.apache.hadoop.hive.metastore.api.Table hmsTable = metastoreClient.getTable(DB_NAME, TABLE_NAME);
replacedertHiveEnabled(hmsTable, true);
catalog.dropTable(TABLE_IDENTIFIER);
// Disable by hive-conf
catalog.getConf().set(ConfigProperties.ENGINE_HIVE_ENABLED, "false");
catalog.createTable(TABLE_IDENTIFIER, schema, ParreplacedionSpec.unparreplacedioned());
hmsTable = metastoreClient.getTable(DB_NAME, TABLE_NAME);
replacedertHiveEnabled(hmsTable, false);
}
17
Source : HiveTableTest.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEngineHiveEnabledDefault() throws TException {
// Drop the previously created table to make place for the new one
catalog.dropTable(TABLE_IDENTIFIER);
// Unset in hive-conf
catalog.getConf().unset(ConfigProperties.ENGINE_HIVE_ENABLED);
catalog.createTable(TABLE_IDENTIFIER, schema, ParreplacedionSpec.unparreplacedioned());
org.apache.hadoop.hive.metastore.api.Table hmsTable = metastoreClient.getTable(DB_NAME, TABLE_NAME);
replacedertHiveEnabled(hmsTable, false);
}
16
Source : HiveIcebergMetaHook.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static ParreplacedionSpec spec(Schema schema, Properties properties, org.apache.hadoop.hive.metastore.api.Table hmsTable) {
if (hmsTable.getParameters().get(InputFormatConfig.PARreplacedION_SPEC) != null) {
Preconditions.checkArgument(!hmsTable.isSetParreplacedionKeys() || hmsTable.getParreplacedionKeys().isEmpty(), "Provide only one of the following: Hive parreplacedion specification, or the " + InputFormatConfig.PARreplacedION_SPEC + " property");
return ParreplacedionSpecParser.fromJson(schema, hmsTable.getParameters().get(InputFormatConfig.PARreplacedION_SPEC));
} else if (hmsTable.isSetParreplacedionKeys() && !hmsTable.getParreplacedionKeys().isEmpty()) {
// If the table is parreplacedioned then generate the idenreplacedy parreplacedion definitions for the Iceberg table
return HiveSchemaUtil.spec(schema, hmsTable.getParreplacedionKeys());
} else {
return ParreplacedionSpec.unparreplacedioned();
}
}
16
Source : TestResiduals.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testUnparreplacedionedResiduals() {
Expression[] expressions = new Expression[] { Expressions.alwaysTrue(), Expressions.alwaysFalse(), Expressions.lessThan("a", 5), Expressions.greaterThanOrEqual("b", 16), Expressions.notNull("c"), Expressions.isNull("d"), Expressions.in("e", 1, 2, 3), Expressions.notIn("f", 1, 2, 3), Expressions.notNaN("g"), Expressions.isNaN("h") };
for (Expression expr : expressions) {
ResidualEvaluator residualEvaluator = ResidualEvaluator.of(ParreplacedionSpec.unparreplacedioned(), expr, true);
replacedert.replacedertEquals("Should return expression", expr, residualEvaluator.residualFor(Row.of()));
}
}
15
Source : TestSparkDataFile.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testValueConversion() throws IOException {
Table table = TABLES.create(SCHEMA, ParreplacedionSpec.unparreplacedioned(), Maps.newHashMap(), tableLocation);
checkSparkDataFile(table);
}
15
Source : TestDataFrameWrites.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private Table createTable(Schema schema, File location) {
HadoopTables tables = new HadoopTables(CONF);
return tables.create(schema, ParreplacedionSpec.unparreplacedioned(), location.toString());
}
15
Source : TestHiveIcebergStorageHandlerNoScan.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCreateTableWithoutSpec() {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
shell.executeStatement("CREATE EXTERNAL TABLE customers " + "STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + testTables.locationForCreateTableSQL(identifier) + "TBLPROPERTIES ('" + InputFormatConfig.TABLE_SCHEMA + "'='" + SchemaParser.toJson(HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA) + "')");
// Check the Iceberg table parreplacedion data
org.apache.iceberg.Table icebergTable = testTables.loadTable(identifier);
replacedert.replacedertEquals(ParreplacedionSpec.unparreplacedioned(), icebergTable.spec());
}
14
Source : IcebergCommitterStageTest.java
with Apache License 2.0
from Netflix
with Apache License 2.0
from Netflix
@BeforeEach
void setUp() {
this.scheduler = new TestScheduler();
this.subscriber = new TestSubscriber<>();
Parameters parameters = StageOverrideParameters.newParameters();
CommitterConfig config = new CommitterConfig(parameters);
CommitterMetrics metrics = new CommitterMetrics();
this.committer = mock(IcebergCommitter.clreplaced);
transformer = new IcebergCommitterStage.Transformer(config, metrics, committer, scheduler);
ServiceLocator serviceLocator = mock(ServiceLocator.clreplaced);
when(serviceLocator.service(Configuration.clreplaced)).thenReturn(mock(Configuration.clreplaced));
this.catalog = mock(Catalog.clreplaced);
Table table = mock(Table.clreplaced);
when(table.spec()).thenReturn(ParreplacedionSpec.unparreplacedioned());
when(this.catalog.loadTable(any())).thenReturn(table);
when(serviceLocator.service(Catalog.clreplaced)).thenReturn(this.catalog);
this.context = mock(Context.clreplaced);
when(this.context.getParameters()).thenReturn(parameters);
when(this.context.getServiceLocator()).thenReturn(serviceLocator);
}
14
Source : TestPredicatePushdown.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Before
public void before() throws IOException {
tableLocation = temp.newFolder();
Schema schema = new Schema(required(1, "id", Types.LongType.get()), optional(2, "data", Types.StringType.get()));
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
Configuration conf = new Configuration();
HadoopCatalog catalog = new HadoopCatalog(conf, tableLocation.getAbsolutePath());
TableIdentifier id = TableIdentifier.parse("source_db.table_a");
table = catalog.createTable(id, schema, spec);
}
14
Source : TestInputFormatWithEmptyTable.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Before
public void before() throws IOException {
tableLocation = temp.newFolder();
Schema schema = new Schema(required(1, "id", Types.LongType.get()), optional(2, "data", Types.StringType.get()));
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables();
Table table = tables.create(schema, spec, tableLocation.getAbsolutePath());
}
14
Source : TestHiveIcebergStorageHandlerLocalScan.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCreateTableWithColumnSpecification() throws IOException {
TableIdentifier identifier = TableIdentifier.of("default", "customers");
Map<StructLike, List<Record>> data = new HashMap<>(1);
data.put(null, HiveIcebergStorageHandlerTestUtils.CUSTOMER_RECORDS);
String createSql = "CREATE EXTERNAL TABLE " + identifier + " (customer_id BIGINT, first_name STRING COMMENT 'This is first name', " + "last_name STRING COMMENT 'This is last name')" + " STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' " + testTables.locationForCreateTableSQL(identifier);
runCreateAndReadTest(identifier, createSql, HiveIcebergStorageHandlerTestUtils.CUSTOMER_SCHEMA, ParreplacedionSpec.unparreplacedioned(), data);
}
14
Source : HiveTableTest.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEngineHiveEnabledTableProperty() throws TException {
// Drop the previously created table to make place for the new one
catalog.dropTable(TABLE_IDENTIFIER);
// Enabled by table property - also check that the hive-conf is ignored
Map<String, String> tableProperties = new HashMap<>();
tableProperties.put(TableProperties.ENGINE_HIVE_ENABLED, "true");
catalog.getConf().set(ConfigProperties.ENGINE_HIVE_ENABLED, "false");
catalog.createTable(TABLE_IDENTIFIER, schema, ParreplacedionSpec.unparreplacedioned(), tableProperties);
org.apache.hadoop.hive.metastore.api.Table hmsTable = metastoreClient.getTable(DB_NAME, TABLE_NAME);
replacedertHiveEnabled(hmsTable, true);
catalog.dropTable(TABLE_IDENTIFIER);
// Disabled by table property - also check that the hive-conf is ignored
tableProperties.put(TableProperties.ENGINE_HIVE_ENABLED, "false");
catalog.getConf().set(ConfigProperties.ENGINE_HIVE_ENABLED, "true");
catalog.createTable(TABLE_IDENTIFIER, schema, ParreplacedionSpec.unparreplacedioned(), tableProperties);
hmsTable = metastoreClient.getTable(DB_NAME, TABLE_NAME);
replacedertHiveEnabled(hmsTable, false);
}
14
Source : TestSortOrderUtil.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testEmptySpecs() {
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
SortOrder order = SortOrder.builderFor(SCHEMA).withOrderId(1).asc("id", NULLS_LAST).build();
TestTables.TestTable table = TestTables.create(tableDir, "test", SCHEMA, spec, order, formatVersion);
// preplaced ParreplacedionSpec.unparreplacedioned() on purpose as it has an empty schema
SortOrder actualOrder = SortOrderUtil.buildSortOrder(table.schema(), spec, table.sortOrder());
replacedert.replacedertEquals("Order ID must be fresh", 1, actualOrder.orderId());
replacedert.replacedertEquals("Order must have 1 field", 1, actualOrder.fields().size());
replacedert.replacedertEquals("Field id must be fresh", 1, actualOrder.fields().get(0).sourceId());
replacedert.replacedertEquals("Direction must match", ASC, actualOrder.fields().get(0).direction());
replacedert.replacedertEquals("Null order must match", NULLS_LAST, actualOrder.fields().get(0).nullOrder());
}
14
Source : TestHadoopCatalog.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testRenameTable() throws Exception {
Configuration conf = new Configuration();
String warehousePath = temp.newFolder().getAbsolutePath();
HadoopCatalog catalog = new HadoopCatalog(conf, warehousePath);
TableIdentifier testTable = TableIdentifier.of("db", "tbl1");
catalog.createTable(testTable, SCHEMA, ParreplacedionSpec.unparreplacedioned());
replacedertHelpers.replacedertThrows("should throw exception", UnsupportedOperationException.clreplaced, "Cannot rename Hadoop tables", () -> {
catalog.renameTable(testTable, TableIdentifier.of("db", "tbl2"));
});
}
13
Source : TestReadSnapshotTable.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Before
public void before() throws IOException {
tableLocation = temp.newFolder();
catalog = new HadoopCatalog(conf, tableLocation.getAbsolutePath());
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
TableIdentifier id = TableIdentifier.parse("source_db.table_a");
Table table = catalog.createTable(id, schema, spec);
List<Record> data = new ArrayList<>();
data.add(TestHelpers.createSimpleRecord(1L, "Michael"));
data.add(TestHelpers.createSimpleRecord(2L, "Andy"));
data.add(TestHelpers.createSimpleRecord(3L, "Berta"));
DataFile fileA = TestHelpers.writeFile(temp.newFile(), table, null, FileFormat.PARQUET, data);
DataFile fileB = TestHelpers.writeFile(temp.newFile(), table, null, FileFormat.PARQUET, data);
DataFile fileC = TestHelpers.writeFile(temp.newFile(), table, null, FileFormat.PARQUET, data);
table.newAppend().appendFile(fileA).commit();
table.newAppend().appendFile(fileB).commit();
table.newAppend().appendFile(fileC).commit();
List<Snapshot> snapshots = Lists.newArrayList(table.snapshots().iterator());
snapshotId = snapshots.get(0).snapshotId();
}
13
Source : TestInputFormatWithMultipleTasks.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Before
public void before() throws IOException {
tableLocation = temp.newFolder();
Schema schema = new Schema(required(1, "id", Types.LongType.get()), optional(2, "data", Types.StringType.get()));
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables();
Table table = tables.create(schema, spec, tableLocation.getAbsolutePath());
List<Record> data = new ArrayList<>();
data.add(TestHelpers.createSimpleRecord(1L, "Michael"));
data.add(TestHelpers.createSimpleRecord(2L, "Andy"));
DataFile fileA = TestHelpers.writeFile(temp.newFile(), table, null, FileFormat.PARQUET, data);
table.newAppend().appendFile(fileA).commit();
DataFile fileB = TestHelpers.writeFile(temp.newFile(), table, null, FileFormat.PARQUET, data);
table.newAppend().appendFile(fileB).commit();
snapshotId = table.currentSnapshot().snapshotId();
}
13
Source : TestCustomCatalog.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Before
public void setupTable() throws Exception {
SparkConf sparkConf = spark.sparkContext().conf();
sparkConf.set(String.format("%s.%s", CustomCatalogs.ICEBERG_CATALOG_PREFIX, CustomCatalogs.ICEBERG_DEFAULT_CATALOG), "placeholder");
sparkConf.set(String.format("%s.%s", CustomCatalogs.ICEBERG_CATALOG_PREFIX, TEST_CATALOG), "placeholder");
this.tables = new HadoopTables(spark.sessionState().newHadoopConf());
this.tableDir = temp.newFolder();
// created by table create
tableDir.delete();
this.tableLocation = tableDir.toURI().toString();
tables.create(SCHEMA, ParreplacedionSpec.unparreplacedioned(), String.format("%s/%s", tableLocation, TABLE.name()));
}
13
Source : TestSnapshotSelection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test(expected = IllegalArgumentException.clreplaced)
public void testSnapshotSelectionByInvalidTimestamp() throws IOException {
long timestamp = System.currentTimeMillis();
String tableLocation = temp.newFolder("iceberg-table").toString();
HadoopTables tables = new HadoopTables(CONF);
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
tables.create(SCHEMA, spec, tableLocation);
Dataset<Row> df = spark.read().format("iceberg").option(SparkReadOptions.AS_OF_TIMESTAMP, timestamp).load(tableLocation);
df.collectAsList();
}
13
Source : TestSnapshotSelection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test(expected = IllegalArgumentException.clreplaced)
public void testSnapshotSelectionByInvalidSnapshotId() throws IOException {
String tableLocation = temp.newFolder("iceberg-table").toString();
HadoopTables tables = new HadoopTables(CONF);
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
tables.create(SCHEMA, spec, tableLocation);
Dataset<Row> df = spark.read().format("iceberg").option("snapshot-id", -10).load(tableLocation);
df.collectAsList();
}
13
Source : TestIcebergSourceHadoopTables.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
public Table createTable(TableIdentifier ident, Schema schema, ParreplacedionSpec spec) {
if (spec.equals(ParreplacedionSpec.unparreplacedioned())) {
return TABLES.create(schema, tableLocation);
}
return TABLES.create(schema, spec, tableLocation);
}
13
Source : TestHiveIcebergStorageHandlerWithEngine.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testInsertSupportedTypes() throws IOException {
replacedume.replacedumeTrue("Tez write is not implemented yet", executionEngine.equals("mr"));
for (int i = 0; i < SUPPORTED_TYPES.size(); i++) {
Type type = SUPPORTED_TYPES.get(i);
// TODO: remove this filter when issue #1881 is resolved
if (type == Types.UUIDType.get() && fileFormat == FileFormat.PARQUET) {
continue;
}
// TODO: remove this filter when we figure out how we could test binary types
if (type.equals(Types.BinaryType.get()) || type.equals(Types.FixedType.ofLength(5))) {
continue;
}
String columnName = type.typeId().toString().toLowerCase() + "_column";
Schema schema = new Schema(required(1, "id", Types.LongType.get()), required(2, columnName, type));
List<Record> expected = TestHelper.generateRandomRecords(schema, 5, 0L);
Table table = testTables.createTable(shell, type.typeId().toString().toLowerCase() + "_table_" + i, schema, ParreplacedionSpec.unparreplacedioned(), fileFormat, expected);
HiveIcebergTestUtils.validateData(table, expected, 0);
}
}
13
Source : HiveCreateReplaceTableTest.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCreateOrReplaceTableTxnTableDeletedConcurrently() {
replacedert.replacedertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC);
replacedert.replacedertTrue("Table should be created", catalog.tableExists(TABLE_IDENTIFIER));
Transaction txn = catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, ParreplacedionSpec.unparreplacedioned(), tableLocation, Maps.newHashMap(), true);
txn.updateProperties().set("prop", "value").commit();
// drop the table concurrently
catalog.dropTable(TABLE_IDENTIFIER);
// expect the transaction to succeed anyway
txn.commitTransaction();
Table table = catalog.loadTable(TABLE_IDENTIFIER);
replacedert.replacedertEquals("Table props should match", "value", table.properties().get("prop"));
}
13
Source : TestHadoopCatalog.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testDropTable() throws Exception {
Configuration conf = new Configuration();
String warehousePath = temp.newFolder().getAbsolutePath();
HadoopCatalog catalog = new HadoopCatalog(conf, warehousePath);
TableIdentifier testTable = TableIdentifier.of("db", "ns1", "ns2", "tbl");
catalog.createTable(testTable, SCHEMA, ParreplacedionSpec.unparreplacedioned());
String metaLocation = catalog.defaultWarehouseLocation(testTable);
FileSystem fs = Util.getFs(new Path(metaLocation), conf);
replacedert.replacedertTrue(fs.isDirectory(new Path(metaLocation)));
catalog.dropTable(testTable);
replacedert.replacedertFalse(fs.isDirectory(new Path(metaLocation)));
}
13
Source : TestHadoopCatalog.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testNamespaceExists() throws IOException {
Configuration conf = new Configuration();
String warehousePath = temp.newFolder().getAbsolutePath();
HadoopCatalog catalog = new HadoopCatalog(conf, warehousePath);
TableIdentifier tbl1 = TableIdentifier.of("db", "ns1", "ns2", "metadata");
TableIdentifier tbl2 = TableIdentifier.of("db", "ns2", "ns3", "tbl2");
TableIdentifier tbl3 = TableIdentifier.of("db", "ns3", "tbl4");
TableIdentifier tbl4 = TableIdentifier.of("db", "metadata");
Lists.newArrayList(tbl1, tbl2, tbl3, tbl4).forEach(t -> catalog.createTable(t, SCHEMA, ParreplacedionSpec.unparreplacedioned()));
replacedert.replacedertTrue("Should true to namespace exist", catalog.namespaceExists(Namespace.of("db", "ns1", "ns2")));
replacedert.replacedertTrue("Should false to namespace doesn't exist", !catalog.namespaceExists(Namespace.of("db", "db2", "ns2")));
}
13
Source : TestHadoopCatalog.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testBasicCatalog() throws Exception {
Configuration conf = new Configuration();
String warehousePath = temp.newFolder().getAbsolutePath();
HadoopCatalog catalog = new HadoopCatalog(conf, warehousePath);
TableIdentifier testTable = TableIdentifier.of("db", "ns1", "ns2", "tbl");
catalog.createTable(testTable, SCHEMA, ParreplacedionSpec.unparreplacedioned());
String metaLocation = catalog.defaultWarehouseLocation(testTable);
FileSystem fs = Util.getFs(new Path(metaLocation), conf);
replacedert.replacedertTrue(fs.isDirectory(new Path(metaLocation)));
catalog.dropTable(testTable);
replacedert.replacedertFalse(fs.isDirectory(new Path(metaLocation)));
}
12
Source : ITIcebergAssetKeyReader.java
with Apache License 2.0
from projectnessie
with Apache License 2.0
from projectnessie
@Test
void testreplacedetKeyReader() {
Table table = catalog.createTable(TableIdentifier.of("test", "table"), SCHEMA);
table.newAppend().appendFile(DataFiles.builder(ParreplacedionSpec.unparreplacedioned()).withPath("file:/x/y/z").withFormat(FileFormat.PARQUET).withFileSizeInBytes(12L).withRecordCount(12).build()).commit();
IcebergreplacedetKeyConverter akr = new IcebergreplacedetKeyConverter(new SerializableConfiguration(hadoopConfig));
// 1 of each as a single commit was checked
ImmutableMap<String, Long> expected = ImmutableMap.of("TABLE", 1L, "ICEBERG_MANIFEST", 1L, "ICEBERG_MANIFEST_LIST", 1L, "ICEBERG_METADATA", 1L, "DATA_FILE", 1L);
check(akr, table, expected);
table.newAppend().appendFile(DataFiles.builder(ParreplacedionSpec.unparreplacedioned()).withPath("file:/x/y/zz").withFormat(FileFormat.PARQUET).withFileSizeInBytes(12L).withRecordCount(12).build()).commit();
expected = // still 1 table
ImmutableMap.of(// still 1 table
"TABLE", // still 1 table
1L, // 1 manifest from first commit, 1 from second
"ICEBERG_MANIFEST", // 1 manifest from first commit, 1 from second
2L, // always one manifest list per snapshot, 2 snapshots currently
"ICEBERG_MANIFEST_LIST", // always one manifest list per snapshot, 2 snapshots currently
2L, // always 1 metadata file per commit
"ICEBERG_METADATA", // always 1 metadata file per commit
1L, "DATA_FILE", // 2 data files, 1 for each append
2L);
check(akr, table, expected);
long commitTime = System.currentTimeMillis();
table.expireSnapshots().expireOlderThan(commitTime).deleteWith(NOOP).cleanExpiredFiles(false).commit();
expected = // still 1 table
ImmutableMap.of(// still 1 table
"TABLE", // still 1 table
1L, // 1 manifest from first commit, 1 from second
"ICEBERG_MANIFEST", // 1 manifest from first commit, 1 from second
2L, // always one manifest list per snapshot, all snapshots have been removed
"ICEBERG_MANIFEST_LIST", // always one manifest list per snapshot, all snapshots have been removed
1L, // always 1 metadata file per commit
"ICEBERG_METADATA", // always 1 metadata file per commit
1L, "DATA_FILE", // 2 data files, 1 for each append
2L);
check(akr, table, expected);
}
12
Source : TestReadSnapshotTable.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Test
public void testCreateRegularTableEndingWithSnapshots() throws IOException {
TableIdentifier id = TableIdentifier.parse("source_db.table_a__snapshots");
Table table = catalog.createTable(id, schema, ParreplacedionSpec.unparreplacedioned());
List<Record> data = new ArrayList<>();
data.add(TestHelpers.createSimpleRecord(1L, "Michael"));
DataFile fileA = TestHelpers.writeFile(temp.newFile(), table, null, FileFormat.PARQUET, data);
table.newAppend().appendFile(fileA).commit();
shell.execute("CREATE DATABASE source_db");
shell.execute(new StringBuilder().append("CREATE TABLE source_db.table_a__snapshots ").append("STORED BY 'com.expediagroup.hiveberg.IcebergStorageHandler' ").append("LOCATION '").append(tableLocation.getAbsolutePath() + "/source_db/table_a__snapshots").append("' TBLPROPERTIES ('iceberg.catalog'='hadoop.catalog','iceberg.snapshots.table'='false')").toString());
List<Object[]> result = shell.executeStatement("SELECT * FROM source_db.table_a__snapshots");
replacedertEquals(1, result.size());
}
12
Source : TestReadSnapshotTable.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Test
public void testCreateTableWithSnapshotIDColumnInSchema() throws IOException {
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
Schema schema = new Schema(required(1, "snapshot__id", Types.LongType.get()), optional(2, "data", Types.StringType.get()));
TableIdentifier id = TableIdentifier.parse("source_db.table_b");
Table table = catalog.createTable(id, schema, spec);
List<Record> data = new ArrayList<>();
data.add(TestHelpers.createSimpleRecord(1L, "Michael"));
DataFile fileA = TestHelpers.writeFile(temp.newFile(), table, null, FileFormat.PARQUET, data);
table.newAppend().appendFile(fileA).commit();
shell.execute("CREATE DATABASE source_db");
shell.execute(new StringBuilder().append("CREATE TABLE source_db.table_b ").append("STORED BY 'com.expediagroup.hiveberg.IcebergStorageHandler' ").append("LOCATION '").append(tableLocation.getAbsolutePath() + "/source_db/table_b").append("' TBLPROPERTIES ('iceberg.catalog'='hadoop.catalog', 'iceberg.hive.snapshot.virtual.column.name' = 'metadata_snapshot_id')").toString());
List<Object[]> resultLatestTable = shell.executeStatement("SELECT * FROM source_db.table_b");
replacedertEquals(1, resultLatestTable.size());
}
12
Source : SnapshotFunctionalityTest.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Before
public void before() throws IOException {
Schema schema = new Schema(optional(1, "id", Types.IntegerType.get()), optional(2, "data", Types.StringType.get()));
spark = SparkSession.builder().master("local[2]").getOrCreate();
tableLocation = Files.createTempDirectory("temp").toFile();
HadoopTables tables = new HadoopTables(spark.sessionState().newHadoopConf());
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
table = tables.create(schema, spec, tableLocation.toString());
List<SimpleRecord> expected = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
Dataset<Row> df = spark.createDataFrame(expected, SimpleRecord.clreplaced);
for (int i = 0; i < 5; i++) {
df.select("id", "data").write().format("iceberg").mode("append").save(tableLocation.toString());
}
table.refresh();
}
12
Source : TestRewriteDataFilesAction.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testRewriteDataFilesEmptyTable() {
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
Map<String, String> options = Maps.newHashMap();
Table table = TABLES.create(SCHEMA, spec, options, tableLocation);
replacedert.replacedertNull("Table must be empty", table.currentSnapshot());
Actions actions = Actions.forTable(table);
actions.rewriteDataFiles().execute();
replacedert.replacedertNull("Table must stay empty", table.currentSnapshot());
}
12
Source : HiveCreateReplaceTableTest.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCreateOrReplaceTableTxnTableCreatedConcurrently() {
replacedert.replacedertFalse("Table should not exist", catalog.tableExists(TABLE_IDENTIFIER));
Transaction txn = catalog.newReplaceTableTransaction(TABLE_IDENTIFIER, SCHEMA, ParreplacedionSpec.unparreplacedioned(), tableLocation, Maps.newHashMap(), true);
txn.updateProperties().set("prop", "value").commit();
// create the table concurrently
catalog.createTable(TABLE_IDENTIFIER, SCHEMA, SPEC);
replacedert.replacedertTrue("Table should be created", catalog.tableExists(TABLE_IDENTIFIER));
// expect the transaction to succeed anyway
txn.commitTransaction();
Table table = catalog.loadTable(TABLE_IDENTIFIER);
replacedert.replacedertEquals("Parreplacedion spec should match", ParreplacedionSpec.unparreplacedioned(), table.spec());
replacedert.replacedertEquals("Table props should match", "value", table.properties().get("prop"));
}
12
Source : TestHadoopCatalog.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testLoadNamespaceMeta() throws IOException {
Configuration conf = new Configuration();
String warehousePath = temp.newFolder().getAbsolutePath();
HadoopCatalog catalog = new HadoopCatalog(conf, warehousePath);
TableIdentifier tbl1 = TableIdentifier.of("db", "ns1", "ns2", "metadata");
TableIdentifier tbl2 = TableIdentifier.of("db", "ns2", "ns3", "tbl2");
TableIdentifier tbl3 = TableIdentifier.of("db", "ns3", "tbl4");
TableIdentifier tbl4 = TableIdentifier.of("db", "metadata");
Lists.newArrayList(tbl1, tbl2, tbl3, tbl4).forEach(t -> catalog.createTable(t, SCHEMA, ParreplacedionSpec.unparreplacedioned()));
catalog.loadNamespaceMetadata(Namespace.of("db"));
replacedertHelpers.replacedertThrows("Should fail to load namespace doesn't exist", NoSuchNamespaceException.clreplaced, "Namespace does not exist: ", () -> {
catalog.loadNamespaceMetadata(Namespace.of("db", "db2", "ns2"));
});
}
11
Source : TestJoinTablesWithHadoopTables.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Before
public void before() throws IOException {
tableLocationA = temp.newFolder("table_a");
tableLocationB = temp.newFolder("table_b");
Schema schemaA = new Schema(optional(1, "first_name", Types.StringType.get()), optional(2, "salary", Types.LongType.get()), optional(3, "id", Types.LongType.get()));
Schema schemaB = new Schema(optional(1, "name", Types.StringType.get()), optional(2, "salary", Types.LongType.get()));
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables();
Table tableA = tables.create(schemaA, spec, tableLocationA.getAbsolutePath());
Table tableB = tables.create(schemaB, spec, tableLocationB.getAbsolutePath());
List<Record> tableAData = new ArrayList<>();
tableAData.add(TestHelpers.createCustomRecord(schemaA, Arrays.asList("Ella", 3000L, 1L)));
tableAData.add(TestHelpers.createCustomRecord(schemaA, Arrays.asList("Jean", 5000L, 2L)));
tableAData.add(TestHelpers.createCustomRecord(schemaA, Arrays.asList("Joe", 2000L, 3L)));
DataFile fileA = TestHelpers.writeFile(temp.newFile(), tableA, null, FileFormat.PARQUET, tableAData);
List<Record> tableBData = new ArrayList<>();
tableBData.add(TestHelpers.createCustomRecord(schemaB, Arrays.asList("Michael", 3000L)));
tableBData.add(TestHelpers.createCustomRecord(schemaB, Arrays.asList("Andy", 3000L)));
tableBData.add(TestHelpers.createCustomRecord(schemaB, Arrays.asList("Berta", 4000L)));
DataFile fileB = TestHelpers.writeFile(temp.newFile(), tableB, null, FileFormat.PARQUET, tableBData);
tableA.newAppend().appendFile(fileA).commit();
tableB.newAppend().appendFile(fileB).commit();
shell.start();
}
11
Source : TestWriteMetricsConfig.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testBadCustomMetricCollectionForParquet() throws IOException {
String tableLocation = temp.newFolder("iceberg-table").toString();
HadoopTables tables = new HadoopTables(CONF);
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
Map<String, String> properties = Maps.newHashMap();
properties.put(TableProperties.DEFAULT_WRITE_METRICS_MODE, "counts");
properties.put("write.metadata.metrics.column.ids", "full");
replacedertHelpers.replacedertThrows("Creating a table with invalid metrics should fail", ValidationException.clreplaced, null, () -> tables.create(SIMPLE_SCHEMA, spec, properties, tableLocation));
}
11
Source : TestSparkSchema.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testFailSparkReadSchemaCombinedWithProjectionWhenSchemaDoesNotContainProjection() throws IOException {
String tableLocation = temp.newFolder("iceberg-table").toString();
HadoopTables tables = new HadoopTables(CONF);
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
tables.create(SCHEMA, spec, null, tableLocation);
List<SimpleRecord> expectedRecords = Lists.newArrayList(new SimpleRecord(1, "a"));
Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.clreplaced);
originalDf.select("id", "data").write().format("iceberg").mode("append").save(tableLocation);
StructType sparkReadSchema = new StructType(new StructField[] { new StructField("data", DataTypes.StringType, true, Metadata.empty()) });
replacedertHelpers.replacedertThrows("Spark should not allow a projection that is not included in the read schema", org.apache.spark.sql.replacedysisException.clreplaced, "cannot resolve '`id`' given input columns: [data]", () -> spark.read().schema(sparkReadSchema).format("iceberg").load(tableLocation).select("id"));
}
11
Source : TestSparkSchema.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testFailIfSparkReadSchemaIsOff() throws IOException {
String tableLocation = temp.newFolder("iceberg-table").toString();
HadoopTables tables = new HadoopTables(CONF);
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
tables.create(SCHEMA, spec, null, tableLocation);
List<SimpleRecord> expectedRecords = Lists.newArrayList(new SimpleRecord(1, "a"));
Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.clreplaced);
originalDf.select("id", "data").write().format("iceberg").mode("append").save(tableLocation);
StructType sparkReadSchema = new StructType(new StructField[] { // wrong field name
new StructField("idd", DataTypes.IntegerType, true, Metadata.empty()) });
replacedertHelpers.replacedertThrows("Iceberg should not allow a projection that contain unknown fields", java.lang.IllegalArgumentException.clreplaced, "Field idd not found in source schema", () -> spark.read().schema(sparkReadSchema).format("iceberg").load(tableLocation));
}
11
Source : TestIcebergSourceTablesBase.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testCountEntriesTable() {
TableIdentifier tableIdentifier = TableIdentifier.of("db", "count_entries_test");
createTable(tableIdentifier, SCHEMA, ParreplacedionSpec.unparreplacedioned());
// init load
List<SimpleRecord> records = Lists.newArrayList(new SimpleRecord(1, "1"));
Dataset<Row> inputDf = spark.createDataFrame(records, SimpleRecord.clreplaced);
inputDf.select("id", "data").write().format("iceberg").mode("append").save(loadLocation(tableIdentifier));
final int expectedEntryCount = 1;
// count entries
replacedert.replacedertEquals("Count should return " + expectedEntryCount, expectedEntryCount, spark.read().format("iceberg").load(loadLocation(tableIdentifier, "entries")).count());
// count all_entries
replacedert.replacedertEquals("Count should return " + expectedEntryCount, expectedEntryCount, spark.read().format("iceberg").load(loadLocation(tableIdentifier, "all_entries")).count());
}
10
Source : IcebergSourceFlatORCDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected final Table initTable() {
Schema schema = new Schema(required(1, "longCol", Types.LongType.get()), required(2, "intCol", Types.IntegerType.get()), required(3, "floatCol", Types.FloatType.get()), optional(4, "doubleCol", Types.DoubleType.get()), optional(5, "decimalCol", Types.DecimalType.of(20, 5)), optional(6, "dateCol", Types.DateType.get()), // Disable timestamp column for ORC performance tests as Spark native reader does not support ORC's
// TIMESTAMP_INSTANT type
// optional(7, "timestampCol", Types.TimestampType.withZone()),
optional(8, "stringCol", Types.StringType.get()));
ParreplacedionSpec parreplacedionSpec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables(hadoopConf());
Map<String, String> properties = Maps.newHashMap();
properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
return tables.create(schema, parreplacedionSpec, properties, newTableLocation());
}
10
Source : IcebergSourceNestedListDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected final Table initTable() {
Schema schema = new Schema(required(0, "id", Types.LongType.get()), optional(1, "outerlist", Types.ListType.ofOptional(2, Types.StructType.of(required(3, "innerlist", Types.ListType.ofRequired(4, Types.StringType.get()))))));
ParreplacedionSpec parreplacedionSpec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables(hadoopConf());
Map<String, String> properties = Maps.newHashMap();
properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
return tables.create(schema, parreplacedionSpec, properties, newTableLocation());
}
10
Source : IcebergSourceNestedDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected final Table initTable() {
Schema schema = new Schema(required(0, "id", Types.LongType.get()), optional(4, "nested", Types.StructType.of(required(1, "col1", Types.StringType.get()), required(2, "col2", Types.DoubleType.get()), required(3, "col3", Types.LongType.get()))));
ParreplacedionSpec parreplacedionSpec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables(hadoopConf());
Map<String, String> properties = Maps.newHashMap();
properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
return tables.create(schema, parreplacedionSpec, properties, newTableLocation());
}
10
Source : IcebergSourceFlatDataBenchmark.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
protected final Table initTable() {
Schema schema = new Schema(required(1, "longCol", Types.LongType.get()), required(2, "intCol", Types.IntegerType.get()), required(3, "floatCol", Types.FloatType.get()), optional(4, "doubleCol", Types.DoubleType.get()), optional(5, "decimalCol", Types.DecimalType.of(20, 5)), optional(6, "dateCol", Types.DateType.get()), optional(7, "timestampCol", Types.TimestampType.withZone()), optional(8, "stringCol", Types.StringType.get()));
ParreplacedionSpec parreplacedionSpec = ParreplacedionSpec.unparreplacedioned();
HadoopTables tables = new HadoopTables(hadoopConf());
Map<String, String> properties = Maps.newHashMap();
properties.put(TableProperties.METADATA_COMPRESSION, "gzip");
return tables.create(schema, parreplacedionSpec, properties, newTableLocation());
}
10
Source : TestSnapshotSelection.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test(expected = IllegalArgumentException.clreplaced)
public void testSnapshotSelectionBySnapshotIdAndTimestamp() throws IOException {
String tableLocation = temp.newFolder("iceberg-table").toString();
HadoopTables tables = new HadoopTables(CONF);
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
Table table = tables.create(SCHEMA, spec, tableLocation);
List<SimpleRecord> firstBatchRecords = Lists.newArrayList(new SimpleRecord(1, "a"), new SimpleRecord(2, "b"), new SimpleRecord(3, "c"));
Dataset<Row> firstDf = spark.createDataFrame(firstBatchRecords, SimpleRecord.clreplaced);
firstDf.select("id", "data").write().format("iceberg").mode("append").save(tableLocation);
long timestamp = System.currentTimeMillis();
long snapshotId = table.currentSnapshot().snapshotId();
Dataset<Row> df = spark.read().format("iceberg").option(SparkReadOptions.SNAPSHOT_ID, snapshotId).option(SparkReadOptions.AS_OF_TIMESTAMP, timestamp).load(tableLocation);
df.collectAsList();
}
10
Source : TestSparkParquetReader.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
protected Table tableFromInputFile(InputFile inputFile, Schema schema) throws IOException {
HadoopTables tables = new HadoopTables();
Table table = tables.create(schema, ParreplacedionSpec.unparreplacedioned(), ImmutableMap.of(), temp.newFolder().getCanonicalPath());
table.newAppend().appendFile(DataFiles.builder(ParreplacedionSpec.unparreplacedioned()).withFormat(FileFormat.PARQUET).withInputFile(inputFile).withMetrics(ParquetUtil.fileMetrics(inputFile, MetricsConfig.getDefault())).withFileSizeInBytes(inputFile.getLength()).build()).commit();
return table;
}
10
Source : TestLocalScan.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private DataFile writeFile(String location, String filename, Schema schema, List<Record> records) throws IOException {
Path path = new Path(location, filename);
FileFormat fileFormat = FileFormat.fromFileName(filename);
Preconditions.checkNotNull(fileFormat, "Cannot determine format for file: %s", filename);
FileAppender<Record> fileAppender = new GenericAppenderFactory(schema).newAppender(fromPath(path, CONF), fileFormat);
try (FileAppender<Record> appender = fileAppender) {
appender.addAll(records);
}
return DataFiles.builder(ParreplacedionSpec.unparreplacedioned()).withInputFile(HadoopInputFile.fromPath(path, CONF)).withMetrics(fileAppender.metrics()).build();
}
9
Source : TestJoinTablesWithHadoopCatalog.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Before
public void before() throws IOException {
tableLocation = temp.newFolder("table_a");
Schema schemaA = new Schema(optional(1, "first_name", Types.StringType.get()), optional(2, "salary", Types.LongType.get()), optional(3, "id", Types.LongType.get()));
Schema schemaB = new Schema(optional(1, "name", Types.StringType.get()), optional(2, "salary", Types.LongType.get()));
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
Configuration conf = new Configuration();
HadoopCatalog catalog = new HadoopCatalog(conf, tableLocation.getAbsolutePath());
TableIdentifier idA = TableIdentifier.parse("source_db.table_a");
Table tableA = catalog.createTable(idA, schemaA, spec);
TableIdentifier idB = TableIdentifier.parse("source_db.table_b");
Table tableB = catalog.createTable(idB, schemaB, spec);
List<Record> tableAData = new ArrayList<>();
tableAData.add(TestHelpers.createCustomRecord(schemaA, Arrays.asList("Ella", 3000L, 1L)));
tableAData.add(TestHelpers.createCustomRecord(schemaA, Arrays.asList("Jean", 5000L, 2L)));
tableAData.add(TestHelpers.createCustomRecord(schemaA, Arrays.asList("Joe", 2000L, 3L)));
DataFile fileA = TestHelpers.writeFile(temp.newFile(), tableA, null, FileFormat.PARQUET, tableAData);
List<Record> tableBData = new ArrayList<>();
tableBData.add(TestHelpers.createCustomRecord(schemaB, Arrays.asList("Michael", 3000L)));
tableBData.add(TestHelpers.createCustomRecord(schemaB, Arrays.asList("Andy", 3000L)));
tableBData.add(TestHelpers.createCustomRecord(schemaB, Arrays.asList("Berta", 4000L)));
DataFile fileB = TestHelpers.writeFile(temp.newFile(), tableB, null, FileFormat.PARQUET, tableBData);
tableA.newAppend().appendFile(fileA).commit();
tableB.newAppend().appendFile(fileB).commit();
shell.start();
}
9
Source : TestSparkSchema.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Test
public void testSparkReadSchemaCombinedWithProjection() throws IOException {
String tableLocation = temp.newFolder("iceberg-table").toString();
HadoopTables tables = new HadoopTables(CONF);
ParreplacedionSpec spec = ParreplacedionSpec.unparreplacedioned();
tables.create(SCHEMA, spec, null, tableLocation);
List<SimpleRecord> expectedRecords = Lists.newArrayList(new SimpleRecord(1, "a"));
Dataset<Row> originalDf = spark.createDataFrame(expectedRecords, SimpleRecord.clreplaced);
originalDf.select("id", "data").write().format("iceberg").mode("append").save(tableLocation);
StructType sparkReadSchema = new StructType(new StructField[] { new StructField("id", DataTypes.IntegerType, true, Metadata.empty()), new StructField("data", DataTypes.StringType, true, Metadata.empty()) });
Dataset<Row> resultDf = spark.read().schema(sparkReadSchema).format("iceberg").load(tableLocation).select("id");
Row[] results = (Row[]) resultDf.collect();
replacedert.replacedertEquals("Result size matches", 1, results.length);
replacedert.replacedertEquals("Row length matches with sparkReadSchema", 1, results[0].length());
replacedert.replacedertEquals("Row content matches data", 1, results[0].getInt(0));
}
See More Examples