Here are the examples of the java api org.apache.avro.Schema.createRecord() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
77 Examples
19
Source : AvroUtils.java
with Apache License 2.0
from Teradata
with Apache License 2.0
from Teradata
public static Schema getSchema(SchemaInfo schemaInfo, JaqyHelper helper) throws SQLException {
FullColumnInfo[] columnInfos = schemaInfo.columns;
int columns = columnInfos.length;
ArrayList<Schema.Field> fields = new ArrayList<Schema.Field>(columns);
for (int i = 0; i < columns; ++i) {
String header = columnInfos[i].label;
int type = columnInfos[i].type;
Schema.Type avroType = getAvroType(type);
Schema fieldSchema;
if (avroType == Schema.Type.ARRAY) {
fieldSchema = getArraySchema(columnInfos[i]);
} else {
fieldSchema = Schema.create(getAvroType(type));
}
if (columnInfos[i].nullable != ResultSetMetaData.columnNoNulls) {
// In order to include NULLs in the record, we have to include
// NULL as part of field schema for the column.
ArrayList<Schema> list = new ArrayList<Schema>();
list.add(Schema.create(Schema.Type.NULL));
list.add(fieldSchema);
fieldSchema = Schema.createUnion(list);
}
Schema.Field field = new Schema.Field(header, fieldSchema, null, (Object) null);
fields.add(field);
}
// create a dummy record schema name.
String schemaName = "rsmd" + columnInfos.hashCode();
Schema schema = Schema.createRecord(schemaName, null, null, false);
schema.setFields(fields);
return schema;
}
19
Source : SchemaIdGenerator.java
with Apache License 2.0
from Talend
with Apache License 2.0
from Talend
private static long fingerprint(final List<Schema.Field> fields) {
return SchemaNormalization.parsingFingerprint64(Schema.createRecord(fields.stream().map(it -> new Schema.Field(it.name(), it.schema(), it.doc(), it.defaultVal(), it.order())).collect(toList())));
}
19
Source : InputAvroSchemaTest.java
with Apache License 2.0
from spotify
with Apache License 2.0
from spotify
private Schema createRecordSchema(final String recordName, final String recordDoc, final String recordNamespace, final String[] fieldNames, final String[] fieldDocs) {
final Schema inputSchema = Schema.createRecord(recordName, recordDoc, recordNamespace, false);
final List<Schema.Field> fields = new ArrayList<>();
for (int i = 0; i < fieldNames.length; i++) {
String fieldName = fieldNames[i];
String fieldDoc = fieldDocs[i];
fields.add(new Schema.Field(fieldName, inputSchema, fieldDoc));
}
inputSchema.setFields(fields);
return inputSchema;
}
19
Source : FastSerdeTestsSupport.java
with Apache License 2.0
from RTBHOUSE
with Apache License 2.0
from RTBHOUSE
public static Schema createRecord(String name, Schema.Field... fields) {
Schema schema = Schema.createRecord(name, name, NAMESPACE, false);
schema.setFields(Arrays.asList(fields));
return schema;
}
19
Source : FastSerdeBenchmarkSupport.java
with Apache License 2.0
from RTBHOUSE
with Apache License 2.0
from RTBHOUSE
public static Schema generateRandomRecordSchema(String name, int depth, int minNestedRecords, int maxNestedRecords, int fieldsNumber) {
int nestedRecords = 0;
if (depth != 0) {
nestedRecords = RandomUtils.nextInt(minNestedRecords, maxNestedRecords + 1);
}
final Schema.Type[] types = new Schema.Type[] { Schema.Type.BOOLEAN, Schema.Type.INT, Schema.Type.LONG, Schema.Type.DOUBLE, Schema.Type.FLOAT, Schema.Type.BYTES, Schema.Type.STRING, Schema.Type.FIXED, Schema.Type.ENUM, Schema.Type.ARRAY, Schema.Type.MAP, Schema.Type.UNION, Schema.Type.UNION, Schema.Type.UNION, Schema.Type.UNION };
final Schema schema = Schema.createRecord(name, null, NAMESPACE, false);
List<Schema.Field> fields = new ArrayList<>();
for (int i = 0; i < fieldsNumber; i++) {
if (i < nestedRecords) {
fields.add(new Schema.Field(RandomStringUtils.randomAlphabetic(10), generateRandomRecordSchema("NestedRecord" + RandomStringUtils.randomAlphabetic(5), depth - 1, minNestedRecords, maxNestedRecords, fieldsNumber), null, null, Schema.Field.Order.ASCENDING));
} else {
final Schema.Type type = types[RandomUtils.nextInt(0, types.length)];
switch(type) {
case ENUM:
fields.add(new Schema.Field(RandomStringUtils.randomAlphabetic(10), generateRandomEnumSchema(), null, null, Schema.Field.Order.ASCENDING));
break;
case FIXED:
fields.add(new Schema.Field(RandomStringUtils.randomAlphabetic(10), generateRandomFixedSchema(), null, null, Schema.Field.Order.ASCENDING));
break;
case UNION:
fields.add(new Schema.Field(RandomStringUtils.randomAlphabetic(10), generateRandomUnionSchema(), null, null, Schema.Field.Order.ASCENDING));
break;
case ARRAY:
fields.add(new Schema.Field(RandomStringUtils.randomAlphabetic(10), generateRandomArraySchema(), null, null, Schema.Field.Order.ASCENDING));
break;
case MAP:
fields.add(new Schema.Field(RandomStringUtils.randomAlphabetic(10), generateRandomMapSchema(), null, null, Schema.Field.Order.ASCENDING));
break;
default:
fields.add(new Schema.Field(RandomStringUtils.randomAlphabetic(10), Schema.create(type), null, null, Schema.Field.Order.ASCENDING));
}
}
}
Collections.shuffle(fields);
schema.setFields(fields);
return schema;
}
19
Source : Expressions.java
with Apache License 2.0
from rdblue
with Apache License 2.0
from rdblue
private static Schema filter(Schema schema, List<PathExpr> exprs) {
if (exprs.isEmpty()) {
return schema;
}
switch(schema.getType()) {
case RECORD:
List<Schema.Field> fields = Lists.newArrayList();
for (PathExpr expr : exprs) {
Schema.Field field = schema.getField(expr.value);
Preconditions.checkArgument(field != null, "Cannot find field '%s' in schema: %s", expr.value, schema);
fields.add(new Schema.Field(expr.value, filter(field.schema(), expr.children), field.doc(), field.defaultVal(), field.order()));
}
return Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError(), fields);
case UNION:
// Ignore schemas that are a union with null because there is another token
if (schema.getTypes().size() == 2) {
if (schema.getTypes().get(0).getType() == Schema.Type.NULL) {
return filter(schema.getTypes().get(1), exprs);
} else if (schema.getTypes().get(1).getType() == Schema.Type.NULL) {
return filter(schema.getTypes().get(0), exprs);
}
}
List<Schema> schemas = Lists.newArrayList();
for (PathExpr expr : exprs) {
schemas.add(filter(schema, expr));
}
if (schemas.size() > 1) {
return Schema.createUnion(schemas);
} else {
return schemas.get(0);
}
case MAP:
Preconditions.checkArgument(exprs.size() == 1, "Cannot find multiple children of map schema: %s", schema);
return filter(schema, exprs.get(0));
case ARRAY:
Preconditions.checkArgument(exprs.size() == 1, "Cannot find multiple children of array schema: %s", schema);
return filter(schema, exprs.get(0));
default:
throw new IllegalArgumentException(String.format("Cannot find child of primitive schema: %s", schema));
}
}
19
Source : AvroSchemaUtil.java
with MIT License
from orfeon
with MIT License
from orfeon
private static Schema convertSchema(final TableFieldSchema fieldSchema, TableRowFieldMode mode) {
if (mode.equals(TableRowFieldMode.REPEATED)) {
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createArray(convertSchema(fieldSchema, TableRowFieldMode.NULLABLE)));
}
switch(TableRowFieldType.valueOf(fieldSchema.getType())) {
case DATETIME:
final Schema datetimeSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING));
datetimeSchema.addProp("sqlType", "DATETIME");
return datetimeSchema;
case GEOGRAPHY:
final Schema geoSchema = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING));
geoSchema.addProp("sqlType", "GEOGRAPHY");
return geoSchema;
case STRING:
return NULLABLE_STRING;
case BYTES:
return NULLABLE_BYTES;
case INT64:
case INTEGER:
return NULLABLE_LONG;
case FLOAT64:
case FLOAT:
return NULLABLE_DOUBLE;
case BOOL:
case BOOLEAN:
return NULLABLE_BOOLEAN;
case DATE:
return NULLABLE_LOGICAL_DATE_TYPE;
case TIME:
return NULLABLE_LOGICAL_TIME_MICRO_TYPE;
case TIMESTAMP:
return NULLABLE_LOGICAL_TIMESTAMP_TYPE;
case NUMERIC:
return NULLABLE_LOGICAL_DECIMAL_TYPE;
case STRUCT:
case RECORD:
final List<Schema.Field> fields = fieldSchema.getFields().stream().map(f -> new Schema.Field(f.getName(), convertSchema(f, TableRowFieldMode.valueOf(f.getMode())), null, (Object) null, Schema.Field.Order.IGNORE)).collect(Collectors.toList());
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createRecord(fieldSchema.getName(), fieldSchema.getDescription(), null, false, fields));
default:
throw new IllegalArgumentException();
}
}
19
Source : AvroSchemaUtil.java
with MIT License
from mercari
with MIT License
from mercari
private static Schema convertSchema(final String name, final Value.ValueTypeCase valueTypeCase, final Value value) {
switch(valueTypeCase) {
case STRING_VALUE:
return NULLABLE_STRING;
case BLOB_VALUE:
return NULLABLE_BYTES;
case INTEGER_VALUE:
return NULLABLE_LONG;
case DOUBLE_VALUE:
return NULLABLE_DOUBLE;
case BOOLEAN_VALUE:
return NULLABLE_BOOLEAN;
case TIMESTAMP_VALUE:
return NULLABLE_LOGICAL_TIMESTAMP_TYPE;
case ENreplacedY_VALUE:
final List<Schema.Field> fields = value.getEnreplacedyValue().getPropertiesMap().entrySet().stream().map(s -> new Schema.Field(s.getKey(), convertSchema(s.getKey(), s.getValue().getValueTypeCase(), s.getValue()), null, (Object) null, Schema.Field.Order.IGNORE)).collect(Collectors.toList());
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createRecord(name, null, null, false, fields));
case ARRAY_VALUE:
final Value av = value.getArrayValue().getValues(0);
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createArray(convertSchema(name, av.getValueTypeCase(), av)));
case VALUETYPE_NOT_SET:
case NULL_VALUE:
System.out.println(String.format("%s %s", name, value.getNullValue().toString()));
return convertSchema(name, value.getDefaultInstanceForType().getValueTypeCase(), value);
default:
throw new IllegalArgumentException(String.format("%s %s is not supported!", valueTypeCase.name(), name));
}
}
19
Source : AvroSchemaUtil.java
with MIT License
from mercari
with MIT License
from mercari
private static Schema convertSchema(final String name, final Value.ValueTypeCase valueTypeCase, final Value value) {
switch(valueTypeCase) {
case STRING_VALUE:
return NULLABLE_STRING;
case BLOB_VALUE:
return NULLABLE_BYTES;
case INTEGER_VALUE:
return NULLABLE_LONG;
case DOUBLE_VALUE:
return NULLABLE_DOUBLE;
case BOOLEAN_VALUE:
return NULLABLE_BOOLEAN;
case TIMESTAMP_VALUE:
return NULLABLE_LOGICAL_TIMESTAMP_MICRO_TYPE;
case ENreplacedY_VALUE:
final List<Schema.Field> fields = value.getEnreplacedyValue().getPropertiesMap().entrySet().stream().map(s -> new Schema.Field(s.getKey(), convertSchema(s.getKey(), s.getValue().getValueTypeCase(), s.getValue()), null, (Object) null, Schema.Field.Order.IGNORE)).collect(Collectors.toList());
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createRecord(name, null, null, false, fields));
case ARRAY_VALUE:
final Value av = value.getArrayValue().getValues(0);
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createArray(convertSchema(name, av.getValueTypeCase(), av)));
case VALUETYPE_NOT_SET:
case NULL_VALUE:
return convertSchema(name, value.getDefaultInstanceForType().getValueTypeCase(), value);
default:
throw new IllegalArgumentException(String.format("%s %s is not supported!", valueTypeCase.name(), name));
}
}
19
Source : AvroSchemaUtil.java
with MIT License
from mercari
with MIT License
from mercari
private static Schema convertSchema(final TableFieldSchema fieldSchema, final TableRowFieldMode mode, final String parentNamespace, final boolean inArray) {
if (mode.equals(TableRowFieldMode.REPEATED)) {
// return Schema.createUnion(
// Schema.create(Schema.Type.NULL),
// Schema.createArray(convertSchema(fieldSchema, TableRowFieldMode.NULLABLE, parentNamespace)));
return Schema.createArray(convertSchema(fieldSchema, TableRowFieldMode.REQUIRED, parentNamespace, true));
}
switch(TableRowFieldType.valueOf(fieldSchema.getType())) {
case DATETIME:
final Schema datetimeSchema = Schema.create(Schema.Type.STRING);
datetimeSchema.addProp("sqlType", "DATETIME");
return TableRowFieldMode.NULLABLE.equals(mode) ? Schema.createUnion(Schema.create(Schema.Type.NULL), datetimeSchema) : datetimeSchema;
case GEOGRAPHY:
final Schema geoSchema = Schema.create(Schema.Type.STRING);
geoSchema.addProp("sqlType", "GEOGRAPHY");
return TableRowFieldMode.NULLABLE.equals(mode) ? Schema.createUnion(Schema.create(Schema.Type.NULL), geoSchema) : geoSchema;
case STRING:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_STRING : REQUIRED_STRING;
case BYTES:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_BYTES : REQUIRED_BYTES;
case INT64:
case INTEGER:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_LONG : REQUIRED_LONG;
case FLOAT64:
case FLOAT:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_DOUBLE : REQUIRED_DOUBLE;
case BOOL:
case BOOLEAN:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_BOOLEAN : REQUIRED_BOOLEAN;
case DATE:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_LOGICAL_DATE_TYPE : REQUIRED_LOGICAL_DATE_TYPE;
case TIME:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_LOGICAL_TIME_MICRO_TYPE : REQUIRED_LOGICAL_TIME_MICRO_TYPE;
case TIMESTAMP:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_LOGICAL_TIMESTAMP_MICRO_TYPE : REQUIRED_LOGICAL_TIMESTAMP_MICRO_TYPE;
case NUMERIC:
return TableRowFieldMode.NULLABLE.equals(mode) ? NULLABLE_LOGICAL_DECIMAL_TYPE : REQUIRED_LOGICAL_DECIMAL_TYPE;
case STRUCT:
case RECORD:
final String namespace = parentNamespace == null ? "root" : parentNamespace + "." + fieldSchema.getName().toLowerCase();
final List<Schema.Field> fields = fieldSchema.getFields().stream().map(f -> new Schema.Field(f.getName(), convertSchema(f, TableRowFieldMode.valueOf(f.getMode()), namespace, false), null, (Object) null)).collect(Collectors.toList());
final String capitalName = fieldSchema.getName().substring(0, 1).toUpperCase() + fieldSchema.getName().substring(1).toLowerCase();
if (TableRowFieldMode.NULLABLE.equals(mode)) {
return Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createRecord(capitalName, fieldSchema.getDescription(), namespace, false, fields));
} else {
return Schema.createRecord(inArray ? capitalName : fieldSchema.getName(), fieldSchema.getDescription(), namespace, false, fields);
}
// final Schema recordSchema = Schema.createRecord(fieldSchema.getName(), fieldSchema.getDescription(), namespace, false, fields);
// return TableRowFieldMode.NULLABLE.equals(mode) ? Schema.createUnion(Schema.create(Schema.Type.NULL), recordSchema) : recordSchema;
// return recordSchema;
default:
throw new IllegalArgumentException();
}
}
19
Source : AvroSchemaUtil.java
with MIT License
from mercari
with MIT License
from mercari
public static Schema createMapRecordSchema(final String name, final Schema keySchema, final Schema valueSchema) {
final Schema.Field keyField = new Schema.Field("key", keySchema, null, (Object) null);
final Schema.Field valueField = new Schema.Field("value", valueSchema, null, (Object) null);
return Schema.createRecord(name, null, null, false, List.of(keyField, valueField));
}
19
Source : RowToRecordConverter.java
with MIT License
from mercari
with MIT License
from mercari
private static Schema convertSchema(org.apache.beam.sdk.schemas.Schema.FieldType fieldType, final String fieldName, final String parentNamespace) {
switch(fieldType.getTypeName()) {
case BOOLEAN:
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_BOOLEAN : AvroSchemaUtil.REQUIRED_BOOLEAN;
case STRING:
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_STRING : AvroSchemaUtil.REQUIRED_STRING;
case BYTES:
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_BYTES : AvroSchemaUtil.REQUIRED_BYTES;
case DECIMAL:
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_LOGICAL_DECIMAL_TYPE : AvroSchemaUtil.REQUIRED_LOGICAL_DECIMAL_TYPE;
case INT16:
case INT32:
case INT64:
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_LONG : AvroSchemaUtil.REQUIRED_LONG;
case FLOAT:
case DOUBLE:
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_DOUBLE : AvroSchemaUtil.REQUIRED_DOUBLE;
case DATETIME:
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_LOGICAL_TIMESTAMP_MICRO_TYPE : AvroSchemaUtil.REQUIRED_LOGICAL_TIMESTAMP_MICRO_TYPE;
case LOGICAL_TYPE:
if (RowSchemaUtil.isLogicalTypeDate(fieldType)) {
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_LOGICAL_DATE_TYPE : AvroSchemaUtil.REQUIRED_LOGICAL_DATE_TYPE;
} else if (RowSchemaUtil.isLogicalTypeTime(fieldType)) {
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_LOGICAL_TIME_MICRO_TYPE : AvroSchemaUtil.REQUIRED_LOGICAL_TIME_MICRO_TYPE;
} else if (RowSchemaUtil.isLogicalTypeTimestamp(fieldType)) {
return fieldType.getNullable() ? AvroSchemaUtil.NULLABLE_LOGICAL_TIMESTAMP_MICRO_TYPE : AvroSchemaUtil.REQUIRED_LOGICAL_TIMESTAMP_MICRO_TYPE;
} else {
throw new IllegalArgumentException("Unsupported Beam logical type: " + fieldType.getLogicalType().getIdentifier());
}
case ROW:
final String namespace = (parentNamespace == null ? fieldName : parentNamespace + "." + fieldName).toLowerCase();
final List<Schema.Field> fields = fieldType.getRowSchema().getFields().stream().map(f -> new Schema.Field(f.getName(), convertSchema(f.getType(), f.getName(), namespace), f.getDescription(), (Object) null, Schema.Field.Order.IGNORE)).collect(Collectors.toList());
final Schema rowSchema = Schema.createRecord(fieldName, fieldType.getTypeName().name(), namespace, false, fields);
return fieldType.getNullable() ? Schema.createUnion(Schema.create(Schema.Type.NULL), rowSchema) : rowSchema;
case ITERABLE:
case ARRAY:
final Schema arraySchema = Schema.createArray(convertSchema(fieldType.getCollectionElementType(), fieldName, parentNamespace));
return fieldType.getNullable() ? Schema.createUnion(Schema.create(Schema.Type.NULL), arraySchema) : arraySchema;
case MAP:
case BYTE:
default:
throw new IllegalArgumentException(fieldType.getTypeName().name() + " is not supported for bigquery.");
}
}
19
Source : TestAvroWrapper.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
@Test
public void testEnumType() {
Schema field1 = createSchema("field1", "" + "\"enum\"," + "\"name\":\"SampleEnum\"," + "\"doc\":\"\"," + "\"symbols\":[\"A\",\"B\"]");
Schema structSchema = Schema.createRecord(ImmutableList.of(new Schema.Field("field1", field1, null, null)));
GenericRecord record1 = new GenericData.Record(structSchema);
record1.put("field1", "A");
StdData stdEnumData1 = AvroWrapper.createStdData(record1.get("field1"), Schema.createEnum("SampleEnum", "", "", Arrays.asList("A", "B")));
replacedertTrue(stdEnumData1 instanceof AvroString);
replacedertEquals("A", ((AvroString) stdEnumData1).get());
GenericRecord record2 = new GenericData.Record(structSchema);
record1.put("field1", new GenericData.EnumSymbol(field1, "A"));
StdData stdEnumData2 = AvroWrapper.createStdData(record1.get("field1"), Schema.createEnum("SampleEnum", "", "", Arrays.asList("A", "B")));
replacedertTrue(stdEnumData2 instanceof AvroString);
replacedertEquals("A", ((AvroString) stdEnumData2).get());
}
19
Source : AvroTypeSystem.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
@Override
protected Schema createStructType(List<String> fieldNames, List<Schema> fieldTypes) {
return Schema.createRecord(IntStream.range(0, fieldTypes.size()).mapToObj(i -> new Schema.Field(fieldNames == null ? "field" + i : fieldNames.get(i), fieldTypes.get(i), null, null)).collect(Collectors.toList()));
}
19
Source : TypeInfoToAvroSchemaConverter.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
Schema convertFieldsTypeInfoToAvroSchema(String recordNamespace, String recordName, List<String> fieldNames, List<TypeInfo> fieldTypeInfos) {
final List<Schema.Field> fields = new ArrayList<>();
for (int i = 0; i < fieldNames.size(); ++i) {
final TypeInfo fieldTypeInfo = fieldTypeInfos.get(i);
String fieldName = fieldNames.get(i);
fieldName = removePrefix(fieldName);
// If there's a structType in the schema, we will use "recordNamespace.fieldName" instead of the
// autogenerated record name. The recordNamespace is composed of its parent's field names recursively.
// This mimics the logic of spark-avro.
// We will set the recordName to be capitalized, and the recordNameSpace will be in lower case
final Schema schema = convertTypeInfoToAvroSchema(fieldTypeInfo, recordNamespace + "." + recordName.toLowerCase(), StringUtils.capitalize(fieldName));
final Schema.Field f = new Schema.Field(fieldName, schema, null, mkFieldsOptional ? NullNode.instance : null);
fields.add(f);
}
final Schema recordSchema = Schema.createRecord(recordName, null, namespace + recordNamespace, false);
recordSchema.setFields(fields);
return recordSchema;
}
19
Source : SchemaUtilities.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
static Schema joinSchemas(@Nonnull Schema leftSchema, @Nonnull Schema rightSchema) {
Preconditions.checkNotNull(leftSchema);
Preconditions.checkNotNull(rightSchema);
List<Schema.Field> combinedSchemaFields = cloneFieldList(leftSchema.getFields());
combinedSchemaFields.addAll(cloneFieldList(rightSchema.getFields()));
Schema combinedSchema = Schema.createRecord(leftSchema.getName(), leftSchema.getDoc(), leftSchema.getNamespace(), leftSchema.isError());
combinedSchema.setFields(combinedSchemaFields);
// In case there are conflicts of property values among leftSchema and rightSchema, the former-applied leftSchema
// will be the winner as Schema object doesn't support prop-overwrite.
replicateSchemaProps(leftSchema, combinedSchema);
replicateSchemaProps(rightSchema, combinedSchema);
return combinedSchema;
}
19
Source : SchemaUtilities.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
static Schema addParreplacedionColsToSchema(@Nonnull Schema schema, @Nonnull Table tableOrView) {
Preconditions.checkNotNull(schema);
Preconditions.checkNotNull(tableOrView);
if (!isParreplacedioned(tableOrView)) {
return schema;
}
Schema parreplacedionColumnsSchema = convertFieldSchemaToAvroSchema("parreplacedionCols", "parreplacedionCols", false, tableOrView.getParreplacedionKeys());
List<Schema.Field> fieldsWithParreplacedionColumns = cloneFieldList(schema.getFields());
fieldsWithParreplacedionColumns.addAll(cloneFieldList(parreplacedionColumnsSchema.getFields(), true));
Schema schemaWithParreplacedionColumns = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
schemaWithParreplacedionColumns.setFields(fieldsWithParreplacedionColumns);
// Copy schema level properties
replicateSchemaProps(schema, schemaWithParreplacedionColumns);
return schemaWithParreplacedionColumns;
}
19
Source : SchemaUtilities.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
private static Schema setupTopLevelRecordName(@Nonnull Schema schema, @Nonnull String schemaName) {
Preconditions.checkNotNull(schema);
Preconditions.checkNotNull(schemaName);
Schema avroSchema = Schema.createRecord(schemaName, schema.getDoc(), schema.getNamespace(), schema.isError());
List<Schema.Field> fields = cloneFieldList(schema.getFields());
avroSchema.setFields(fields);
return avroSchema;
}
19
Source : SchemaAssistantTest.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
@Test
public void testGetSchemaFullName() {
String namespace = "com.linkedin.avro.fastserde";
replacedert.replacedertEquals("string", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.STRING)));
replacedert.replacedertEquals("bytes", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.BYTES)));
replacedert.replacedertEquals("int", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.INT)));
replacedert.replacedertEquals("long", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.LONG)));
replacedert.replacedertEquals("float", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.FLOAT)));
replacedert.replacedertEquals("double", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.DOUBLE)));
replacedert.replacedertEquals("boolean", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.BOOLEAN)));
replacedert.replacedertEquals("null", Schemareplacedistant.getSchemaFullName(Schema.create(Schema.Type.NULL)));
replacedert.replacedertEquals("com.linkedin.avro.fastserde.TestRecord", Schemareplacedistant.getSchemaFullName(Schema.createRecord("TestRecord", "", namespace, false)));
replacedert.replacedertEquals("com.linkedin.avro.fastserde.TestFixed", Schemareplacedistant.getSchemaFullName(Schema.createFixed("TestFixed", "", namespace, 16)));
replacedert.replacedertEquals("com.linkedin.avro.fastserde.TestEnum", Schemareplacedistant.getSchemaFullName(Schema.createEnum("TestEnum", "", namespace, Collections.emptyList())));
}
19
Source : FastSerdeTestsSupport.java
with BSD 2-Clause "Simplified" License
from linkedin
with BSD 2-Clause "Simplified" License
from linkedin
public static Schema createRecord(String name, Schema.Field... fields) {
Schema schema = Schema.createRecord(name, name, "com.adpilot.utils.generated.avro", false);
schema.setFields(Arrays.asList(fields));
return schema;
}
19
Source : AvroResolverTest.java
with Apache License 2.0
from greenplum-db
with Apache License 2.0
from greenplum-db
private Schema getAvroSchemaForPrimitiveTypes() {
Schema schema = Schema.createRecord("tableName", "", "public.avro", false);
List<Schema.Field> fields = new ArrayList<>();
Schema.Type[] types = new Schema.Type[] { Schema.Type.BOOLEAN, Schema.Type.BYTES, Schema.Type.LONG, Schema.Type.INT, Schema.Type.FLOAT, Schema.Type.DOUBLE, Schema.Type.STRING };
for (Schema.Type type : types) {
fields.add(new Schema.Field(type.getName(), Schema.create(type), "", null));
}
schema.setFields(fields);
return schema;
}
19
Source : AvroResolverTest.java
with Apache License 2.0
from greenplum-db
with Apache License 2.0
from greenplum-db
private Schema createRecord(Schema.Type[] types) {
List<Schema.Field> fields = new ArrayList<>();
for (Schema.Type type : types) {
fields.add(new Schema.Field(type.getName(), Schema.create(type), "", null));
}
return Schema.createRecord(fields);
}
19
Source : AvroResolverTest.java
with Apache License 2.0
from greenplum-db
with Apache License 2.0
from greenplum-db
private Schema getAvroSchemaForComplexTypes() {
Schema schema = Schema.createRecord("tableName", "", "public.avro", false);
List<Schema.Field> fields = new ArrayList<>();
// add a UNION of NULL with BYTES
fields.add(new Schema.Field(Schema.Type.UNION.getName(), createUnion(), "", null));
// add a RECORD with a float, int, and string inside
fields.add(new Schema.Field(Schema.Type.RECORD.getName(), createRecord(new Schema.Type[] { Schema.Type.FLOAT, Schema.Type.INT, Schema.Type.STRING }), "", null));
// add an ARRAY of strings
fields.add(new Schema.Field(Schema.Type.ARRAY.getName(), Schema.createArray(Schema.create(Schema.Type.STRING)), "", null));
// add an ENUM of card suites
fields.add(new Schema.Field(Schema.Type.ENUM.getName(), createEnum(new String[] { "SPADES", "HEARTS", "DIAMONDS", "CLUBS" }), "", null));
// add a FIXED with 6 byte length
fields.add(new Schema.Field(Schema.Type.FIXED.getName(), Schema.createFixed("fixed", "", null, 6), "", null));
// add a MAP from string to long
fields.add(new Schema.Field(Schema.Type.MAP.getName(), Schema.createMap(Schema.create(Schema.Type.LONG)), "", null));
schema.setFields(fields);
return schema;
}
19
Source : AvroSchemaConverterTest.java
with Apache License 2.0
from GoogleCloudDataproc
with Apache License 2.0
from GoogleCloudDataproc
@Test
public void testSchemaConversion() {
StructType sparkSchema = TestConstants.ALL_TYPES_TABLE_SCHEMA();
Schema avroSchema = AvroSchemaConverter.sparkSchemaToAvroSchema(sparkSchema);
Schema.Field[] fields = avroSchema.getFields().toArray(new Schema.Field[avroSchema.getFields().size()]);
checkField(fields[0], "int_req", Schema.create(Schema.Type.LONG));
checkField(fields[1], "int_null", nullable(Schema.Type.LONG));
checkField(fields[2], "bl", nullable(Schema.Type.BOOLEAN));
checkField(fields[3], "str", nullable(Schema.Type.STRING));
checkField(fields[4], "day", nullable(LogicalTypes.date().addToSchema(SchemaBuilder.builder().intType())));
checkField(fields[5], "ts", nullable(LogicalTypes.timestampMicros().addToSchema(SchemaBuilder.builder().longType())));
checkField(fields[6], "dt", nullable(Schema.Type.STRING));
checkField(fields[7], "tm", nullable(Schema.Type.LONG));
checkField(fields[8], "binary", nullable(Schema.Type.BYTES));
checkField(fields[9], "float", nullable(Schema.Type.DOUBLE));
checkField(fields[10], "nums", nullable(Schema.createRecord("nums", null, null, false, ImmutableList.of(new Schema.Field("min", nullable(decimal("min")), null, (Object) null), new Schema.Field("max", nullable(decimal("max")), null, (Object) null), new Schema.Field("pi", nullable(decimal("pi")), null, (Object) null), new Schema.Field("big_pi", nullable(decimal("big_pi")), null, (Object) null)))));
checkField(fields[11], "int_arr", nullable(Schema.createArray(nullable(Schema.Type.LONG))));
checkField(fields[12], "int_struct_arr", nullable(Schema.createArray(nullable(Schema.createRecord("int_struct_arr", null, null, false, ImmutableList.of(new Schema.Field("i", nullable(Schema.Type.LONG), null, (Object) null)))))));
}
19
Source : TestGenericData.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
/**
* This method illustrates the use case for generating Schemas when generic data is null. Since
* we can't union Null with a meaningful Schema (and in the absence of any other way to create
* such a Schema) and we can't reliably generate a default value, our only backward compatible
* solution is to omit that data from the Schema.
*/
@Override
public Schema getSchema() {
return Schema.createRecord(TestGenericData.clreplaced.getCanonicalName(), null, null, false, data == null ? Collections.emptyList() : Collections.singletonList(createDataSchemaField()));
}
19
Source : LoadingAvroSerDeTest.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Test
public void dataCanBeDeserializedWhenWrittenSchemaAddsField() {
TestData data = TestData.create();
TestDataWithAdditionalField dataWithAdditionalField = TestDataWithAdditionalField.fromTestData(data);
registry.setRegistrationHook(schema -> Schema.createRecord(TestData.clreplaced.getCanonicalName(), schema.getDoc(), null, schema.isError(), copyFields(schema.getFields())));
byte[] serializedData = serializer.serialize(TOPIC, dataWithAdditionalField);
replacedertNotNull(serializedData);
replacedertTrue(serializedData.length > 0);
TestData deserialized = (TestData) deserializer.deserialize(TOPIC, serializedData);
replacedertEquals(data, deserialized);
}
19
Source : GenericDataHolderWithAdditionalField.java
with Apache License 2.0
from ExpediaGroup
with Apache License 2.0
from ExpediaGroup
@Override
public Schema getSchema() {
return Schema.createRecord(GenericDataHolder.clreplaced.getName(), null, null, false, Arrays.asList(new Schema.Field("extraData", Schema.create(Schema.Type.STRING), null, Object.clreplaced.cast(null)), new Schema.Field("data", AvroSchemas.getOrReflectNullable(getData()), null, JsonProperties.NULL_VALUE)));
}
19
Source : TestParquetExport.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
private Schema buildSchema(ColumnGenerator... extraCols) {
List<Field> fields = new ArrayList<Field>();
fields.add(buildField("id", Schema.Type.INT));
fields.add(buildField("msg", Schema.Type.STRING));
int colNum = 0;
for (ColumnGenerator gen : extraCols) {
if (gen.getColumnParquetSchema() != null) {
fields.add(buildParquetField(forIdx(colNum++), gen.getColumnParquetSchema()));
}
}
Schema schema = Schema.createRecord("myschema", null, null, false);
schema.setFields(fields);
return schema;
}
19
Source : TestParquetExport.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
public void testParquetRecordsNotSupported() throws IOException, SQLException {
String[] argv = {};
final int TOTAL_RECORDS = 1;
Schema schema = Schema.createRecord("nestedrecord", null, null, false);
schema.setFields(Lists.newArrayList(buildField("myint", Schema.Type.INT)));
GenericRecord record = new GenericData.Record(schema);
record.put("myint", 100);
// DB type is not used so can be anything:
ColumnGenerator gen = colGenerator(record, schema, null, "VARCHAR(64)");
createParquetFile(0, TOTAL_RECORDS, gen);
createTable(gen);
try {
runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1)));
fail("Parquet records can not be exported.");
} catch (Exception e) {
// expected
replacedertTrue(true);
}
}
19
Source : TestAvroExport.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
public void testAvroRecordsNotSupported() throws IOException, SQLException {
String[] argv = {};
final int TOTAL_RECORDS = 1;
Schema schema = Schema.createRecord("nestedrecord", null, null, false);
schema.setFields(Lists.newArrayList(buildAvroField("myint", Schema.Type.INT)));
GenericRecord record = new GenericData.Record(schema);
record.put("myint", 100);
// DB type is not used so can be anything:
ColumnGenerator gen = colGenerator(record, schema, null, "VARCHAR(64)");
createAvroFile(0, TOTAL_RECORDS, gen);
createTable(gen);
try {
runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1)));
fail("Avro records can not be exported.");
} catch (Exception e) {
// expected
replacedertTrue(true);
}
}
19
Source : TestAvroExport.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
private Schema buildAvroSchema(ColumnGenerator... extraCols) {
List<Field> fields = new ArrayList<Field>();
fields.add(buildAvroField("id", Schema.Type.INT));
fields.add(buildAvroField("msg", Schema.Type.STRING));
int colNum = 0;
for (ColumnGenerator gen : extraCols) {
if (gen.getColumnAvroSchema() != null) {
fields.add(buildAvroField(forIdx(colNum++), gen.getColumnAvroSchema()));
}
}
Schema schema = Schema.createRecord("myschema", null, null, false);
schema.setFields(fields);
return schema;
}
19
Source : AvroSchemaGenerator.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
public Schema generate() throws IOException {
ClreplacedWriter clreplacedWriter = new ClreplacedWriter(options, connManager, tableName, null);
Map<String, Integer> columnTypes = clreplacedWriter.getColumnTypes();
String[] columnNames = clreplacedWriter.getColumnNames(columnTypes);
List<Field> fields = new ArrayList<Field>();
for (String columnName : columnNames) {
String cleanedCol = AvroUtil.toAvroIdentifier(ClreplacedWriter.toJavaIdentifier(columnName));
int sqlType = columnTypes.get(columnName);
Schema avroSchema = toAvroSchema(sqlType, columnName);
Field field = new Field(cleanedCol, avroSchema, null, null);
field.addProp("columnName", columnName);
field.addProp("sqlType", Integer.toString(sqlType));
fields.add(field);
}
TableClreplacedName tableClreplacedName = new TableClreplacedName(options);
String shortClreplacedName = tableClreplacedName.getShortClreplacedForTable(tableName);
String avroTableName = (tableName == null ? TableClreplacedName.QUERY_RESULT : tableName);
String avroName = "sqoop_import_" + (shortClreplacedName == null ? avroTableName : shortClreplacedName);
String avroNamespace = tableClreplacedName.getPackageForTable();
String doc = "Sqoop import of " + avroTableName;
Schema schema = Schema.createRecord(avroName, doc, avroNamespace, false);
schema.setFields(fields);
schema.addProp("tableName", avroTableName);
return schema;
}
19
Source : SchemaRegistryMockExtensionTest.java
with MIT License
from bakdata
with MIT License
from bakdata
private Schema createSchema(final String name) {
return Schema.createRecord(name, "no doc", "", false, Collections.emptyList());
}
19
Source : SchemaRegistryMockTest.java
with MIT License
from bakdata
with MIT License
from bakdata
private static Schema createSchema(final String name) {
return Schema.createRecord(name, "no doc", "", false, Collections.emptyList());
}
19
Source : AvroTestHelpers.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
static Schema record(String name, Schema.Field... fields) {
return Schema.createRecord(name, null, null, false, Arrays.asList(fields));
}
19
Source : PruneColumns.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static Schema copyRecord(Schema record, List<Schema.Field> newFields) {
Schema copy = Schema.createRecord(record.getName(), record.getDoc(), record.getNamespace(), record.isError(), newFields);
for (Map.Entry<String, Object> prop : record.getObjectProps().entrySet()) {
copy.addProp(prop.getKey(), prop.getValue());
}
return copy;
}
19
Source : AvroSchemaUtil.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
static Schema createProjectionMap(String recordName, int keyId, String keyName, Schema keySchema, int valueId, String valueName, Schema valueSchema) {
String keyValueName = "k" + keyId + "_v" + valueId;
Schema.Field keyField = new Schema.Field("key", keySchema, null, (Object) null);
if (!"key".equals(keyName)) {
keyField.addAlias(keyName);
}
keyField.addProp(FIELD_ID_PROP, keyId);
Schema.Field valueField = new Schema.Field("value", valueSchema, null, isOptionSchema(valueSchema) ? JsonProperties.NULL_VALUE : null);
valueField.addProp(FIELD_ID_PROP, valueId);
if (!"value".equals(valueName)) {
valueField.addAlias(valueName);
}
Schema keyValueRecord = Schema.createRecord(keyValueName, null, null, false, ImmutableList.of(keyField, valueField));
if (!keyValueName.equals(recordName)) {
keyValueRecord.addAlias(recordName);
}
return LogicalMap.get().addToSchema(Schema.createArray(keyValueRecord));
}
19
Source : AvroSchemaUtil.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
static Schema createMap(int keyId, Schema keySchema, int valueId, Schema valueSchema) {
String keyValueName = "k" + keyId + "_v" + valueId;
Schema.Field keyField = new Schema.Field("key", keySchema, null, (Object) null);
keyField.addProp(FIELD_ID_PROP, keyId);
Schema.Field valueField = new Schema.Field("value", valueSchema, null, isOptionSchema(valueSchema) ? JsonProperties.NULL_VALUE : null);
valueField.addProp(FIELD_ID_PROP, valueId);
return LogicalMap.get().addToSchema(Schema.createArray(Schema.createRecord(keyValueName, null, null, false, ImmutableList.of(keyField, valueField))));
}
19
Source : HoodieRealtimeRecordReaderUtils.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Generate a reader schema off the provided writeSchema, to just project out the provided columns.
*/
public static Schema generateProjectionSchema(Schema writeSchema, Map<String, Schema.Field> schemaFieldsMap, List<String> fieldNames) {
/**
* Avro & Presto field names seems to be case sensitive (support fields differing only in case) whereas
* Hive/Impala/SparkSQL(default) are case-insensitive. Spark allows this to be configurable using
* spark.sql.caseSensitive=true
*
* For a RT table setup with no delta-files (for a latest file-slice) -> we translate parquet schema to Avro Here
* the field-name case is dependent on parquet schema. Hive (1.x/2.x/CDH) translate column projections to
* lower-cases
*/
List<Schema.Field> projectedFields = new ArrayList<>();
for (String fn : fieldNames) {
Schema.Field field = schemaFieldsMap.get(fn.toLowerCase());
if (field == null) {
throw new HoodieException("Field " + fn + " not found in log schema. Query cannot proceed! " + "Derived Schema Fields: " + new ArrayList<>(schemaFieldsMap.keySet()));
} else {
projectedFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
}
}
Schema projectedSchema = Schema.createRecord(writeSchema.getName(), writeSchema.getDoc(), writeSchema.getNamespace(), writeSchema.isError());
projectedSchema.setFields(projectedFields);
return projectedSchema;
}
19
Source : HoodieAvroUtils.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static Schema removeMetadataFields(Schema schema) {
List<Schema.Field> filteredFields = schema.getFields().stream().filter(field -> !HoodieRecord.HOODIE_META_COLUMNS.contains(field.name())).map(field -> new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal())).collect(Collectors.toList());
Schema filteredSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
filteredSchema.setFields(filteredFields);
return filteredSchema;
}
19
Source : HoodieAvroUtils.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Fetch schema for record key and parreplacedion path.
*/
public static Schema getRecordKeyParreplacedionPathSchema() {
List<Schema.Field> toBeAddedFields = new ArrayList<>();
Schema recordSchema = Schema.createRecord("HoodieRecordKey", "", "", false);
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
Schema.Field parreplacedionPathField = new Schema.Field(HoodieRecord.PARreplacedION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
toBeAddedFields.add(recordKeyField);
toBeAddedFields.add(parreplacedionPathField);
recordSchema.setFields(toBeAddedFields);
return recordSchema;
}
19
Source : HoodieAvroUtils.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Add null fields to preplaceded in schema. Caller is responsible for ensuring there is no duplicates. As different query
* engines have varying constraints regarding treating the case-sensitivity of fields, its best to let caller
* determine that.
*
* @param schema Preplaceded in schema
* @param newFieldNames Null Field names to be added
*/
public static Schema appendNullSchemaFields(Schema schema, List<String> newFieldNames) {
List<Field> newFields = schema.getFields().stream().map(field -> new Field(field.name(), field.schema(), field.doc(), field.defaultVal())).collect(Collectors.toList());
for (String newField : newFieldNames) {
newFields.add(new Schema.Field(newField, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE));
}
Schema newSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError());
newSchema.setFields(newFields);
return newSchema;
}
19
Source : HoodieAvroUtils.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Generate a reader schema off the provided writeSchema, to just project out the provided columns.
*/
public static Schema generateProjectionSchema(Schema originalSchema, List<String> fieldNames) {
Map<String, Field> schemaFieldsMap = originalSchema.getFields().stream().map(r -> Pair.of(r.name().toLowerCase(), r)).collect(Collectors.toMap(Pair::getLeft, Pair::getRight));
List<Schema.Field> projectedFields = new ArrayList<>();
for (String fn : fieldNames) {
Schema.Field field = schemaFieldsMap.get(fn.toLowerCase());
if (field == null) {
throw new HoodieException("Field " + fn + " not found in log schema. Query cannot proceed! " + "Derived Schema Fields: " + new ArrayList<>(schemaFieldsMap.keySet()));
} else {
projectedFields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal()));
}
}
Schema projectedSchema = Schema.createRecord(originalSchema.getName(), originalSchema.getDoc(), originalSchema.getNamespace(), originalSchema.isError());
projectedSchema.setFields(projectedFields);
return projectedSchema;
}
19
Source : HoodieAvroUtils.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
private static Schema initRecordKeySchema() {
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
Schema recordKeySchema = Schema.createRecord("HoodieRecordKey", "", "", false);
recordKeySchema.setFields(Collections.singletonList(recordKeyField));
return recordKeySchema;
}
19
Source : HoodieAvroUtils.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Adds the Hoodie metadata fields to the given schema.
*/
public static Schema addMetadataFields(Schema schema) {
List<Schema.Field> parentFields = new ArrayList<>();
Schema.Field commitTimeField = new Schema.Field(HoodieRecord.COMMIT_TIME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
Schema.Field commitSeqnoField = new Schema.Field(HoodieRecord.COMMIT_SEQNO_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
Schema.Field recordKeyField = new Schema.Field(HoodieRecord.RECORD_KEY_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
Schema.Field parreplacedionPathField = new Schema.Field(HoodieRecord.PARreplacedION_PATH_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
Schema.Field fileNameField = new Schema.Field(HoodieRecord.FILENAME_METADATA_FIELD, METADATA_FIELD_SCHEMA, "", JsonProperties.NULL_VALUE);
parentFields.add(commitTimeField);
parentFields.add(commitSeqnoField);
parentFields.add(recordKeyField);
parentFields.add(parreplacedionPathField);
parentFields.add(fileNameField);
for (Schema.Field field : schema.getFields()) {
if (!isMetadataField(field.name())) {
Schema.Field newField = new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultVal());
for (Map.Entry<String, Object> prop : field.getObjectProps().entrySet()) {
newField.addProp(prop.getKey(), prop.getValue());
}
parentFields.add(newField);
}
}
Schema mergedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), false);
mergedSchema.setFields(parentFields);
return mergedSchema;
}
19
Source : AvroSchemaGenerator.java
with Apache License 2.0
from aliyun
with Apache License 2.0
from aliyun
public Schema generate(String schemaNameOverride) throws IOException {
ClreplacedWriter clreplacedWriter = new ClreplacedWriter(options, connManager, tableName, null);
Map<String, Integer> columnTypes = clreplacedWriter.getColumnTypes();
String[] columnNames = clreplacedWriter.getColumnNames(columnTypes);
List<Field> fields = new ArrayList<Field>();
for (String columnName : columnNames) {
String cleanedCol = AvroUtil.toAvroIdentifier(ClreplacedWriter.toJavaIdentifier(columnName));
int sqlType = columnTypes.get(columnName);
Schema avroSchema = toAvroSchema(sqlType, columnName);
Field field = new Field(cleanedCol, avroSchema, null, NullNode.getInstance());
field.addProp("columnName", columnName);
field.addProp("sqlType", Integer.toString(sqlType));
fields.add(field);
}
TableClreplacedName tableClreplacedName = new TableClreplacedName(options);
String shortClreplacedName = tableClreplacedName.getShortClreplacedForTable(tableName);
String avroTableName = (tableName == null ? TableClreplacedName.QUERY_RESULT : tableName);
String avroName = schemaNameOverride != null ? schemaNameOverride : (shortClreplacedName == null ? avroTableName : shortClreplacedName);
String avroNamespace = tableClreplacedName.getPackageForTable();
String doc = "Sqoop import of " + avroTableName;
Schema schema = Schema.createRecord(avroName, doc, avroNamespace, false);
schema.setFields(fields);
schema.addProp("tableName", avroTableName);
return schema;
}
18
Source : AvroSchemaTest.java
with Apache License 2.0
from Talend
with Apache License 2.0
from Talend
@Test
void schemaProps() {
final Schema.Field field = new Schema.Field("nf", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), null, null);
field.addProp(KeysForAvroProperty.LABEL, "n f");
field.addProp("one", "_1");
field.addProp("two", "_2");
final Schema delegate = Schema.createRecord("foo", null, null, false, singletonList(field));
delegate.addProp("root", "toor");
final AvroSchema schema = new AvroSchema(delegate);
replacedertEquals("toor", schema.getProp("root"));
final List<org.talend.sdk.component.api.record.Schema.Entry> entries = schema.getEntries();
final org.talend.sdk.component.api.record.Schema.Entry entry = entries.iterator().next();
replacedertEquals("n f", entry.getProp(KeysForAvroProperty.LABEL));
replacedertEquals("_1", entry.getProp("one"));
replacedertEquals("_2", entry.getProp("two"));
}
18
Source : AvroSchemaTest.java
with Apache License 2.0
from Talend
with Apache License 2.0
from Talend
@Test
void getRecordType() {
final Schema.Field field = new Schema.Field("nf", Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING)), null, null);
field.addProp(KeysForAvroProperty.LABEL, "n f");
final Schema delegate = Schema.createUnion(Schema.create(Schema.Type.NULL), Schema.createRecord("foo", null, null, false, singletonList(field)));
final AvroSchema schema = new AvroSchema(delegate);
final List<org.talend.sdk.component.api.record.Schema.Entry> entries = schema.getEntries();
replacedertEquals(RECORD, schema.getType());
replacedertEquals(1, entries.size());
final org.talend.sdk.component.api.record.Schema.Entry entry = entries.iterator().next();
replacedertEquals(STRING, entry.getType());
replacedertTrue(entry.isNullable());
replacedertEquals("nf", entry.getName());
replacedertEquals("n f", entry.getRawName());
replacedertEquals("n f", entry.getOriginalFieldName());
}
18
Source : Schemas.java
with Apache License 2.0
from rdblue
with Apache License 2.0
from rdblue
/**
* Merges two {@link Schema} instances or returns {@code null}.
* <p>
* The two schemas are merged if they are the same type. Records are merged
* if the two records have the same name or have no names but have a
* significant number of shared fields.
* <p>
* @see {@link #mergeOrUnion} to return a union when a merge is not possible.
*
* @param left a {@code Schema}
* @param right a {@code Schema}
* @return a merged {@code Schema} or {@code null} if merging is not possible
*/
private static Schema mergeOnly(Schema left, Schema right) {
if (Objects.equal(left, right)) {
return left;
}
// handle primitive type promotion; doesn't promote integers to floats
switch(left.getType()) {
case INT:
if (right.getType() == Schema.Type.LONG) {
return right;
}
break;
case LONG:
if (right.getType() == Schema.Type.INT) {
return left;
}
break;
case FLOAT:
if (right.getType() == Schema.Type.DOUBLE) {
return right;
}
break;
case DOUBLE:
if (right.getType() == Schema.Type.FLOAT) {
return left;
}
}
// any other cases where the types don't match must be combined by a union
if (left.getType() != right.getType()) {
return null;
}
switch(left.getType()) {
case UNION:
return union(left, right);
case RECORD:
if (left.getName() == null && right.getName() == null && fieldSimilarity(left, right) < SIMILARITY_THRESH) {
return null;
} else if (!Objects.equal(left.getName(), right.getName())) {
return null;
}
Schema combinedRecord = Schema.createRecord(coalesce(left.getName(), right.getName()), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), false);
combinedRecord.setFields(mergeFields(left, right));
return combinedRecord;
case MAP:
return Schema.createMap(mergeOrUnion(left.getValueType(), right.getValueType()));
case ARRAY:
return Schema.createArray(mergeOrUnion(left.getElementType(), right.getElementType()));
case ENUM:
if (!Objects.equal(left.getName(), right.getName())) {
return null;
}
Set<String> symbols = Sets.newLinkedHashSet();
symbols.addAll(left.getEnumSymbols());
symbols.addAll(right.getEnumSymbols());
return Schema.createEnum(left.getName(), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), ImmutableList.copyOf(symbols));
default:
// all primitives are handled before the switch by the equality check.
// schemas that reach this point are not primitives and also not any of
// the above known types.
throw new UnsupportedOperationException("Unknown schema type: " + left.getType());
}
}
18
Source : SchemaUtils.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
private static Schema mergeOnly(Schema left, Schema right) {
if (Objects.equal(left, right)) {
return left;
} else {
switch(left.getType()) {
case INT:
if (right.getType() == Schema.Type.LONG) {
return right;
}
break;
case LONG:
if (right.getType() == Schema.Type.INT) {
return left;
}
break;
case FLOAT:
if (right.getType() == Schema.Type.DOUBLE) {
return right;
}
break;
case DOUBLE:
if (right.getType() == Schema.Type.FLOAT) {
return left;
}
}
if (left.getType() != right.getType()) {
return null;
} else {
switch(left.getType()) {
case RECORD:
if (left.getName() == null && right.getName() == null && fieldSimilarity(left, right) < SIMILARITY_THRESH) {
return null;
} else {
if (!Objects.equal(left.getName(), right.getName())) {
return null;
}
Schema combinedRecord = Schema.createRecord(coalesce(left.getName(), right.getName()), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), false);
combinedRecord.setFields(mergeFields(left, right));
return combinedRecord;
}
case UNION:
return union(left, right);
case ARRAY:
return Schema.createArray(mergeOrUnion(left.getElementType(), right.getElementType()));
case MAP:
return Schema.createMap(mergeOrUnion(left.getValueType(), right.getValueType()));
case BOOLEAN:
case INT:
case LONG:
case FLOAT:
case DOUBLE:
case STRING:
default:
throw new UnsupportedOperationException("Unknown schema type: " + left.getType());
case ENUM:
if (!Objects.equal(left.getName(), right.getName())) {
return null;
} else {
Set<String> symbols = Sets.newLinkedHashSet();
symbols.addAll(left.getEnumSymbols());
symbols.addAll(right.getEnumSymbols());
return Schema.createEnum(left.getName(), coalesce(left.getDoc(), right.getDoc()), coalesce(left.getNamespace(), right.getNamespace()), ImmutableList.copyOf(symbols));
}
}
}
}
}
See More Examples