Here are the examples of the java api class org.apache.avro.Schema taken from open source projects.
1. AvroSchemaFieldRemoverTest#testRemoveFields()
Project: gobblin
File: AvroSchemaFieldRemoverTest.java
File: AvroSchemaFieldRemoverTest.java
@Test public void testRemoveFields() throws IllegalArgumentException, IOException { Schema convertedSchema1 = convertSchema("/converter/recursive_schema_1.avsc", "YwchQiH.OjuzrLOtmqLW"); Schema expectedSchema1 = parseSchema("/converter/recursive_schema_1_converted.avsc"); Assert.assertEquals(convertedSchema1, expectedSchema1); Schema convertedSchema2 = convertSchema("/converter/recursive_schema_2.avsc", "FBuKC.wIINqII.lvaerUEKxBQUWg,eFQjDj.TzuYZajb"); Schema expectedSchema2 = parseSchema("/converter/recursive_schema_2_converted.avsc"); Assert.assertEquals(convertedSchema2, expectedSchema2); Schema convertedSchema3 = convertSchema("/converter/recursive_schema_2.avsc", "field.that.does.not.exist"); Schema expectedSchema3 = parseSchema("/converter/recursive_schema_2_not_converted.avsc"); Assert.assertEquals(convertedSchema3, expectedSchema3); }
2. TestAvroSchemaConverter#testArrayOfOptionalRecordsOldBehavior()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testArrayOfOptionalRecordsOldBehavior() throws Exception { Schema innerRecord = Schema.createRecord("InnerRecord", null, null, false); Schema optionalString = optional(Schema.create(Schema.Type.STRING)); innerRecord.setFields(Lists.newArrayList(new Schema.Field("s1", optionalString, null, NullNode.getInstance()), new Schema.Field("s2", optionalString, null, NullNode.getInstance()))); Schema schema = Schema.createRecord("HasArray", null, null, false); schema.setFields(Lists.newArrayList(new Schema.Field("myarray", Schema.createArray(optional(innerRecord)), null, null))); System.err.println("Avro schema: " + schema.toString(true)); // Cannot use round-trip assertion because InnerRecord optional is removed testAvroToParquetConversion(schema, "message HasArray {\n" + " required group myarray (LIST) {\n" + " repeated group array {\n" + " optional binary s1 (UTF8);\n" + " optional binary s2 (UTF8);\n" + " }\n" + " }\n" + "}\n"); }
3. TestTableConversion#testConvertTableWithRequiredFields()
Project: kite
File: TestTableConversion.java
File: TestTableConversion.java
@Test public void testConvertTableWithRequiredFields() { Schema recordSchema = Schema.createRecord("inner", null, null, false); recordSchema.setFields(Lists.newArrayList(new Schema.Field("a", Schema.create(Schema.Type.INT), null, null), new Schema.Field("b", optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT))); Schema structOfStructsSchema = Schema.createRecord("test", null, null, false); structOfStructsSchema.setFields(Lists.newArrayList(new Schema.Field("str", Schema.create(Schema.Type.STRING), null, null), new Schema.Field("inner", recordSchema, null, null))); PartitionStrategy strategy = new PartitionStrategy.Builder().provided("not_present", "int").hash("inner.a", // requires both inner and inner.a 16).identity("str").build(); Assert.assertEquals("Should convert table named test", structOfStructsSchema, HiveSchemaConverter.convertTable("test", TABLE, strategy)); }
4. TestTableConversion#testConvertTable()
Project: kite
File: TestTableConversion.java
File: TestTableConversion.java
@Test public void testConvertTable() { Schema recordSchema = Schema.createRecord("inner", null, null, false); recordSchema.setFields(Lists.newArrayList(new Schema.Field("a", optional(Schema.create(Schema.Type.INT)), null, NULL_DEFAULT), new Schema.Field("b", optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT))); Schema structOfStructsSchema = Schema.createRecord("test", null, null, false); structOfStructsSchema.setFields(Lists.newArrayList(new Schema.Field("str", optional(Schema.create(Schema.Type.STRING)), null, NULL_DEFAULT), new Schema.Field("inner", optional(recordSchema), null, NULL_DEFAULT))); Assert.assertEquals("Should convert struct of structs", structOfStructsSchema, HiveSchemaConverter.convertTable("test", TABLE, null)); }
5. TestTableConversion#testConvertStructWithRequiredFields()
Project: kite
File: TestTableConversion.java
File: TestTableConversion.java
@Test public void testConvertStructWithRequiredFields() { Schema recordSchema = Schema.createRecord("inner", null, null, false); recordSchema.setFields(Lists.newArrayList(new Schema.Field("a", Schema.create(Schema.Type.INT), null, null), new Schema.Field("b", optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT))); Schema structOfStructsSchema = Schema.createRecord("test", null, null, false); structOfStructsSchema.setFields(Lists.newArrayList(new Schema.Field("str", Schema.create(Schema.Type.STRING), null, null), new Schema.Field("inner", recordSchema, null, null))); Assert.assertEquals("Should convert struct of structs", structOfStructsSchema, HiveSchemaConverter.convert(startPath, "test", STRUCT_OF_STRUCTS_TYPE, Lists.newArrayList(new String[] { "test", "str" }, new String[] { "test", "inner", "a" }))); }
6. TestTableConversion#testConvertStructs()
Project: kite
File: TestTableConversion.java
File: TestTableConversion.java
@Test public void testConvertStructs() { Schema recordSchema = Schema.createRecord("inner", null, null, false); recordSchema.setFields(Lists.newArrayList(new Schema.Field("a", optional(Schema.create(Schema.Type.INT)), null, NULL_DEFAULT), new Schema.Field("b", optional(Schema.create(Schema.Type.BYTES)), null, NULL_DEFAULT))); Schema structOfStructsSchema = Schema.createRecord("test", null, null, false); structOfStructsSchema.setFields(Lists.newArrayList(new Schema.Field("str", optional(Schema.create(Schema.Type.STRING)), null, NULL_DEFAULT), new Schema.Field("inner", optional(recordSchema), null, NULL_DEFAULT))); Assert.assertEquals("Should convert struct of structs", structOfStructsSchema, HiveSchemaConverter.convert(startPath, "test", STRUCT_OF_STRUCTS_TYPE, NO_REQUIRED_FIELDS)); }
7. TestGenericLogicalTypes#testWriteDecimalBytes()
Project: parquet-mr
File: TestGenericLogicalTypes.java
File: TestGenericLogicalTypes.java
@Test public void testWriteDecimalBytes() throws IOException { Schema bytesSchema = Schema.create(Schema.Type.BYTES); Schema bytesRecord = record("R", field("dec", bytesSchema)); Schema decimalSchema = DECIMAL_9_2.addToSchema(Schema.create(Schema.Type.BYTES)); Schema decimalRecord = record("R", field("dec", decimalSchema)); GenericRecord r1 = instance(decimalRecord, "dec", D1); GenericRecord r2 = instance(decimalRecord, "dec", D2); Conversion<BigDecimal> conversion = new Conversions.DecimalConversion(); // use the conversion directly instead of relying on the write side GenericRecord r1bytes = instance(bytesRecord, "dec", conversion.toBytes(D1, bytesSchema, DECIMAL_9_2)); GenericRecord r2bytes = instance(bytesRecord, "dec", conversion.toBytes(D2, bytesSchema, DECIMAL_9_2)); List<GenericRecord> expected = Arrays.asList(r1bytes, r2bytes); File test = write(GENERIC, decimalRecord, r1, r2); Assert.assertEquals("Should read BigDecimals as bytes", expected, read(GENERIC, bytesRecord, test)); }
8. TestGenericLogicalTypes#testReadDecimalBytes()
Project: parquet-mr
File: TestGenericLogicalTypes.java
File: TestGenericLogicalTypes.java
@Test public void testReadDecimalBytes() throws IOException { Schema bytesSchema = Schema.create(Schema.Type.BYTES); Schema bytesRecord = record("R", field("dec", bytesSchema)); Schema decimalSchema = DECIMAL_9_2.addToSchema(Schema.create(Schema.Type.BYTES)); Schema decimalRecord = record("R", field("dec", decimalSchema)); GenericRecord r1 = instance(decimalRecord, "dec", D1); GenericRecord r2 = instance(decimalRecord, "dec", D2); List<GenericRecord> expected = Arrays.asList(r1, r2); Conversion<BigDecimal> conversion = new Conversions.DecimalConversion(); // use the conversion directly instead of relying on the write side GenericRecord r1bytes = instance(bytesRecord, "dec", conversion.toBytes(D1, bytesSchema, DECIMAL_9_2)); GenericRecord r2bytes = instance(bytesRecord, "dec", conversion.toBytes(D2, bytesSchema, DECIMAL_9_2)); File test = write(bytesRecord, r1bytes, r2bytes); Assert.assertEquals("Should convert bytes to BigDecimals", expected, read(GENERIC, decimalRecord, test)); }
9. TestGenericLogicalTypes#testWriteDecimalFixed()
Project: parquet-mr
File: TestGenericLogicalTypes.java
File: TestGenericLogicalTypes.java
@Test public void testWriteDecimalFixed() throws IOException { Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); Schema fixedRecord = record("R", field("dec", fixedSchema)); Schema decimalSchema = DECIMAL_9_2.addToSchema(Schema.createFixed("aFixed", null, null, 4)); Schema decimalRecord = record("R", field("dec", decimalSchema)); GenericRecord r1 = instance(decimalRecord, "dec", D1); GenericRecord r2 = instance(decimalRecord, "dec", D2); Conversion<BigDecimal> conversion = new Conversions.DecimalConversion(); // use the conversion directly instead of relying on the write side GenericRecord r1fixed = instance(fixedRecord, "dec", conversion.toFixed(D1, fixedSchema, DECIMAL_9_2)); GenericRecord r2fixed = instance(fixedRecord, "dec", conversion.toFixed(D2, fixedSchema, DECIMAL_9_2)); List<GenericRecord> expected = Arrays.asList(r1fixed, r2fixed); File test = write(GENERIC, decimalRecord, r1, r2); Assert.assertEquals("Should read BigDecimals as fixed", expected, read(GENERIC, fixedRecord, test)); }
10. TestGenericLogicalTypes#testReadDecimalFixed()
Project: parquet-mr
File: TestGenericLogicalTypes.java
File: TestGenericLogicalTypes.java
@Test public void testReadDecimalFixed() throws IOException { Schema fixedSchema = Schema.createFixed("aFixed", null, null, 4); Schema fixedRecord = record("R", field("dec", fixedSchema)); Schema decimalSchema = DECIMAL_9_2.addToSchema(Schema.createFixed("aFixed", null, null, 4)); Schema decimalRecord = record("R", field("dec", decimalSchema)); GenericRecord r1 = instance(decimalRecord, "dec", D1); GenericRecord r2 = instance(decimalRecord, "dec", D2); List<GenericRecord> expected = Arrays.asList(r1, r2); Conversion<BigDecimal> conversion = new Conversions.DecimalConversion(); // use the conversion directly instead of relying on the write side GenericRecord r1fixed = instance(fixedRecord, "dec", conversion.toFixed(D1, fixedSchema, DECIMAL_9_2)); GenericRecord r2fixed = instance(fixedRecord, "dec", conversion.toFixed(D2, fixedSchema, DECIMAL_9_2)); File test = write(fixedRecord, r1fixed, r2fixed); Assert.assertEquals("Should convert fixed to BigDecimals", expected, read(GENERIC, decimalRecord, test)); }
11. TestGenericLogicalTypes#testWriteNullableUUID()
Project: parquet-mr
File: TestGenericLogicalTypes.java
File: TestGenericLogicalTypes.java
@Test public void testWriteNullableUUID() throws IOException { Schema nullableUuidSchema = record("R", optionalField("uuid", LogicalTypes.uuid().addToSchema(Schema.create(STRING)))); GenericRecord u1 = instance(nullableUuidSchema, "uuid", UUID.randomUUID()); GenericRecord u2 = instance(nullableUuidSchema, "uuid", UUID.randomUUID()); Schema stringUuidSchema = Schema.create(STRING); stringUuidSchema.addProp(GenericData.STRING_PROP, "String"); Schema nullableStringSchema = record("R", optionalField("uuid", stringUuidSchema)); GenericRecord s1 = instance(nullableStringSchema, "uuid", u1.get("uuid").toString()); GenericRecord s2 = instance(nullableStringSchema, "uuid", u2.get("uuid").toString()); File test = write(GENERIC, nullableUuidSchema, u1, u2); Assert.assertEquals("Should read UUIDs as Strings", Arrays.asList(s1, s2), read(GENERIC, nullableStringSchema, test)); }
12. TestGenericLogicalTypes#testWriteUUIDReadStringSchema()
Project: parquet-mr
File: TestGenericLogicalTypes.java
File: TestGenericLogicalTypes.java
@Test public void testWriteUUIDReadStringSchema() throws IOException { Schema uuidSchema = record("R", field("uuid", LogicalTypes.uuid().addToSchema(Schema.create(STRING)))); GenericRecord u1 = instance(uuidSchema, "uuid", UUID.randomUUID()); GenericRecord u2 = instance(uuidSchema, "uuid", UUID.randomUUID()); Schema stringUuidSchema = Schema.create(STRING); stringUuidSchema.addProp(GenericData.STRING_PROP, "String"); Schema stringSchema = record("R", field("uuid", stringUuidSchema)); GenericRecord s1 = instance(stringSchema, "uuid", u1.get("uuid").toString()); GenericRecord s2 = instance(stringSchema, "uuid", u2.get("uuid").toString()); File test = write(GENERIC, uuidSchema, u1, u2); Assert.assertEquals("Should read UUIDs as Strings", Arrays.asList(s1, s2), read(GENERIC, stringSchema, test)); }
13. TestAvroSchemaConverter#testDecimalFixedType()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testDecimalFixedType() throws Exception { Schema schema = Schema.createRecord("myrecord", null, null, false); Schema decimal = LogicalTypes.decimal(9, 2).addToSchema(Schema.createFixed("dec", null, null, 8)); schema.setFields(Collections.singletonList(new Schema.Field("dec", decimal, null, null))); testRoundTripConversion(schema, "message myrecord {\n" + " required fixed_len_byte_array(8) dec (DECIMAL(9,2));\n" + "}\n"); }
14. TestAvroSchemaConverter#testDecimalBytesType()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testDecimalBytesType() throws Exception { Schema schema = Schema.createRecord("myrecord", null, null, false); Schema decimal = LogicalTypes.decimal(9, 2).addToSchema(Schema.create(Schema.Type.BYTES)); schema.setFields(Collections.singletonList(new Schema.Field("dec", decimal, null, null))); testRoundTripConversion(schema, "message myrecord {\n" + " required binary dec (DECIMAL(9,2));\n" + "}\n"); }
15. TestAvroSchemaConverter#testParquetMapWithoutMapKeyValueAnnotation()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testParquetMapWithoutMapKeyValueAnnotation() throws Exception { Schema schema = Schema.createRecord("myrecord", null, null, false); Schema map = Schema.createMap(Schema.create(INT)); schema.setFields(Collections.singletonList(new Schema.Field("mymap", map, null, null))); String parquetSchema = "message myrecord {\n" + " required group mymap (MAP) {\n" + " repeated group map {\n" + " required binary key (UTF8);\n" + " required int32 value;\n" + " }\n" + " }\n" + "}\n"; testParquetToAvroConversion(schema, parquetSchema); testParquetToAvroConversion(NEW_BEHAVIOR, schema, parquetSchema); }
16. TestAvroSchemaConverter#testUnionOfTwoTypes()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testUnionOfTwoTypes() throws Exception { Schema schema = Schema.createRecord("record2", null, null, false); Schema multipleTypes = Schema.createUnion(Arrays.asList(Schema.create(Schema.Type.NULL), Schema.create(INT), Schema.create(Schema.Type.FLOAT))); schema.setFields(Arrays.asList(new Schema.Field("myunion", multipleTypes, null, NullNode.getInstance()))); // Avro union is modelled using optional data members of the different // types. This does not translate back into an Avro union testAvroToParquetConversion(schema, "message record2 {\n" + " optional group myunion {\n" + " optional int32 member0;\n" + " optional float member1;\n" + " }\n" + "}\n"); }
17. TestAvroSchemaConverter#testOptionalArrayElement()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testOptionalArrayElement() throws Exception { Schema schema = Schema.createRecord("record1", null, null, false); Schema optionalIntArray = Schema.createArray(optional(Schema.create(INT))); schema.setFields(Arrays.asList(new Schema.Field("myintarray", optionalIntArray, null, null))); testRoundTripConversion(NEW_BEHAVIOR, schema, "message record1 {\n" + " required group myintarray (LIST) {\n" + " repeated group list {\n" + " optional int32 element;\n" + " }\n" + " }\n" + "}\n"); }
18. TestAvroSchemaConverter#testOptionalMapValue()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testOptionalMapValue() throws Exception { Schema schema = Schema.createRecord("record1", null, null, false); Schema optionalIntMap = Schema.createMap(optional(Schema.create(INT))); schema.setFields(Arrays.asList(new Schema.Field("myintmap", optionalIntMap, null, null))); testRoundTripConversion(schema, "message record1 {\n" + " required group myintmap (MAP) {\n" + " repeated group map (MAP_KEY_VALUE) {\n" + " required binary key (UTF8);\n" + " optional int32 value;\n" + " }\n" + " }\n" + "}\n"); }
19. TestAvroSchemaConverter#testOptionalFields()
Project: parquet-mr
File: TestAvroSchemaConverter.java
File: TestAvroSchemaConverter.java
@Test public void testOptionalFields() throws Exception { Schema schema = Schema.createRecord("record1", null, null, false); Schema optionalInt = optional(Schema.create(INT)); schema.setFields(Arrays.asList(new Schema.Field("myint", optionalInt, null, NullNode.getInstance()))); testRoundTripConversion(schema, "message record1 {\n" + " optional int32 myint;\n" + "}\n"); }
20. TestCopyCommandClusterChangedNameWithPartitioning#getEvolvedSchema()
Project: kite
File: TestCopyCommandClusterChangedNameWithPartitioning.java
File: TestCopyCommandClusterChangedNameWithPartitioning.java
@Override public Schema getEvolvedSchema(Schema original) { List<Schema.Field> fields = Lists.newArrayList(); for (Schema.Field field : original.getFields()) { fields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue())); } Schema evolved = Schema.createRecord("NewUser", original.getDoc(), original.getNamespace(), false); evolved.addAlias("User"); evolved.setFields(fields); return evolved; }
21. AvroMorphlineTest#processAndVerifyUnion()
Project: kite
File: AvroMorphlineTest.java
File: AvroMorphlineTest.java
private void processAndVerifyUnion(Object input, Object expected, List<Schema> types) { Schema documentSchema = Schema.createRecord("Doc", "adoc", null, false); Schema unionSchema = Schema.createUnion(types); documentSchema.setFields(Arrays.asList(new Field("price", unionSchema, null, null))); GenericData.Record document1 = new GenericData.Record(documentSchema); document1.put("price", expected); Record jdoc1 = new Record(); jdoc1.put("_dataset_descriptor_schema", documentSchema); jdoc1.put("price", input); Record expect1 = jdoc1.copy(); expect1.put(Fields.ATTACHMENT_BODY, document1); processAndVerifySuccess(jdoc1, expect1, false); }
22. AvroUtilsTest#testSwitchName()
Project: gobblin
File: AvroUtilsTest.java
File: AvroUtilsTest.java
@Test public void testSwitchName() { String originalName = "originalName"; String newName = "newName"; Schema schema = SchemaBuilder.record(originalName).fields().requiredDouble("double").optionalFloat("float").endRecord(); Schema newSchema = AvroUtils.switchName(schema, newName); Assert.assertEquals(newSchema.getName(), newName); for (Schema.Field field : newSchema.getFields()) { Assert.assertEquals(field, schema.getField(field.name())); } Assert.assertEquals(newName, AvroUtils.switchName(schema, newName).getName()); Assert.assertEquals(schema, AvroUtils.switchName(AvroUtils.switchName(schema, newName), schema.getName())); }
23. AvroUtilsTest#testNullifyFieldWhenOldSchemaNotRecord()
Project: gobblin
File: AvroUtilsTest.java
File: AvroUtilsTest.java
/** * Test nullifying fields when one schema is not record type. */ @Test public void testNullifyFieldWhenOldSchemaNotRecord() { Schema oldSchema = new Schema.Parser().parse("{\"type\": \"array\", \"items\": \"string\"}"); Schema newSchema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"test\", " + "\"fields\":[" + "{\"name\": \"name\", \"type\": \"string\"}" + "]}"); Schema expectedOutputSchema = newSchema; Assert.assertEquals(expectedOutputSchema, AvroUtils.nullifyFieldsForSchemaMerge(oldSchema, newSchema)); }
24. AvroUtilsTest#testNullifyFieldForMultipleFieldsRemoved()
Project: gobblin
File: AvroUtilsTest.java
File: AvroUtilsTest.java
/** * Test nullifying fields when more than one field is removed in the new schema. */ @Test public void testNullifyFieldForMultipleFieldsRemoved() { Schema oldSchema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"test\", " + "\"fields\":[" + "{\"name\": \"name\", \"type\": \"string\"}, " + "{\"name\": \"color\", \"type\": \"string\"}, " + "{\"name\": \"number\", \"type\": [{\"type\": \"string\"}, {\"type\": \"array\", \"items\": \"string\"}]}" + "]}"); Schema newSchema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"test\", " + "\"fields\":[" + "{\"name\": \"name\", \"type\": \"string\"}" + "]}"); Schema expectedOutputSchema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"test\", " + "\"fields\":[" + "{\"name\": \"name\", \"type\": \"string\"}, " + "{\"name\": \"color\", \"type\": [\"null\", \"string\"]}, " + "{\"name\": \"number\", \"type\": [\"null\", {\"type\": \"string\"}, {\"type\": \"array\", \"items\": \"string\"}]}" + "]}"); Assert.assertEquals(expectedOutputSchema, AvroUtils.nullifyFieldsForSchemaMerge(oldSchema, newSchema)); }
25. AvroMorphlineTest#processAndVerifyUnion()
Project: cdk
File: AvroMorphlineTest.java
File: AvroMorphlineTest.java
private void processAndVerifyUnion(Object input, Object expected, List<Schema> types) { Schema documentSchema = Schema.createRecord("Doc", "adoc", null, false); Schema unionSchema = Schema.createUnion(types); documentSchema.setFields(Arrays.asList(new Field("price", unionSchema, null, null))); GenericData.Record document1 = new GenericData.Record(documentSchema); document1.put("price", expected); Record jdoc1 = new Record(); jdoc1.put("_dataset_descriptor_schema", documentSchema); jdoc1.put("price", input); Record expect1 = jdoc1.copy(); expect1.put(Fields.ATTACHMENT_BODY, document1); processAndVerifySuccess(jdoc1, expect1, false); }
26. InferredType#computeAvroSchema()
Project: RecordBreaker
File: InferredType.java
File: InferredType.java
Schema computeAvroSchema() { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (InferredType it : structTypes) { Schema itS = it.getAvroSchema(); if (itS == null) { continue; } fields.add(new Schema.Field(it.getName(), it.getAvroSchema(), it.getDocString(), it.getDefaultValue())); } Schema s = Schema.createRecord(name, "RECORD", "", false); s.setFields(fields); return s; }
27. TestReflectLogicalTypes#testReflectedSchema()
Project: parquet-mr
File: TestReflectLogicalTypes.java
File: TestReflectLogicalTypes.java
@Test public void testReflectedSchema() { Schema expected = SchemaBuilder.record(RecordWithUUIDList.class.getName()).fields().name("uuids").type().array().items().stringType().noDefault().endRecord(); expected.getField("uuids").schema().addProp(SpecificData.CLASS_PROP, List.class.getName()); LogicalTypes.uuid().addToSchema(expected.getField("uuids").schema().getElementType()); Schema actual = REFLECT.getSchema(RecordWithUUIDList.class); Assert.assertEquals("Should use the UUID logical type", expected, actual); }
28. TestReadWriteOldListBehavior#testMapRequiredValueWithNull()
Project: parquet-mr
File: TestReadWriteOldListBehavior.java
File: TestReadWriteOldListBehavior.java
@Test(expected = RuntimeException.class) public void testMapRequiredValueWithNull() throws Exception { Schema schema = Schema.createRecord("record1", null, null, false); schema.setFields(Lists.newArrayList(new Schema.Field("mymap", Schema.createMap(Schema.create(Schema.Type.INT)), null, null))); File tmp = File.createTempFile(getClass().getSimpleName(), ".tmp"); tmp.deleteOnExit(); tmp.delete(); Path file = new Path(tmp.getPath()); AvroParquetWriter<GenericRecord> writer = new AvroParquetWriter<GenericRecord>(file, schema); // Write a record with a null value Map<String, Integer> map = new HashMap<String, Integer>(); map.put("thirty-four", 34); map.put("eleventy-one", null); map.put("one-hundred", 100); GenericData.Record record = new GenericRecordBuilder(schema).set("mymap", map).build(); writer.write(record); }
29. TestColumnMappingParser#testAddEmbeddedColumnMapping()
Project: kite
File: TestColumnMappingParser.java
File: TestColumnMappingParser.java
@Test public void testAddEmbeddedColumnMapping() { ColumnMapping mapping = new ColumnMapping.Builder().key("id").column("username", "u", "username").column("real_name", "u", "name").build(); Schema original = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"partitions\": [" + " {\"type\": \"identity\", \"source\": \"id\", \"name\": \"id_copy\"}" + " ]," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"username\", \"type\": \"string\"}," + " {\"name\": \"real_name\", \"type\": \"string\"}" + " ]" + "}"); Schema embedded = ColumnMappingParser.embedColumnMapping(original, mapping); junit.framework.Assert.assertTrue(ColumnMappingParser.hasEmbeddedColumnMapping(embedded)); junit.framework.Assert.assertEquals(mapping, ColumnMappingParser.parseFromSchema(embedded)); }
30. TestSchemaManager#testUpdateSchema()
Project: kite
File: TestSchemaManager.java
File: TestSchemaManager.java
@Test public void testUpdateSchema() throws IOException { SchemaManager manager = SchemaManager.create(getConfiguration(), testDirectory); manager.writeSchema(DatasetTestUtilities.USER_SCHEMA); Schema schema = manager.getNewestSchema(); Assert.assertEquals(DatasetTestUtilities.USER_SCHEMA, schema); // Create an updated schema and ensure it can be written. Schema updatedSchema = SchemaBuilder.record(schema.getName()).fields().requiredString("username").requiredString("email").optionalBoolean("extra_field").endRecord(); manager.writeSchema(updatedSchema); Assert.assertEquals(updatedSchema, manager.getNewestSchema()); }
31. Compatibility#checkCompatible()
Project: kite
File: Compatibility.java
File: Compatibility.java
/** * Checks that the {@code existing} {@link DatasetDescriptor} is compatible * with {@code test}. * * @param existing the current {@code DatasetDescriptor} for a dataset * @param test a new {@code DatasetDescriptor} for the same dataset */ public static void checkCompatible(DatasetDescriptor existing, DatasetDescriptor test) { checkNotChanged("format", existing.getFormat(), test.getFormat()); checkNotChanged("partitioning", existing.isPartitioned(), test.isPartitioned()); if (existing.isPartitioned()) { checkStrategyUpdate(existing.getPartitionStrategy(), test.getPartitionStrategy(), test.getSchema()); } // check can read records written with old schema using new schema Schema oldSchema = existing.getSchema(); Schema testSchema = test.getSchema(); if (!SchemaValidationUtil.canRead(oldSchema, testSchema)) { throw new IncompatibleSchemaException("Schema cannot read data " + "written using existing schema. Schema: " + testSchema.toString(true) + "\nExisting schema: " + oldSchema.toString(true)); } }
32. SchemaUtil#getUniqueSchemasMap()
Project: kaa
File: SchemaUtil.java
File: SchemaUtil.java
public static Map<String, Schema> getUniqueSchemasMap(Collection<Schema> schemas) throws Exception { Map<String, Schema> map = new HashMap<String, Schema>(); List<Schema> allPossible = new LinkedList<Schema>(); for (Schema schema : schemas) { allPossible.addAll(getChildSchemas(schema)); } for (Schema schema : allPossible) { String key = schema.getFullName(); if (!map.containsKey(key)) { map.put(key, schema); } else { if (!SchemaUtil.isEqualSchemas(schema, map.get(key))) { LOG.debug("classes {} are not the same: \n{}\n\n{}", key, schema.toString(), map.get(key).toString()); throw new IllegalArgumentException("multiple occurrences of " + key + " with different fields"); } } } return map; }
33. SchemaUtil#isEqualUnions()
Project: kaa
File: SchemaUtil.java
File: SchemaUtil.java
private static boolean isEqualUnions(Schema s1, Schema s2) { SortedMap<String, Schema> types1 = new TreeMap<String, Schema>(); SortedMap<String, Schema> types2 = new TreeMap<String, Schema>(); for (Schema schema : s1.getTypes()) { types1.put(schema.getName(), schema); } for (Schema schema : s2.getTypes()) { types2.put(schema.getName(), schema); } return isEqualSchemaMaps(types1, types2); }
34. AvroDataCanonizationUtils#removeUuid()
Project: kaa
File: AvroDataCanonizationUtils.java
File: AvroDataCanonizationUtils.java
/** * Recursively removes UUIDs from the record. * * @param baseRecord The record containing UUID fields. */ public static void removeUuid(GenericRecord baseRecord) { Schema recordSchema = baseRecord.getSchema(); for (Schema.Field fieldSchema : recordSchema.getFields()) { if (baseRecord.get(fieldSchema.name()) != null) { Object field = baseRecord.get(fieldSchema.name()); if (field instanceof GenericFixed) { baseRecord.put(fieldSchema.name(), clearUuid((GenericFixed) field, fieldSchema)); } else if (field instanceof GenericRecord) { removeUuid((GenericRecord) field); } else if (field instanceof GenericArray) { GenericArray arrayField = (GenericArray) field; for (Object obj : arrayField) { if (obj instanceof GenericRecord) { removeUuid((GenericRecord) obj); } } } } } }
35. ProtocolSchemaStrategy#onSchemaProcessed()
Project: kaa
File: ProtocolSchemaStrategy.java
File: ProtocolSchemaStrategy.java
@Override public Schema onSchemaProcessed(Schema rootSchema, Set<Schema> addressableRecords) { List<Schema> deltaTypes = new ArrayList<Schema>(addressableRecords.size() + 1); deltaTypes.add(rootSchema); deltaTypes.addAll(addressableRecords); Field deltaTypesField = new Field(DELTA, Schema.createUnion(deltaTypes), null, null); List<Field> deltaFields = new ArrayList<Field>(); deltaFields.add(deltaTypesField); Schema delta = Schema.createRecord(DELTA + "T", null, KAA_NAMESPACE, false); delta.setFields(deltaFields); return Schema.createArray(delta); }
36. DefaultDeltaCalculationAlgorithm#fillDeltaWithoutMerge()
Project: kaa
File: DefaultDeltaCalculationAlgorithm.java
File: DefaultDeltaCalculationAlgorithm.java
/** * Fill delta without merge. * * @param delta the delta * @param root the root * @throws DeltaCalculatorException the delta calculator exception */ private void fillDeltaWithoutMerge(GenericRecord delta, GenericRecord root) throws DeltaCalculatorException { Schema rootSchema = root.getSchema(); for (Field field : rootSchema.getFields()) { Object value = root.get(field.name()); if (value instanceof List) { List<Object> values = (List<Object>) value; Schema arraySchema = getArraySchema(delta, field.name()); GenericArray deltaArray = new GenericData.Array(values.size(), arraySchema); for (Object item : values) { if (item instanceof GenericContainer) { GenericContainer record = (GenericContainer) item; addComplexItemToArray(record, deltaArray); } else { deltaArray.add(item); } } delta.put(field.name(), deltaArray); } else if (value instanceof GenericContainer) { processComplexField(delta, field.name(), (GenericContainer) value, null, null); } else { delta.put(field.name(), value); } } }
37. AvroCoderTest#testAvroCoderForGenerics()
Project: incubator-beam
File: AvroCoderTest.java
File: AvroCoderTest.java
@Test public void testAvroCoderForGenerics() throws Exception { Schema fooSchema = AvroCoder.of(Foo.class).getSchema(); Schema schema = new Schema.Parser().parse("{" + "\"type\":\"record\"," + "\"name\":\"SomeGeneric\"," + "\"namespace\":\"ns\"," + "\"fields\":[" + " {\"name\":\"foo\", \"type\":" + fooSchema.toString() + "}" + "]}"); @SuppressWarnings("rawtypes") AvroCoder<SomeGeneric> coder = AvroCoder.of(SomeGeneric.class, schema); assertNonDeterministic(coder, reasonField(SomeGeneric.class, "foo", "erasure")); }
38. DataSourceParquet#read()
Project: hydra
File: DataSourceParquet.java
File: DataSourceParquet.java
public Bundle read() throws IOException { GenericRecord nextRecord = parquetReader.read(); if (nextRecord == null) { return null; } GenericData genericData = GenericData.get(); Bundle bundle = factory.createBundle(); Schema inputSchema = nextRecord.getSchema(); for (Schema.Field field : inputSchema.getFields()) { ValueObject value = DataSourceAvro.getValueObject(nextRecord, field, genericData); if (value != null) { bundle.setValue(bundle.getFormat().getField(field.name()), value); } } return bundle; }
39. AvroUtils#switchName()
Project: gobblin
File: AvroUtils.java
File: AvroUtils.java
/** * Copies the input {@link org.apache.avro.Schema} but changes the schema name. * @param schema {@link org.apache.avro.Schema} to copy. * @param newName name for the copied {@link org.apache.avro.Schema}. * @return A {@link org.apache.avro.Schema} that is a copy of schema, but has the name newName. */ public static Schema switchName(Schema schema, String newName) { if (schema.getName().equals(newName)) { return schema; } Schema newSchema = Schema.createRecord(newName, schema.getDoc(), schema.getNamespace(), schema.isError()); List<Field> fields = schema.getFields(); Iterable<Field> fieldsNew = Iterables.transform(fields, new Function<Field, Field>() { @Override public Schema.Field apply(Field input) { //this should never happen but the API has marked input as Nullable if (null == input) { return null; } Field field = new Field(input.name(), input.schema(), input.doc(), input.defaultValue(), input.order()); return field; } }); newSchema.setFields(Lists.newArrayList(fieldsNew)); return newSchema; }
40. AvroUtils#removeUncomparableFieldsFromRecord()
Project: gobblin
File: AvroUtils.java
File: AvroUtils.java
private static Optional<Schema> removeUncomparableFieldsFromRecord(Schema record, Set<Schema> processed) { Preconditions.checkArgument(record.getType() == Schema.Type.RECORD); if (processed.contains(record)) { return Optional.absent(); } processed.add(record); List<Field> fields = Lists.newArrayList(); for (Field field : record.getFields()) { Optional<Schema> newFieldSchema = removeUncomparableFields(field.schema(), processed); if (newFieldSchema.isPresent()) { fields.add(new Field(field.name(), newFieldSchema.get(), field.doc(), field.defaultValue())); } } Schema newSchema = Schema.createRecord(record.getName(), record.getDoc(), record.getNamespace(), false); newSchema.setFields(fields); return Optional.of(newSchema); }
41. AvroFlattener#flattenRecord()
Project: gobblin
File: AvroFlattener.java
File: AvroFlattener.java
/*** * Flatten Record schema * @param schema Record Schema to flatten * @param shouldPopulateLineage If lineage information should be tagged in the field, this is true when we are * un-nesting fields * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Record Schema */ private Schema flattenRecord(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) { Preconditions.checkNotNull(schema); Preconditions.checkArgument(Schema.Type.RECORD.equals(schema.getType())); Schema flattenedSchema; List<Schema.Field> flattenedFields = new ArrayList<>(); if (schema.getFields().size() > 0) { for (Schema.Field oldField : schema.getFields()) { List<Schema.Field> newFields = flattenField(oldField, ImmutableList.<String>of(), shouldPopulateLineage, flattenComplexTypes, Optional.<Schema>absent()); if (null != newFields && newFields.size() > 0) { flattenedFields.addAll(newFields); } } } flattenedSchema = Schema.createRecord(schema.getName(), schema.getDoc(), schema.getNamespace(), schema.isError()); flattenedSchema.setFields(flattenedFields); return flattenedSchema; }
42. HiveAvroORCQueryUtilsTest#testRecordWithinRecordWithinRecordFlattenedDML()
Project: gobblin
File: HiveAvroORCQueryUtilsTest.java
File: HiveAvroORCQueryUtilsTest.java
/*** * Test DML generation * @throws IOException */ @Test public void testRecordWithinRecordWithinRecordFlattenedDML() throws IOException { String schemaName = "testRecordWithinRecordWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); AvroFlattener avroFlattener = new AvroFlattener(); Schema flattenedSchema = avroFlattener.flatten(schema, true); String q = HiveAvroORCQueryUtils.generateTableMappingDML(schema, flattenedSchema, schemaName, schemaName + "_orc", Optional.<String>absent(), Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<Boolean>absent(), Optional.<Boolean>absent()); Assert.assertEquals(q.trim().replaceAll(" ", ""), ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord.dml").replaceAll(" ", "")); }
43. HiveAvroORCQueryUtilsTest#testRecordWithinRecordWithinRecordFlattenedDDL()
Project: gobblin
File: HiveAvroORCQueryUtilsTest.java
File: HiveAvroORCQueryUtilsTest.java
/*** * Test DDL generation for schema structured as: record within record within record after flattening * @throws IOException */ @Test public void testRecordWithinRecordWithinRecordFlattenedDDL() throws IOException { String schemaName = "testRecordWithinRecordWithinRecordDDL"; Schema schema = ConversionHiveTestUtils.readSchemaFromJsonFile(resourceDir, "recordWithinRecordWithinRecord_nested.json"); AvroFlattener avroFlattener = new AvroFlattener(); Schema flattenedSchema = avroFlattener.flatten(schema, true); String q = HiveAvroORCQueryUtils.generateCreateTableDDL(flattenedSchema, schemaName, "file:/user/hive/warehouse/" + schemaName, Optional.<String>absent(), Optional.<Map<String, String>>absent(), Optional.<List<String>>absent(), Optional.<Map<String, HiveAvroORCQueryUtils.COLUMN_SORT_ORDER>>absent(), Optional.<Integer>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<String>absent(), Optional.<Map<String, String>>absent()); Assert.assertEquals(q.trim().replaceAll(" ", ""), ConversionHiveTestUtils.readQueryFromFile(resourceDir, "recordWithinRecordWithinRecord_flattened.ddl").replaceAll(" ", "")); }
44. AvroFieldsPickConverter#createSchemaHelper()
Project: gobblin
File: AvroFieldsPickConverter.java
File: AvroFieldsPickConverter.java
private static Schema createSchemaHelper(Schema inputSchema, TrieNode node) { Schema newRecord = Schema.createRecord(inputSchema.getName(), inputSchema.getDoc(), inputSchema.getNamespace(), inputSchema.isError()); List<Field> newFields = Lists.newArrayList(); for (TrieNode child : node.children.values()) { Field innerSrcField = inputSchema.getField(child.val); Preconditions.checkNotNull(innerSrcField, child.val + " does not exist under " + inputSchema); if (child.children.isEmpty()) { //Leaf newFields.add(new Field(innerSrcField.name(), innerSrcField.schema(), innerSrcField.doc(), innerSrcField.defaultValue())); } else { Schema innerSrcSchema = innerSrcField.schema(); //Recurse of schema Schema innerDestSchema = createSchemaHelper(innerSrcSchema, child); Field innerDestField = new Field(innerSrcField.name(), innerDestSchema, innerSrcField.doc(), innerSrcField.defaultValue()); newFields.add(innerDestField); } } newRecord.setFields(newFields); return newRecord; }
45. AvroDrillTable#getRowType()
Project: drill
File: AvroDrillTable.java
File: AvroDrillTable.java
@Override public RelDataType getRowType(RelDataTypeFactory typeFactory) { List<RelDataType> typeList = Lists.newArrayList(); List<String> fieldNameList = Lists.newArrayList(); Schema schema = reader.getSchema(); for (Field field : schema.getFields()) { fieldNameList.add(field.name()); typeList.add(getNullableRelDataTypeFromAvroType(typeFactory, field.schema())); } return typeFactory.createStructType(typeList, fieldNameList); }
46. AvroSource#createDatumReader()
Project: DataflowJavaSDK
File: AvroSource.java
File: AvroSource.java
private DatumReader<T> createDatumReader() { Schema readSchema = getReadSchema(); Schema fileSchema = getFileSchema(); Preconditions.checkNotNull(readSchema, "No read schema has been initialized for source %s", this); Preconditions.checkNotNull(fileSchema, "No file schema has been initialized for source %s", this); if (type == GenericRecord.class) { return new GenericDatumReader<>(fileSchema, readSchema); } else { return new ReflectDatumReader<>(fileSchema, readSchema); } }
47. AvroHiveSchemaGenerator#inferSchema()
Project: datacollector
File: AvroHiveSchemaGenerator.java
File: AvroHiveSchemaGenerator.java
/** * It takes a record structure in <String, HiveTypeInfo> format. * Generate a schema and return in String. * @param record : record structure * @return String representation of Avro schema. * @throws StageException: If record contains unsupported type */ @Override public String inferSchema(Map<String, HiveTypeInfo> record) throws StageException { Map<String, Schema> fields = new LinkedHashMap(); for (Map.Entry<String, HiveTypeInfo> pair : record.entrySet()) { if (!HiveMetastoreUtil.validateColumnName(pair.getKey())) { throw new HiveStageCheckedException(Errors.HIVE_30, pair.getKey()); } Schema columnSchema = Schema.createUnion(ImmutableList.of(Schema.create(Schema.Type.NULL), traverse(pair))); // We always set default value to null columnSchema.addProp("default", NullNode.getInstance()); fields.put(pair.getKey(), columnSchema); } Schema schema = buildSchema(fields); return schema.toString(); }
48. AvroTypeUtil#bestEffortResolve()
Project: datacollector
File: AvroTypeUtil.java
File: AvroTypeUtil.java
public static Schema bestEffortResolve(Schema schema, Field field, Object value) { // Go over the types in the union one by one and try to match the field type with the schema. // First schema type which is a match is considered as the target schema. Schema match = null; for (Schema unionType : schema.getTypes()) { if (schemaMatch(unionType, field, value)) { match = unionType; break; } } return match; }
49. AvroSchemaGenerator#buildSchema()
Project: datacollector
File: AvroSchemaGenerator.java
File: AvroSchemaGenerator.java
// Build a schema with type "record". This will be the top level schema and contains fields public static Schema buildSchema(Map<String, Schema> fields, Object... levels) { List<Schema.Field> recordFields = new ArrayList<>(fields.size()); for (Map.Entry<String, Schema> entry : fields.entrySet()) { recordFields.add(new Schema.Field(entry.getKey(), entry.getValue(), // Avro's Schema.Field constructor requires doc. null, entry.getValue().getJsonProp("default"))); } Schema recordSchema; if (levels.length == 0) { recordSchema = Schema.createRecord(schemaName, null, null, false); } else { LinkedList<String> lvl = (LinkedList<String>) levels[0]; recordSchema = Schema.createRecord(joiner.join(lvl), null, null, false); } recordSchema.setFields(recordFields); return recordSchema; }
50. UnionFieldGenerate#getUnionFieldFiller()
Project: databus
File: UnionFieldGenerate.java
File: UnionFieldGenerate.java
public SchemaFiller getUnionFieldFiller() throws UnknownTypeException { List<Schema> schemas = field.schema().getTypes(); Schema schema = null; for (Schema s : schemas) { schema = s; if (schema.getType() != Schema.Type.NULL) break; } Field tempField = new Field(field.name(), schema, null, null); return SchemaFiller.createRandomField(tempField); }
51. AvroBackwardsCompatibilityTest#testAvroSchemaEvolutionWrite()
Project: voldemort
File: AvroBackwardsCompatibilityTest.java
File: AvroBackwardsCompatibilityTest.java
/* * This tests if a client tries to serialize an object created using an old * schema is successful or not */ @Test public void testAvroSchemaEvolutionWrite() throws IOException { String versionZero = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" }]}"; String versionOne = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" } ," + "{ \"name\": \"new-field\", \"type\": \"string\", \"default\":\"\" }]}"; Schema s0 = Schema.parse(versionZero); Schema s1 = Schema.parse(versionOne); Map<Integer, String> versions = new HashMap<Integer, String>(); versions.put(0, versionZero); versions.put(1, versionOne); byte[] versionZeroBytes = writeVersion0with1Present(versions, s0); }
52. AvroBackwardsCompatibilityTest#testAvroSchemaEvolution()
Project: voldemort
File: AvroBackwardsCompatibilityTest.java
File: AvroBackwardsCompatibilityTest.java
/* * This tests if a client tries to deserialize an object created using an * old schema is successful or not */ @Test public void testAvroSchemaEvolution() throws IOException { String versionZero = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" }]}"; String versionOne = "{\"type\": \"record\", \"name\": \"myrec\",\"fields\": [{ \"name\": \"original\", \"type\": \"string\" } ," + "{ \"name\": \"new-field\", \"type\": \"string\", \"default\":\"\" }]}"; Schema s0 = Schema.parse(versionZero); Schema s1 = Schema.parse(versionOne); Map<Integer, String> versions = new HashMap<Integer, String>(); versions.put(0, versionZero); versions.put(1, versionOne); byte[] versionZeroBytes = writeVersion0(s0); GenericData.Record record = (Record) readVersion0(versions, versionZeroBytes); }
53. TestParquetExport#testParquetRecordsNotSupported()
Project: sqoop
File: TestParquetExport.java
File: TestParquetExport.java
public void testParquetRecordsNotSupported() throws IOException, SQLException { String[] argv = {}; final int TOTAL_RECORDS = 1; Schema schema = Schema.createRecord("nestedrecord", null, null, false); schema.setFields(Lists.newArrayList(buildField("myint", Schema.Type.INT))); GenericRecord record = new GenericData.Record(schema); record.put("myint", 100); // DB type is not used so can be anything: ColumnGenerator gen = colGenerator(record, schema, null, "VARCHAR(64)"); createParquetFile(0, TOTAL_RECORDS, gen); createTable(gen); try { runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1))); fail("Parquet records can not be exported."); } catch (Exception e) { assertTrue(true); } }
54. TestParquetExport#buildSchema()
Project: sqoop
File: TestParquetExport.java
File: TestParquetExport.java
private Schema buildSchema(ColumnGenerator... extraCols) { List<Field> fields = new ArrayList<Field>(); fields.add(buildField("id", Schema.Type.INT)); fields.add(buildField("msg", Schema.Type.STRING)); int colNum = 0; if (null != extraCols) { for (ColumnGenerator gen : extraCols) { if (gen.getColumnParquetSchema() != null) { fields.add(buildParquetField(forIdx(colNum++), gen.getColumnParquetSchema())); } } } Schema schema = Schema.createRecord("myschema", null, null, false); schema.setFields(fields); return schema; }
55. TestAvroExport#testAvroRecordsNotSupported()
Project: sqoop
File: TestAvroExport.java
File: TestAvroExport.java
public void testAvroRecordsNotSupported() throws IOException, SQLException { String[] argv = {}; final int TOTAL_RECORDS = 1; Schema schema = Schema.createRecord("nestedrecord", null, null, false); schema.setFields(Lists.newArrayList(buildAvroField("myint", Schema.Type.INT))); GenericRecord record = new GenericData.Record(schema); record.put("myint", 100); // DB type is not used so can be anything: ColumnGenerator gen = colGenerator(record, schema, null, "VARCHAR(64)"); createAvroFile(0, TOTAL_RECORDS, gen); createTable(gen); try { runExport(getArgv(true, 10, 10, newStrArray(argv, "-m", "" + 1))); fail("Avro records can not be exported."); } catch (Exception e) { assertTrue(true); } }
56. TestAvroExport#buildAvroSchema()
Project: sqoop
File: TestAvroExport.java
File: TestAvroExport.java
private Schema buildAvroSchema(ColumnGenerator... extraCols) { List<Field> fields = new ArrayList<Field>(); fields.add(buildAvroField("id", Schema.Type.INT)); fields.add(buildAvroField("msg", Schema.Type.STRING)); int colNum = 0; // Issue [SQOOP-2846] if (null != extraCols) { for (ColumnGenerator gen : extraCols) { if (gen.getColumnAvroSchema() != null) { fields.add(buildAvroField(forIdx(colNum++), gen.getColumnAvroSchema())); } } } Schema schema = Schema.createRecord("myschema", null, null, false); schema.setFields(fields); return schema; }
57. MergeJob#configueAvroMergeJob()
Project: sqoop
File: MergeJob.java
File: MergeJob.java
private void configueAvroMergeJob(Configuration conf, Job job, Path oldPath, Path newPath) throws IOException { LOG.info("Trying to merge avro files"); final Schema oldPathSchema = AvroUtil.getAvroSchema(oldPath, conf); final Schema newPathSchema = AvroUtil.getAvroSchema(newPath, conf); if (oldPathSchema == null || newPathSchema == null || !oldPathSchema.equals(newPathSchema)) { throw new IOException("Invalid schema for input directories. Schema for old data: [" + oldPathSchema + "]. Schema for new data: [" + newPathSchema + "]"); } LOG.debug("Avro Schema:" + oldPathSchema); job.setInputFormatClass(AvroInputFormat.class); job.setOutputFormatClass(AvroOutputFormat.class); job.setMapperClass(MergeAvroMapper.class); job.setReducerClass(MergeAvroReducer.class); AvroJob.setOutputSchema(job.getConfiguration(), oldPathSchema); }
58. MergeAvroMapper#toSqoopRecord()
Project: sqoop
File: MergeAvroMapper.java
File: MergeAvroMapper.java
private SqoopRecord toSqoopRecord(GenericRecord genericRecord) throws IOException { Schema avroSchema = genericRecord.getSchema(); for (Schema.Field field : avroSchema.getFields()) { Pair<String, String> sqoopRecordField = sqoopRecordFields.get(field.name().toLowerCase()); if (null == sqoopRecordField) { throw new IOException("Cannot find field '" + field.name() + "' in fields of user class" + sqoopRecordImpl.getClass().getName() + ". Fields are: " + Arrays.deepToString(sqoopRecordFields.values().toArray())); } Object avroObject = genericRecord.get(field.name()); Object fieldVal = AvroUtil.fromAvro(avroObject, field.schema(), sqoopRecordField.value()); sqoopRecordImpl.setField(sqoopRecordField.key(), fieldVal); } return sqoopRecordImpl; }
59. GenerateDictionary#getSchema()
Project: Cubert
File: GenerateDictionary.java
File: GenerateDictionary.java
public static Schema getSchema() { Field[] fields = { new Schema.Field("colname", Schema.create(Type.STRING), null, null), new Schema.Field("colvalue", Schema.create(Type.STRING), null, null), new Schema.Field("code", Schema.create(Type.INT), null, null) }; Schema schema = Schema.createRecord("dictionary", null, null, false); schema.setFields(Arrays.asList(fields)); return schema; }
60. Avros#createTupleSchema()
Project: crunch
File: Avros.java
File: Avros.java
private static Schema createTupleSchema(PType<?>... ptypes) { // Guarantee each tuple schema has a globally unique name String tupleName = "tuple" + UUID.randomUUID().toString().replace('-', 'x'); Schema schema = Schema.createRecord(tupleName, "", "crunch", false); List<Schema.Field> fields = Lists.newArrayList(); for (int i = 0; i < ptypes.length; i++) { AvroType atype = (AvroType) ptypes[i]; Schema fieldSchema = allowNulls(atype.getSchema()); fields.add(new Schema.Field("v" + i, fieldSchema, "", null)); } schema.setFields(fields); return schema; }
61. SpecificAvroDao#buildCompositeDaoWithEntityManager()
Project: cdk
File: SpecificAvroDao.java
File: SpecificAvroDao.java
/** * Create a CompositeDao, which will return SpecificRecord instances * represented by the entitySchemaString avro schema. This avro schema must be * a composition of the schemas in the subEntitySchemaStrings list. * * @param tablePool * An HTabePool instance to use for connecting to HBase. * @param tableName * The table name of the managed schema. * @param entityClass * The class that is the composite record, which is made up of fields * referencing the sub records. * @param schemaManager * The SchemaManager which will use to create the entity mapper that * will power this dao. * @return The CompositeDao instance. * @throws SchemaNotFoundException */ public static <K extends SpecificRecord, E extends SpecificRecord, S extends SpecificRecord> Dao<E> buildCompositeDaoWithEntityManager(HTablePool tablePool, String tableName, Class<E> entityClass, SchemaManager schemaManager) { Schema entitySchema = getSchemaFromEntityClass(entityClass); List<EntityMapper<S>> entityMappers = new ArrayList<EntityMapper<S>>(); for (Schema.Field field : entitySchema.getFields()) { entityMappers.add(new VersionedAvroEntityMapper.Builder().setSchemaManager(schemaManager).setTableName(tableName).setEntityName(getSchemaName(field.schema())).setSpecific(true).<S>build()); } return new SpecificCompositeAvroDao<E, S>(tablePool, tableName, entityMappers, entityClass); }
62. TestFileSystemDatasetReader#testEvolvedSchema()
Project: cdk
File: TestFileSystemDatasetReader.java
File: TestFileSystemDatasetReader.java
@Test public void testEvolvedSchema() throws IOException { Schema schema = Schema.createRecord("mystring", null, null, false); schema.setFields(Lists.newArrayList(new Field("text", Schema.create(Type.STRING), null, null), new Field("text2", Schema.create(Type.STRING), null, JsonNodeFactory.instance.textNode("N/A")))); FileSystemDatasetReader<Record> reader = new FileSystemDatasetReader<Record>(fileSystem, new Path(Resources.getResource("data/strings-100.avro").getFile()), schema); checkReaderBehavior(reader, 100, new RecordValidator<Record>() { @Override public void validate(Record record, int recordNum) { Assert.assertEquals(String.valueOf(recordNum), record.get("text")); Assert.assertEquals("N/A", record.get("text2")); } }); }
63. FileWriterHelper#generateAvroFile()
Project: cdap
File: FileWriterHelper.java
File: FileWriterHelper.java
/** * Generate an Avro file of schema (key String, value String) containing the records ("<prefix>i", "#i") * for start <= i < end. The file is written using the passed-in output stream. */ public static void generateAvroFile(OutputStream out, String prefix, int start, int end) throws IOException { Schema schema = Schema.createRecord("kv", null, null, false); schema.setFields(ImmutableList.of(new Schema.Field("key", Schema.create(Schema.Type.STRING), null, null), new Schema.Field("value", Schema.create(Schema.Type.STRING), null, null))); DatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<>(schema); DataFileWriter<GenericRecord> dataFileWriter = new DataFileWriter<>(datumWriter); dataFileWriter.create(schema, out); try { for (int i = start; i < end; i++) { GenericRecord kv = new GenericData.Record(schema); kv.put("key", prefix + i); kv.put("value", "#" + i); dataFileWriter.append(kv); } } finally { Closeables.closeQuietly(dataFileWriter); Closeables.closeQuietly(out); } }
64. AvroStreamBodyConsumerTest#generateAvroFile()
Project: cdap
File: AvroStreamBodyConsumerTest.java
File: AvroStreamBodyConsumerTest.java
private File generateAvroFile(File file, int recordCount) throws IOException { Schema schema = Schema.createRecord("Record", null, null, false); schema.setFields(ImmutableList.of(new Schema.Field("id", Schema.create(Schema.Type.INT), null, null), new Schema.Field("name", Schema.createUnion(ImmutableList.of(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING))), null, null))); DataFileWriter<Record> writer = new DataFileWriter<>(new ReflectDatumWriter<>(Record.class)); try { writer.setCodec(CodecFactory.snappyCodec()); writer.create(schema, file); for (int i = 0; i < recordCount; i++) { writer.append(new Record(i, "Record number " + i)); } } finally { writer.close(); } return file; }
65. TestGenericLogicalTypes#testReadUUID()
Project: parquet-mr
File: TestGenericLogicalTypes.java
File: TestGenericLogicalTypes.java
@Test public void testReadUUID() throws IOException { Schema uuidSchema = record("R", field("uuid", LogicalTypes.uuid().addToSchema(Schema.create(STRING)))); GenericRecord u1 = instance(uuidSchema, "uuid", UUID.randomUUID()); GenericRecord u2 = instance(uuidSchema, "uuid", UUID.randomUUID()); Schema stringSchema = record("R", field("uuid", Schema.create(STRING))); GenericRecord s1 = instance(stringSchema, "uuid", u1.get("uuid").toString()); GenericRecord s2 = instance(stringSchema, "uuid", u2.get("uuid").toString()); File test = write(stringSchema, s1, s2); Assert.assertEquals("Should convert Strings to UUIDs", Arrays.asList(u1, u2), read(GENERIC, uuidSchema, test)); }
66. AvroSchemaConverter#convertFields()
Project: parquet-mr
File: AvroSchemaConverter.java
File: AvroSchemaConverter.java
private Schema convertFields(String name, List<Type> parquetFields) { List<Schema.Field> fields = new ArrayList<Schema.Field>(); for (Type parquetType : parquetFields) { Schema fieldSchema = convertField(parquetType); if (parquetType.isRepetition(REPEATED)) { throw new UnsupportedOperationException("REPEATED not supported outside LIST or MAP. Type: " + parquetType); } else if (parquetType.isRepetition(Type.Repetition.OPTIONAL)) { fields.add(new Schema.Field(parquetType.getName(), optional(fieldSchema), null, NULL_VALUE)); } else { // REQUIRED fields.add(new Schema.Field(parquetType.getName(), fieldSchema, null, (Object) null)); } } Schema schema = Schema.createRecord(name, null, null, false); schema.setFields(fields); return schema; }
67. Utility#createAddressSchema()
Project: krati
File: Utility.java
File: Utility.java
public static Schema createAddressSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("city", Schema.create(Type.STRING), null, null)); fields.add(new Field("state", Schema.create(Type.STRING), null, null)); fields.add(new Field("country", Schema.create(Type.STRING), null, null)); fields.add(new Field("postal_code", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Address", null, "avro.test", false); schema.setFields(fields); return schema; }
68. Utility#createPersonSchema()
Project: krati
File: Utility.java
File: Utility.java
public static Schema createPersonSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
69. TestGenericRecordSerializer#createSchemaV2()
Project: krati
File: TestGenericRecordSerializer.java
File: TestGenericRecordSerializer.java
static Schema createSchemaV2() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); fields.add(new Field("title", Schema.create(Type.STRING), null, new TextNode(""))); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
70. TestGenericRecordSerializer#createSchemaV1()
Project: krati
File: TestGenericRecordSerializer.java
File: TestGenericRecordSerializer.java
static Schema createSchemaV1() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
71. AbstractTestAvroStoreJoiner#createAddressSchema()
Project: krati
File: AbstractTestAvroStoreJoiner.java
File: AbstractTestAvroStoreJoiner.java
/** * Creates the Avro schema for the Address store. */ static Schema createAddressSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("city", Schema.create(Type.STRING), null, null)); fields.add(new Field("state", Schema.create(Type.STRING), null, null)); fields.add(new Field("country", Schema.create(Type.STRING), null, null)); fields.add(new Field("postal_code", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Address", null, "avro.test", false); schema.setFields(fields); return schema; }
72. AbstractTestAvroStoreJoiner#createPersonSchema()
Project: krati
File: AbstractTestAvroStoreJoiner.java
File: AbstractTestAvroStoreJoiner.java
/** * Creates the Avro schema for the Person store. */ static Schema createPersonSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
73. AvroStoreJoiner#initJoinerSchema()
Project: krati
File: AvroStoreJoiner.java
File: AvroStoreJoiner.java
protected Schema initJoinerSchema() { List<String> sourceList = new ArrayList<String>(sources()); Collections.sort(sourceList); List<Field> fields = new ArrayList<Field>(); for (String source : sourceList) { List<Schema> l = new ArrayList<Schema>(); l.add(Schema.create(Type.NULL)); l.add(getSchema(source)); fields.add(new Field(source, Schema.createUnion(l), null, null)); } Schema schema = Schema.createRecord(getName(), null, getNamespace(), false); schema.setFields(fields); return schema; }
74. MultiTenantStoreHttpClientPerson#createSchema()
Project: krati
File: MultiTenantStoreHttpClientPerson.java
File: MultiTenantStoreHttpClientPerson.java
static Schema createSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
75. MultiTenantStoreHttpClientAddress#createSchema()
Project: krati
File: MultiTenantStoreHttpClientAddress.java
File: MultiTenantStoreHttpClientAddress.java
static Schema createSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("city", Schema.create(Type.STRING), null, null)); fields.add(new Field("state", Schema.create(Type.STRING), null, null)); fields.add(new Field("country", Schema.create(Type.STRING), null, null)); fields.add(new Field("postal_code", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Address", null, "avro.test", false); schema.setFields(fields); return schema; }
76. AvroStoreJoinerHttpServer#createAddressSchema()
Project: krati
File: AvroStoreJoinerHttpServer.java
File: AvroStoreJoinerHttpServer.java
/** * Creates the Avro schema for the Address store. */ static Schema createAddressSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("city", Schema.create(Type.STRING), null, null)); fields.add(new Field("state", Schema.create(Type.STRING), null, null)); fields.add(new Field("country", Schema.create(Type.STRING), null, null)); fields.add(new Field("postal_code", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Address", null, "avro.test", false); schema.setFields(fields); return schema; }
77. AvroStoreJoinerHttpServer#createPersonSchema()
Project: krati
File: AvroStoreJoinerHttpServer.java
File: AvroStoreJoinerHttpServer.java
/** * Creates the Avro schema for the Person store. */ static Schema createPersonSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
78. SingleAvroStoreHttpServer#createSchema()
Project: krati
File: SingleAvroStoreHttpServer.java
File: SingleAvroStoreHttpServer.java
static Schema createSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
79. AvroStoreBus2HttpServer#createAddressSchema()
Project: krati
File: AvroStoreBus2HttpServer.java
File: AvroStoreBus2HttpServer.java
/** * Creates the Avro schema for the Address store. */ static Schema createAddressSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("city", Schema.create(Type.STRING), null, null)); fields.add(new Field("state", Schema.create(Type.STRING), null, null)); fields.add(new Field("country", Schema.create(Type.STRING), null, null)); fields.add(new Field("postal_code", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Address", null, "avro.test", false); schema.setFields(fields); return schema; }
80. AvroStoreBus2HttpServer#createPersonSchema()
Project: krati
File: AvroStoreBus2HttpServer.java
File: AvroStoreBus2HttpServer.java
/** * Creates the Avro schema for the Person store. */ static Schema createPersonSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
81. AvroStoreBusHttpServer#createAddressSchema()
Project: krati
File: AvroStoreBusHttpServer.java
File: AvroStoreBusHttpServer.java
/** * Creates the Avro schema for the Address store. */ static Schema createAddressSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("city", Schema.create(Type.STRING), null, null)); fields.add(new Field("state", Schema.create(Type.STRING), null, null)); fields.add(new Field("country", Schema.create(Type.STRING), null, null)); fields.add(new Field("postal_code", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Address", null, "avro.test", false); schema.setFields(fields); return schema; }
82. AvroStoreBusHttpServer#createPersonSchema()
Project: krati
File: AvroStoreBusHttpServer.java
File: AvroStoreBusHttpServer.java
/** * Creates the Avro schema for the Person store. */ static Schema createPersonSchema() { List<Field> fields = new ArrayList<Field>(); fields.add(new Field("id", Schema.create(Type.INT), null, null)); fields.add(new Field("age", Schema.create(Type.INT), null, null)); fields.add(new Field("fname", Schema.create(Type.STRING), null, null)); fields.add(new Field("lname", Schema.create(Type.STRING), null, null)); Schema schema = Schema.createRecord("Person", null, "avro.test", false); schema.setFields(fields); return schema; }
83. TestCopyCommandClusterSchemaEvolution#createDestination()
Project: kite
File: TestCopyCommandClusterSchemaEvolution.java
File: TestCopyCommandClusterSchemaEvolution.java
@Override public void createDestination() throws Exception { FileInputStream schemaIn = new FileInputStream(avsc); Schema original = new Schema.Parser().parse(schemaIn); schemaIn.close(); Schema evolved = getEvolvedSchema(original); FileOutputStream schemaOut = new FileOutputStream(evolvedAvsc); schemaOut.write(evolved.toString(true).getBytes()); schemaOut.close(); List<String> createArgs = Lists.newArrayList("create", dest, "-s", evolvedAvsc, "-r", repoUri, "-d", "target/data"); createArgs.addAll(getExtraCreateArgs()); TestUtil.run(LoggerFactory.getLogger(this.getClass()), "delete", dest, "-r", repoUri, "-d", "target/data"); TestUtil.run(LoggerFactory.getLogger(this.getClass()), createArgs.toArray(new String[createArgs.size()])); this.console = mock(Logger.class); this.command = new CopyCommand(console); command.setConf(new Configuration()); }
84. TestCopyCommandClusterNewField#getEvolvedSchema()
Project: kite
File: TestCopyCommandClusterNewField.java
File: TestCopyCommandClusterNewField.java
@Override public Schema getEvolvedSchema(Schema original) { List<Schema.Field> fields = Lists.newArrayList(); fields.add(new Schema.Field("new", Schema.createUnion(ImmutableList.of(Schema.create(Schema.Type.NULL), Schema.create(Schema.Type.STRING))), "New field", NullNode.getInstance())); for (Schema.Field field : original.getFields()) { fields.add(new Schema.Field(field.name(), field.schema(), field.doc(), field.defaultValue())); } Schema evolved = Schema.createRecord(original.getName(), original.getDoc(), original.getNamespace(), false); evolved.setFields(fields); return evolved; }
85. ToUpperCase#map()
Project: kite
File: ToUpperCase.java
File: ToUpperCase.java
@Override public GenericRecord map(GenericRecord input) { Schema schema = input.getSchema(); for (Schema.Field field : schema.getFields()) { Object value = input.get(field.name()); if (value instanceof String || value instanceof Utf8) { // replace with upper case input.put(field.name(), value.toString().toUpperCase()); } } return input; }
86. HiveSchemaConverter#convert()
Project: kite
File: HiveSchemaConverter.java
File: HiveSchemaConverter.java
private static Schema convert(LinkedList<String> path, String name, StructTypeInfo type, Collection<String[]> required) { List<String> names = type.getAllStructFieldNames(); List<TypeInfo> types = type.getAllStructFieldTypeInfos(); Preconditions.checkArgument(names.size() == types.size(), "Cannot convert struct: %s names != %s types", names.size(), types.size()); List<Schema.Field> fields = Lists.newArrayList(); for (int i = 0; i < names.size(); i += 1) { path.addLast(name); fields.add(convertField(path, names.get(i), types.get(i), required)); path.removeLast(); } Schema recordSchema = Schema.createRecord(name, doc(type), null, false); recordSchema.setFields(fields); return recordSchema; }
87. HiveSchemaConverter#convertTable()
Project: kite
File: HiveSchemaConverter.java
File: HiveSchemaConverter.java
public static Schema convertTable(String table, Collection<FieldSchema> columns, @Nullable PartitionStrategy strategy) { ArrayList<String> fieldNames = Lists.newArrayList(); ArrayList<TypeInfo> fieldTypes = Lists.newArrayList(); LinkedList<String> start = Lists.newLinkedList(); Collection<String[]> requiredFields = requiredFields(strategy); List<Schema.Field> fields = Lists.newArrayList(); for (FieldSchema column : columns) { // pass null for the initial path to exclude the table name TypeInfo type = parseTypeInfo(column.getType()); fieldNames.add(column.getName()); fieldTypes.add(type); fields.add(convertField(start, column.getName(), type, requiredFields)); } StructTypeInfo struct = new StructTypeInfo(); struct.setAllStructFieldNames(fieldNames); struct.setAllStructFieldTypeInfos(fieldTypes); Schema recordSchema = Schema.createRecord(table, doc(struct), null, false); recordSchema.setFields(fields); return recordSchema; }
88. SpecificAvroDao#buildCompositeDaoWithEntityManager()
Project: kite
File: SpecificAvroDao.java
File: SpecificAvroDao.java
/** * Create a CompositeDao, which will return SpecificRecord instances * represented by the entitySchemaString avro schema. This avro schema must be * a composition of the schemas in the subEntitySchemaStrings list. * * @param tablePool * An HTabePool instance to use for connecting to HBase. * @param tableName * The table name of the managed schema. * @param entityClass * The class that is the composite record, which is made up of fields * referencing the sub records. * @param schemaManager * The SchemaManager which will use to create the entity mapper that * will power this dao. * @return The CompositeDao instance. * @throws SchemaNotFoundException */ public static <K extends SpecificRecord, E extends SpecificRecord, S extends SpecificRecord> Dao<E> buildCompositeDaoWithEntityManager(HTablePool tablePool, String tableName, Class<E> entityClass, SchemaManager schemaManager) { Schema entitySchema = getSchemaFromEntityClass(entityClass); List<EntityMapper<S>> entityMappers = new ArrayList<EntityMapper<S>>(); for (Schema.Field field : entitySchema.getFields()) { entityMappers.add(new VersionedAvroEntityMapper.Builder().setSchemaManager(schemaManager).setTableName(tableName).setEntityName(getSchemaName(field.schema())).setSpecific(true).<S>build()); } return new SpecificCompositeAvroDao<E, S>(tablePool, tableName, entityMappers, entityClass); }
89. TestPartitionStrategyParser#testAddEmbeddedPartitionStrategy()
Project: kite
File: TestPartitionStrategyParser.java
File: TestPartitionStrategyParser.java
@Test public void testAddEmbeddedPartitionStrategy() { PartitionStrategy strategy = new PartitionStrategy.Builder().hash("username", 16).identity("username", "u").build(); Schema original = new Schema.Parser().parse("{" + " \"type\": \"record\"," + " \"name\": \"User\"," + " \"fields\": [" + " {\"name\": \"id\", \"type\": \"long\"}," + " {\"name\": \"username\", \"type\": \"string\"}," + " {\"name\": \"real_name\", \"type\": \"string\"}" + " ]" + "}"); Schema embedded = PartitionStrategyParser.embedPartitionStrategy(original, strategy); Assert.assertTrue(PartitionStrategyParser.hasEmbeddedStrategy(embedded)); Assert.assertEquals(strategy, PartitionStrategyParser.parseFromSchema(embedded)); }
90. CassandraStore#put()
Project: gora
File: CassandraStore.java
File: CassandraStore.java
/** * When doing the * {@link org.apache.gora.cassandra.store.CassandraStore#put(Object, PersistentBase)} * operation, the logic is as follows: * <ol> * <li>Obtain the Avro {@link org.apache.avro.Schema} for the object.</li> * <li>Create a new duplicate instance of the object (explained in more detail below) **.</li> * <li>Obtain a {@link java.util.List} of the {@link org.apache.avro.Schema} * {@link org.apache.avro.Schema.Field}'s.</li> * <li>Iterate through the field {@link java.util.List}. This allows us to * consequently process each item.</li> * <li>Check to see if the {@link org.apache.avro.Schema.Field} is NOT dirty. * If this condition is true then we DO NOT process this field.</li> * <li>Obtain the element at the specified position in this list so we can * directly operate on it.</li> * <li>Obtain the {@link org.apache.avro.Schema.Type} of the element obtained * above and process it accordingly. N.B. For nested type ARRAY, MAP * RECORD or UNION, we shadow the checks in bullet point 5 above to infer that the * {@link org.apache.avro.Schema.Field} is either at * position 0 OR it is NOT dirty. If one of these conditions is true then we DO NOT * process this field. This is carried out in * {@link org.apache.gora.cassandra.store.CassandraStore#getFieldValue(Schema, Type, Object)}</li> * <li>We then insert the Key and Object into the {@link java.util.LinkedHashMap} buffer * before being flushed. This performs a structural modification of the map.</li> * </ol> * ** We create a duplicate instance of the object to be persisted and insert processed * objects into a synchronized {@link java.util.LinkedHashMap}. This allows * us to keep all the objects in memory till flushing. * * @param key for the Avro Record (object). * @param value Record object to be persisted in Cassandra * @see org.apache.gora.store.DataStore#put(java.lang.Object,org.apache.gora.persistency.Persistent) */ @Override public void put(K key, T value) { Schema schema = value.getSchema(); @SuppressWarnings("unchecked") T p = (T) SpecificData.get().newRecord(value, schema); List<Field> fields = schema.getFields(); for (int i = 1; i < fields.size(); i++) { if (!value.isDirty(i)) { continue; } Field field = fields.get(i); Type type = field.schema().getType(); Object fieldValue = value.get(field.pos()); Schema fieldSchema = field.schema(); // check if field has a nested structure (array, map, record or union) fieldValue = getFieldValue(fieldSchema, type, fieldValue); p.put(field.pos(), fieldValue); } // this performs a structural modification of the map this.buffer.put(key, p); }
91. CassandraClient#getReverseMap()
Project: gora
File: CassandraClient.java
File: CassandraClient.java
/** * Select the field names according to the column names, which format * if fully qualified: "family:column" * @param query * @return a map which keys are the fully qualified column * names and values the query fields */ public Map<String, String> getReverseMap(Query<K, T> query) { Map<String, String> map = new HashMap<>(); Schema persistentSchema = query.getDataStore().newPersistent().getSchema(); for (String field : query.getFields()) { String family = this.getMappingFamily(field); String column = this.getMappingColumn(field); if (persistentSchema.getField(field).schema().getType() == Type.UNION) map.put(family + ":" + field + CassandraStore.UNION_COL_SUFIX, field + CassandraStore.UNION_COL_SUFIX); map.put(family + ":" + column, field); } return map; }
92. CassandraClient#getFamilyMap()
Project: gora
File: CassandraClient.java
File: CassandraClient.java
/** * Select the families that contain at least one column mapped to a query field. * @param query indicates the columns to select * @return a map which keys are the family names and values the * corresponding column names required to get all the query fields. */ public Map<String, List<String>> getFamilyMap(Query<K, T> query) { Map<String, List<String>> map = new HashMap<>(); Schema persistentSchema = query.getDataStore().newPersistent().getSchema(); for (String field : query.getFields()) { String family = this.getMappingFamily(field); String column = this.getMappingColumn(field); // check if the family value was already initialized List<String> list = map.get(family); if (list == null) { list = new ArrayList<>(); map.put(family, list); } if (persistentSchema.getField(field).schema().getType() == Type.UNION) list.add(field + CassandraStore.UNION_COL_SUFIX); if (column != null) { list.add(column); } } return map; }
93. CassandraSubColumn#getValue()
Project: gora
File: CassandraSubColumn.java
File: CassandraSubColumn.java
/** * Deserialize a String into an typed Object, according to the field schema. * @see org.apache.gora.cassandra.query.CassandraColumn#getValue() */ public Object getValue() { Field field = getField(); Schema fieldSchema = field.schema(); Type type = fieldSchema.getType(); ByteBuffer byteBuffer = hColumn.getValue(); if (byteBuffer == null) { return null; } Object value = getFieldValue(type, fieldSchema, byteBuffer); return value; }
94. AccumuloStore#fromBytes()
Project: gora
File: AccumuloStore.java
File: AccumuloStore.java
public Object fromBytes(Schema schema, byte[] data) throws IOException { Schema fromSchema = null; if (schema.getType() == Type.UNION) { try { Decoder decoder = DecoderFactory.get().binaryDecoder(data, null); int unionIndex = decoder.readIndex(); List<Schema> possibleTypes = schema.getTypes(); fromSchema = possibleTypes.get(unionIndex); Schema effectiveSchema = possibleTypes.get(unionIndex); if (effectiveSchema.getType() == Type.NULL) { decoder.readNull(); return null; } else { data = decoder.readBytes(null).array(); } } catch (IOException e) { LOG.error(e.getMessage()); throw new GoraException("Error decoding union type: ", e); } } else { fromSchema = schema; } return fromBytes(encoder, fromSchema, data); }
95. AvroUtilsTest#testSerializeAsPath()
Project: gobblin
File: AvroUtilsTest.java
File: AvroUtilsTest.java
@Test public void testSerializeAsPath() throws Exception { Schema schema = new Schema.Parser().parse("{\"type\":\"record\", \"name\":\"test\", " + "\"fields\":[" + "{\"name\": \"name\", \"type\": \"string\"}, " + "{\"name\": \"title\", \"type\": \"string\"}" + "]}"); GenericRecord partition = new GenericData.Record(schema); partition.put("name", "a/b:c\\d e"); partition.put("title", "title"); Assert.assertEquals(AvroUtils.serializeAsPath(partition, true, true), new Path("name=a_b_c_d_e/title=title")); Assert.assertEquals(AvroUtils.serializeAsPath(partition, false, true), new Path("a_b_c_d_e/title")); Assert.assertEquals(AvroUtils.serializeAsPath(partition, false, false), new Path("a/b_c_d_e/title")); }
96. AvroUtils#removeUncomparableFieldsFromUnion()
Project: gobblin
File: AvroUtils.java
File: AvroUtils.java
private static Optional<Schema> removeUncomparableFieldsFromUnion(Schema union, Set<Schema> processed) { Preconditions.checkArgument(union.getType() == Schema.Type.UNION); if (processed.contains(union)) { return Optional.absent(); } processed.add(union); List<Schema> newUnion = Lists.newArrayList(); for (Schema unionType : union.getTypes()) { Optional<Schema> newType = removeUncomparableFields(unionType, processed); if (newType.isPresent()) { newUnion.add(newType.get()); } } // Discard the union field if one or more types are removed from the union. if (newUnion.size() != union.getTypes().size()) { return Optional.absent(); } return Optional.of(Schema.createUnion(newUnion)); }
97. AvroUtils#getDirectorySchema()
Project: gobblin
File: AvroUtils.java
File: AvroUtils.java
/** * Get the latest avro schema for a directory * @param directory the input dir that contains avro files * @param fs the {@link FileSystem} for the given directory. * @param latest true to return latest schema, false to return oldest schema * @return the latest/oldest schema in the directory * @throws IOException */ public static Schema getDirectorySchema(Path directory, FileSystem fs, boolean latest) throws IOException { Schema schema = null; try (Closer closer = Closer.create()) { List<FileStatus> files = getDirectorySchemaHelper(directory, fs); if (files == null || files.size() == 0) { LOG.warn("There is no previous avro file in the directory: " + directory); } else { FileStatus file = latest ? files.get(0) : files.get(files.size() - 1); LOG.debug("Path to get the avro schema: " + file); FsInput fi = new FsInput(file.getPath(), fs.getConf()); GenericDatumReader<GenericRecord> genReader = new GenericDatumReader<>(); schema = closer.register(new DataFileReader<>(fi, genReader)).getSchema(); } } catch (IOException ioe) { throw new IOException("Cannot get the schema for directory " + directory, ioe); } return schema; }
98. AvroFlattener#flattenUnion()
Project: gobblin
File: AvroFlattener.java
File: AvroFlattener.java
/*** * Flatten Union Schema * @param schema Union Schema to flatten * @param shouldPopulateLineage If lineage information should be tagged in the field, this is true when we are * un-nesting fields * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Union Schema */ private Schema flattenUnion(Schema schema, boolean shouldPopulateLineage, boolean flattenComplexTypes) { Preconditions.checkNotNull(schema); Preconditions.checkArgument(Schema.Type.UNION.equals(schema.getType())); Schema flattenedSchema; List<Schema> flattenedUnionMembers = new ArrayList<>(); if (null != schema.getTypes() && schema.getTypes().size() > 0) { for (Schema oldUnionMember : schema.getTypes()) { if (flattenComplexTypes) { // It's member might still recursively contain records flattenedUnionMembers.add(flatten(oldUnionMember, shouldPopulateLineage, flattenComplexTypes)); } else { flattenedUnionMembers.add(oldUnionMember); } } } flattenedSchema = Schema.createUnion(flattenedUnionMembers); return flattenedSchema; }
99. AvroFlattener#flatten()
Project: gobblin
File: AvroFlattener.java
File: AvroFlattener.java
/*** * Flatten the Schema to un-nest recursive Records (to make it optimal for ORC) * @param schema Avro Schema to flatten * @param flattenComplexTypes Flatten complex types recursively other than Record and Option * @return Flattened Avro Schema */ public Schema flatten(Schema schema, boolean flattenComplexTypes) { Preconditions.checkNotNull(schema); // To help make it configurable later this.flattenedNameJoiner = FLATTENED_NAME_JOINER; this.flattenedSourceJoiner = FLATTENED_SOURCE_JOINER; Schema flattenedSchema = flatten(schema, false, flattenComplexTypes); LOG.debug("Original Schema : " + schema); LOG.debug("Flattened Schema: " + flattenedSchema); return flattenedSchema; }
100. MultiConverterTest#testConversionWithMultiplicity()
Project: gobblin
File: MultiConverterTest.java
File: MultiConverterTest.java
@Test public void testConversionWithMultiplicity() throws Exception { MultiConverter multiConverter = new MultiConverter(Lists.newArrayList(new SchemaSimplificationConverter(), new MultiIdentityConverter(2), new MultiIdentityConverter(2), new TestConverter())); WorkUnitState workUnitState = new WorkUnitState(); Schema schema = (Schema) multiConverter.convertSchema(TEST_SCHEMA, workUnitState); Iterable<Object> convertedRecordIterable = multiConverter.convertRecord(schema, TEST_RECORD, workUnitState); Assert.assertEquals(Iterables.size(convertedRecordIterable), 4); for (Object record : convertedRecordIterable) { checkConvertedAvroData(schema, (GenericRecord) record); } }