Here are the examples of the java api org.apache.flink.core.fs.Path taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
536 Examples
19
Source : FlinkKeyStoreManager.java
with Apache License 2.0
from ZuInnoTe
with Apache License 2.0
from ZuInnoTe
/**
* *
* Opens a keystore on any Hadoop compatible filesystem
*
* @param path path to key store, if null then a new keystore is created
* @param keyStoreType
* @param keyStorePreplacedword
* @throws IOException
* @throws NoSuchAlgorithmException
* @throws CertificateException
* @throws KeyStoreException
*/
public void openKeyStore(Path path, String keyStoreType, String keyStorePreplacedword) throws IOException, NoSuchAlgorithmException, CertificateException, KeyStoreException {
this.keystore = KeyStore.getInstance(keyStoreType);
if (path != null) {
InputStream keyStoreInputStream = ffr.openFile(path);
this.keystore.load(keyStoreInputStream, keyStorePreplacedword.toCharArray());
} else {
this.keystore.load(null, keyStorePreplacedword.toCharArray());
}
}
19
Source : FlinkFileReader.java
with Apache License 2.0
from ZuInnoTe
with Apache License 2.0
from ZuInnoTe
/*
* Loads linked workbooks as InputStreams
*
* @param fileNames List of filenames (full URI/path) to load
*
* @return a Map of filenames (without path!) with replacedociated InputStreams
*
* @throws java.io.IOException in case of issues loading a file
*
*/
public Map<String, InputStream> loadLinkedWorkbooks(String[] fileNames) throws IOException {
HashMap<String, InputStream> result = new HashMap<>();
if (fileNames == null) {
return result;
}
for (String currentFile : fileNames) {
Path currentPath = new Path(currentFile);
InputStream currentInputStream = openFile(currentPath);
result.put(currentPath.getName(), currentInputStream);
}
return result;
}
19
Source : FlinkFileReader.java
with Apache License 2.0
from ZuInnoTe
with Apache License 2.0
from ZuInnoTe
/*
* Loads template as InputStreams
*
* @param fileName filename of template (full URI/path) to load
*
* @return InputStream of the template
*
* @throws java.io.IOException in case of issues loading a file
*
*/
public InputStream loadTemplate(String fileName) throws IOException {
Path currentPath = new Path(fileName);
return openFile(currentPath);
}
19
Source : FunctionsStateBootstrapOperator.java
with Apache License 2.0
from ververica
with Apache License 2.0
from ververica
/**
* An operator used to bootstrap function state for the {@link FunctionGroupOperator}.
*/
public final clreplaced FunctionsStateBootstrapOperator extends AbstractStreamOperator<TaggedOperatorSubtaskState> implements OneInputStreamOperator<TaggedBootstrapData, TaggedOperatorSubtaskState>, BoundedOneInput {
private static final long serialVersionUID = 1L;
private final StateBootstrapFunctionRegistry stateBootstrapFunctionRegistry;
private final boolean disableMultiplexState;
private final long snapshotTimestamp;
private final Path snapshotPath;
private transient StateBootstrapper stateBootstrapper;
public FunctionsStateBootstrapOperator(StateBootstrapFunctionRegistry stateBootstrapFunctionRegistry, boolean disableMultiplexState, long snapshotTimestamp, Path snapshotPath) {
this.stateBootstrapFunctionRegistry = stateBootstrapFunctionRegistry;
this.disableMultiplexState = disableMultiplexState;
this.snapshotTimestamp = snapshotTimestamp;
this.snapshotPath = snapshotPath;
}
@Override
public void initializeState(StateInitializationContext context) throws Exception {
super.initializeState(context);
final State stateAccessor = createStateAccessor(getRuntimeContext(), getKeyedStateBackend(), disableMultiplexState);
this.stateBootstrapper = new StateBootstrapper(stateBootstrapFunctionRegistry, stateAccessor);
}
@Override
public void processElement(StreamRecord<TaggedBootstrapData> streamRecord) throws Exception {
stateBootstrapper.apply(streamRecord.getValue());
}
@Override
public void endInput() throws Exception {
// bootstrap dataset is now completely processed;
// take a snapshot of the function states
final TaggedOperatorSubtaskState state = SnapshotUtils.snapshot(this, getRuntimeContext().getIndexOfThisSubtask(), snapshotTimestamp, getContainingTask().getCheckpointStorage(), snapshotPath);
output.collect(new StreamRecord<>(state));
}
private static State createStateAccessor(RuntimeContext runtimeContext, KeyedStateBackend<Object> keyedStateBackend, boolean disableMultiplexState) {
if (disableMultiplexState) {
return new FlinkState(runtimeContext, keyedStateBackend, new DynamicallyRegisteredTypes(new StaticallyRegisteredTypes(MessageFactoryType.WITH_RAW_PAYLOADS)));
} else {
return new MultiplexedState(runtimeContext, keyedStateBackend, new DynamicallyRegisteredTypes(new StaticallyRegisteredTypes(MessageFactoryType.WITH_RAW_PAYLOADS)));
}
}
}
19
Source : RobustTsvOutputFormat.java
with Apache License 2.0
from Remper
with Apache License 2.0
from Remper
// --------------------------------------------------------------------------------------------
private String sanitizePathName(Path outputPath) {
return outputPath.toString().replaceAll("[^a-zA-Z]+", "-");
}
19
Source : StanfordTweetsDataSetInputFormat.java
with MIT License
from mushketyk
with MIT License
from mushketyk
clreplaced TweetFileInputSplit implements InputSplit {
private final int splitNumber;
private final Path path;
public TweetFileInputSplit(int splitNumber, Path path) {
this.splitNumber = splitNumber;
this.path = path;
}
@Override
public int getSplitNumber() {
return splitNumber;
}
public Path getPath() {
return path;
}
}
19
Source : NoOpRecoverableWriter.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Override
public RecoverableFsDataOutputStream open(Path path) throws IOException {
return null;
}
19
Source : RollingPolicyTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------- Utility Methods --------------------------------
private static Buckets<String, String> createBuckets(final Path basePath, final MethodCallCountingPolicyWrapper<String, String> rollingPolicyToTest) throws IOException {
return new Buckets<>(basePath, new TestUtils.StringIdenreplacedyBucketreplacedigner(), new DefaultBucketFactoryImpl<>(), new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()), rollingPolicyToTest, 0);
}
19
Source : BucketsTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private static TypeSafeMatcher<Bucket<String, String>> hreplacedinglePartFileToBeCommittedOnCheckpointAck(final Path testTmpPath, final String bucketId) {
return new TypeSafeMatcher<Bucket<String, String>>() {
@Override
protected boolean matchesSafely(Bucket<String, String> bucket) {
return bucket.getBucketId().equals(bucketId) && bucket.getBucketPath().equals(new Path(testTmpPath, bucketId)) && bucket.getInProgressPart() == null && bucket.getPendingPartsForCurrentCheckpoint().isEmpty() && bucket.getPendingPartsPerCheckpoint().size() == 1;
}
@Override
public void describeTo(Description description) {
description.appendText("a Bucket with a single pending part file @ ").appendValue(new Path(testTmpPath, bucketId)).appendText("'");
}
};
}
19
Source : BucketsTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private static Buckets<String, String> restoreBuckets(final Path basePath, final RollingPolicy<String, String> rollingPolicy, final int subtaskIdx, final ListState<byte[]> bucketState, final ListState<Long> partCounterState) throws Exception {
final Buckets<String, String> restoredBuckets = createBuckets(basePath, rollingPolicy, subtaskIdx);
restoredBuckets.initializeState(bucketState, partCounterState);
return restoredBuckets;
}
19
Source : BucketsTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------- Utility Methods --------------------------------
private static Buckets<String, String> createBuckets(final Path basePath, final RollingPolicy<String, String> rollingPolicy, final int subtaskIdx) throws IOException {
return new Buckets<>(basePath, new TestUtils.StringIdenreplacedyBucketreplacedigner(), new DefaultBucketFactoryImpl<>(), new RowWisePartWriter.Factory<>(new SimpleStringEncoder<>()), rollingPolicy, subtaskIdx);
}
19
Source : ContinuousFileMonitoringFunction.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Override
public void run(SourceFunction.SourceContext<TimestampedFileInputSplit> context) throws Exception {
Path p = new Path(path);
FileSystem fileSystem = FileSystem.get(p.toUri());
if (!fileSystem.exists(p)) {
throw new FileNotFoundException("The provided file path " + path + " does not exist.");
}
checkpointLock = context.getCheckpointLock();
switch(watchType) {
case PROCESS_CONTINUOUSLY:
while (isRunning) {
synchronized (checkpointLock) {
monitorDirAndForwardSplits(fileSystem, context);
}
Thread.sleep(interval);
}
// here we do not need to set the running to false and the
// globalModificationTime to Long.MAX_VALUE because to arrive here,
// either close() or cancel() have already been called, so this
// is already done.
break;
case PROCESS_ONCE:
synchronized (checkpointLock) {
// the following check guarantees that if we restart
// after a failure and we managed to have a successful
// checkpoint, we will not reprocess the directory.
if (globalModificationTime == Long.MIN_VALUE) {
monitorDirAndForwardSplits(fileSystem, context);
globalModificationTime = Long.MAX_VALUE;
}
isRunning = false;
}
break;
default:
isRunning = false;
throw new RuntimeException("Unknown WatchType" + watchType);
}
}
19
Source : StreamingFileSink.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Creates the builder for a {@link StreamingFileSink} with row-encoding format.
* @param basePath the base path where all the buckets are going to be created as sub-directories.
* @param writerFactory the {@link BulkWriter.Factory} to be used when writing elements in the buckets.
* @param <IN> the type of incoming elements
* @return The builder where the remaining of the configuration parameters for the sink can be configured.
* In order to instantiate the sink, call {@link RowFormatBuilder#build()} after specifying the desired parameters.
*/
public static <IN> StreamingFileSink.BulkFormatBuilder<IN, String> forBulkFormat(final Path basePath, final BulkWriter.Factory<IN> writerFactory) {
return new StreamingFileSink.BulkFormatBuilder<>(basePath, writerFactory, new DateTimeBucketreplacedigner<>());
}
19
Source : StreamingFileSink.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
// --------------------------- Sink Builders -----------------------------
/**
* Creates the builder for a {@code StreamingFileSink} with row-encoding format.
* @param basePath the base path where all the buckets are going to be created as sub-directories.
* @param encoder the {@link Encoder} to be used when writing elements in the buckets.
* @param <IN> the type of incoming elements
* @return The builder where the remaining of the configuration parameters for the sink can be configured.
* In order to instantiate the sink, call {@link RowFormatBuilder#build()} after specifying the desired parameters.
*/
public static <IN> StreamingFileSink.RowFormatBuilder<IN, String> forRowFormat(final Path basePath, final Encoder<IN> encoder) {
return new StreamingFileSink.RowFormatBuilder<>(basePath, encoder, new DateTimeBucketreplacedigner<>());
}
19
Source : DefaultBucketFactoryImpl.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Override
public Bucket<IN, BucketID> getNewBucket(final RecoverableWriter fsWriter, final int subtaskIndex, final BucketID bucketId, final Path bucketPath, final long initialPartCounter, final PartFileWriter.PartFileFactory<IN, BucketID> partFileWriterFactory, final RollingPolicy<IN, BucketID> rollingPolicy) {
return Bucket.getNew(fsWriter, subtaskIndex, bucketId, bucketPath, initialPartCounter, partFileWriterFactory, rollingPolicy);
}
19
Source : Bucket.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// --------------------------- Static Factory Methods -----------------------------
/**
* Creates a new empty {@code Bucket}.
* @param fsWriter the filesystem-specific {@link RecoverableWriter}.
* @param subtaskIndex the index of the subtask creating the bucket.
* @param bucketId the identifier of the bucket, as returned by the {@link Bucketreplacedigner}.
* @param bucketPath the path to where the part files for the bucket will be written to.
* @param initialPartCounter the initial counter for the part files of the bucket.
* @param partFileFactory the {@link PartFileWriter.PartFileFactory} the factory creating part file writers.
* @param <IN> the type of input elements to the sink.
* @param <BucketID> the type of the identifier of the bucket, as returned by the {@link Bucketreplacedigner}
* @return The new Bucket.
*/
static <IN, BucketID> Bucket<IN, BucketID> getNew(final RecoverableWriter fsWriter, final int subtaskIndex, final BucketID bucketId, final Path bucketPath, final long initialPartCounter, final PartFileWriter.PartFileFactory<IN, BucketID> partFileFactory, final RollingPolicy<IN, BucketID> rollingPolicy) {
return new Bucket<>(fsWriter, subtaskIndex, bucketId, bucketPath, initialPartCounter, partFileFactory, rollingPolicy);
}
19
Source : RocksDBStateDownloaderTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private void replacedertStateContentEqual(byte[] expected, Path path) throws IOException {
byte[] actual = Files.readAllBytes(Paths.get(path.toUri()));
replacedertArrayEquals(expected, actual);
}
19
Source : RocksDBIncrementalRestoreOperation.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* This recreates the new working directory of the recovered RocksDB instance and links/copies the contents from
* a local state.
*/
private void restoreInstanceDirectoryFromPath(Path source, String instanceRocksDBPath) throws IOException {
FileSystem fileSystem = source.getFileSystem();
final FileStatus[] fileStatuses = fileSystem.listStatus(source);
if (fileStatuses == null) {
throw new IOException("Cannot list file statues. Directory " + source + " does not exist.");
}
for (FileStatus fileStatus : fileStatuses) {
final Path filePath = fileStatus.getPath();
final String fileName = filePath.getName();
File restoreFile = new File(source.getPath(), fileName);
File targetFile = new File(instanceRocksDBPath, fileName);
if (fileName.endsWith(SST_FILE_SUFFIX)) {
// hardlink'ing the immutable sst-files.
Files.createLink(targetFile.toPath(), restoreFile.toPath());
} else {
// true copy for all other files.
Files.copy(restoreFile.toPath(), targetFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
}
}
}
19
Source : RocksDBIncrementalRestoreOperation.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private void cleanUpPathQuietly(@Nonnull Path path) {
try {
FileSystem fileSystem = path.getFileSystem();
if (fileSystem.exists(path)) {
fileSystem.delete(path, true);
}
} catch (IOException ex) {
LOG.warn("Failed to clean up path " + path, ex);
}
}
19
Source : SnapshotDirectoryTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Tests if mkdirs for snapshot directories works.
*/
@Test
public void mkdirs() throws Exception {
File folderRoot = temporaryFolder.getRoot();
File newFolder = new File(folderRoot, String.valueOf(UUID.randomUUID()));
File innerNewFolder = new File(newFolder, String.valueOf(UUID.randomUUID()));
Path path = new Path(innerNewFolder.toURI());
replacedert.replacedertFalse(newFolder.isDirectory());
replacedert.replacedertFalse(innerNewFolder.isDirectory());
SnapshotDirectory snapshotDirectory = SnapshotDirectory.permanent(path);
replacedert.replacedertFalse(snapshotDirectory.exists());
replacedert.replacedertFalse(newFolder.isDirectory());
replacedert.replacedertFalse(innerNewFolder.isDirectory());
replacedert.replacedertTrue(snapshotDirectory.mkdirs());
replacedert.replacedertTrue(newFolder.isDirectory());
replacedert.replacedertTrue(innerNewFolder.isDirectory());
replacedert.replacedertTrue(snapshotDirectory.exists());
}
19
Source : MemoryCheckpointStorageTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Override
protected CheckpointStorage createCheckpointStorageWithSavepointDir(Path checkpointDir, Path savepointDir) throws Exception {
return new MemoryBackendCheckpointStorage(new JobID(), checkpointDir, savepointDir, DEFAULT_MAX_STATE_SIZE);
}
19
Source : MemoryCheckpointStorageTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
// General Fs-based checkpoint storage tests, inherited
// ------------------------------------------------------------------------
@Override
protected CheckpointStorage createCheckpointStorage(Path checkpointDir) throws Exception {
return new MemoryBackendCheckpointStorage(new JobID(), checkpointDir, null, DEFAULT_MAX_STATE_SIZE);
}
19
Source : FsCheckpointStorageTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Override
protected CheckpointStorage createCheckpointStorageWithSavepointDir(Path checkpointDir, Path savepointDir) throws Exception {
return new FsCheckpointStorage(checkpointDir, savepointDir, new JobID(), FILE_SIZE_THRESHOLD);
}
19
Source : FsCheckpointStorageTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
private void replacedertParent(Path parent, Path child) {
Path path = new Path(parent, child.getName());
replacedertEquals(path, child);
}
19
Source : FsCheckpointStorageTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
// General Fs-based checkpoint storage tests, inherited
// ------------------------------------------------------------------------
@Override
protected CheckpointStorage createCheckpointStorage(Path checkpointDir) throws Exception {
return new FsCheckpointStorage(checkpointDir, null, new JobID(), FILE_SIZE_THRESHOLD);
}
19
Source : FsCheckpointStateOutputStreamTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
private static void ensureLocalFileDeleted(Path path) {
URI uri = path.toUri();
if ("file".equals(uri.getScheme())) {
File file = new File(uri.getPath());
replacedertFalse("file not properly deleted", file.exists());
} else {
throw new IllegalArgumentException("not a local path");
}
}
19
Source : AbstractFileCheckpointStorageTestBase.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
// savepoints
// ------------------------------------------------------------------------
@Test
public void testSavepointPathConfiguredAndTarget() throws Exception {
final Path savepointDir = randomTempPath();
final Path customDir = randomTempPath();
testSavepoint(savepointDir, customDir, customDir);
}
19
Source : AbstractFileCheckpointStorageTestBase.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Test
public void testSavepointPathConfiguredNoTarget() throws Exception {
final Path savepointDir = randomTempPath();
testSavepoint(savepointDir, null, savepointDir);
}
19
Source : SnapshotDirectory.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Creates a permanent snapshot directory for the given path, which will not delete the underlying directory in
* {@link #cleanup()} after {@link #completeSnapshotAndGetHandle()} was called.
*/
public static SnapshotDirectory permanent(@Nonnull Path directory) throws IOException {
return new PermanentSnapshotDirectory(directory);
}
19
Source : FsCheckpointStreamFactory.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
@Override
public FsCheckpointStateOutputStream createCheckpointStateOutputStream(CheckpointedStateScope scope) throws IOException {
Path target = scope == CheckpointedStateScope.EXCLUSIVE ? checkpointDirectory : sharedStateDirectory;
int bufferSize = Math.max(DEFAULT_WRITE_BUFFER_SIZE, fileStateThreshold);
return new FsCheckpointStateOutputStream(target, filesystem, bufferSize, fileStateThreshold);
}
19
Source : FileStateHandle.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* {@link StreamStateHandle} for state that was written to a file stream. The written data is
* identified by the file path. The state can be read again by calling {@link #openInputStream()}.
*/
public clreplaced FileStateHandle implements StreamStateHandle {
private static final long serialVersionUID = 350284443258002355L;
/**
* The path to the file in the filesystem, fully describing the file system
*/
private final Path filePath;
/**
* The size of the state in the file
*/
private final long stateSize;
/**
* Creates a new file state for the given file path.
*
* @param filePath The path to the file that stores the state.
*/
public FileStateHandle(Path filePath, long stateSize) {
checkArgument(stateSize >= -1);
this.filePath = checkNotNull(filePath);
this.stateSize = stateSize;
}
/**
* Gets the path where this handle's state is stored.
*
* @return The path where this handle's state is stored.
*/
public Path getFilePath() {
return filePath;
}
@Override
public FSDataInputStream openInputStream() throws IOException {
return getFileSystem().open(filePath);
}
/**
* Discard the state by deleting the file that stores the state. If the parent directory
* of the state is empty after deleting the state file, it is also deleted.
*
* @throws Exception Thrown, if the file deletion (not the directory deletion) fails.
*/
@Override
public void discardState() throws Exception {
FileSystem fs = getFileSystem();
fs.delete(filePath, false);
}
/**
* Returns the file size in bytes.
*
* @return The file size in bytes.
*/
@Override
public long getStateSize() {
return stateSize;
}
/**
* Gets the file system that stores the file state.
*
* @return The file system that stores the file state.
* @throws IOException Thrown if the file system cannot be accessed.
*/
private FileSystem getFileSystem() throws IOException {
return FileSystem.get(filePath.toUri());
}
// ------------------------------------------------------------------------
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (!(o instanceof FileStateHandle)) {
return false;
}
FileStateHandle that = (FileStateHandle) o;
return filePath.equals(that.filePath);
}
@Override
public int hashCode() {
return filePath.hashCode();
}
@Override
public String toString() {
return String.format("File State: %s [%d bytes]", filePath, stateSize);
}
}
19
Source : AbstractFsCheckpointStorage.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Creates the directory path for the data exclusive to a specific checkpoint.
*
* @param baseDirectory The base directory into which the job checkpoints.
* @param checkpointId The ID (logical timestamp) of the checkpoint.
*/
protected static Path createCheckpointDirectory(Path baseDirectory, long checkpointId) {
return new Path(baseDirectory, CHECKPOINT_DIR_PREFIX + checkpointId);
}
19
Source : AbstractFileStateBackend.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// ------------------------------------------------------------------------
// Utilities
// ------------------------------------------------------------------------
/**
* Checks the validity of the path's scheme and path.
*
* @param path The path to check.
* @return The URI as a Path.
*
* @throws IllegalArgumentException Thrown, if the URI misses scheme or path.
*/
private static Path validatePath(Path path) {
final URI uri = path.toUri();
final String scheme = uri.getScheme();
final String pathPart = uri.getPath();
// some validity checks
if (scheme == null) {
throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " + "Please specify the file system scheme explicitly in the URI.");
}
if (pathPart == null) {
throw new IllegalArgumentException("The path to store the checkpoint data in is null. " + "Please specify a directory path for the checkpoint data.");
}
if (pathPart.length() == 0 || pathPart.equals("/")) {
throw new IllegalArgumentException("Cannot use the root directory for checkpoints.");
}
return path;
}
19
Source : DirectoryStateHandle.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* This state handle represents a directory. This clreplaced is, for example, used to represent the directory of RocksDB's
* native checkpoint directories for local recovery.
*/
public clreplaced DirectoryStateHandle implements StateObject {
/**
* Serial version.
*/
private static final long serialVersionUID = 1L;
/**
* The path that describes the directory.
*/
@Nonnull
private final Path directory;
public DirectoryStateHandle(@Nonnull Path directory) {
this.directory = directory;
}
@Override
public void discardState() throws IOException {
FileSystem fileSystem = directory.getFileSystem();
fileSystem.delete(directory, true);
}
@Override
public long getStateSize() {
// For now, we will not report any size, but in the future this could (if needed) return the total dir size.
// unknown
return 0L;
}
@Nonnull
public Path getDirectory() {
return directory;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClreplaced() != o.getClreplaced()) {
return false;
}
DirectoryStateHandle that = (DirectoryStateHandle) o;
return directory.equals(that.directory);
}
@Override
public int hashCode() {
return directory.hashCode();
}
@Override
public String toString() {
return "DirectoryStateHandle{" + "directory=" + directory + '}';
}
}
19
Source : JobSubmitHandler.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private static Path getPathAndreplacedertUpload(String fileName, String type, Map<String, Path> uploadedFiles) throws MissingFileException {
final Path file = uploadedFiles.get(fileName);
if (file == null) {
throw new MissingFileException(type, fileName);
}
return file;
}
19
Source : JobSubmitHandler.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private static Collection<Path> getJarFilesToUpload(Collection<String> jarFileNames, Map<String, Path> nameToFileMap) throws MissingFileException {
Collection<Path> jarFiles = new ArrayList<>(jarFileNames.size());
for (String jarFileName : jarFileNames) {
Path jarFile = getPathAndreplacedertUpload(jarFileName, FILE_TYPE_JAR, nameToFileMap);
jarFiles.add(new Path(jarFile.toString()));
}
return jarFiles;
}
19
Source : JobGraph.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// --------------------------------------------------------------------------------------------
// Handling of attached JAR files
// --------------------------------------------------------------------------------------------
/**
* Adds the path of a JAR file required to run the job on a task manager.
*
* @param jar
* path of the JAR file required to run the job on a task manager
*/
public void addJar(Path jar) {
if (jar == null) {
throw new IllegalArgumentException();
}
if (!userJars.contains(jar)) {
userJars.add(jar);
}
}
19
Source : JsonResponseHistoryServerArchivist.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Implementation which archives an {@link AccessExecutionGraph} such that it stores
* the JSON requests for all possible history server requests.
*/
clreplaced JsonResponseHistoryServerArchivist implements HistoryServerArchivist {
private final JsonArchivist jsonArchivist;
private final Path archivePath;
JsonResponseHistoryServerArchivist(JsonArchivist jsonArchivist, Path archivePath) {
this.jsonArchivist = Preconditions.checkNotNull(jsonArchivist);
this.archivePath = Preconditions.checkNotNull(archivePath);
}
@Override
public CompletableFuture<Acknowledge> archiveExecutionGraph(AccessExecutionGraph executionGraph) {
try {
FsJobArchivist.archiveJob(archivePath, executionGraph.getJobID(), jsonArchivist.archiveJsonWithPath(executionGraph));
return CompletableFuture.completedFuture(Acknowledge.get());
} catch (IOException e) {
return FutureUtils.completedExceptionally(e);
}
}
}
19
Source : PythonPlanBinder.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// =====Setup========================================================================================================
private static void deleteIfExists(Path path) throws IOException {
FileSystem fs = path.getFileSystem();
if (fs.exists(path)) {
fs.delete(path, true);
}
}
19
Source : PythonPlanBinder.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
void runPlan(String[] args) throws Exception {
if (args.length < 1) {
throw new IllegalArgumentException("Missing script file argument. Usage: ./bin/pyflink.[sh/bat] <pathToScript>[ <pathToPackage1>[ <pathToPackageX]][ - <parameter1>[ <parameterX>]]");
}
int split = 0;
for (int x = 0; x < args.length; x++) {
if (args[x].equals("-")) {
split = x;
break;
}
}
try {
String planFile = args[0];
String[] filesToCopy = Arrays.copyOfRange(args, 1, split == 0 ? args.length : split);
String[] planArgumentsArray = Arrays.copyOfRange(args, split == 0 ? args.length : split + 1, args.length);
StringBuilder planArgumentsBuilder = new StringBuilder();
for (String arg : planArgumentsArray) {
planArgumentsBuilder.append(" ").append(arg);
}
String planArguments = planArgumentsBuilder.toString();
operatorConfig.setString(PLAN_ARGUMENTS_KEY, planArguments);
Path planPath = new Path(planFile);
if (!FileSystem.getUnguardedFileSystem(planPath.toUri()).exists(planPath)) {
throw new FileNotFoundException("Plan file " + planFile + " does not exist.");
}
for (String file : filesToCopy) {
Path filePath = new Path(file);
if (!FileSystem.getUnguardedFileSystem(filePath.toUri()).exists(filePath)) {
throw new FileNotFoundException("Additional file " + file + " does not exist.");
}
}
// setup temporary local directory for flink python library and user files
Path targetDir = new Path(tmpPlanFilesDir);
deleteIfExists(targetDir);
targetDir.getFileSystem().mkdirs(targetDir);
// extract and unzip flink library to temporary location
unzipPythonLibrary(new Path(tmpPlanFilesDir));
// copy user files to temporary location
Path tmpPlanFilesPath = new Path(tmpPlanFilesDir);
copyFile(planPath, tmpPlanFilesPath, FLINK_PYTHON_PLAN_NAME);
for (String file : filesToCopy) {
Path source = new Path(file);
copyFile(source, tmpPlanFilesPath, source.getName());
}
// start python process
streamer = new PythonPlanStreamer(operatorConfig);
streamer.open(tmpPlanFilesDir, planArguments);
// Python process should terminate itself when all jobs have been run
while (streamer.preparePlanMode()) {
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
receivePlan(env);
env.registerCachedFile(tmpPlanFilesPath.toUri().toString(), FLINK_PYTHON_DC_ID, true);
JobExecutionResult jer = env.execute();
long runtime = jer.getNetRuntime();
streamer.sendRecord(runtime);
streamer.finishPlanMode();
sets.reset();
}
} finally {
try {
// clean up created files
FileSystem local = FileSystem.getLocalFileSystem();
local.delete(new Path(tmpPlanFilesDir), true);
} catch (IOException ioe) {
LOG.error("PythonAPI file cleanup failed. {}", ioe.getMessage());
} finally {
if (streamer != null) {
streamer.close();
}
}
}
}
19
Source : RowCsvInputFormatTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Tests for {@link RowCsvInputFormat}.
*/
public clreplaced RowCsvInputFormatTest {
private static final Path PATH = new Path("an/ignored/file/");
// static variables for testing the removal of \r\n to \n
private static final String FIRST_PART = "That is the first part";
private static final String SECOND_PART = "That is the second part";
@Test
public void ignoreInvalidLines() throws Exception {
String fileContent = "#description of the data\n" + "header1|header2|header3|\n" + "this is|1|2.0|\n" + "//a comment\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.setLenient(false);
Configuration parameters = new Configuration();
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
try {
result = format.nextRecord(result);
fail("Parse Exception was not thrown! (Row too short)");
} catch (ParseException ignored) {
}
// => ok
try {
result = format.nextRecord(result);
fail("Parse Exception was not thrown! (Invalid int value)");
} catch (ParseException ignored) {
}
// => ok
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("this is", result.getField(0));
replacedertEquals(1, result.getField(1));
replacedertEquals(2.0, result.getField(2));
try {
result = format.nextRecord(result);
fail("Parse Exception was not thrown! (Row too short)");
} catch (ParseException ignored) {
}
// => ok
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a test", result.getField(0));
replacedertEquals(3, result.getField(1));
replacedertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("#next", result.getField(0));
replacedertEquals(5, result.getField(1));
replacedertEquals(6.0, result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
// re-open with lenient = true
format.setLenient(true);
format.configure(parameters);
format.open(split);
result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("header1", result.getField(0));
replacedertNull(result.getField(1));
replacedertNull(result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("this is", result.getField(0));
replacedertEquals(1, result.getField(1));
replacedertEquals(2.0, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a test", result.getField(0));
replacedertEquals(3, result.getField(1));
replacedertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("#next", result.getField(0));
replacedertEquals(5, result.getField(1));
replacedertEquals(6.0, result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
}
@Test
public void ignoreSingleCharPrefixComments() throws Exception {
String fileContent = "#description of the data\n" + "#successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.setCommentPrefix("#");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("this is", result.getField(0));
replacedertEquals(1, result.getField(1));
replacedertEquals(2.0, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a test", result.getField(0));
replacedertEquals(3, result.getField(1));
replacedertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
}
@Test
public void ignoreMultiCharPrefixComments() throws Exception {
String fileContent = "//description of the data\n" + "//successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "//next|5|6.0|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.setCommentPrefix("//");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("this is", result.getField(0));
replacedertEquals(1, result.getField(1));
replacedertEquals(2.0, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a test", result.getField(0));
replacedertEquals(3, result.getField(1));
replacedertEquals(4.0, result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
}
@Test
public void readStringFields() throws Exception {
String fileContent = "abc|def|ghijk\nabc||hhg\n|||\n||";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("def", result.getField(1));
replacedertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("hhg", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("", result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void readMixedQuotedStringFields() throws Exception {
String fileContent = "@a|b|c@|def|@ghijk@\nabc||@|hhg@\n|||\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.configure(new Configuration());
format.enableQuotedStringParsing('@');
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a|b|c", result.getField(0));
replacedertEquals("def", result.getField(1));
replacedertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("|hhg", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("", result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void readStringFieldsWithTrailingDelimiters() throws Exception {
String fileContent = "abc|-def|-ghijk\nabc|-|-hhg\n|-|-|-\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.setFieldDelimiter("|-");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("def", result.getField(1));
replacedertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("hhg", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("", result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testTailingEmptyFields() throws Exception {
String fileContent = "abc|-def|-ghijk\n" + "abc|-def|-\n" + "abc|-|-\n" + "|-|-|-\n" + "|-|-\n" + "abc|-def\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.setFieldDelimiter("|-");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("def", result.getField(1));
replacedertEquals("ghijk", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("def", result.getField(1));
replacedertEquals("", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("", result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.getField(0));
replacedertEquals("", result.getField(1));
replacedertEquals("", result.getField(2));
try {
format.nextRecord(result);
fail("Parse Exception was not thrown! (Row too short)");
} catch (ParseException e) {
}
}
@Test
public void testIntegerFields() throws Exception {
String fileContent = "111|222|333|444|555\n666|777|888|999|000|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, "\n", "|");
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(5);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(111, result.getField(0));
replacedertEquals(222, result.getField(1));
replacedertEquals(333, result.getField(2));
replacedertEquals(444, result.getField(3));
replacedertEquals(555, result.getField(4));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(666, result.getField(0));
replacedertEquals(777, result.getField(1));
replacedertEquals(888, result.getField(2));
replacedertEquals(999, result.getField(3));
replacedertEquals(0, result.getField(4));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testEmptyFields() throws Exception {
String fileContent = ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,,\n" + ",,,,,,,\n" + ",,,,,,,,\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.BOOLEAN_TYPE_INFO, BasicTypeInfo.BYTE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.FLOAT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.LONG_TYPE_INFO, BasicTypeInfo.SHORT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, true);
format.setFieldDelimiter(",");
format.configure(new Configuration());
format.open(split);
Row result = new Row(8);
int linesCnt = fileContent.split("\n").length;
for (int i = 0; i < linesCnt; i++) {
result = format.nextRecord(result);
replacedertNull(result.getField(i));
}
// ensure no more rows
replacedertNull(format.nextRecord(result));
replacedertTrue(format.reachedEnd());
}
@Test
public void testDoubleFields() throws Exception {
String fileContent = "11.1|22.2|33.3|44.4|55.5\n66.6|77.7|88.8|99.9|00.0|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(5);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(11.1, result.getField(0));
replacedertEquals(22.2, result.getField(1));
replacedertEquals(33.3, result.getField(2));
replacedertEquals(44.4, result.getField(3));
replacedertEquals(55.5, result.getField(4));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(66.6, result.getField(0));
replacedertEquals(77.7, result.getField(1));
replacedertEquals(88.8, result.getField(2));
replacedertEquals(99.9, result.getField(3));
replacedertEquals(0.0, result.getField(4));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testReadFirstN() throws Exception {
String fileContent = "111|222|333|444|555|\n666|777|888|999|000|\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(111, result.getField(0));
replacedertEquals(222, result.getField(1));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(666, result.getField(0));
replacedertEquals(777, result.getField(1));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testReadSparseWithNullFieldsForTypes() throws Exception {
String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" + "000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
format.setFieldDelimiter("|x|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(111, result.getField(0));
replacedertEquals(444, result.getField(1));
replacedertEquals(888, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(0, result.getField(0));
replacedertEquals(777, result.getField(1));
replacedertEquals(333, result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testReadSparseWithPositionSetter() throws Exception {
String fileContent = "111|222|333|444|555|666|777|888|999|000|\n" + "000|999|888|777|666|555|444|333|222|111|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(111, result.getField(0));
replacedertEquals(444, result.getField(1));
replacedertEquals(888, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(0, result.getField(0));
replacedertEquals(777, result.getField(1));
replacedertEquals(333, result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testReadSparseWithMask() throws Exception {
String fileContent = "111&&222&&333&&444&&555&&666&&777&&888&&999&&000&&\n" + "000&&999&&888&&777&&666&&555&&444&&333&&222&&111&&";
FileInputSplit split = RowCsvInputFormatTest.createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, new int[] { 0, 3, 7 });
format.setFieldDelimiter("&&");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(111, result.getField(0));
replacedertEquals(444, result.getField(1));
replacedertEquals(888, result.getField(2));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(0, result.getField(0));
replacedertEquals(777, result.getField(1));
replacedertEquals(333, result.getField(2));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testParseStringErrors() throws Exception {
StringParser stringParser = new StringParser();
stringParser.enableQuotedStringParsing((byte) '"');
Map<String, StringParser.ParseErrorState> failures = new HashMap<>();
failures.put("\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING);
failures.put("\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING);
for (Map.Entry<String, StringParser.ParseErrorState> failure : failures.entrySet()) {
int result = stringParser.parseField(failure.getKey().getBytes(ConfigConstants.DEFAULT_CHARSET), 0, failure.getKey().length(), new byte[] { (byte) '|' }, null);
replacedertEquals(-1, result);
replacedertEquals(failure.getValue(), stringParser.getErrorState());
}
}
@Test
@Ignore("Test disabled because we do not support double-quote escaped quotes right now.")
public void testParserCorrectness() throws Exception {
// RFC 4180 Compliance Test content
// Taken from http://en.wikipedia.org/wiki/Comma-separated_values#Example
String fileContent = "Year,Make,Model,Description,Price\n" + "1997,Ford,E350,\"ac, abs, moon\",3000.00\n" + "1999,Chevy,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00\n" + "1996,Jeep,Grand Cherokee,\"MUST SELL! air, moon roof, loaded\",4799.00\n" + "1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",,5000.00\n" + ",,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.DOUBLE_TYPE_INFO };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes);
format.setSkipFirstLineAsHeader(true);
format.setFieldDelimiter(",");
format.configure(new Configuration());
format.open(split);
Row result = new Row(5);
Row r1 = new Row(5);
r1.setField(0, 1997);
r1.setField(1, "Ford");
r1.setField(2, "E350");
r1.setField(3, "ac, abs, moon");
r1.setField(4, 3000.0);
Row r2 = new Row(5);
r2.setField(0, 1999);
r2.setField(1, "Chevy");
r2.setField(2, "Venture \"Extended Edition\"");
r2.setField(3, "");
r2.setField(4, 4900.0);
Row r3 = new Row(5);
r3.setField(0, 1996);
r3.setField(1, "Jeep");
r3.setField(2, "Grand Cherokee");
r3.setField(3, "MUST SELL! air, moon roof, loaded");
r3.setField(4, 4799.0);
Row r4 = new Row(5);
r4.setField(0, 1999);
r4.setField(1, "Chevy");
r4.setField(2, "Venture \"Extended Edition, Very Large\"");
r4.setField(3, "");
r4.setField(4, 5000.0);
Row r5 = new Row(5);
r5.setField(0, 0);
r5.setField(1, "");
r5.setField(2, "Venture \"Extended Edition\"");
r5.setField(3, "");
r5.setField(4, 4900.0);
Row[] expectedLines = new Row[] { r1, r2, r3, r4, r5 };
for (Row expected : expectedLines) {
result = format.nextRecord(result);
replacedertEquals(expected, result);
}
replacedertNull(format.nextRecord(result));
replacedertTrue(format.reachedEnd());
}
@Test
public void testWindowsLineEndRemoval() throws Exception {
// check typical use case -- linux file is correct and it is set up to linux(\n)
testRemovingTrailingCR("\n", "\n");
// check typical windows case -- windows file endings and file has windows file endings set up
testRemovingTrailingCR("\r\n", "\r\n");
// check problematic case windows file -- windows file endings(\r\n)
// but linux line endings (\n) set up
testRemovingTrailingCR("\r\n", "\n");
// check problematic case linux file -- linux file endings (\n)
// but windows file endings set up (\r\n)
// specific setup for windows line endings will expect \r\n because
// it has to be set up and is not standard.
}
@Test
public void testQuotedStringParsingWithIncludeFields() throws Exception {
String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" + "\"Blahblah <[email protected]>\"|\"blaaa|\"blubb\"";
File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
writer.write(fileContent);
writer.close();
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat inputFormat = new RowCsvInputFormat(new Path(tempFile.toURI().toString()), fieldTypes, new int[] { 0, 2 });
inputFormat.enableQuotedStringParsing('"');
inputFormat.setFieldDelimiter("|");
inputFormat.setDelimiter('\n');
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Row record = inputFormat.nextRecord(new Row(2));
replacedertEquals("20:41:52-1-3-2015", record.getField(0));
replacedertEquals("Blahblah <[email protected]>", record.getField(1));
}
@Test
public void testQuotedStringParsingWithEscapedQuotes() throws Exception {
String fileContent = "\"\\\"Hello\\\" World\"|\"We are\\\" young\"";
File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
writer.write(fileContent);
writer.close();
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO, BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat inputFormat = new RowCsvInputFormat(new Path(tempFile.toURI().toString()), fieldTypes);
inputFormat.enableQuotedStringParsing('"');
inputFormat.setFieldDelimiter("|");
inputFormat.setDelimiter('\n');
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Row record = inputFormat.nextRecord(new Row(2));
replacedertEquals("\\\"Hello\\\" World", record.getField(0));
replacedertEquals("We are\\\" young", record.getField(1));
}
@Test
public void testSqlTimeFields() throws Exception {
String fileContent = "1990-10-14|02:42:25|1990-10-14 02:42:25.123|1990-1-4 2:2:5\n" + "1990-10-14|02:42:25|1990-10-14 02:42:25.123|1990-1-4 2:2:5.3\n";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { SqlTimeTypeInfo.DATE, SqlTimeTypeInfo.TIME, SqlTimeTypeInfo.TIMESTAMP, SqlTimeTypeInfo.TIMESTAMP };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(4);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Date.valueOf("1990-10-14"), result.getField(0));
replacedertEquals(Time.valueOf("02:42:25"), result.getField(1));
replacedertEquals(Timestamp.valueOf("1990-10-14 02:42:25.123"), result.getField(2));
replacedertEquals(Timestamp.valueOf("1990-01-04 02:02:05"), result.getField(3));
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Date.valueOf("1990-10-14"), result.getField(0));
replacedertEquals(Time.valueOf("02:42:25"), result.getField(1));
replacedertEquals(Timestamp.valueOf("1990-10-14 02:42:25.123"), result.getField(2));
replacedertEquals(Timestamp.valueOf("1990-01-04 02:02:05.3"), result.getField(3));
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
}
@Test
public void testScanOrder() throws Exception {
String fileContent = // first row
"111|222|333|444|555|666|777|888|999|000|\n" + // second row
"000|999|888|777|666|555|444|333|222|111|";
FileInputSplit split = createTempFile(fileContent);
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO, BasicTypeInfo.INT_TYPE_INFO };
int[] order = new int[] { 7, 3, 0 };
RowCsvInputFormat format = new RowCsvInputFormat(PATH, fieldTypes, order);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Row result = new Row(3);
// check first row
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(888, result.getField(0));
replacedertEquals(444, result.getField(1));
replacedertEquals(111, result.getField(2));
// check second row
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(333, result.getField(0));
replacedertEquals(777, result.getField(1));
replacedertEquals(0, result.getField(2));
}
private static FileInputSplit createTempFile(String content) throws IOException {
File tempFile = File.createTempFile("test_contents", "tmp");
tempFile.deleteOnExit();
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8);
wrt.write(content);
wrt.close();
return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] { "localhost" });
}
private static void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) throws IOException {
String fileContent = FIRST_PART + lineBreakerInFile + SECOND_PART + lineBreakerInFile;
// create input file
File tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write(fileContent);
wrt.close();
TypeInformation[] fieldTypes = new TypeInformation[] { BasicTypeInfo.STRING_TYPE_INFO };
RowCsvInputFormat inputFormat = new RowCsvInputFormat(new Path(tempFile.toURI().toString()), fieldTypes);
inputFormat.configure(new Configuration());
inputFormat.setDelimiter(lineBreakerSetup);
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Row result = inputFormat.nextRecord(new Row(1));
replacedertNotNull("Expecting to not return null", result);
replacedertEquals(FIRST_PART, result.getField(0));
result = inputFormat.nextRecord(result);
replacedertNotNull("Expecting to not return null", result);
replacedertEquals(SECOND_PART, result.getField(0));
}
}
19
Source : PrimitiveInputFormatTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Tests for {@link PrimitiveInputFormat}.
*/
public clreplaced PrimitiveInputFormatTest {
private static final Path PATH = new Path("an/ignored/file/");
@Test
public void testStringInput() {
try {
final String fileContent = "abc||def||||";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<String> format = new PrimitiveInputFormat<String>(PATH, "||", String.clreplaced);
final Configuration parameters = new Configuration();
format.configure(parameters);
format.open(split);
String result = null;
result = format.nextRecord(result);
replacedertEquals("abc", result);
result = format.nextRecord(result);
replacedertEquals("def", result);
result = format.nextRecord(result);
replacedertEquals("", result);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
ex.printStackTrace();
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testIntegerInput() throws IOException {
try {
final String fileContent = "111|222|";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<Integer> format = new PrimitiveInputFormat<Integer>(PATH, "|", Integer.clreplaced);
format.configure(new Configuration());
format.open(split);
Integer result = null;
result = format.nextRecord(result);
replacedertEquals(Integer.valueOf(111), result);
result = format.nextRecord(result);
replacedertEquals(Integer.valueOf(222), result);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testDoubleInputLinewise() throws IOException {
try {
final String fileContent = "1.21\n2.23\n";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<Double> format = new PrimitiveInputFormat<Double>(PATH, Double.clreplaced);
format.configure(new Configuration());
format.open(split);
Double result = null;
result = format.nextRecord(result);
replacedertEquals(Double.valueOf(1.21), result);
result = format.nextRecord(result);
replacedertEquals(Double.valueOf(2.23), result);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testRemovingTrailingCR() {
try {
String first = "First line";
String second = "Second line";
String fileContent = first + "\r\n" + second + "\r\n";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<String> format = new PrimitiveInputFormat<String>(PATH, String.clreplaced);
format.configure(new Configuration());
format.open(split);
String result = null;
result = format.nextRecord(result);
replacedertEquals(first, result);
result = format.nextRecord(result);
replacedertEquals(second, result);
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test(expected = IOException.clreplaced)
public void testFailingInput() throws IOException {
final String fileContent = "111|222|asdf|17";
final FileInputSplit split = createInputSplit(fileContent);
final PrimitiveInputFormat<Integer> format = new PrimitiveInputFormat<Integer>(PATH, "|", Integer.clreplaced);
format.configure(new Configuration());
format.open(split);
Integer result = null;
result = format.nextRecord(result);
replacedertEquals(Integer.valueOf(111), result);
result = format.nextRecord(result);
replacedertEquals(Integer.valueOf(222), result);
result = format.nextRecord(result);
}
private FileInputSplit createInputSplit(String content) throws IOException {
File tempFile = File.createTempFile("test_contents", "tmp");
tempFile.deleteOnExit();
try (FileWriter wrt = new FileWriter(tempFile)) {
wrt.write(content);
}
return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] { "localhost" });
}
}
19
Source : CsvInputFormatTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Tests for {@link CsvInputFormat}.
*/
public clreplaced CsvInputFormatTest {
private static final Path PATH = new Path("an/ignored/file/");
// Static variables for testing the removal of \r\n to \n
private static final String FIRST_PART = "That is the first part";
private static final String SECOND_PART = "That is the second part";
@Test
public void testSplitCsvInputStreamInLargeBuffer() throws Exception {
testSplitCsvInputStream(1024 * 1024, false);
}
@Test
public void testSplitCsvInputStreamInSmallBuffer() throws Exception {
testSplitCsvInputStream(2, false);
}
private void testSplitCsvInputStream(int bufferSize, boolean failAtStart) throws Exception {
final String fileContent = "this is|1|2.0|\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n" + "asdadas|5|30.0|\n";
// create temporary file with 3 blocks
final File tempFile = File.createTempFile("input-stream-decoration-test", "tmp");
tempFile.deleteOnExit();
try (FileOutputStream fileOutputStream = new FileOutputStream(tempFile)) {
fileOutputStream.write(fileContent.getBytes(ConfigConstants.DEFAULT_CHARSET));
}
// fix the number of blocks and the size of each one.
final int noOfBlocks = 3;
final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, Integer.clreplaced, Double.clreplaced);
CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<>(new Path(tempFile.toURI()), "\n", "|", typeInfo);
format.setLenient(true);
format.setBufferSize(bufferSize);
final Configuration config = new Configuration();
format.configure(config);
long[] offsetsAfterRecord = new long[] { 15, 29, 42, 58 };
long[] offsetAtEndOfSplit = new long[] { 20, 40, 58 };
int recordCounter = 0;
int splitCounter = 0;
FileInputSplit[] inputSplits = format.createInputSplits(noOfBlocks);
Tuple3<String, Integer, Double> result = new Tuple3<>();
for (FileInputSplit inputSplit : inputSplits) {
replacedertEquals(inputSplit.getStart() + inputSplit.getLength(), offsetAtEndOfSplit[splitCounter]);
splitCounter++;
format.open(inputSplit);
format.reopen(inputSplit, format.getCurrentState());
while (!format.reachedEnd()) {
if ((result = format.nextRecord(result)) != null) {
replacedertEquals((long) format.getCurrentState(), offsetsAfterRecord[recordCounter]);
recordCounter++;
if (recordCounter == 1) {
replacedertNotNull(result);
replacedertEquals("this is", result.f0);
replacedertEquals(Integer.valueOf(1), result.f1);
replacedertEquals(new Double(2.0), result.f2);
replacedertEquals((long) format.getCurrentState(), 15);
} else if (recordCounter == 2) {
replacedertNotNull(result);
replacedertEquals("a test", result.f0);
replacedertEquals(Integer.valueOf(3), result.f1);
replacedertEquals(new Double(4.0), result.f2);
replacedertEquals((long) format.getCurrentState(), 29);
} else if (recordCounter == 3) {
replacedertNotNull(result);
replacedertEquals("#next", result.f0);
replacedertEquals(Integer.valueOf(5), result.f1);
replacedertEquals(new Double(6.0), result.f2);
replacedertEquals((long) format.getCurrentState(), 42);
} else {
replacedertNotNull(result);
replacedertEquals("asdadas", result.f0);
replacedertEquals(new Integer(5), result.f1);
replacedertEquals(new Double(30.0), result.f2);
replacedertEquals((long) format.getCurrentState(), 58);
}
// simulate checkpoint
Long state = format.getCurrentState();
long offsetToRestore = state;
// create a new format
format = new TupleCsvInputFormat<>(new Path(tempFile.toURI()), "\n", "|", typeInfo);
format.setLenient(true);
format.setBufferSize(bufferSize);
format.configure(config);
// simulate the restore operation.
format.reopen(inputSplit, offsetToRestore);
} else {
result = new Tuple3<>();
}
}
format.close();
}
replacedert.replacedertEquals(4, recordCounter);
}
@Test
public void ignoreInvalidLinesAndGetOffsetInLargeBuffer() {
ignoreInvalidLines(1024 * 1024);
}
@Test
public void ignoreInvalidLinesAndGetOffsetInSmallBuffer() {
ignoreInvalidLines(2);
}
private void ignoreInvalidLines(int bufferSize) {
try {
final String fileContent = "#description of the data\n" + "header1|header2|header3|\n" + "this is|1|2.0|\n" + "//a comment\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n" + "asdasdas";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, Integer.clreplaced, Double.clreplaced);
final CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<Tuple3<String, Integer, Double>>(PATH, "\n", "|", typeInfo);
format.setLenient(true);
format.setBufferSize(bufferSize);
final Configuration parameters = new Configuration();
format.configure(parameters);
format.open(split);
Tuple3<String, Integer, Double> result = new Tuple3<String, Integer, Double>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("this is", result.f0);
replacedertEquals(Integer.valueOf(1), result.f1);
replacedertEquals(new Double(2.0), result.f2);
replacedertEquals((long) format.getCurrentState(), 65);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a test", result.f0);
replacedertEquals(Integer.valueOf(3), result.f1);
replacedertEquals(new Double(4.0), result.f2);
replacedertEquals((long) format.getCurrentState(), 91);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("#next", result.f0);
replacedertEquals(Integer.valueOf(5), result.f1);
replacedertEquals(new Double(6.0), result.f2);
replacedertEquals((long) format.getCurrentState(), 104);
result = format.nextRecord(result);
replacedertNull(result);
replacedertEquals(fileContent.length(), (long) format.getCurrentState());
} catch (Exception ex) {
ex.printStackTrace();
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void ignoreSingleCharPrefixComments() {
try {
final String fileContent = "#description of the data\n" + "#successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "#next|5|6.0|\n";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, Integer.clreplaced, Double.clreplaced);
final CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<Tuple3<String, Integer, Double>>(PATH, "\n", "|", typeInfo);
format.setCommentPrefix("#");
final Configuration parameters = new Configuration();
format.configure(parameters);
format.open(split);
Tuple3<String, Integer, Double> result = new Tuple3<String, Integer, Double>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("this is", result.f0);
replacedertEquals(Integer.valueOf(1), result.f1);
replacedertEquals(new Double(2.0), result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a test", result.f0);
replacedertEquals(Integer.valueOf(3), result.f1);
replacedertEquals(new Double(4.0), result.f2);
result = format.nextRecord(result);
replacedertNull(result);
} catch (Exception ex) {
ex.printStackTrace();
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void ignoreMultiCharPrefixComments() {
try {
final String fileContent = "//description of the data\n" + "//successive commented line\n" + "this is|1|2.0|\n" + "a test|3|4.0|\n" + "//next|5|6.0|\n";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<String, Integer, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, Integer.clreplaced, Double.clreplaced);
final CsvInputFormat<Tuple3<String, Integer, Double>> format = new TupleCsvInputFormat<Tuple3<String, Integer, Double>>(PATH, "\n", "|", typeInfo);
format.setCommentPrefix("//");
final Configuration parameters = new Configuration();
format.configure(parameters);
format.open(split);
Tuple3<String, Integer, Double> result = new Tuple3<String, Integer, Double>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("this is", result.f0);
replacedertEquals(Integer.valueOf(1), result.f1);
replacedertEquals(new Double(2.0), result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a test", result.f0);
replacedertEquals(Integer.valueOf(3), result.f1);
replacedertEquals(new Double(4.0), result.f2);
result = format.nextRecord(result);
replacedertNull(result);
} catch (Exception ex) {
ex.printStackTrace();
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void readStringFields() {
try {
final String fileContent = "abc|def|ghijk\nabc||hhg\n|||";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, String.clreplaced, String.clreplaced);
final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, "\n", "|", typeInfo);
final Configuration parameters = new Configuration();
format.configure(parameters);
format.open(split);
Tuple3<String, String, String> result = new Tuple3<String, String, String>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.f0);
replacedertEquals("def", result.f1);
replacedertEquals("ghijk", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("hhg", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("", result.f2);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
ex.printStackTrace();
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void readMixedQuotedStringFields() {
try {
final String fileContent = "@a|b|c@|def|@ghijk@\nabc||@|hhg@\n|||";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, String.clreplaced, String.clreplaced);
final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, "\n", "|", typeInfo);
final Configuration parameters = new Configuration();
format.configure(parameters);
format.enableQuotedStringParsing('@');
format.open(split);
Tuple3<String, String, String> result = new Tuple3<String, String, String>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("a|b|c", result.f0);
replacedertEquals("def", result.f1);
replacedertEquals("ghijk", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("|hhg", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("", result.f2);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
ex.printStackTrace();
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void readStringFieldsWithTrailingDelimiters() {
try {
final String fileContent = "abc|-def|-ghijk\nabc|-|-hhg\n|-|-|-\n";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, String.clreplaced, String.clreplaced);
final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo);
format.setFieldDelimiter("|-");
format.configure(new Configuration());
format.open(split);
Tuple3<String, String, String> result = new Tuple3<String, String, String>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.f0);
replacedertEquals("def", result.f1);
replacedertEquals("ghijk", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("abc", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("hhg", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("", result.f2);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testTailingEmptyFields() throws Exception {
final String fileContent = // ok
"aa,bb,cc\n" + // the last field is empty
"aa,bb,\n" + // the last two fields are empty
"aa,,\n" + // all fields are empty
",,\n" + // row too short
"aa,bb";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<String, String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, String.clreplaced, String.clreplaced);
final CsvInputFormat<Tuple3<String, String, String>> format = new TupleCsvInputFormat<Tuple3<String, String, String>>(PATH, typeInfo);
format.setFieldDelimiter(",");
format.configure(new Configuration());
format.open(split);
Tuple3<String, String, String> result = new Tuple3<String, String, String>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("aa", result.f0);
replacedertEquals("bb", result.f1);
replacedertEquals("cc", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("aa", result.f0);
replacedertEquals("bb", result.f1);
replacedertEquals("", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("aa", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("", result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals("", result.f0);
replacedertEquals("", result.f1);
replacedertEquals("", result.f2);
try {
format.nextRecord(result);
fail("Parse Exception was not thrown! (Row too short)");
} catch (ParseException e) {
}
}
@Test
public void testIntegerFields() throws IOException {
try {
final String fileContent = "111|222|333|444|555\n666|777|888|999|000|\n";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple5<Integer, Integer, Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.clreplaced, Integer.clreplaced, Integer.clreplaced, Integer.clreplaced, Integer.clreplaced);
final CsvInputFormat<Tuple5<Integer, Integer, Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple5<Integer, Integer, Integer, Integer, Integer>>(PATH, typeInfo);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Tuple5<Integer, Integer, Integer, Integer, Integer> result = new Tuple5<Integer, Integer, Integer, Integer, Integer>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(111), result.f0);
replacedertEquals(Integer.valueOf(222), result.f1);
replacedertEquals(Integer.valueOf(333), result.f2);
replacedertEquals(Integer.valueOf(444), result.f3);
replacedertEquals(Integer.valueOf(555), result.f4);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(666), result.f0);
replacedertEquals(Integer.valueOf(777), result.f1);
replacedertEquals(Integer.valueOf(888), result.f2);
replacedertEquals(Integer.valueOf(999), result.f3);
replacedertEquals(Integer.valueOf(000), result.f4);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testEmptyFields() throws IOException {
try {
final String fileContent = "|0|0|0|0|0|\n" + "1||1|1|1|1|\n" + "2|2||2|2|2|\n" + "3|3|3| |3|3|\n" + "4|4|4|4||4|\n" + "5|5|5|5|5||\n";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple6<Short, Integer, Long, Float, Double, Byte>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Short.clreplaced, Integer.clreplaced, Long.clreplaced, Float.clreplaced, Double.clreplaced, Byte.clreplaced);
final CsvInputFormat<Tuple6<Short, Integer, Long, Float, Double, Byte>> format = new TupleCsvInputFormat<Tuple6<Short, Integer, Long, Float, Double, Byte>>(PATH, typeInfo);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Tuple6<Short, Integer, Long, Float, Double, Byte> result = new Tuple6<Short, Integer, Long, Float, Double, Byte>();
try {
result = format.nextRecord(result);
fail("Empty String Parse Exception was not thrown! (ShortParser)");
} catch (ParseException e) {
}
try {
result = format.nextRecord(result);
fail("Empty String Parse Exception was not thrown! (IntegerParser)");
} catch (ParseException e) {
}
try {
result = format.nextRecord(result);
fail("Empty String Parse Exception was not thrown! (LongParser)");
} catch (ParseException e) {
}
try {
result = format.nextRecord(result);
fail("Empty String Parse Exception was not thrown! (FloatParser)");
} catch (ParseException e) {
}
try {
result = format.nextRecord(result);
fail("Empty String Parse Exception was not thrown! (DoubleParser)");
} catch (ParseException e) {
}
try {
result = format.nextRecord(result);
fail("Empty String Parse Exception was not thrown! (ByteParser)");
} catch (ParseException e) {
}
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testDoubleFields() throws IOException {
try {
final String fileContent = "11.1|22.2|33.3|44.4|55.5\n66.6|77.7|88.8|99.9|00.0|\n";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple5<Double, Double, Double, Double, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Double.clreplaced, Double.clreplaced, Double.clreplaced, Double.clreplaced, Double.clreplaced);
final CsvInputFormat<Tuple5<Double, Double, Double, Double, Double>> format = new TupleCsvInputFormat<Tuple5<Double, Double, Double, Double, Double>>(PATH, typeInfo);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Tuple5<Double, Double, Double, Double, Double> result = new Tuple5<Double, Double, Double, Double, Double>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Double.valueOf(11.1), result.f0);
replacedertEquals(Double.valueOf(22.2), result.f1);
replacedertEquals(Double.valueOf(33.3), result.f2);
replacedertEquals(Double.valueOf(44.4), result.f3);
replacedertEquals(Double.valueOf(55.5), result.f4);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Double.valueOf(66.6), result.f0);
replacedertEquals(Double.valueOf(77.7), result.f1);
replacedertEquals(Double.valueOf(88.8), result.f2);
replacedertEquals(Double.valueOf(99.9), result.f3);
replacedertEquals(Double.valueOf(00.0), result.f4);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testReadFirstN() throws IOException {
try {
final String fileContent = "111|222|333|444|555|\n666|777|888|999|000|\n";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple2<Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.clreplaced, Integer.clreplaced);
final CsvInputFormat<Tuple2<Integer, Integer>> format = new TupleCsvInputFormat<Tuple2<Integer, Integer>>(PATH, typeInfo);
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Tuple2<Integer, Integer> result = new Tuple2<Integer, Integer>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(111), result.f0);
replacedertEquals(Integer.valueOf(222), result.f1);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(666), result.f0);
replacedertEquals(Integer.valueOf(777), result.f1);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testReadSparseWithNullFieldsForTypes() throws IOException {
try {
final String fileContent = "111|x|222|x|333|x|444|x|555|x|666|x|777|x|888|x|999|x|000|x|\n" + "000|x|999|x|888|x|777|x|666|x|555|x|444|x|333|x|222|x|111|x|";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.clreplaced, Integer.clreplaced, Integer.clreplaced);
final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[] { true, false, false, true, false, false, false, true });
format.setFieldDelimiter("|x|");
format.configure(new Configuration());
format.open(split);
Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(111), result.f0);
replacedertEquals(Integer.valueOf(444), result.f1);
replacedertEquals(Integer.valueOf(888), result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(000), result.f0);
replacedertEquals(Integer.valueOf(777), result.f1);
replacedertEquals(Integer.valueOf(333), result.f2);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testReadSparseWithPositionSetter() throws IOException {
try {
final String fileContent = "111|222|333|444|555|666|777|888|999|000|\n000|999|888|777|666|555|444|333|222|111|";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.clreplaced, Integer.clreplaced, Integer.clreplaced);
final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new int[] { 0, 3, 7 });
format.setFieldDelimiter("|");
format.configure(new Configuration());
format.open(split);
Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(111), result.f0);
replacedertEquals(Integer.valueOf(444), result.f1);
replacedertEquals(Integer.valueOf(888), result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(000), result.f0);
replacedertEquals(Integer.valueOf(777), result.f1);
replacedertEquals(Integer.valueOf(333), result.f2);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testReadSparseWithMask() throws IOException {
try {
final String fileContent = "111&&222&&333&&444&&555&&666&&777&&888&&999&&000&&\n" + "000&&999&&888&&777&&666&&555&&444&&333&&222&&111&&";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple3<Integer, Integer, Integer>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.clreplaced, Integer.clreplaced, Integer.clreplaced);
final CsvInputFormat<Tuple3<Integer, Integer, Integer>> format = new TupleCsvInputFormat<Tuple3<Integer, Integer, Integer>>(PATH, typeInfo, new boolean[] { true, false, false, true, false, false, false, true });
format.setFieldDelimiter("&&");
format.configure(new Configuration());
format.open(split);
Tuple3<Integer, Integer, Integer> result = new Tuple3<Integer, Integer, Integer>();
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(111), result.f0);
replacedertEquals(Integer.valueOf(444), result.f1);
replacedertEquals(Integer.valueOf(888), result.f2);
result = format.nextRecord(result);
replacedertNotNull(result);
replacedertEquals(Integer.valueOf(000), result.f0);
replacedertEquals(Integer.valueOf(777), result.f1);
replacedertEquals(Integer.valueOf(333), result.f2);
result = format.nextRecord(result);
replacedertNull(result);
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
@Test
public void testParseStringErrors() throws Exception {
StringParser stringParser = new StringParser();
stringParser.enableQuotedStringParsing((byte) '"');
Object[][] failures = { { "\"string\" trailing", FieldParser.ParseErrorState.UNQUOTED_CHARS_AFTER_QUOTED_STRING }, { "\"unterminated ", FieldParser.ParseErrorState.UNTERMINATED_QUOTED_STRING } };
for (Object[] failure : failures) {
String input = (String) failure[0];
int result = stringParser.parseField(input.getBytes(ConfigConstants.DEFAULT_CHARSET), 0, input.length(), new byte[] { '|' }, null);
replacedertThat(result, is(-1));
replacedertThat(stringParser.getErrorState(), is(failure[1]));
}
}
// Test disabled because we do not support double-quote escaped quotes right now.
// @Test
public void testParserCorrectness() throws Exception {
// RFC 4180 Compliance Test content
// Taken from http://en.wikipedia.org/wiki/Comma-separated_values#Example
final String fileContent = "Year,Make,Model,Description,Price\n" + "1997,Ford,E350,\"ac, abs, moon\",3000.00\n" + "1999,Chevy,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00\n" + "1996,Jeep,Grand Cherokee,\"MUST SELL! air, moon roof, loaded\",4799.00\n" + "1999,Chevy,\"Venture \"\"Extended Edition, Very Large\"\"\",,5000.00\n" + ",,\"Venture \"\"Extended Edition\"\"\",\"\",4900.00";
final FileInputSplit split = createTempFile(fileContent);
final TupleTypeInfo<Tuple5<Integer, String, String, String, Double>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(Integer.clreplaced, String.clreplaced, String.clreplaced, String.clreplaced, Double.clreplaced);
final CsvInputFormat<Tuple5<Integer, String, String, String, Double>> format = new TupleCsvInputFormat<Tuple5<Integer, String, String, String, Double>>(PATH, typeInfo);
format.setSkipFirstLineAsHeader(true);
format.setFieldDelimiter(",");
format.configure(new Configuration());
format.open(split);
Tuple5<Integer, String, String, String, Double> result = new Tuple5<Integer, String, String, String, Double>();
@SuppressWarnings("unchecked")
Tuple5<Integer, String, String, String, Double>[] expectedLines = new Tuple5[] { new Tuple5<Integer, String, String, String, Double>(1997, "Ford", "E350", "ac, abs, moon", 3000.0), new Tuple5<Integer, String, String, String, Double>(1999, "Chevy", "Venture \"Extended Edition\"", "", 4900.0), new Tuple5<Integer, String, String, String, Double>(1996, "Jeep", "Grand Cherokee", "MUST SELL! air, moon roof, loaded", 4799.00), new Tuple5<Integer, String, String, String, Double>(1999, "Chevy", "Venture \"Extended Edition, Very Large\"", "", 5000.00), new Tuple5<Integer, String, String, String, Double>(0, "", "Venture \"Extended Edition\"", "", 4900.0) };
try {
for (Tuple5<Integer, String, String, String, Double> expected : expectedLines) {
result = format.nextRecord(result);
replacedertEquals(expected, result);
}
replacedertNull(format.nextRecord(result));
replacedertTrue(format.reachedEnd());
} catch (Exception ex) {
fail("Test failed due to a " + ex.getClreplaced().getName() + ": " + ex.getMessage());
}
}
private FileInputSplit createTempFile(String content) throws IOException {
File tempFile = File.createTempFile("test_contents", "tmp");
tempFile.deleteOnExit();
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile), StandardCharsets.UTF_8);
wrt.write(content);
wrt.close();
return new FileInputSplit(0, new Path(tempFile.toURI().toString()), 0, tempFile.length(), new String[] { "localhost" });
}
@Test
public void testWindowsLineEndRemoval() {
// Check typical use case -- linux file is correct and it is set up to linux (\n)
this.testRemovingTrailingCR("\n", "\n");
// Check typical windows case -- windows file endings and file has windows file endings set up
this.testRemovingTrailingCR("\r\n", "\r\n");
// Check problematic case windows file -- windows file endings (\r\n) but linux line endings (\n) set up
this.testRemovingTrailingCR("\r\n", "\n");
// Check problematic case linux file -- linux file endings (\n) but windows file endings set up (\r\n)
// Specific setup for windows line endings will expect \r\n because it has to be set up and is not standard.
}
private void testRemovingTrailingCR(String lineBreakerInFile, String lineBreakerSetup) {
File tempFile = null;
String fileContent = CsvInputFormatTest.FIRST_PART + lineBreakerInFile + CsvInputFormatTest.SECOND_PART + lineBreakerInFile;
try {
// create input file
tempFile = File.createTempFile("CsvInputFormatTest", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write(fileContent);
wrt.close();
final TupleTypeInfo<Tuple1<String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced);
final CsvInputFormat<Tuple1<String>> inputFormat = new TupleCsvInputFormat<Tuple1<String>>(new Path(tempFile.toURI().toString()), typeInfo);
Configuration parameters = new Configuration();
inputFormat.configure(parameters);
inputFormat.setDelimiter(lineBreakerSetup);
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Tuple1<String> result = inputFormat.nextRecord(new Tuple1<String>());
replacedertNotNull("Expecting to not return null", result);
replacedertEquals(FIRST_PART, result.f0);
result = inputFormat.nextRecord(result);
replacedertNotNull("Expecting to not return null", result);
replacedertEquals(SECOND_PART, result.f0);
} catch (Throwable t) {
System.err.println("test failed with exception: " + t.getMessage());
t.printStackTrace(System.err);
fail("Test erroneous");
}
}
private void validatePojoItem(CsvInputFormat<PojoItem> format) throws Exception {
PojoItem item = new PojoItem();
format.nextRecord(item);
replacedertEquals(123, item.field1);
replacedertEquals("AAA", item.field2);
replacedertEquals(Double.valueOf(3.123), item.field3);
replacedertEquals("BBB", item.field4);
format.nextRecord(item);
replacedertEquals(456, item.field1);
replacedertEquals("BBB", item.field2);
replacedertEquals(Double.valueOf(1.123), item.field3);
replacedertEquals("AAA", item.field4);
}
@Test
public void testPojoType() throws Exception {
File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write("123,AAA,3.123,BBB\n");
wrt.write("456,BBB,1.123,AAA\n");
wrt.close();
@SuppressWarnings("unchecked")
PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.clreplaced);
CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo);
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
validatePojoItem(inputFormat);
}
@Test
public void testPojoTypeWithPrivateField() throws Exception {
File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write("123,AAA,3.123,BBB\n");
wrt.write("456,BBB,1.123,AAA\n");
wrt.close();
@SuppressWarnings("unchecked")
PojoTypeInfo<PrivatePojoItem> typeInfo = (PojoTypeInfo<PrivatePojoItem>) TypeExtractor.createTypeInfo(PrivatePojoItem.clreplaced);
CsvInputFormat<PrivatePojoItem> inputFormat = new PojoCsvInputFormat<PrivatePojoItem>(new Path(tempFile.toURI().toString()), typeInfo);
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
PrivatePojoItem item = new PrivatePojoItem();
inputFormat.nextRecord(item);
replacedertEquals(123, item.field1);
replacedertEquals("AAA", item.field2);
replacedertEquals(Double.valueOf(3.123), item.field3);
replacedertEquals("BBB", item.field4);
inputFormat.nextRecord(item);
replacedertEquals(456, item.field1);
replacedertEquals("BBB", item.field2);
replacedertEquals(Double.valueOf(1.123), item.field3);
replacedertEquals("AAA", item.field4);
}
@Test
public void testPojoTypeWithTrailingEmptyFields() throws Exception {
final String fileContent = "123,,3.123,,\n456,BBB,3.23,,";
final FileInputSplit split = createTempFile(fileContent);
@SuppressWarnings("unchecked")
PojoTypeInfo<PrivatePojoItem> typeInfo = (PojoTypeInfo<PrivatePojoItem>) TypeExtractor.createTypeInfo(PrivatePojoItem.clreplaced);
CsvInputFormat<PrivatePojoItem> inputFormat = new PojoCsvInputFormat<PrivatePojoItem>(PATH, typeInfo);
inputFormat.configure(new Configuration());
inputFormat.open(split);
PrivatePojoItem item = new PrivatePojoItem();
inputFormat.nextRecord(item);
replacedertEquals(123, item.field1);
replacedertEquals("", item.field2);
replacedertEquals(Double.valueOf(3.123), item.field3);
replacedertEquals("", item.field4);
inputFormat.nextRecord(item);
replacedertEquals(456, item.field1);
replacedertEquals("BBB", item.field2);
replacedertEquals(Double.valueOf(3.23), item.field3);
replacedertEquals("", item.field4);
}
@Test
public void testPojoTypeWithMappingInformation() throws Exception {
File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write("123,3.123,AAA,BBB\n");
wrt.write("456,1.123,BBB,AAA\n");
wrt.close();
@SuppressWarnings("unchecked")
PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.clreplaced);
CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[] { "field1", "field3", "field2", "field4" });
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
validatePojoItem(inputFormat);
}
@Test
public void testPojoTypeWithPartialFieldInCSV() throws Exception {
File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write("123,NODATA,AAA,NODATA,3.123,BBB\n");
wrt.write("456,NODATA,BBB,NODATA,1.123,AAA\n");
wrt.close();
@SuppressWarnings("unchecked")
PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.clreplaced);
CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new boolean[] { true, false, true, false, true, true });
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
validatePojoItem(inputFormat);
}
@Test
public void testPojoTypeWithMappingInfoAndPartialField() throws Exception {
File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter wrt = new OutputStreamWriter(new FileOutputStream(tempFile));
wrt.write("123,3.123,AAA,BBB\n");
wrt.write("456,1.123,BBB,AAA\n");
wrt.close();
@SuppressWarnings("unchecked")
PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.clreplaced);
CsvInputFormat<PojoItem> inputFormat = new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[] { "field1", "field4" }, new boolean[] { true, false, false, true });
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
PojoItem item = new PojoItem();
inputFormat.nextRecord(item);
replacedertEquals(123, item.field1);
replacedertEquals("BBB", item.field4);
}
@Test
public void testPojoTypeWithInvalidFieldMapping() throws Exception {
File tempFile = File.createTempFile("CsvReaderPojoType", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
@SuppressWarnings("unchecked")
PojoTypeInfo<PojoItem> typeInfo = (PojoTypeInfo<PojoItem>) TypeExtractor.createTypeInfo(PojoItem.clreplaced);
try {
new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[] { "field1", "field2" });
fail("The number of POJO fields cannot be same as that of selected CSV fields");
} catch (IllegalArgumentException e) {
// success
}
try {
new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[] { "field1", "field2", null, "field4" });
fail("Fields mapping cannot contain null.");
} catch (NullPointerException e) {
// success
}
try {
new PojoCsvInputFormat<PojoItem>(new Path(tempFile.toURI().toString()), typeInfo, new String[] { "field1", "field2", "field3", "field5" });
fail("Invalid field name");
} catch (IllegalArgumentException e) {
// success
}
}
@Test
public void testQuotedStringParsingWithIncludeFields() throws Exception {
final String fileContent = "\"20:41:52-1-3-2015\"|\"Re: Taskmanager memory error in Eclipse\"|" + "\"Blahblah <[email protected]>\"|\"blaaa|\"blubb\"";
final File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
writer.write(fileContent);
writer.close();
TupleTypeInfo<Tuple2<String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, String.clreplaced);
CsvInputFormat<Tuple2<String, String>> inputFormat = new TupleCsvInputFormat<Tuple2<String, String>>(new Path(tempFile.toURI().toString()), typeInfo, new boolean[] { true, false, true });
inputFormat.enableQuotedStringParsing('"');
inputFormat.setFieldDelimiter("|");
inputFormat.setDelimiter('\n');
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Tuple2<String, String> record = inputFormat.nextRecord(new Tuple2<String, String>());
replacedertEquals("20:41:52-1-3-2015", record.f0);
replacedertEquals("Blahblah <[email protected]>", record.f1);
}
@Test
public void testQuotedStringParsingWithEscapedQuotes() throws Exception {
final String fileContent = "\"\\\"Hello\\\" World\"|\"We are\\\" young\"";
final File tempFile = File.createTempFile("CsvReaderQuotedString", "tmp");
tempFile.deleteOnExit();
tempFile.setWritable(true);
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
writer.write(fileContent);
writer.close();
TupleTypeInfo<Tuple2<String, String>> typeInfo = TupleTypeInfo.getBasicTupleTypeInfo(String.clreplaced, String.clreplaced);
CsvInputFormat<Tuple2<String, String>> inputFormat = new TupleCsvInputFormat<>(new Path(tempFile.toURI().toString()), typeInfo);
inputFormat.enableQuotedStringParsing('"');
inputFormat.setFieldDelimiter("|");
inputFormat.setDelimiter('\n');
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
Tuple2<String, String> record = inputFormat.nextRecord(new Tuple2<String, String>());
replacedertEquals("\\\"Hello\\\" World", record.f0);
replacedertEquals("We are\\\" young", record.f1);
}
/**
* Tests that the CSV input format can deal with POJOs which are subclreplacedes.
*
* @throws Exception
*/
@Test
public void testPojoSubclreplacedType() throws Exception {
final String fileContent = "t1,foobar,tweet2\nt2,barfoo,tweet2";
final File tempFile = File.createTempFile("CsvReaderPOJOSubclreplaced", "tmp");
tempFile.deleteOnExit();
OutputStreamWriter writer = new OutputStreamWriter(new FileOutputStream(tempFile));
writer.write(fileContent);
writer.close();
@SuppressWarnings("unchecked")
PojoTypeInfo<TwitterPOJO> typeInfo = (PojoTypeInfo<TwitterPOJO>) TypeExtractor.createTypeInfo(TwitterPOJO.clreplaced);
CsvInputFormat<TwitterPOJO> inputFormat = new PojoCsvInputFormat<>(new Path(tempFile.toURI().toString()), typeInfo);
inputFormat.configure(new Configuration());
FileInputSplit[] splits = inputFormat.createInputSplits(1);
inputFormat.open(splits[0]);
List<TwitterPOJO> expected = new ArrayList<>();
for (String line : fileContent.split("\n")) {
String[] elements = line.split(",");
expected.add(new TwitterPOJO(elements[0], elements[1], elements[2]));
}
List<TwitterPOJO> actual = new ArrayList<>();
TwitterPOJO pojo;
while ((pojo = inputFormat.nextRecord(new TwitterPOJO())) != null) {
actual.add(pojo);
}
replacedertEquals(expected, actual);
}
// --------------------------------------------------------------------------------------------
// Custom types for testing
// --------------------------------------------------------------------------------------------
/**
* Sample test pojo.
*/
public static clreplaced PojoItem {
public int field1;
public String field2;
public Double field3;
public String field4;
}
/**
* Sample test pojo with private fields.
*/
public static clreplaced PrivatePojoItem {
private int field1;
private String field2;
private Double field3;
private String field4;
public int getField1() {
return field1;
}
public void setField1(int field1) {
this.field1 = field1;
}
public String getField2() {
return field2;
}
public void setField2(String field2) {
this.field2 = field2;
}
public Double getField3() {
return field3;
}
public void setField3(Double field3) {
this.field3 = field3;
}
public String getField4() {
return field4;
}
public void setField4(String field4) {
this.field4 = field4;
}
}
/**
* Sample test pojo.
*/
public static clreplaced POJO {
public String table;
public String time;
public POJO() {
this("", "");
}
public POJO(String table, String time) {
this.table = table;
this.time = time;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof POJO) {
POJO other = (POJO) obj;
return table.equals(other.table) && time.equals(other.time);
} else {
return false;
}
}
}
/**
* Sample test pojo representing tweets.
*/
public static clreplaced TwitterPOJO extends POJO {
public String tweet;
public TwitterPOJO() {
this("", "", "");
}
public TwitterPOJO(String table, String time, String tweet) {
super(table, time);
this.tweet = tweet;
}
@Override
public boolean equals(Object obj) {
if (obj instanceof TwitterPOJO) {
TwitterPOJO other = (TwitterPOJO) obj;
return super.equals(other) && tweet.equals(other.tweet);
} else {
return false;
}
}
}
}
19
Source : AvroOutputFormatTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Test
public void testCompression() throws Exception {
// given
final Path outputPath = new Path(File.createTempFile("avro-output-file", "avro").getAbsolutePath());
final AvroOutputFormat<User> outputFormat = new AvroOutputFormat<>(outputPath, User.clreplaced);
outputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
final Path compressedOutputPath = new Path(File.createTempFile("avro-output-file", "compressed.avro").getAbsolutePath());
final AvroOutputFormat<User> compressedOutputFormat = new AvroOutputFormat<>(compressedOutputPath, User.clreplaced);
compressedOutputFormat.setWriteMode(FileSystem.WriteMode.OVERWRITE);
compressedOutputFormat.setCodec(AvroOutputFormat.Codec.SNAPPY);
// when
output(outputFormat);
output(compressedOutputFormat);
// then
replacedertTrue(fileSize(outputPath) > fileSize(compressedOutputPath));
// cleanup
FileSystem fs = FileSystem.getLocalFileSystem();
fs.delete(outputPath, false);
fs.delete(compressedOutputPath, false);
}
19
Source : AvroOutputFormatTest.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private long fileSize(Path path) throws IOException {
return path.getFileSystem().getFileStatus(path).getLen();
}
19
Source : PrestoS3FileSystemITCase.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Override
protected void checkEmptyDirectory(Path path) throws IOException, InterruptedException {
// seems the presto file system does not replacedume existence of empty directories in S3
// do nothing as before
}
19
Source : HadoopS3RecoverableWriterITCase.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
// -------------------------- Test Utilities --------------------------
private String getContentsOfFile(Path path) throws Exception {
final StringBuilder builder = new StringBuilder();
try (FSDataInputStream inStream = getFileSystem().open(path);
BufferedReader reader = new BufferedReader(new InputStreamReader(inStream))) {
String line;
while ((line = reader.readLine()) != null) {
builder.append(line);
}
}
return builder.toString();
}
19
Source : S3RecoverableWriter.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
@Override
public RecoverableFsDataOutputStream open(Path path) throws IOException {
final RecoverableMultiPartUpload upload = uploadFactory.getNewRecoverableUpload(path);
return S3RecoverableFsDataOutputStream.newStream(upload, tempFileCreator, userDefinedMinPartSize);
}
19
Source : S3RecoverableMultipartUploadFactory.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
private String pathToObjectName(final Path path) {
org.apache.hadoop.fs.Path hadoopPath = HadoopFileSystem.toHadoopPath(path);
if (!hadoopPath.isAbsolute()) {
hadoopPath = new org.apache.hadoop.fs.Path(fs.getWorkingDirectory(), hadoopPath);
}
return hadoopPath.toUri().getScheme() != null && hadoopPath.toUri().getPath().isEmpty() ? "" : hadoopPath.toUri().getPath().substring(1);
}
19
Source : S3RecoverableMultipartUploadFactory.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
RecoverableMultiPartUpload getNewRecoverableUpload(Path path) throws IOException {
return RecoverableMultiPartUploadImpl.newUpload(s3AccessHelper, limitedExecutor(), pathToObjectName(path));
}
See More Examples