Here are the examples of the java api org.apache.hadoop.fs.FileSystem.create() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
563 Examples
19
Source : HadoopUtil.java
with Apache License 2.0
from ZuInnoTe
with Apache License 2.0
from ZuInnoTe
/*
* Creates for the file to be written and outputstream and takes - depending on the configuration - take of compression. Set for compression the following options:
* mapreduce.output.fileoutputformat.compress true/false
* mapreduce.output.fileoutputformat.compress.codec java clreplaced of compression codec
*
* Note that some formats may use already internal compression so that additional compression does not lead to many benefits
*
* @param conf Configuration of Job
* @param file file to be written
*
* @return outputstream of the file
*
*/
public static DataOutputStream getDataOutputStream(Configuration conf, Path file, Progressable progress, boolean compressed, Clreplaced<? extends CompressionCodec> compressorClreplaced) throws IOException {
if (!compressed) {
// uncompressed
FileSystem fs = file.getFileSystem(conf);
return fs.create(file, progress);
} else {
// compressed (note partially adapted from TextOutputFormat)
Clreplaced<? extends CompressionCodec> codecClreplaced = compressorClreplaced;
// create the named codec
CompressionCodec codec = ReflectionUtils.newInstance(codecClreplaced, conf);
// provide proper file extension
Path compressedFile = file.suffix(codec.getDefaultExtension());
// build the filename including the extension
FileSystem fs = compressedFile.getFileSystem(conf);
return new DataOutputStream(codec.createOutputStream(fs.create(compressedFile, progress)));
}
}
19
Source : FileTest.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
conf.set(FileSystem.FS_DEFAULT_NAME_KEY, "sync:///");
System.out.println(FileSystem.getDefaultUri(conf));
FileSystem fs = FileSystem.get(conf);
// FileSystem fs = new LocalSyncableFileSystem(conf);
Path path = new Path("/tmp/testFile");
FSDataOutputStream out = fs.create(path);
byte[] s = "hello world".getBytes();
out.write(s);
out.sync();
// out.close();
FSDataInputStream in = fs.open(path);
byte[] bytes = new byte[s.length];
in.read(bytes);
System.out.println(new String(bytes));
File file = new File("/tmp/testFile");
FileOutputStream fos = new FileOutputStream(file);
FileInputStream fis = new FileInputStream(file);
fos.write(s);
fos.getFD().sync();
fis.read(bytes);
System.out.println(new String(bytes));
out = fs.create(new Path("/tmp/file"));
for (int i = 0; i < 100; i++) {
bytes = new byte[256 * 1024];
Stopwatch watch = Stopwatch.createStarted();
out.write(bytes);
out.sync();
long t = watch.elapsed(TimeUnit.MILLISECONDS);
System.out.printf("Elapsed: %d. Rate %d.\n", t, (long) ((long) bytes.length * 1000L / t));
}
}
19
Source : TestParquetScan.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
@Test
public void testSuccessFile() throws Exception {
Path p = new Path("/tmp/nation_test_parquet_scan");
if (fs.exists(p)) {
fs.delete(p, true);
}
fs.mkdirs(p);
byte[] bytes = Resources.toByteArray(Resources.getResource("tpch/nation.parquet"));
FSDataOutputStream os = fs.create(new Path(p, "nation.parquet"));
os.write(bytes);
os.close();
fs.create(new Path(p, "_SUCCESS")).close();
fs.create(new Path(p, "_logs")).close();
testBuilder().sqlQuery("select count(*) c from dfs.tmp.nation_test_parquet_scan where 1 = 1").unOrdered().baselineColumns("c").baselineValues(25L).build().run();
}
19
Source : DrillTextRecordWriter.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
@Override
public void startNewSchema(List<String> columnNames) throws IOException {
// wrap up the current file
cleanup();
// open a new file for writing data with new schema
Path fileName = new Path(location, prefix + "_" + index + "." + extension);
try {
// drill text writer does not support parreplacedions, so only one file can be created
// and thus only one location should be deleted in case of abort
// to ensure that our writer was the first to create output file,
// we create empty output file first and fail if file exists
cleanUpLocation = storageStrategy.createFileAndApply(fs, fileName);
// since empty output file will be overwritten (some file systems may restrict append option)
// we need to re-apply file permission
DataOutputStream fos = fs.create(fileName);
storageStrategy.applyToFile(fs, fileName);
stream = new PrintStream(fos);
logger.debug("Created file: {}", fileName);
} catch (IOException ex) {
logger.error("Unable to create file: " + fileName, ex);
throw ex;
}
index++;
stream.println(Joiner.on(fieldDelimiter).join(columnNames));
}
19
Source : HdfsCreate.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException {
// 获取文件系统
FileSystem fileSystem = SysUtil.getFileSystem();
// 如果因为权限而无法写入,可以先修改权限 hadoop dfs -chmod 777 /hadoop
Path path = new Path("/hadoop/create.txt");
// 获取输出流
FSDataOutputStream outputStream = fileSystem.create(path);
// 写入一些内容
outputStream.writeUTF("Hello HDFS!");
outputStream.close();
// ------写入完毕后,再读出来-----------
// 获取该文件的输入流
FSDataInputStream inputStream = fileSystem.open(path);
String data = inputStream.readUTF();
System.out.println(data);
// 输出: Hello HDFS!
fileSystem.close();
}
19
Source : FileCopyWithProgress.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws Exception {
String localSrc = "hello.txt";
String dst = "/hadoop/hello.txt";
InputStream in = new BufferedInputStream(new FileInputStream(localSrc));
FileSystem fs = SysUtil.getFileSystem();
OutputStream out = fs.create(new Path(dst), new Progressable() {
@Override
public void progress() {
System.out.print(".");
}
});
IOUtils.copyBytes(in, out, 4096, true);
}
19
Source : HDFSApplicationTests.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* <p>上传本地文件到HDFS服务器, 带进度条</p>
* @throws Exception
*/
@Test
void copyFromLocalFileWithProgress() throws Exception {
InputStream in = new BufferedInputStream(new FileInputStream(new File("D:/install/VMware-workstation-full-14.1.1-7528167.exe")));
FSDataOutputStream out = fileSystem.create(new Path("/hdfsapi/test/vmware-14.exe"), new Progressable() {
public void progress() {
// 带进度提醒信息
System.out.print("-");
}
});
IOUtils.copyBytes(in, out, 4096);
}
19
Source : HDFSApplicationTests.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* <p>创建文件</p>
* <p>注意DataNode端口是否开启</p>
* @throws IOException
*/
@Test
void createFile() throws IOException {
FSDataOutputStream outputStream = fileSystem.create(new Path("/hdfsapi/test/a.txt"));
outputStream.write("Hello Hadoop!".getBytes());
outputStream.flush();
outputStream.close();
}
19
Source : WriterTextCell.java
with Apache License 2.0
from tugraz-isds
with Apache License 2.0
from tugraz-isds
@Override
public final void writeEmptyMatrixToHDFS(String fname, long rlen, long clen, int blen) throws IOException, DMLRuntimeException {
Path path = new Path(fname);
FileSystem fs = IOUtilFunctions.getFileSystem(path);
try (FSDataOutputStream writer = fs.create(path)) {
writer.writeBytes(IOUtilFunctions.EMPTY_TEXT_LINE);
}
IOUtilFunctions.deleteCrcFilesFromLocalFileSystem(fs, path);
}
19
Source : AbstractTestHiveFileSystemS3.java
with Apache License 2.0
from trinodb
with Apache License 2.0
from trinodb
@Test
public void testIgnoreHadoopFolderMarker() throws Exception {
Path basePath = getBasePath();
FileSystem fs = hdfsEnvironment.getFileSystem(TESTING_CONTEXT, basePath);
String markerFileName = "test_table_$folder$";
Path filePath = new Path(basePath, markerFileName);
fs.create(filePath).close();
replacedertFalse(Arrays.stream(fs.listStatus(basePath)).anyMatch(file -> file.getPath().getName().equalsIgnoreCase(markerFileName)));
}
19
Source : RcFileFileWriterFactory.java
with Apache License 2.0
from trinodb
with Apache License 2.0
from trinodb
@Override
public Optional<FileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, OptionalInt bucketNumber, AcidTransaction transaction, boolean useAcidSchema, WriterKind writerKind) {
if (!RCFileOutputFormat.clreplaced.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.clreplaced.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding(timeZone);
} else if (ColumnarSerDe.clreplaced.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = createTextVectorEncoding(schema);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and parreplacedions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager, getTimestampPrecision(session))).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OutputStream outputStream = fileSystem.create(path);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
} catch (IOException e) {
throw new TrinoException(HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(PRESTO_VERSION_NAME, nodeVersion.toString()).put(PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
} catch (Exception e) {
throw new TrinoException(HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
}
}
19
Source : UploadFile.java
with MIT License
from TranswarpCN
with MIT License
from TranswarpCN
// 通过Java API上传文件
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
conf.addResource("core-site.xml");
conf.addResource("hdfs-site.xml");
conf.addResource("yarn-site.xml");
// 没开kerberos,注释下面两行
UserGroupInformation.setConfiguration(conf);
UserGroupInformation.loginUserFromKeytab("hdfs@TDH", "E:\\星环\\任务\\2016年11月28日\\hdfs.keytab");
String localFile = "E:\\星环\\yarn-site.xml";
InputStream in = new BufferedInputStream(new FileInputStream(localFile));
Path p = new Path("/tmp/yarn-site.xml");
FileSystem fs = p.getFileSystem(conf);
OutputStream out = fs.create(p);
IOUtils.copyBytes(in, out, conf);
fs.close();
IOUtils.closeStream(in);
}
19
Source : CreateFile.java
with MIT License
from TranswarpCN
with MIT License
from TranswarpCN
public static void main(String[] args) throws IOException {
// 通过Java API创建文件
String rootPath = "hdfs://nameservice1";
Path p = new Path(rootPath + "/tmp/file.txt");
Configuration conf = new Configuration();
conf.addResource("core-site.xml");
conf.addResource("hdfs-site.xml");
conf.addResource("yarn-site.xml");
// 没开kerberos,注释下面两行
UserGroupInformation.setConfiguration(conf);
UserGroupInformation.loginUserFromKeytab("hdfs@TDH", "E:\\星环\\hdfs.keytab");
FileSystem fs = p.getFileSystem(conf);
fs.create(p);
fs.close();
}
19
Source : HDFSUtils.java
with MIT License
from Tianny
with MIT License
from Tianny
/*
* 向 HDFS 指定目录创建一个文件
* @param fs HDFS 文件系统
* @param dst 目标文件路径
* @param contents 文件内容
*/
public static void createFile(FileSystem fs, String dst, String contents) {
try {
Path path = new Path(dst);
FSDataOutputStream fsDataOutputStream = fs.create(path);
fsDataOutputStream.write(contents.getBytes());
fsDataOutputStream.close();
} catch (IOException e) {
e.printStackTrace();
}
}
19
Source : SegmentHelper.java
with Apache License 2.0
from shunfei
with Apache License 2.0
from shunfei
public static void notifyUpdate(FileSystem fileSystem, String tableLocation) throws IOException {
// Touch the update file to notify segment change.
Path updateFilePath = new Path(String.format("%s/__UPDATE__", tableLocation));
try (FSDataOutputStream os = fileSystem.create(updateFilePath, true)) {
os.hsync();
}
}
19
Source : SegmentHelper.java
with Apache License 2.0
from shunfei
with Apache License 2.0
from shunfei
public static void uploadSegment(StorageSegment segment, FileSystem fileSystem, Path path) throws IOException {
Path parent = path.getParent();
short replica = fileSystem.getDefaultReplication(parent);
if (replica <= 0) {
logger.warn("Failed to get replication from {}", parent);
replica = fileSystem.getDefaultReplication();
}
short _replica = replica;
ByteBufferWriter.PredictSizeOpener writeOpener = size -> {
long blockSize = getSegmentBlockSize(fileSystem, size);
FSDataOutputStream outputStream = fileSystem.create(path, FsPermission.getFileDefault().applyUMask(FsPermission.getUMask(fileSystem.getConf())), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 4096 * 10, _replica, blockSize, null);
ByteBufferWriter writer = ByteBufferWriter.of(outputStream, outputStream::close);
writer.setName(path.toString());
return writer;
};
IntegratedSegment.Fd.create(segment, writeOpener, null);
}
19
Source : IndexROutputWriter.java
with Apache License 2.0
from shunfei
with Apache License 2.0
from shunfei
@Override
public void close() {
DPSegment segment = null;
try {
segment = segmentGen.gen();
rowBuilder = null;
if (segment.rowCount() == 0) {
// Only create an empty file.
// We cannot just ignore this as hive will complain.
IOUtils.closeQuietly(fileSystem.create(segmentOutPath));
} else {
SegmentHelper.uploadSegment(segment, fileSystem, segmentOutPath);
}
} catch (Exception e) {
logger.error("write to {} failed", segmentOutPath, e);
} finally {
IOUtils.closeQuietly(segment);
// Remove temporary dir.
Try.on(() -> FileUtils.deleteDirectory(localSegmentPath.toFile()), logger);
}
}
19
Source : SegmentHelper.java
with Apache License 2.0
from shunfei
with Apache License 2.0
from shunfei
public static void uploadSegment(StorageSegment segment, FileSystem fileSystem, Path path, Path tableLocation) throws IOException {
short replica = fileSystem.getDefaultReplication(tableLocation);
if (replica <= 0) {
logger.warn("Failed to get replication from {}", tableLocation);
replica = fileSystem.getDefaultReplication();
}
short _replica = replica;
ByteBufferWriter.PredictSizeOpener writeOpener = size -> {
long blockSize = getSegmentBlockSize(fileSystem, size);
FSDataOutputStream outputStream = fileSystem.create(path, FsPermission.getFileDefault().applyUMask(FsPermission.getUMask(fileSystem.getConf())), EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), 4096 * 10, _replica, blockSize, null);
ByteBufferWriter writer = ByteBufferWriter.of(outputStream, outputStream::close);
writer.setName(path.toString());
return writer;
};
IntegratedSegment.Fd.create(segment, writeOpener, null);
}
19
Source : IndexRRecordWriter.java
with Apache License 2.0
from shunfei
with Apache License 2.0
from shunfei
@Override
public void close(boolean abort) throws IOException {
DPSegment segment = null;
try {
segment = segmentGen.gen();
rowBuilder = null;
if (!abort) {
if (segment.rowCount() == 0) {
// Only create an empty file.
// We cannot just ignore this as hive will complain.
IOUtils.closeQuietly(fileSystem.create(segmentOutPath));
} else {
SegmentHelper.uploadSegment(segment, fileSystem, segmentOutPath, tableLocation);
}
}
} finally {
IOUtils.closeQuietly(segment);
// Remove temporary dir.
FileUtils.deleteDirectory(localSegmentPath.toFile());
}
}
19
Source : TestS3PartitionedFileListing.java
with Apache License 2.0
from rdblue
with Apache License 2.0
from rdblue
@Test
public void testTaskOutputListingWithHiddenFiles() throws Exception {
S3ParreplacedionedOutputCommitter committer = newTaskCommitter();
// create files in the attempt path that should be found by getTaskOutput
Path attemptPath = committer.getTaskAttemptPath(getTAC());
FileSystem attemptFS = attemptPath.getFileSystem(getTAC().getConfiguration());
attemptFS.delete(attemptPath, true);
List<String> expectedFiles = Lists.newArrayList();
for (String dateint : Arrays.asList("20161115", "20161116")) {
String metadata = "dateint=" + dateint + "/" + "_metadata";
attemptFS.create(new Path(attemptPath, metadata)).close();
for (String hour : Arrays.asList("13", "14")) {
String relative = "dateint=" + dateint + "/hour=" + hour + "/" + UUID.randomUUID().toString() + ".parquet";
expectedFiles.add(relative);
attemptFS.create(new Path(attemptPath, relative)).close();
String partial = "dateint=" + dateint + "/hour=" + hour + "/." + UUID.randomUUID().toString() + ".partial";
attemptFS.create(new Path(attemptPath, partial)).close();
}
}
List<FileStatus> attemptFiles = committer.getTaskOutput(getTAC());
List<String> actualFiles = Lists.newArrayList();
for (FileStatus stat : attemptFiles) {
String relative = getRelativePath(attemptPath, stat.getPath());
actualFiles.add(relative);
}
replacedert.replacedertEquals("File sets should match", expectedFiles, actualFiles);
attemptFS.delete(attemptPath, true);
}
19
Source : TestS3PartitionedFileListing.java
with Apache License 2.0
from rdblue
with Apache License 2.0
from rdblue
@Test
public void testTaskOutputListing() throws Exception {
S3ParreplacedionedOutputCommitter committer = newTaskCommitter();
// create files in the attempt path that should be found by getTaskOutput
Path attemptPath = committer.getTaskAttemptPath(getTAC());
FileSystem attemptFS = attemptPath.getFileSystem(getTAC().getConfiguration());
attemptFS.delete(attemptPath, true);
List<String> expectedFiles = Lists.newArrayList();
for (String dateint : Arrays.asList("20161115", "20161116")) {
for (String hour : Arrays.asList("13", "14")) {
String relative = "dateint=" + dateint + "/hour=" + hour + "/" + UUID.randomUUID().toString() + ".parquet";
expectedFiles.add(relative);
attemptFS.create(new Path(attemptPath, relative)).close();
}
}
List<FileStatus> attemptFiles = committer.getTaskOutput(getTAC());
List<String> actualFiles = Lists.newArrayList();
for (FileStatus stat : attemptFiles) {
String relative = getRelativePath(attemptPath, stat.getPath());
actualFiles.add(relative);
}
replacedert.replacedertEquals("File sets should match", expectedFiles, actualFiles);
attemptFS.delete(attemptPath, true);
}
19
Source : MockS3FileSystem.java
with Apache License 2.0
from rdblue
with Apache License 2.0
from rdblue
@Override
public FSDataOutputStream create(Path f, FsPermission permission, boolean overwrite, int bufferSize, short replication, long blockSize, Progressable progress) throws IOException {
return mock.create(f, permission, overwrite, bufferSize, replication, blockSize, progress);
}
19
Source : TextMultiOutputFormat.java
with Apache License 2.0
from Qihoo360
with Apache License 2.0
from Qihoo360
public FSDataOutputStream createFile() throws IOException {
Path file;
file = FileOutputFormat.getTaskOutputPath(jobConf, fileName + getFileExtention(fileNum++) + codec.getDefaultExtension());
FileSystem fs = file.getFileSystem(jobConf);
return fs.create(file, null);
}
19
Source : TeraInputFormat.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
/**
* Use the input splits to take samples of the input and generate sample
* keys. By default reads 100,000 keys from 10 locations in the input, sorts
* them and picks N-1 keys to generate N equally sized parreplacedions.
* @param job the job to sample
* @param partFile where to write the output file to
* @throws Throwable if something goes wrong
*/
public static void writeParreplacedionFile(final JobContext job, Path partFile) throws Throwable {
long t1 = System.currentTimeMillis();
Configuration conf = job.getConfiguration();
// Instead of reading from hdfs, now the input is from Pravega stream
final PravegaInputFormat inFormat = new PravegaInputFormat();
final TextSampler sampler = new TextSampler();
int parreplacedions = job.getNumReduceTasks();
long sampleSize = conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(), TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE);
final List<InputSplit> splits = inFormat.getSplits(job);
long t2 = System.currentTimeMillis();
System.out.println("Computing input splits took " + (t2 - t1) + "ms");
int samples = Math.min(conf.getInt(TeraSortConfigKeys.NUM_PARreplacedIONS.key(), TeraSortConfigKeys.DEFAULT_NUM_PARreplacedIONS), splits.size());
System.out.println("Sampling " + samples + " splits of " + splits.size());
final long recordsPerSample = sampleSize / samples;
final int sampleStep = splits.size() / samples;
Thread[] samplerReader = new Thread[samples];
SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
// take N samples from different parts of the input
for (int i = 0; i < samples; ++i) {
final int idx = i;
samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
{
setDaemon(true);
}
public void run() {
long records = 0;
try {
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context);
reader.initialize(splits.get(sampleStep * idx), context);
while (reader.nextKeyValue()) {
sampler.addKey(new Text(reader.getCurrentValue().toString().substring(0, 10)));
records += 1;
if (recordsPerSample <= records) {
break;
}
}
} catch (IOException ie) {
System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(ie));
throw new RuntimeException(ie);
} catch (InterruptedException e) {
}
}
};
samplerReader[i].start();
}
FileSystem outFs = partFile.getFileSystem(conf);
DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile));
for (int i = 0; i < samples; i++) {
try {
samplerReader[i].join();
if (threadGroup.getThrowable() != null) {
throw threadGroup.getThrowable();
}
} catch (InterruptedException e) {
}
}
for (Text split : sampler.createParreplacedions(parreplacedions)) {
split.write(writer);
}
writer.close();
long t3 = System.currentTimeMillis();
System.out.println("Computing parireplacedions took " + (t3 - t2) + "ms");
}
19
Source : TurHDFSConnector.java
with GNU General Public License v3.0
from openturing
with GNU General Public License v3.0
from openturing
/**
* create a existing file from local filesystem to hdfs
* @param source
* @param dest
* @param conf
* @throws IOException
*/
public void addFile(String source, String dest, Configuration conf) throws IOException {
FileSystem fileSystem = FileSystem.get(conf);
// Get the filename out of the file path
String filename = source.substring(source.lastIndexOf('/') + 1, source.length());
// Create the destination path including the filename.
if (dest.charAt(dest.length() - 1) != '/') {
dest = dest + "/" + filename;
} else {
dest = dest + filename;
}
// System.out.println("Adding file to " + destination);
// Check if the file already exists
Path path = new Path(dest);
if (fileSystem.exists(path)) {
System.out.println("File " + dest + " already exists");
return;
}
// Create a new file and write data to it.
FSDataOutputStream out = fileSystem.create(path);
InputStream in = new BufferedInputStream(new FileInputStream(new File(source)));
byte[] b = new byte[1024];
int numBytes = 0;
while ((numBytes = in.read(b)) > 0) {
out.write(b, 0, numBytes);
}
// Close all the file descriptors
in.close();
out.close();
fileSystem.close();
}
19
Source : RcFileFileWriterFactory.java
with Apache License 2.0
from openlookeng
with Apache License 2.0
from openlookeng
@Override
public Optional<HiveFileWriter> createFileWriter(Path path, List<String> inputColumnNames, StorageFormat storageFormat, Properties schema, JobConf configuration, ConnectorSession session, Optional<AcidOutputFormat.Options> acidOptions, Optional<HiveACIDWriteType> acidWriteType) {
if (!RCFileOutputFormat.clreplaced.getName().equals(storageFormat.getOutputFormat())) {
return Optional.empty();
}
RcFileEncoding rcFileEncoding;
if (LazyBinaryColumnarSerDe.clreplaced.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = new BinaryRcFileEncoding();
} else if (ColumnarSerDe.clreplaced.getName().equals(storageFormat.getSerDe())) {
rcFileEncoding = RcFilePageSourceFactory.createTextVectorEncoding(schema, hiveStorageTimeZone);
} else {
return Optional.empty();
}
Optional<String> codecName = Optional.ofNullable(configuration.get(FileOutputFormat.COMPRESS_CODEC));
// existing tables and parreplacedions may have columns in a different order than the writer is providing, so build
// an index to rearrange columns in the proper order
List<String> fileColumnNames = getColumnNames(schema);
List<Type> fileColumnTypes = getColumnTypes(schema).stream().map(hiveType -> hiveType.getType(typeManager)).collect(toList());
int[] fileInputColumnIndexes = fileColumnNames.stream().mapToInt(inputColumnNames::indexOf).toArray();
try {
FileSystem fileSystem = hdfsEnvironment.getFileSystem(session.getUser(), path, configuration);
OutputStream outputStream = fileSystem.create(path);
Optional<Supplier<RcFileDataSource>> validationInputFactory = Optional.empty();
if (HiveSessionProperties.isRcfileOptimizedWriterValidate(session)) {
validationInputFactory = Optional.of(() -> {
try {
return new HdfsRcFileDataSource(path.toString(), fileSystem.open(path), fileSystem.getFileStatus(path).getLen(), stats);
} catch (IOException e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITE_VALIDATION_FAILED, e);
}
});
}
Callable<Void> rollbackAction = () -> {
fileSystem.delete(path, false);
return null;
};
return Optional.of(new RcFileFileWriter(outputStream, rollbackAction, rcFileEncoding, fileColumnTypes, codecName, fileInputColumnIndexes, ImmutableMap.<String, String>builder().put(HiveMetadata.PRESTO_VERSION_NAME, nodeVersion.toString()).put(HiveMetadata.PRESTO_QUERY_ID_NAME, session.getQueryId()).build(), validationInputFactory));
} catch (Exception e) {
throw new PrestoException(HiveErrorCode.HIVE_WRITER_OPEN_ERROR, "Error creating RCFile file", e);
}
}
19
Source : JsonSerDeser.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Save a cluster description to a hadoop filesystem
* @param fs filesystem
* @param path path
* @param overwrite should any existing file be overwritten
* @throws IOException IO exception
*/
public void save(FileSystem fs, Path path, T instance, boolean overwrite) throws IOException {
FSDataOutputStream dataOutputStream = fs.create(path, overwrite);
writeJsonAsBytes(instance, dataOutputStream);
}
19
Source : TestSymLink.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test(timeout = 120000)
public void testSymLink() throws Exception {
boolean mayExit = false;
MiniMRCluster mr = null;
MiniDFSCluster dfs = null;
try {
Configuration conf = new Configuration();
dfs = new MiniDFSCluster.Builder(conf).build();
FileSystem fileSys = dfs.getFileSystem();
String namenode = fileSys.getUri().toString();
mr = new MiniMRCluster(1, namenode, 3);
List<String> args = new ArrayList<String>();
for (Map.Entry<String, String> entry : mr.createJobConf()) {
args.add("-jobconf");
args.add(entry.getKey() + "=" + entry.getValue());
}
// During tests, the default Configuration will use a local mapred
// So don't specify -config or -cluster
String[] argv = new String[] { "-input", INPUT_FILE, "-output", OUTPUT_DIR, "-mapper", map, "-reducer", reduce, "-jobconf", "stream.tmpdir=" + System.getProperty("test.build.data", "/tmp"), "-jobconf", JobConf.MAPRED_MAP_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-jobconf", JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-cacheFile", fileSys.getUri() + CACHE_FILE + "#testlink", "-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR };
for (String arg : argv) {
args.add(arg);
}
argv = args.toArray(new String[args.size()]);
fileSys.delete(new Path(OUTPUT_DIR), true);
DataOutputStream file = fileSys.create(new Path(INPUT_FILE));
file.writeBytes(mapString);
file.close();
file = fileSys.create(new Path(CACHE_FILE));
file.writeBytes(cacheString);
file.close();
job = new StreamJob(argv, mayExit);
job.go();
fileSys = dfs.getFileSystem();
String line = null;
Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(new Path(OUTPUT_DIR), new Utils.OutputFileUtils.OutputFilesFilter()));
for (int i = 0; i < fileList.length; i++) {
System.out.println(fileList[i].toString());
BufferedReader bread = new BufferedReader(new InputStreamReader(fileSys.open(fileList[i])));
line = bread.readLine();
System.out.println(line);
}
replacedertEquals(cacheString + "\t", line);
} finally {
if (dfs != null) {
dfs.shutdown();
}
if (mr != null) {
mr.shutdown();
}
}
}
19
Source : TestMultipleCachefiles.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testMultipleCachefiles() throws Exception {
boolean mayExit = false;
MiniMRCluster mr = null;
MiniDFSCluster dfs = null;
try {
Configuration conf = new Configuration();
dfs = new MiniDFSCluster.Builder(conf).build();
FileSystem fileSys = dfs.getFileSystem();
String namenode = fileSys.getUri().toString();
mr = new MiniMRCluster(1, namenode, 3);
List<String> args = new ArrayList<String>();
for (Map.Entry<String, String> entry : mr.createJobConf()) {
args.add("-jobconf");
args.add(entry.getKey() + "=" + entry.getValue());
}
String[] argv = new String[] { "-input", INPUT_FILE, "-output", OUTPUT_DIR, "-mapper", map, "-reducer", reduce, "-jobconf", "stream.tmpdir=" + System.getProperty("test.build.data", "/tmp"), "-jobconf", JobConf.MAPRED_MAP_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_MAP_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-jobconf", JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS + "=" + "-Dcontrib.name=" + System.getProperty("contrib.name") + " " + "-Dbuild.test=" + System.getProperty("build.test") + " " + conf.get(JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS, conf.get(JobConf.MAPRED_TASK_JAVA_OPTS, "")), "-cacheFile", fileSys.getUri() + CACHE_FILE + "#" + mapString, "-cacheFile", fileSys.getUri() + CACHE_FILE_2 + "#" + mapString2, "-jobconf", "mapred.jar=" + TestStreaming.STREAMING_JAR };
for (String arg : argv) {
args.add(arg);
}
argv = args.toArray(new String[args.size()]);
fileSys.delete(new Path(OUTPUT_DIR), true);
DataOutputStream file = fileSys.create(new Path(INPUT_FILE));
file.writeBytes(mapString + "\n");
file.writeBytes(mapString2 + "\n");
file.close();
file = fileSys.create(new Path(CACHE_FILE));
file.writeBytes(cacheString + "\n");
file.close();
file = fileSys.create(new Path(CACHE_FILE_2));
file.writeBytes(cacheString2 + "\n");
file.close();
job = new StreamJob(argv, mayExit);
job.go();
fileSys = dfs.getFileSystem();
String line = null;
String line2 = null;
Path[] fileList = FileUtil.stat2Paths(fileSys.listStatus(new Path(OUTPUT_DIR), new Utils.OutputFileUtils.OutputFilesFilter()));
for (int i = 0; i < fileList.length; i++) {
System.out.println(fileList[i].toString());
BufferedReader bread = new BufferedReader(new InputStreamReader(fileSys.open(fileList[i])));
line = bread.readLine();
System.out.println(line);
line2 = bread.readLine();
System.out.println(line2);
}
replacedertEquals(cacheString + "\t", line);
replacedertEquals(cacheString2 + "\t", line2);
} finally {
if (dfs != null) {
dfs.shutdown();
}
if (mr != null) {
mr.shutdown();
}
}
}
19
Source : TestMultipleArchiveFiles.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
protected void createInput() throws IOException {
fileSys.delete(new Path(INPUT_DIR), true);
DataOutputStream dos = fileSys.create(new Path(INPUT_FILE));
String inputFileString = "symlink1" + File.separator + "cacheArchive1\nsymlink2" + File.separator + "cacheArchive2";
dos.write(inputFileString.getBytes("UTF-8"));
dos.close();
DataOutputStream out = fileSys.create(new Path(CACHE_ARCHIVE_1.toString()));
ZipOutputStream zos = new ZipOutputStream(out);
ZipEntry ze = new ZipEntry(CACHE_FILE_1.toString());
zos.putNextEntry(ze);
zos.write(input.getBytes("UTF-8"));
zos.closeEntry();
zos.close();
out = fileSys.create(new Path(CACHE_ARCHIVE_2.toString()));
zos = new ZipOutputStream(out);
ze = new ZipEntry(CACHE_FILE_2.toString());
zos.putNextEntry(ze);
zos.write(input.getBytes("UTF-8"));
zos.closeEntry();
zos.close();
}
19
Source : TestFileArgs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Before
@Override
public void setUp() throws IOException {
// Set up side file
FileSystem localFs = FileSystem.getLocal(conf);
DataOutputStream dos = localFs.create(new Path("target/sidefile"));
dos.write("hello world\n".getBytes("UTF-8"));
dos.close();
// Since ls doesn't read stdin, we don't want to write anything
// to it, or else we risk Broken Pipe exceptions.
input = "";
}
19
Source : TestPseudoLocalFs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Validate if file creation succeeds for correctly formed file paths on
* PseudoLocalFs and also verify if appropriate exception is thrown for
* invalid file paths.
* @param pfs Pseudo Local File System
* @param path file path for which create() is to be called
* @param shouldSucceed <code>true</code> if create() should succeed
* @throws IOException
*/
private void validateCreate(FileSystem pfs, Path path, boolean shouldSucceed) throws IOException {
boolean expectedExceptionSeen = false;
try {
pfs.create(path);
} catch (IOException e) {
expectedExceptionSeen = true;
}
if (shouldSucceed) {
replacedertFalse("create() has thrown Exception for valid file name " + path, expectedExceptionSeen);
} else {
replacedertTrue("create() did not throw Exception for invalid file name " + path, expectedExceptionSeen);
}
}
19
Source : TestFileBasedCopyListing.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void addEntries(Path listFile, String... entries) throws IOException {
OutputStream out = fs.create(listFile);
try {
for (String entry : entries) {
out.write(entry.getBytes());
out.write("\n".getBytes());
}
} finally {
out.close();
}
}
19
Source : TestFileBasedCopyListing.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void createFiles(String... entries) throws IOException {
for (String entry : entries) {
OutputStream out = fs.create(new Path(entry));
try {
out.write(entry.getBytes());
out.write("\n".getBytes());
} finally {
out.close();
}
}
}
19
Source : TestExternalCall.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private Path createFile(String fname) throws IOException {
Path result = new Path(root + "/" + fname);
OutputStream out = fs.create(result);
try {
out.write((root + "/" + fname).getBytes());
out.write("\n".getBytes());
} finally {
out.close();
}
return result;
}
19
Source : TestDistCpWithXAttrs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@BeforeClreplaced
public static void init() throws Exception {
initCluster(true, true);
fs.mkdirs(subDir1);
fs.create(file1).close();
fs.mkdirs(dir2);
fs.create(file2).close();
fs.create(file3).close();
fs.create(file4).close();
// dir1
fs.setXAttr(dir1, name1, value1);
fs.setXAttr(dir1, name2, value2);
// subDir1
fs.setXAttr(subDir1, name1, value1);
fs.setXAttr(subDir1, name3, value3);
// file1
fs.setXAttr(file1, name1, value1);
fs.setXAttr(file1, name2, value2);
fs.setXAttr(file1, name3, value3);
// dir2
fs.setXAttr(dir2, name2, value2);
// file2
fs.setXAttr(file2, name1, value1);
fs.setXAttr(file2, name4, value4);
// file3
fs.setXAttr(file3, name3, value3);
fs.setXAttr(file3, name4, value4);
}
19
Source : TestDistCpWithRawXAttrs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private static void makeFilesAndDirs(FileSystem fs) throws Exception {
fs.delete(new Path("/src"), true);
fs.delete(new Path("/dest"), true);
fs.mkdirs(subDir1);
fs.create(file1).close();
}
19
Source : TestDistCpViewFs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void createFiles(String... entries) throws IOException {
String e;
for (String entry : entries) {
if ((new Path(entry)).isAbsolute()) {
e = entry;
} else {
e = root + "/" + entry;
}
OutputStream out = fs.create(new Path(e));
try {
out.write((e).getBytes());
out.write("\n".getBytes());
} finally {
out.close();
}
}
}
19
Source : TestDistCpSystem.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void createFiles(FileSystem fs, String topdir, FileEntry[] entries) throws IOException {
for (FileEntry entry : entries) {
Path newpath = new Path(topdir + "/" + entry.getPath());
if (entry.isDirectory()) {
fs.mkdirs(newpath);
} else {
OutputStream out = fs.create(newpath);
try {
out.write((topdir + "/" + entry).getBytes());
out.write("\n".getBytes());
} finally {
out.close();
}
}
}
}
19
Source : TeraInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Use the input splits to take samples of the input and generate sample
* keys. By default reads 100,000 keys from 10 locations in the input, sorts
* them and picks N-1 keys to generate N equally sized parreplacedions.
* @param job the job to sample
* @param partFile where to write the output file to
* @throws Throwable if something goes wrong
*/
public static void writeParreplacedionFile(final JobContext job, Path partFile) throws Throwable {
long t1 = System.currentTimeMillis();
Configuration conf = job.getConfiguration();
final TeraInputFormat inFormat = new TeraInputFormat();
final TextSampler sampler = new TextSampler();
int parreplacedions = job.getNumReduceTasks();
long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
final List<InputSplit> splits = inFormat.getSplits(job);
long t2 = System.currentTimeMillis();
System.out.println("Computing input splits took " + (t2 - t1) + "ms");
int samples = Math.min(conf.getInt(NUM_PARreplacedIONS, 10), splits.size());
System.out.println("Sampling " + samples + " splits of " + splits.size());
final long recordsPerSample = sampleSize / samples;
final int sampleStep = splits.size() / samples;
Thread[] samplerReader = new Thread[samples];
SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
// take N samples from different parts of the input
for (int i = 0; i < samples; ++i) {
final int idx = i;
samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
{
setDaemon(true);
}
public void run() {
long records = 0;
try {
TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID());
RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context);
reader.initialize(splits.get(sampleStep * idx), context);
while (reader.nextKeyValue()) {
sampler.addKey(new Text(reader.getCurrentKey()));
records += 1;
if (recordsPerSample <= records) {
break;
}
}
} catch (IOException ie) {
System.err.println("Got an exception while reading splits " + StringUtils.stringifyException(ie));
throw new RuntimeException(ie);
} catch (InterruptedException e) {
}
}
};
samplerReader[i].start();
}
FileSystem outFs = partFile.getFileSystem(conf);
DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile));
for (int i = 0; i < samples; i++) {
try {
samplerReader[i].join();
if (threadGroup.getThrowable() != null) {
throw threadGroup.getThrowable();
}
} catch (InterruptedException e) {
}
}
for (Text split : sampler.createParreplacedions(parreplacedions)) {
split.write(writer);
}
writer.close();
long t3 = System.currentTimeMillis();
System.out.println("Computing parireplacedions took " + (t3 - t2) + "ms");
}
19
Source : TestMRAsyncDiskService.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public /**
* Test that volumes specified as relative paths are handled properly
* by MRAsyncDiskService (MAPREDUCE-1887).
*/
void testVolumeNormalization() throws Throwable {
LOG.info("TEST_ROOT_DIR is " + TEST_ROOT_DIR);
String relativeTestRoot = relativeToWorking(TEST_ROOT_DIR);
FileSystem localFileSystem = FileSystem.getLocal(new Configuration());
String[] vols = new String[] { relativeTestRoot + "/0", relativeTestRoot + "/1" };
// Put a file in one of the volumes to be cleared on startup.
Path delDir = new Path(vols[0], MRAsyncDiskService.TOBEDELETED);
localFileSystem.mkdirs(delDir);
localFileSystem.create(new Path(delDir, "foo")).close();
MRAsyncDiskService service = new MRAsyncDiskService(localFileSystem, vols);
makeSureCleanedUp(vols, service);
}
19
Source : TestMapReduceLazyOutput.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public void createInput(FileSystem fs, int numMappers) throws Exception {
for (int i = 0; i < numMappers; i++) {
OutputStream os = fs.create(new Path(INPUT, "text" + i + ".txt"));
Writer wr = new OutputStreamWriter(os);
for (String inp : input) {
wr.write(inp + "\n");
}
wr.close();
}
}
19
Source : TestMapperReducerCleanup.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Create a single input file in the input directory.
* @param dirPath the directory in which the file resides
* @param id the file id number
* @param numRecords how many records to write to each file.
*/
private void createInputFile(Path dirPath, int id, int numRecords) throws IOException {
final String MESSAGE = "This is a line in a file: ";
Path filePath = new Path(dirPath, "" + id);
Configuration conf = new Configuration();
FileSystem fs = FileSystem.getLocal(conf);
OutputStream os = fs.create(filePath);
BufferedWriter w = new BufferedWriter(new OutputStreamWriter(os));
for (int i = 0; i < numRecords; i++) {
w.write(MESSAGE + id + " " + i + "\n");
}
w.close();
}
19
Source : TestDelegatingInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
static Path getPath(final String location, final FileSystem fs) throws IOException {
Path path = new Path(location);
// create a multi-block file on hdfs
DataOutputStream out = fs.create(path, true, 4096, (short) 2, 512, null);
for (int i = 0; i < 1000; ++i) {
out.writeChars("Hello\n");
}
out.close();
return path;
}
19
Source : TestMapReduceChain.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private static void writeFlag(Configuration conf, String flag) throws IOException {
FileSystem fs = FileSystem.get(conf);
if (getFlag(conf, flag)) {
fail("Flag " + flag + " already exists");
}
DataOutputStream file = fs.create(new Path(flagDir, flag));
file.close();
}
19
Source : TestUserDefinedCounters.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void cleanAndCreateInput(FileSystem fs) throws IOException {
fs.delete(INPUT_DIR, true);
fs.delete(OUTPUT_DIR, true);
OutputStream os = fs.create(INPUT_FILE);
Writer wr = new OutputStreamWriter(os);
wr.write("hello1\n");
wr.write("hello2\n");
wr.write("hello3\n");
wr.write("hello4\n");
wr.close();
}
19
Source : TestSpecialCharactersInOutputPath.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static boolean launchJob(URI fileSys, JobConf conf, int numMaps, int numReduces) throws IOException {
final Path inDir = new Path("/testing/input");
final Path outDir = new Path("/testing/output");
FileSystem fs = FileSystem.get(fileSys, conf);
fs.delete(outDir, true);
if (!fs.mkdirs(inDir)) {
LOG.warn("Can't create " + inDir);
return false;
}
// generate an input file
DataOutputStream file = fs.create(new Path(inDir, "part-0"));
file.writeBytes("foo foo2 foo3");
file.close();
// use WordCount example
FileSystem.setDefaultUri(conf, fileSys);
conf.setJobName("foo");
conf.setInputFormat(TextInputFormat.clreplaced);
conf.setOutputFormat(SpecialTextOutputFormat.clreplaced);
conf.setOutputKeyClreplaced(LongWritable.clreplaced);
conf.setOutputValueClreplaced(Text.clreplaced);
conf.setMapperClreplaced(IdenreplacedyMapper.clreplaced);
conf.setReducerClreplaced(IdenreplacedyReducer.clreplaced);
FileInputFormat.setInputPaths(conf, inDir);
FileOutputFormat.setOutputPath(conf, outDir);
conf.setNumMapTasks(numMaps);
conf.setNumReduceTasks(numReduces);
// run job and wait for completion
RunningJob runningJob = JobClient.runJob(conf);
try {
replacedertTrue(runningJob.isComplete());
replacedertTrue(runningJob.isSuccessful());
replacedertTrue("Output folder not found!", fs.exists(new Path("/testing/output/" + OUTPUT_FILENAME)));
} catch (NullPointerException npe) {
// This NPE should no more happens
fail("A NPE should not have happened.");
}
// return job result
LOG.info("job is complete: " + runningJob.isSuccessful());
return (runningJob.isSuccessful());
}
19
Source : TestReporter.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testStatusLimit() throws IOException, InterruptedException, ClreplacedNotFoundException {
Path test = new Path(testRootTempDir, "testStatusLimit");
Configuration conf = new Configuration();
Path inDir = new Path(test, "in");
Path outDir = new Path(test, "out");
FileSystem fs = FileSystem.get(conf);
if (fs.exists(inDir)) {
fs.delete(inDir, true);
}
fs.mkdirs(inDir);
DataOutputStream file = fs.create(new Path(inDir, "part-" + 0));
file.writeBytes("testStatusLimit");
file.close();
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
Job job = Job.getInstance(conf, "testStatusLimit");
job.setMapperClreplaced(StatusLimitMapper.clreplaced);
job.setNumReduceTasks(0);
FileInputFormat.addInputPath(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
job.waitForCompletion(true);
replacedertTrue("Job failed", job.isSuccessful());
}
19
Source : TestMultiFileInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private Path initFiles(FileSystem fs, int numFiles, int numBytes) throws IOException {
Path dir = new Path(System.getProperty("test.build.data", ".") + "/mapred");
Path multiFileDir = new Path(dir, "test.multifile");
fs.delete(multiFileDir, true);
fs.mkdirs(multiFileDir);
LOG.info("Creating " + numFiles + " file(s) in " + multiFileDir);
for (int i = 0; i < numFiles; i++) {
Path path = new Path(multiFileDir, "file_" + i);
FSDataOutputStream out = fs.create(path);
if (numBytes == -1) {
numBytes = rand.nextInt(MAX_BYTES);
}
for (int j = 0; j < numBytes; j++) {
out.write(rand.nextInt());
}
out.close();
if (LOG.isDebugEnabled()) {
LOG.debug("Created file " + path + " with length " + numBytes);
}
lengths.put(path.getName(), new Long(numBytes));
}
FileInputFormat.setInputPaths(job, multiFileDir);
return multiFileDir;
}
19
Source : TestMRIntermediateDataEncryption.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void createInput(FileSystem fs, int numMappers, int numLines) throws Exception {
fs.delete(INPUT_DIR, true);
for (int i = 0; i < numMappers; i++) {
OutputStream os = fs.create(new Path(INPUT_DIR, "input_" + i + ".txt"));
Writer writer = new OutputStreamWriter(os);
for (int j = 0; j < numLines; j++) {
// Create sorted key, value pairs.
int k = j + 1;
String formattedNumber = String.format("%09d", k);
writer.write(formattedNumber + " " + formattedNumber + "\n");
}
writer.close();
}
}
See More Examples