Here are the examples of the java api org.apache.hadoop.mapreduce.Job.getInstance() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
314 Examples
19
Source : TransformBaseRunner.java
with GNU General Public License v3.0
from wlhbdp
with GNU General Public License v3.0
from wlhbdp
protected Job initJob(Configuration conf) throws IOException {
Job job = Job.getInstance(conf, this.jobName);
job.setJarByClreplaced(this.runnerClreplaced);
// 本地运行
// TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, true);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job),
// this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
// job);
job.setReducerClreplaced(this.reducerClreplaced);
job.setOutputKeyClreplaced(this.outputKeyClreplaced);
job.setOutputValueClreplaced(this.outputValueClreplaced);
job.setOutputFormatClreplaced(this.outputFormatClreplaced);
return job;
}
19
Source : AnalyserLogDataRunner.java
with GNU General Public License v3.0
from wlhbdp
with GNU General Public License v3.0
from wlhbdp
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
// 设置reducer配置
// 1集群上运行 打成jar运行 (要求addDependencyJars为true(默认true)
// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, true);
// 2本地运行 打成jar运行 (要求addDependencyJars为true(默认true)
// TableMapReduceUtil
// .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
// null, false);
// 设置输入路径
job.setNumReduceTasks(0);
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
19
Source : ReduceJobsConfiguration.java
with Apache License 2.0
from WinterChenS
with Apache License 2.0
from WinterChenS
/**
* 获取单词统计的配置信息
*
* @param jobName
* @param inputPath
* @param outputPath
* @throws IOException
* @throws ClreplacedNotFoundException
* @throws InterruptedException
*/
public void getWordCountJobsConf(String jobName, String inputPath, String outputPath) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration conf = getConfiguration();
Job job = Job.getInstance(conf, jobName);
job.setMapperClreplaced(WordMapper.clreplaced);
job.setCombinerClreplaced(WordReduce.clreplaced);
job.setJarByClreplaced(HadoopDemoApplication.clreplaced);
job.setReducerClreplaced(WordReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
// 小文件合并设置
// job.setInputFormatClreplaced(CombineTextInputFormat.clreplaced);
// 最大分片
// CombineTextInputFormat.setMaxInputSplitSize(job, 4 * 1024 * 1024);
// 最小分片
// CombineTextInputFormat.setMinInputSplitSize(job, 2 * 1024 * 1024);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.waitForCompletion(true);
}
19
Source : StoreDiagJob.java
with Apache License 2.0
from steveloughran
with Apache License 2.0
from steveloughran
public static boolean execute(Configuration conf, List<String> targets) throws IOException, ClreplacedNotFoundException, InterruptedException {
JobConf jobConf = new JobConf(conf);
StoreDiagJob diagJob = new StoreDiagJob(jobConf);
Job job = Job.getInstance(jobConf, "Store Diag");
job.setJarByClreplaced(StoreDiagJob.clreplaced);
job.setMapperClreplaced(DiagMapper.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
String filename = UUID.randomUUID().toString();
FileContext clusterFC = FileContext.getFileContext(jobConf);
Path home = clusterFC.getHomeDirectory();
Path jobdir = new Path(home, filename);
Path srcFile = new Path(jobdir, "input.txt");
Path destDir = new Path(jobdir, "output");
// one entry per line
try (FSDataOutputStream stream = clusterFC.create(srcFile, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), Options.CreateOpts.createParent())) {
for (String target : targets) {
stream.writeChars(target);
stream.writeChar('\n');
}
}
jobConf.set(DELIMITER, "\n");
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
FileInputFormat.addInputPath(job, srcFile);
FileOutputFormat.setOutputPath(job, destDir);
return job.waitForCompletion(true);
}
19
Source : WordMean.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
@Override
public int run(String[] args) throws Exception {
if (args.length != 5) {
System.err.println("Usage: wordmean <dummy_hdfs> <uri> <scope> <stream> <out>");
return 0;
}
Configuration conf = getConf();
conf.setStrings("input.pravega.uri", args[1]);
conf.setStrings("input.pravega.scope", args[2]);
conf.setStrings("input.pravega.stream", args[3]);
conf.setStrings("input.pravega.deserializer", TextSerializer.clreplaced.getName());
Job job = Job.getInstance(conf, "word mean");
job.setJarByClreplaced(WordMean.clreplaced);
job.setMapperClreplaced(WordMeanMapper.clreplaced);
job.setCombinerClreplaced(WordMeanReducer.clreplaced);
job.setReducerClreplaced(WordMeanReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
job.setInputFormatClreplaced(PravegaInputFormat.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[4]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
mean = readAndCalcMean(outputpath, conf);
return (result ? 0 : 1);
}
19
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple fail job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a simple kill job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createKillJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
Job theJob = Job.getInstance(conf);
theJob.setJobName("Kill-Job");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(KillMapper.clreplaced);
theJob.setReducerClreplaced(Reducer.clreplaced);
theJob.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
return theJob;
}
19
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple copy job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a data copy job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
conf.setInt(MRJobConfig.NUM_MAPS, 3);
Job theJob = Job.getInstance(conf);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(DataCopyMapper.clreplaced);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
theJob.setReducerClreplaced(DataCopyReducer.clreplaced);
theJob.setNumReduceTasks(1);
return theJob;
}
19
Source : TestMRSequenceFileAsBinaryOutputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public void testSequenceOutputClreplacedDefaultsToMapRedOutputClreplaced() throws IOException {
Job job = Job.getInstance();
// Setting Random clreplaced to test getSequenceFileOutput{Key,Value}Clreplaced
job.setOutputKeyClreplaced(FloatWritable.clreplaced);
job.setOutputValueClreplaced(BooleanWritable.clreplaced);
replacedertEquals("SequenceFileOutputKeyClreplaced should default to ouputKeyClreplaced", FloatWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
replacedertEquals("SequenceFileOutputValueClreplaced should default to " + "ouputValueClreplaced", BooleanWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClreplaced(job, IntWritable.clreplaced);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClreplaced(job, DoubleWritable.clreplaced);
replacedertEquals("SequenceFileOutputKeyClreplaced not updated", IntWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
replacedertEquals("SequenceFileOutputValueClreplaced not updated", DoubleWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
}
19
Source : TestMRKeyValueTextInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test using the gzip codec for reading
*/
@Test
public void testGzip() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
CompressionCodec gzip = new GzipCodec();
ReflectionUtils.setConf(gzip, conf);
localFs.delete(workDir, true);
writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "line-1\tthe quick\nline-2\tbrown\nline-3\t" + "fox jumped\nline-4\tover\nline-5\t the lazy\nline-6\t dog\n");
writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "line-1\tthis is a test\nline-1\tof gzip\n");
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, workDir);
KeyValueTextInputFormat format = new KeyValueTextInputFormat();
List<InputSplit> splits = format.getSplits(job);
replacedertEquals("compressed splits == 2", 2, splits.size());
FileSplit tmp = (FileSplit) splits.get(0);
if (tmp.getPath().getName().equals("part2.txt.gz")) {
splits.set(0, splits.get(1));
splits.set(1, tmp);
}
List<Text> results = readSplit(format, splits.get(0), job);
replacedertEquals("splits[0] length", 6, results.size());
replacedertEquals("splits[0][0]", "the quick", results.get(0).toString());
replacedertEquals("splits[0][1]", "brown", results.get(1).toString());
replacedertEquals("splits[0][2]", "fox jumped", results.get(2).toString());
replacedertEquals("splits[0][3]", "over", results.get(3).toString());
replacedertEquals("splits[0][4]", " the lazy", results.get(4).toString());
replacedertEquals("splits[0][5]", " dog", results.get(5).toString());
results = readSplit(format, splits.get(1), job);
replacedertEquals("splits[1] length", 2, results.size());
replacedertEquals("splits[1][0]", "this is a test", results.get(0).toString());
replacedertEquals("splits[1][1]", "of gzip", results.get(1).toString());
}
19
Source : TestLineRecordReaderJobs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates and runs an MR job
*
* @param conf
* @throws IOException
* @throws InterruptedException
* @throws ClreplacedNotFoundException
*/
public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClreplacedNotFoundException {
Job job = Job.getInstance(conf);
job.setJarByClreplaced(TestLineRecordReaderJobs.clreplaced);
job.setMapperClreplaced(Mapper.clreplaced);
job.setReducerClreplaced(Reducer.clreplaced);
FileInputFormat.addInputPath(job, inputDir);
FileOutputFormat.setOutputPath(job, outputDir);
job.waitForCompletion(true);
}
19
Source : TestCombineTextInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test using the gzip codec for reading
*/
@Test(timeout = 10000)
public void testGzip() throws IOException, InterruptedException {
Configuration conf = new Configuration(defaultConf);
CompressionCodec gzip = new GzipCodec();
ReflectionUtils.setConf(gzip, conf);
localFs.delete(workDir, true);
writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n");
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, workDir);
CombineTextInputFormat format = new CombineTextInputFormat();
List<InputSplit> splits = format.getSplits(job);
replacedertEquals("compressed splits == 1", 1, splits.size());
List<Text> results = readSplit(format, splits.get(0), job);
replacedertEquals("splits[0] length", 8, results.size());
final String[] firstList = { "the quick", "brown", "fox jumped", "over", " the lazy", " dog" };
final String[] secondList = { "this is a test", "of gzip" };
String first = results.get(0).toString();
if (first.equals(firstList[0])) {
testResults(results, firstList, secondList);
} else if (first.equals(secondList[0])) {
testResults(results, secondList, firstList);
} else {
fail("unexpected first token!");
}
}
19
Source : TestFileInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testSplitLocationInfo() throws Exception {
Configuration conf = getConfiguration();
conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2");
Job job = Job.getInstance(conf);
TextInputFormat fileInputFormat = new TextInputFormat();
List<InputSplit> splits = fileInputFormat.getSplits(job);
String[] locations = splits.get(0).getLocations();
replacedert.replacedertEquals(2, locations.length);
SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo();
replacedert.replacedertEquals(2, locationInfo.length);
SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1];
SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1];
replacedert.replacedertTrue(localhostInfo.isOnDisk());
replacedert.replacedertTrue(localhostInfo.isInMemory());
replacedert.replacedertTrue(otherhostInfo.isOnDisk());
replacedert.replacedertFalse(otherhostInfo.isInMemory());
}
19
Source : TestCompressionEmulationUtils.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* Runs a GridMix data-generation job.
*/
private static void runDataGenJob(Configuration conf, Path tempDir) throws IOException, ClreplacedNotFoundException, InterruptedException {
JobClient client = new JobClient(conf);
// get the local job runner
conf.setInt(MRJobConfig.NUM_MAPS, 1);
Job job = Job.getInstance(conf);
CompressionEmulationUtil.configure(job);
job.setInputFormatClreplaced(CustomInputFormat.clreplaced);
// set the output path
FileOutputFormat.setOutputPath(job, tempDir);
// submit and wait for completion
job.submit();
int ret = job.waitForCompletion(true) ? 0 : 1;
replacedertEquals("Job Failed", 0, ret);
}
19
Source : UserNamePermission.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public static void main(String[] args) throws Exception {
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClreplaced(UserNamePermission.clreplaced);
job.setMapperClreplaced(UserNamePermission.UserNameMapper.clreplaced);
job.setCombinerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
job.setNumReduceTasks(1);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
19
Source : TestMiniMRClientCluster.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public static Job createJob() throws IOException {
final Job baseJob = Job.getInstance(mrCluster.getConfig());
baseJob.setOutputKeyClreplaced(Text.clreplaced);
baseJob.setOutputValueClreplaced(IntWritable.clreplaced);
baseJob.setMapperClreplaced(MyMapper.clreplaced);
baseJob.setReducerClreplaced(MyReducer.clreplaced);
baseJob.setNumReduceTasks(1);
return baseJob;
}
19
Source : InputSampler.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* Driver for InputSampler from the command line.
* Configures a JobConf instance and calls {@link #writeParreplacedionFile}.
*/
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
ArrayList<String> otherArgs = new ArrayList<String>();
Sampler<K, V> sampler = null;
for (int i = 0; i < args.length; ++i) {
try {
if ("-r".equals(args[i])) {
job.setNumReduceTasks(Integer.parseInt(args[++i]));
} else if ("-inFormat".equals(args[i])) {
job.setInputFormatClreplaced(Clreplaced.forName(args[++i]).replacedubclreplaced(InputFormat.clreplaced));
} else if ("-keyClreplaced".equals(args[i])) {
job.setMapOutputKeyClreplaced(Clreplaced.forName(args[++i]).replacedubclreplaced(WritableComparable.clreplaced));
} else if ("-splitSample".equals(args[i])) {
int numSamples = Integer.parseInt(args[++i]);
int maxSplits = Integer.parseInt(args[++i]);
if (0 >= maxSplits)
maxSplits = Integer.MAX_VALUE;
sampler = new SplitSampler<K, V>(numSamples, maxSplits);
} else if ("-splitRandom".equals(args[i])) {
double pcnt = Double.parseDouble(args[++i]);
int numSamples = Integer.parseInt(args[++i]);
int maxSplits = Integer.parseInt(args[++i]);
if (0 >= maxSplits)
maxSplits = Integer.MAX_VALUE;
sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
} else if ("-splitInterval".equals(args[i])) {
double pcnt = Double.parseDouble(args[++i]);
int maxSplits = Integer.parseInt(args[++i]);
if (0 >= maxSplits)
maxSplits = Integer.MAX_VALUE;
sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
} else {
otherArgs.add(args[i]);
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + args[i]);
return printUsage();
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
return printUsage();
}
}
if (job.getNumReduceTasks() <= 1) {
System.err.println("Sampler requires more than one reducer");
return printUsage();
}
if (otherArgs.size() < 2) {
System.out.println("ERROR: Wrong number of parameters: ");
return printUsage();
}
if (null == sampler) {
sampler = new RandomSampler<K, V>(0.1, 10000, 10);
}
Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
TotalOrderParreplacedioner.setParreplacedionFile(getConf(), outf);
for (String s : otherArgs) {
FileInputFormat.addInputPath(job, new Path(s));
}
InputSampler.<K, V>writeParreplacedionFile(job, sampler);
return 0;
}
19
Source : CombineFileInputFormat.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* List input directories.
* Subclreplacedes may override to, e.g., select only files matching a regular
* expression.
*
* @param job the job to list input paths for
* @return array of FileStatus objects
* @throws IOException if zero items.
*/
protected FileStatus[] listStatus(JobConf job) throws IOException {
List<FileStatus> result = super.listStatus(Job.getInstance(job));
return result.toArray(new FileStatus[result.size()]);
}
19
Source : CombineFileInputFormat.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(Job.getInstance(job));
InputSplit[] ret = new InputSplit[newStyleSplits.size()];
for (int pos = 0; pos < newStyleSplits.size(); ++pos) {
org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations());
}
return ret;
}
19
Source : MR_WLA.java
with GNU General Public License v3.0
from monsonlee
with GNU General Public License v3.0
from monsonlee
public int run(String[] args) throws Exception {
String jobName = "wla_baidu";
String inputPath = args[0];
String outputPath = args[1];
Path path = new Path(outputPath);
// 删除输出目录
path.getFileSystem(getConf()).delete(path, true);
// 1、把所有代码组织到类似于Topology的类中
Job job = Job.getInstance(getConf(), jobName);
// 2、一定要打包运行,必须写下面一行代码
job.setJarByClreplaced(MR_WLA.clreplaced);
// 3、指定输入的hdfs
FileInputFormat.setInputPaths(job, inputPath);
// 4、指定map类
job.setMapperClreplaced(WLA_Mapper.clreplaced);
// 5、指定map输出的<key,value>的类型
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 6、指定reduce类
job.setReducerClreplaced(WLA_Reducer.clreplaced);
// 7、指定reduce输出的<key,value>的类型
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
// 8、指定输出的hdfs
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
19
Source : HadoopInputs.java
with Apache License 2.0
from ljygz
with Apache License 2.0
from ljygz
/**
* Creates a Flink {@link InputFormat} that wraps the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}.
*
* @return A Flink InputFormat that wraps the Hadoop FileInputFormat.
*/
public static <K, V> org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat<K, V> readHadoopFile(org.apache.hadoop.mapreduce.lib.input.FileInputFormat<K, V> mapreduceInputFormat, Clreplaced<K> key, Clreplaced<V> value, String inputPath) throws IOException {
return readHadoopFile(mapreduceInputFormat, key, value, inputPath, Job.getInstance());
}
19
Source : CarbondataInputFormat.java
with Apache License 2.0
from DTStack
with Apache License 2.0
from DTStack
@Override
public InputSplit[] createInputSplitsInternal(int num) throws IOException {
org.apache.hadoop.conf.Configuration conf = initConfig();
Job job = Job.getInstance(conf);
CarbonTableInputFormat format = new CarbonTableInputFormat();
List<org.apache.hadoop.mapreduce.InputSplit> splitList = format.getSplits(job);
int splitNum = (splitList.size() < num ? splitList.size() : num);
int groupSize = (int) Math.ceil(splitList.size() / (double) splitNum);
InputSplit[] ret = new InputSplit[splitNum];
for (int i = 0; i < splitNum; ++i) {
List<CarbonInputSplit> carbonInputSplits = new ArrayList<>();
for (int j = 0; j < groupSize && i * groupSize + j < splitList.size(); ++j) {
carbonInputSplits.add((CarbonInputSplit) splitList.get(i * groupSize + j));
}
ret[i] = new CarbonFlinkInputSplit(carbonInputSplits, i);
}
return ret;
}
19
Source : HdfsUtil.java
with Apache License 2.0
from didi
with Apache License 2.0
from didi
public static Job getHdfsJob(Configuration conf, TaskConfig taskConfig, IndexInfo indexInfo) throws Exception {
Job job = Job.getInstance(conf, MAIN_CLreplaced);
job.setJobName("DidiFastIndex_" + taskConfig.getEsTemplate());
job.setJarByClreplaced(FastIndex.clreplaced);
job.setMapperClreplaced(FastIndexMapper.clreplaced);
job.setInputFormatClreplaced(HCatInputFormat.clreplaced);
job.setMapOutputKeyClreplaced(IntWritable.clreplaced);
job.setMapOutputValueClreplaced(DefaultHCatRecord.clreplaced);
HCatInputFormat.setInput(job, taskConfig.getHiveDB(), taskConfig.getHiveTable(), taskConfig.getFilterStr());
job.setReducerClreplaced(FastIndexReducer.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
job.setNumReduceTasks(indexInfo.getReducerNum());
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
FileOutputFormat.setOutputPath(job, new Path(taskConfig.getHdfsMROutputPath()));
return job;
}
19
Source : NewInstallUserRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 处理参数
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "new_install_user");
job.setJarByClreplaced(NewInstallUserRunner.clreplaced);
// 本地运行
TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
job.setReducerClreplaced(NewInstallUserReducer.clreplaced);
job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
job.setOutputValueClreplaced(MapWritableValue.clreplaced);
// job.setInputFormatClreplaced(KeyValueTextInputFormat.clreplaced);
job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
if (job.waitForCompletion(true)) {
// 执行成功, 需要计算总用户
this.calculateTotalUsers(conf);
return 0;
} else {
return -1;
}
}
19
Source : ActiveUserRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 初始化参数
this.processArgs(conf, args);
// 创建job
Job job = Job.getInstance(conf, "active_user");
// 设置job相关配置参数
job.setJarByClreplaced(ActiveUserRunner.clreplaced);
// hbase 输入mapper参数
// 1. 本地运行
TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
// 2. 集群运行
// TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.clreplaced,
// StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
// 设置reducer相关参数
job.setReducerClreplaced(ActiveUserReducer.clreplaced);
job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
job.setOutputValueClreplaced(MapWritableValue.clreplaced);
// 设置output相关参数
job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
// 开始毫秒数
long startTime = System.currentTimeMillis();
try {
return job.waitForCompletion(true) ? 0 : -1;
} finally {
// 结束的毫秒数
long endTime = System.currentTimeMillis();
logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms");
}
}
19
Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
// 设置本地提交job,集群运行,需要代码
// File jarFile = EJob.createTempJar("target/clreplacedes");
// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
// 设置本地提交job,集群运行,需要代码结束
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
// 设置reducer配置
// 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
// null, job);
// 2. 本地运行,要求参数addDependencyJars为false
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
job.setNumReduceTasks(0);
// 设置输入路径
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
19
Source : TransformerBaseRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
/**
* 创建job
*
* @param conf
* @return
* @throws IOException
*/
protected Job initJob(Configuration conf) throws IOException {
Job job = Job.getInstance(conf, this.jobName);
job.setJarByClreplaced(this.runnerClreplaced);
// 本地运行
TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job),
// this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
// job);
job.setReducerClreplaced(this.reducerClreplaced);
job.setOutputKeyClreplaced(this.outputKeyClreplaced);
job.setOutputValueClreplaced(this.outputValueClreplaced);
job.setOutputFormatClreplaced(this.outputFormatClreplaced);
return job;
}
19
Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
// 设置本地提交job,集群运行,需要代码
// File jarFile = EJob.createTempJar("target/clreplacedes");
// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
// 设置本地提交job,集群运行,需要代码结束
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
// 设置reducer配置
// 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
// 2. 本地运行,要求参数addDependencyJars为false
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
job.setNumReduceTasks(0);
// 设置输入路径
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
19
Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
job.setNumReduceTasks(0);
// 设置输入路径
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
19
Source : HoodieParquetInputFormat.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
/**
* Achieves listStatus functionality for an incrementally queried table. Instead of listing all
* parreplacedions and then filtering based on the commits of interest, this logic first extracts the
* parreplacedions touched by the desired commits and then lists only those parreplacedions.
*/
private List<FileStatus> listStatusForIncrementalMode(JobConf job, HoodieTableMetaClient tableMetaClient, List<Path> inputPaths) throws IOException {
String tableName = tableMetaClient.getTableConfig().getTableName();
Job jobContext = Job.getInstance(job);
Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
if (!timeline.isPresent()) {
return null;
}
Option<List<HoodieInstant>> commitsToCheck = HoodieInputFormatUtils.getCommitsForIncrementalQuery(jobContext, tableName, timeline.get());
if (!commitsToCheck.isPresent()) {
return null;
}
Option<String> incrementalInputPaths = HoodieInputFormatUtils.getAffectedParreplacedions(commitsToCheck.get(), tableMetaClient, timeline.get(), inputPaths);
// Mutate the JobConf to set the input paths to only parreplacedions touched by incremental pull.
if (!incrementalInputPaths.isPresent()) {
return null;
}
setInputPaths(job, incrementalInputPaths.get());
FileStatus[] fileStatuses = super.listStatus(job);
return HoodieInputFormatUtils.filterIncrementalFileStatus(jobContext, tableMetaClient, timeline.get(), fileStatuses, commitsToCheck.get());
}
19
Source : HoodieParquetInputFormat.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@Override
public FileStatus[] listStatus(JobConf job) throws IOException {
// Segregate inputPaths[] to incremental, snapshot and non hoodie paths
List<String> incrementalTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(job));
InputPathHandler inputPathHandler = new InputPathHandler(conf, getInputPaths(job), incrementalTables);
List<FileStatus> returns = new ArrayList<>();
Map<String, HoodieTableMetaClient> tableMetaClientMap = inputPathHandler.getTableMetaClientMap();
// process incremental pulls first
for (String table : incrementalTables) {
HoodieTableMetaClient metaClient = tableMetaClientMap.get(table);
if (metaClient == null) {
/* This can happen when the INCREMENTAL mode is set for a table but there were no InputPaths
* in the jobConf
*/
continue;
}
List<Path> inputPaths = inputPathHandler.getGroupedIncrementalPaths().get(metaClient);
List<FileStatus> result = listStatusForIncrementalMode(job, metaClient, inputPaths);
if (result != null) {
returns.addAll(result);
}
}
// process non hoodie Paths next.
List<Path> nonHoodiePaths = inputPathHandler.getNonHoodieInputPaths();
if (nonHoodiePaths.size() > 0) {
setInputPaths(job, nonHoodiePaths.toArray(new Path[nonHoodiePaths.size()]));
FileStatus[] fileStatuses = super.listStatus(job);
returns.addAll(Arrays.asList(fileStatuses));
}
// process snapshot queries next.
List<Path> snapshotPaths = inputPathHandler.getSnapshotPaths();
if (snapshotPaths.size() > 0) {
returns.addAll(HoodieInputFormatUtils.filterFileStatusForSnapshotMode(job, tableMetaClientMap, snapshotPaths));
}
return returns.toArray(new FileStatus[0]);
}
18
Source : ProvincePVAndUVJob.java
with Apache License 2.0
from xpleaf
with Apache License 2.0
from xpleaf
public static void main(String[] args) throws Exception {
if (args == null || args.length < 2) {
System.err.println("Parameter Errors! Usage <inputPath...> <outputPath>");
System.exit(-1);
}
Path outputPath = new Path(args[args.length - 1]);
Configuration conf = new Configuration();
String jobName = ProvincePVAndUVJob.clreplaced.getSimpleName();
Job job = Job.getInstance(conf, jobName);
job.setJarByClreplaced(ProvincePVAndUVJob.clreplaced);
// 设置mr的输入参数
for (int i = 0; i < args.length - 1; i++) {
FileInputFormat.addInputPath(job, new Path(args[i]));
}
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setMapperClreplaced(ProvincePVAndUVMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 设置mr的输出参数
// 避免job在运行的时候出现输出目录已经存在的异常
outputPath.getFileSystem(conf).delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setReducerClreplaced(ProvincePVAndUVReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setNumReduceTasks(1);
job.waitForCompletion(true);
}
18
Source : AccessLogCleanJob.java
with Apache License 2.0
from xpleaf
with Apache License 2.0
from xpleaf
public static void main(String[] args) throws Exception {
if (args == null || args.length < 2) {
System.err.println("Parameter Errors! Usage <inputPath...> <outputPath>");
System.exit(-1);
}
Path outputPath = new Path(args[args.length - 1]);
Configuration conf = new Configuration();
String jobName = AccessLogCleanJob.clreplaced.getSimpleName();
Job job = Job.getInstance(conf, jobName);
job.setJarByClreplaced(AccessLogCleanJob.clreplaced);
// 设置mr的输入参数
for (int i = 0; i < args.length - 1; i++) {
FileInputFormat.addInputPath(job, new Path(args[i]));
}
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setMapperClreplaced(AccessLogCleanMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 设置mr的输出参数
// 避免job在运行的时候出现输出目录已经存在的异常
outputPath.getFileSystem(conf).delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
// map only操作,没有reducer
job.setNumReduceTasks(0);
job.waitForCompletion(true);
}
18
Source : CommonFriendStep2.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClreplaced(CommonFriendStep2.clreplaced);
// 设置job的mapper类和reducer类
job.setMapperClreplaced(CommonFansStep2Mapper.clreplaced);
job.setReducerClreplaced(CommonFansStep2Reducer.clreplaced);
// 设置map阶段输出key:value数据的类型
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 设置reudce阶段输出key:value数据的类型
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
// 检测输出目录是否已存在,如果已存在则删除,以免在测试阶段需要反复手动删除输出目录
FileSystem fs = FileSystem.get(conf);
Path out = new Path(args[1]);
if (fs.exists(out)) {
fs.delete(out, true);
}
// 设置数据输入输出目录
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, out);
// 提交job到yarn或者local runner执行
job.waitForCompletion(true);
}
18
Source : CommonFriendStep1.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
job.setJarByClreplaced(CommonFriendStep1.clreplaced);
// 设置job的mapper类
job.setMapperClreplaced(CommonFansStep1Mapper.clreplaced);
// 设置job的reducer类
job.setReducerClreplaced(CommonFansStep1Reducer.clreplaced);
// 设置map阶段输出的key:value数据类型
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 设置reduce阶段输出的key:value数据类型
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
// 判断结果输出路径是否已存在,如果已经存在,则删除。以免在测试阶段需要反复手动删除输出目录
FileSystem fs = FileSystem.get(configuration);
Path out = new Path(args[1]);
if (fs.exists(out)) {
fs.delete(out, true);
}
// 设置数据输入输出路径
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, out);
// 提交job给yarn或者local runner来运行
job.waitForCompletion(true);
}
18
Source : WordCount2Application.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 1.1 准备清理已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
_LOGGER.info("rm -rf output path success.");
}
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(WordCount2Application.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : CombinerApplication.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 1.1 准备清理已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
_LOGGER.info("rm -rf output path success.");
}
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(CombinerApplication.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 通过Job设置combiner处理类, 其实逻辑上和我们的Reduce是一模一样的
job.setCombinerClreplaced(WordCountReduce.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : LogAnalysisApplication.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 1.1 准备清理已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
_LOGGER.info("rm -rf output path success.");
}
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "logreplacedysis");
// 3. 设置Job的处理类
job.setJarByClreplaced(LogreplacedysisApplication.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(LogreplacedysisApplication.LogreplacedysisMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(LogreplacedysisApplication.LogreplacedysisReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : TeraStreamValidate.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
public int run(String[] args) throws Exception {
if (args.length != 5) {
usage();
return 2;
}
LOG.info("starting");
Path inputDir = new Path(args[0]);
Path outputDir = new Path(args[1]);
getConf().setStrings(INPUT_URI_STRING, args[2]);
getConf().setStrings(INPUT_SCOPE_NAME, args[3]);
getConf().setStrings(INPUT_STREAM_NAME, args[4]);
getConf().setStrings(INPUT_DESERIALIZER, TextSerializer.clreplaced.getName());
getConf().setInt(MRJobConfig.NUM_MAPS, 1);
Job job = Job.getInstance(getConf());
TeraInputFormat.setInputPaths(job, inputDir);
FileOutputFormat.setOutputPath(job, outputDir);
job.setJobName("TeraStreamValidate");
job.setJarByClreplaced(TeraStreamValidate.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setMapperClreplaced(TeraSortMapper.clreplaced);
job.setNumReduceTasks(1);
job.setInputFormatClreplaced(PravegaInputFormat.clreplaced);
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
int ret = job.waitForCompletion(true) ? 0 : 1;
LOG.info("done");
return ret;
}
18
Source : TeraGen.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
/**
* @param args the cli arguments
*/
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
if (args.length != 5 && args.length != 6) {
usage();
return 2;
}
Path outputDir = new Path(args[1]);
getConf().setStrings(OUTPUT_URI_STRING, args[2]);
getConf().setStrings(OUTPUT_SCOPE_NAME, args[3]);
getConf().setStrings(OUTPUT_STREAM_NAME, args[4]);
getConf().setStrings(OUTPUT_STREAM_SEGMENTS, args[5]);
getConf().setStrings(OUTPUT_DESERIALIZER, TextSerializer.clreplaced.getName());
Job job = Job.getInstance(getConf());
setNumberOfRows(job, parseHumanLong(args[0]));
FileOutputFormat.setOutputPath(job, outputDir);
job.setJobName("TeraGen");
job.setJarByClreplaced(TeraGen.clreplaced);
job.setMapperClreplaced(SortGenMapper.clreplaced);
job.setNumReduceTasks(0);
job.setOutputKeyClreplaced(String.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
job.setOutputFormatClreplaced(PravegaFixedSegmentsOutputFormat.clreplaced);
return job.waitForCompletion(true) ? 0 : 1;
}
18
Source : TeraGen.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* @param args the cli arguments
*/
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 2;
}
setNumberOfRows(job, parseHumanLong(args[0]));
Path outputDir = new Path(args[1]);
if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
throw new IOException("Output directory " + outputDir + " already exists.");
}
FileOutputFormat.setOutputPath(job, outputDir);
job.setJobName("TeraGen");
job.setJarByClreplaced(TeraGen.clreplaced);
job.setMapperClreplaced(SortGenMapper.clreplaced);
job.setNumReduceTasks(0);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
job.setOutputFormatClreplaced(TeraOutputFormat.clreplaced);
return job.waitForCompletion(true) ? 0 : 1;
}
18
Source : TestSpeculativeExecution.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private Job runSpecTest(boolean mapspec, boolean redspec) throws IOException, ClreplacedNotFoundException, InterruptedException {
Path first = createTempFile("specexec_map_input1", "a\nz");
Path secnd = createTempFile("specexec_map_input2", "a\nz");
Configuration conf = mrCluster.getConfig();
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, mapspec);
conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, redspec);
conf.setClreplaced(MRJobConfig.MR_AM_TASK_ESTIMATOR, TestSpecEstimator.clreplaced, TaskRuntimeEstimator.clreplaced);
Job job = Job.getInstance(conf);
job.setJarByClreplaced(TestSpeculativeExecution.clreplaced);
job.setMapperClreplaced(SpeculativeMapper.clreplaced);
job.setReducerClreplaced(SpeculativeReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job, first);
FileInputFormat.addInputPath(job, secnd);
FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
// Delete output directory if it exists.
try {
localFs.delete(TEST_OUT_DIR, true);
} catch (IOException e) {
// ignore
}
// Creates the Job Configuration
// The AppMaster jar itself.
job.addFileToClreplacedPath(APP_JAR);
job.setMaxMapAttempts(2);
job.submit();
return job;
}
18
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static Job createJob(Configuration conf, Path inDir, Path outDir, int numInputFiles, int numReds, String input) throws IOException {
Job job = Job.getInstance(conf);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
if (fs.exists(inDir)) {
fs.delete(inDir, true);
}
fs.mkdirs(inDir);
for (int i = 0; i < numInputFiles; ++i) {
DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
file.writeBytes(input);
file.close();
}
FileInputFormat.setInputPaths(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
job.setNumReduceTasks(numReds);
return job;
}
18
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple fail job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a simple fail job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createFailJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
FileSystem fs = outdir.getFileSystem(conf);
if (fs.exists(outdir)) {
fs.delete(outdir, true);
}
conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
Job theJob = Job.getInstance(conf);
theJob.setJobName("Fail-Job");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(FailMapper.clreplaced);
theJob.setReducerClreplaced(Reducer.clreplaced);
theJob.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
return theJob;
}
18
Source : TestMRSequenceFileAsBinaryOutputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public void testcheckOutputSpecsForbidRecordCompression() throws IOException {
Job job = Job.getInstance();
FileSystem fs = FileSystem.getLocal(job.getConfiguration());
Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output");
fs.delete(outputdir, true);
// Without outputpath, FileOutputFormat.checkoutputspecs will throw
// InvalidJobConfException
FileOutputFormat.setOutputPath(job, outputdir);
// SequenceFileAsBinaryOutputFormat doesn't support record compression
// It should throw an exception when checked by checkOutputSpecs
SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
try {
new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
} catch (Exception e) {
fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.getClreplaced().getName());
}
SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD);
try {
new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat");
} catch (InvalidJobConfException ie) {
// expected
} catch (Exception e) {
fail("Expected " + InvalidJobConfException.clreplaced.getName() + "but caught " + e.getClreplaced().getName());
}
}
18
Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@SuppressWarnings("unchecked")
public void testAddInputPathWithMapper() throws IOException {
final Job conf = Job.getInstance();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.clreplaced, MapClreplaced.clreplaced);
MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.clreplaced, KeyValueMapClreplaced.clreplaced);
final Map<Path, InputFormat> inputs = MultipleInputs.getInputFormatMap(conf);
final Map<Path, Clreplaced<? extends Mapper>> maps = MultipleInputs.getMapperTypeMap(conf);
replacedertEquals(TextInputFormat.clreplaced, inputs.get(new Path("/foo")).getClreplaced());
replacedertEquals(KeyValueTextInputFormat.clreplaced, inputs.get(new Path("/bar")).getClreplaced());
replacedertEquals(MapClreplaced.clreplaced, maps.get(new Path("/foo")));
replacedertEquals(KeyValueMapClreplaced.clreplaced, maps.get(new Path("/bar")));
}
18
Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testDoMultipleInputs() throws IOException {
Path in1Dir = getDir(IN1_DIR);
Path in2Dir = getDir(IN2_DIR);
Path outDir = getDir(OUT_DIR);
Configuration conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
fs.delete(outDir, true);
DataOutputStream file1 = fs.create(new Path(in1Dir, "part-0"));
file1.writeBytes("a\nb\nc\nd\ne");
file1.close();
// write tab delimited to second file because we're doing
// KeyValueInputFormat
DataOutputStream file2 = fs.create(new Path(in2Dir, "part-0"));
file2.writeBytes("a\tblah\nb\tblah\nc\tblah\nd\tblah\ne\tblah");
file2.close();
Job job = Job.getInstance(conf);
job.setJobName("mi");
MultipleInputs.addInputPath(job, in1Dir, TextInputFormat.clreplaced, MapClreplaced.clreplaced);
MultipleInputs.addInputPath(job, in2Dir, KeyValueTextInputFormat.clreplaced, KeyValueMapClreplaced.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(ReducerClreplaced.clreplaced);
FileOutputFormat.setOutputPath(job, outDir);
boolean success = false;
try {
success = job.waitForCompletion(true);
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
} catch (ClreplacedNotFoundException instante) {
throw new RuntimeException(instante);
}
if (!success)
throw new RuntimeException("Job failed!");
// copy bytes a bunch of times for the ease of readLine() - whatever
BufferedReader output = new BufferedReader(new InputStreamReader(fs.open(new Path(outDir, "part-r-00000"))));
// reducer should have counted one key from each file
replacedertTrue(output.readLine().equals("a 2"));
replacedertTrue(output.readLine().equals("b 2"));
replacedertTrue(output.readLine().equals("c 2"));
replacedertTrue(output.readLine().equals("d 2"));
replacedertTrue(output.readLine().equals("e 2"));
}
18
Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@SuppressWarnings("unchecked")
public void testAddInputPathWithFormat() throws IOException {
final Job conf = Job.getInstance();
MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.clreplaced);
MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.clreplaced);
final Map<Path, InputFormat> inputs = MultipleInputs.getInputFormatMap(conf);
replacedertEquals(TextInputFormat.clreplaced, inputs.get(new Path("/foo")).getClreplaced());
replacedertEquals(KeyValueTextInputFormat.clreplaced, inputs.get(new Path("/bar")).getClreplaced());
}
18
Source : TestMRCJCFileInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test when the input file's length is 0.
*/
@Test
public void testForEmptyFile() throws Exception {
Configuration conf = new Configuration();
FileSystem fileSys = FileSystem.get(conf);
Path file = new Path("test" + "/file");
FSDataOutputStream out = fileSys.create(file, true, conf.getInt("io.file.buffer.size", 4096), (short) 1, (long) 1024);
out.write(new byte[0]);
out.close();
// split it using a File input format
DummyInputFormat inFormat = new DummyInputFormat();
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, "test");
List<InputSplit> splits = inFormat.getSplits(job);
replacedertEquals(1, splits.size());
FileSplit fileSplit = (FileSplit) splits.get(0);
replacedertEquals(0, fileSplit.getLocations().length);
replacedertEquals(file.getName(), fileSplit.getPath().getName());
replacedertEquals(0, fileSplit.getStart());
replacedertEquals(0, fileSplit.getLength());
fileSys.delete(file.getParent(), true);
}
18
Source : TestMRCJCFileInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testAddInputPath() throws IOException {
final Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///abc/");
final Job j = Job.getInstance(conf);
// setup default fs
final FileSystem defaultfs = FileSystem.get(conf);
System.out.println("defaultfs.getUri() = " + defaultfs.getUri());
{
// test addInputPath
final Path original = new Path("file:/foo");
System.out.println("original = " + original);
FileInputFormat.addInputPath(j, original);
final Path[] results = FileInputFormat.getInputPaths(j);
System.out.println("results = " + Arrays.asList(results));
replacedertEquals(1, results.length);
replacedertEquals(original, results[0]);
}
{
// test setInputPaths
final Path original = new Path("file:/bar");
System.out.println("original = " + original);
FileInputFormat.setInputPaths(j, original);
final Path[] results = FileInputFormat.getInputPaths(j);
System.out.println("results = " + Arrays.asList(results));
replacedertEquals(1, results.length);
replacedertEquals(original, results[0]);
}
}
18
Source : TestFixedLengthInputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void runPartialRecordTest(CompressionCodec codec) throws Exception {
localFs.delete(workDir, true);
Job job = Job.getInstance(defaultConf);
// Create a file with fixed length records with 5 byte long
// records with a partial record at the end.
StringBuilder fileName = new StringBuilder("testFormat.txt");
if (codec != null) {
fileName.append(".gz");
ReflectionUtils.setConf(codec, job.getConfiguration());
}
writeFile(localFs, new Path(workDir, fileName.toString()), codec, "one two threefour five six seveneightnine ten");
FixedLengthInputFormat format = new FixedLengthInputFormat();
format.setRecordLength(job.getConfiguration(), 5);
FileInputFormat.setInputPaths(job, workDir);
List<InputSplit> splits = format.getSplits(job);
if (codec != null) {
replacedertEquals("compressed splits == 1", 1, splits.size());
}
boolean exceptionThrown = false;
for (InputSplit split : splits) {
try {
List<String> results = readSplit(format, split, job);
} catch (IOException ioe) {
exceptionThrown = true;
LOG.info("Exception message:" + ioe.getMessage());
}
}
replacedertTrue("Exception for partial record:", exceptionThrown);
}
See More Examples