Here are the examples of the java api org.apache.hadoop.mapreduce.Job.setJarByClass() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
242 Examples
17
Source : AggregateWordHistogram.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* The main driver for word count map/reduce program. Invoke this method to
* submit the map/reduce job.
*
* @throws IOException
* When there is communication problems with the job tracker.
*/
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
Job job = ValueAggregatorJob.createValueAggregatorJob(args, new Clreplaced[] { AggregateWordHistogramPlugin.clreplaced });
job.setJarByClreplaced(AggregateWordCount.clreplaced);
int ret = job.waitForCompletion(true) ? 0 : 1;
System.exit(ret);
}
17
Source : AggregateWordCount.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* The main driver for word count map/reduce program. Invoke this method to
* submit the map/reduce job.
*
* @throws IOException
* When there is communication problems with the job tracker.
*/
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
Job job = ValueAggregatorJob.createValueAggregatorJob(args, new Clreplaced[] { WordCountPlugInClreplaced.clreplaced });
job.setJarByClreplaced(AggregateWordCount.clreplaced);
int ret = job.waitForCompletion(true) ? 0 : 1;
System.exit(ret);
}
13
Source : DistCp.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Create Job object for submitting it, with all the configuration
*
* @return Reference to job object.
* @throws IOException - Exception if any
*/
private Job createJob() throws IOException {
String jobName = "distcp";
String userChosenName = getConf().get(JobContext.JOB_NAME);
if (userChosenName != null)
jobName += ": " + userChosenName;
Job job = Job.getInstance(getConf());
job.setJobName(jobName);
job.setInputFormatClreplaced(DistCpUtils.getStrategy(getConf(), inputOptions));
job.setJarByClreplaced(CopyMapper.clreplaced);
configureOutputFormat(job);
job.setMapperClreplaced(CopyMapper.clreplaced);
job.setNumReduceTasks(0);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setOutputFormatClreplaced(CopyOutputFormat.clreplaced);
job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
job.getConfiguration().set(JobContext.NUM_MAPS, String.valueOf(inputOptions.getMaxMaps()));
if (inputOptions.getSslConfigurationFile() != null) {
setupSSLConfig(job);
}
inputOptions.appendToConf(job.getConfiguration());
return job;
}
13
Source : MultiFileWordCount.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public int run(String[] args) throws Exception {
if (args.length < 2) {
printUsage();
return 2;
}
Job job = new Job(getConf());
job.setJobName("MultiFileWordCount");
job.setJarByClreplaced(MultiFileWordCount.clreplaced);
// set the InputFormat of the job to our InputFormat
job.setInputFormatClreplaced(MyInputFormat.clreplaced);
// the keys are words (strings)
job.setOutputKeyClreplaced(Text.clreplaced);
// the values are counts (ints)
job.setOutputValueClreplaced(IntWritable.clreplaced);
// use the defined mapper
job.setMapperClreplaced(MapClreplaced.clreplaced);
// use the WordCount Reducer
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
13
Source : TestLineRecordReaderJobs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates and runs an MR job
*
* @param conf
* @throws IOException
* @throws InterruptedException
* @throws ClreplacedNotFoundException
*/
public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClreplacedNotFoundException {
Job job = Job.getInstance(conf);
job.setJarByClreplaced(TestLineRecordReaderJobs.clreplaced);
job.setMapperClreplaced(Mapper.clreplaced);
job.setReducerClreplaced(Reducer.clreplaced);
FileInputFormat.addInputPath(job, inputDir);
FileOutputFormat.setOutputPath(job, outputDir);
job.waitForCompletion(true);
}
13
Source : Main.java
with Apache License 2.0
from lfz757077613
with Apache License 2.0
from lfz757077613
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Job job = Job.getInstance(new Configuration(), "wordCount");
// 设置jar包主类
job.setJarByClreplaced(Main.clreplaced);
// 设置mapper
job.setMapperClreplaced(MyMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 设置reducer
job.setReducerClreplaced(MyReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 在本地先进行一次reduce,减少将数据发送量
job.setCombinerClreplaced(MyReducer.clreplaced);
// 设置parreplacedioner
// 暂时没用到
// job.setParreplacedionerClreplaced(MyParreplacedioner.clreplaced);
// job.setNumReduceTasks(2);
// 设置作业输入输出路径,注意输出文件是不能事先存在的,输出文件是一个文件夹,里面有结果和运行结束状态
FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:8020/access.log"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
System.exit(job.waitForCompletion(true) ? 0 : -1);
}
13
Source : SampleUploader.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String[] args) throws IOException {
Path inputPath = new Path(args[0]);
String tableName = args[1];
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClreplaced(Uploader.clreplaced);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
job.setMapperClreplaced(Uploader.clreplaced);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
12
Source : TransformBaseRunner.java
with GNU General Public License v3.0
from wlhbdp
with GNU General Public License v3.0
from wlhbdp
protected Job initJob(Configuration conf) throws IOException {
Job job = Job.getInstance(conf, this.jobName);
job.setJarByClreplaced(this.runnerClreplaced);
// 本地运行
// TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, true);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job),
// this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
// job);
job.setReducerClreplaced(this.reducerClreplaced);
job.setOutputKeyClreplaced(this.outputKeyClreplaced);
job.setOutputValueClreplaced(this.outputValueClreplaced);
job.setOutputFormatClreplaced(this.outputFormatClreplaced);
return job;
}
12
Source : WordCount1Application.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(WordCount1Application.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
12
Source : AveragePageCount.java
with MIT License
from PacktPublishing
with MIT License
from PacktPublishing
public static void main(String[] args) throws Exception {
Configuration con = new Configuration();
Job bookJob = Job.getInstance(con, "Average Page Count");
bookJob.setJarByClreplaced(AveragePageCount.clreplaced);
bookJob.setMapperClreplaced(TextMapper.clreplaced);
bookJob.setReducerClreplaced(AverageReduce.clreplaced);
bookJob.setOutputKeyClreplaced(Text.clreplaced);
bookJob.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
if (bookJob.waitForCompletion(true)) {
System.exit(0);
}
}
12
Source : WordCount.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClreplaced(WordCount.clreplaced);
job.setMapperClreplaced(TokenizerMapper.clreplaced);
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
12
Source : TeraChecksum.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 2;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraSum");
job.setJarByClreplaced(TeraChecksum.clreplaced);
job.setMapperClreplaced(ChecksumMapper.clreplaced);
job.setReducerClreplaced(ChecksumReducer.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(Unsigned16.clreplaced);
// force a single reducer
job.setNumReduceTasks(1);
job.setInputFormatClreplaced(TeraInputFormat.clreplaced);
return job.waitForCompletion(true) ? 0 : 1;
}
12
Source : TransformerBaseRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
/**
* 创建job
*
* @param conf
* @return
* @throws IOException
*/
protected Job initJob(Configuration conf) throws IOException {
Job job = Job.getInstance(conf, this.jobName);
job.setJarByClreplaced(this.runnerClreplaced);
// 本地运行
TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job),
// this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
// job);
job.setReducerClreplaced(this.reducerClreplaced);
job.setOutputKeyClreplaced(this.outputKeyClreplaced);
job.setOutputValueClreplaced(this.outputValueClreplaced);
job.setOutputFormatClreplaced(this.outputFormatClreplaced);
return job;
}
11
Source : MinTemperature.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
if (args.length != 2) {
System.err.println("Usage: MinTemperature <input path> <output path>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClreplaced(MinTemperature.clreplaced);
job.setJobName("MapReduce实验-气象数据集-求气温最小值");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(TemperatureMapper.clreplaced);
// 设置 Combiner 减少数据的传输量、提高效率
// job.setCombinerClreplaced(MinTemperatureReducer.clreplaced);
job.setReducerClreplaced(MinTemperatureReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : MaxTemperature.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClreplaced(MaxTemperature.clreplaced);
job.setJobName("MapReduce实验-气象数据集-求气温最大值");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(TemperatureMapper.clreplaced);
// 设置 Combiner 减少数据的传输量、提高效率
// job.setCombinerClreplaced(MaxTemperatureReducer.clreplaced);
job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : AvgTemperature.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
if (args.length != 2) {
System.err.println("Usage: AvgTemperature <input path> <output path>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClreplaced(AvgTemperature.clreplaced);
job.setJobName("MapReduce实验-气象数据集-求气温平均值");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(TemperatureMapper.clreplaced);
job.setReducerClreplaced(AvgTemperatureReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : Hdfs2Tg.java
with Apache License 2.0
from tigergraph
with Apache License 2.0
from tigergraph
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "HDFS to TG");
job.setJarByClreplaced(Hdfs2Tg.clreplaced);
job.setMapperClreplaced(LineMapper.clreplaced);
job.setNumReduceTasks(0);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : MaxTemperature.java
with MIT License
from Tianny
with MIT License
from Tianny
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Max Temperature");
// 在 setJarByClreplaced 方法中传递一个类即可,Hadoop 利用这个类来查找包含它的 JAR 文件
job.setJarByClreplaced(MaxTemperature.clreplaced);
// 指定输入数据和输出数据的路径
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(MaxTemperatureMapper.clreplaced);
job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
// 控制 reduce 函数的输出类型,并且必须和 Reduce 类产生的相匹配
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : CompressionEmulationUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Configure the {@link Job} for enabling compression emulation.
*/
static void configure(final Job job) throws IOException, InterruptedException, ClreplacedNotFoundException {
// set the random text mapper
job.setMapperClreplaced(RandomTextDataMapper.clreplaced);
job.setNumReduceTasks(0);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(GenDataFormat.clreplaced);
job.setJarByClreplaced(GenerateData.clreplaced);
// set the output compression true
FileOutputFormat.setCompressOutput(job, true);
try {
FileInputFormat.addInputPath(job, new Path("ignored"));
} catch (IOException e) {
LOG.error("Error while adding input path ", e);
}
}
11
Source : TestMiniMRChildTask.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Launch tests
* @param conf Configuration of the mapreduce job.
* @param inDir input path
* @param outDir output path
* @param input Input text
* @throws IOException
*/
public void launchTest(JobConf conf, Path inDir, Path outDir, String input) throws IOException, InterruptedException, ClreplacedNotFoundException {
FileSystem outFs = outDir.getFileSystem(conf);
// Launch job with default option for temp dir.
// i.e. temp dir is ./tmp
Job job = new Job(conf);
job.addFileToClreplacedPath(APP_JAR);
job.setJarByClreplaced(TestMiniMRChildTask.clreplaced);
// speed up failures
job.setMaxMapAttempts(1);
job.waitForCompletion(true);
boolean succeeded = job.waitForCompletion(true);
replacedertTrue(succeeded);
outFs.delete(outDir, true);
}
11
Source : WordCount.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "word count");
job.setJarByClreplaced(WordCount.clreplaced);
job.setMapperClreplaced(TokenizerMapper.clreplaced);
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : HCatalogTestUtils.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
/**
* Run a local map reduce job to read records from HCatalog table.
* @param readCount
* @param filter
* @return
* @throws Exception
*/
public List<HCatRecord> readHCatRecords(String dbName, String tableName, String filter) throws Exception {
HCatReaderMapper.setReadRecordCount(0);
recsRead.clear();
// Configuration conf = new Configuration();
Job job = new Job(conf, "HCatalog reader job");
job.setJarByClreplaced(this.getClreplaced());
job.setMapperClreplaced(HCatReaderMapper.clreplaced);
job.getConfiguration().setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
// input/output settings
job.setInputFormatClreplaced(HCatInputFormat.clreplaced);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter);
job.setMapOutputKeyClreplaced(BytesWritable.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setNumReduceTasks(0);
Path path = new Path(fs.getWorkingDirectory(), "mapreduce/HCatTableIndexOutput");
if (fs.exists(path)) {
fs.delete(path, true);
}
FileOutputFormat.setOutputPath(job, path);
job.waitForCompletion(true);
LOG.info("Read " + HCatReaderMapper.readRecordCount + " records");
return recsRead;
}
10
Source : AnalyserLogDataRunner.java
with GNU General Public License v3.0
from wlhbdp
with GNU General Public License v3.0
from wlhbdp
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
// 设置reducer配置
// 1集群上运行 打成jar运行 (要求addDependencyJars为true(默认true)
// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, true);
// 2本地运行 打成jar运行 (要求addDependencyJars为true(默认true)
// TableMapReduceUtil
// .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
// null, false);
// 设置输入路径
job.setNumReduceTasks(0);
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
10
Source : ReduceJobsConfiguration.java
with Apache License 2.0
from WinterChenS
with Apache License 2.0
from WinterChenS
/**
* 获取单词统计的配置信息
*
* @param jobName
* @param inputPath
* @param outputPath
* @throws IOException
* @throws ClreplacedNotFoundException
* @throws InterruptedException
*/
public void getWordCountJobsConf(String jobName, String inputPath, String outputPath) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration conf = getConfiguration();
Job job = Job.getInstance(conf, jobName);
job.setMapperClreplaced(WordMapper.clreplaced);
job.setCombinerClreplaced(WordReduce.clreplaced);
job.setJarByClreplaced(HadoopDemoApplication.clreplaced);
job.setReducerClreplaced(WordReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
// 小文件合并设置
// job.setInputFormatClreplaced(CombineTextInputFormat.clreplaced);
// 最大分片
// CombineTextInputFormat.setMaxInputSplitSize(job, 4 * 1024 * 1024);
// 最小分片
// CombineTextInputFormat.setMinInputSplitSize(job, 2 * 1024 * 1024);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.waitForCompletion(true);
}
10
Source : WordMedian.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmedian <in> <out>");
return 0;
}
setConf(new Configuration());
Configuration conf = getConf();
@SuppressWarnings("deprecation")
Job job = new Job(conf, "word median");
job.setJarByClreplaced(WordMedian.clreplaced);
job.setMapperClreplaced(WordMedianMapper.clreplaced);
job.setCombinerClreplaced(WordMedianReducer.clreplaced);
job.setReducerClreplaced(WordMedianReducer.clreplaced);
job.setOutputKeyClreplaced(IntWritable.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
// Wait for JOB 1 -- get middle value to check for Median
long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
int medianIndex2 = (int) Math.floor((totalWords / 2.0));
median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
return (result ? 0 : 1);
}
10
Source : UserNamePermission.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static void main(String[] args) throws Exception {
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = new Job(conf, "user name check");
job.setJarByClreplaced(UserNamePermission.clreplaced);
job.setMapperClreplaced(UserNamePermission.UserNameMapper.clreplaced);
job.setCombinerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
job.setNumReduceTasks(1);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
10
Source : MultiFileWordCount.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public int run(String[] args) throws Exception {
if (args.length < 2) {
printUsage();
return 2;
}
Job job = Job.getInstance(getConf());
job.setJobName("MultiFileWordCount");
job.setJarByClreplaced(MultiFileWordCount.clreplaced);
// set the InputFormat of the job to our InputFormat
job.setInputFormatClreplaced(MyInputFormat.clreplaced);
// the keys are words (strings)
job.setOutputKeyClreplaced(Text.clreplaced);
// the values are counts (ints)
job.setOutputValueClreplaced(IntWritable.clreplaced);
// use the defined mapper
job.setMapperClreplaced(MapClreplaced.clreplaced);
// use the WordCount Reducer
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
10
Source : TestMiniMRChildTask.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* Launch tests
* @param conf Configuration of the mapreduce job.
* @param inDir input path
* @param outDir output path
* @param input Input text
* @throws IOException
*/
public void launchTest(JobConf conf, Path inDir, Path outDir, String input) throws IOException, InterruptedException, ClreplacedNotFoundException {
FileSystem outFs = outDir.getFileSystem(conf);
// Launch job with default option for temp dir.
// i.e. temp dir is ./tmp
Job job = Job.getInstance(conf);
job.addFileToClreplacedPath(APP_JAR);
job.setJarByClreplaced(TestMiniMRChildTask.clreplaced);
// speed up failures
job.setMaxMapAttempts(1);
job.waitForCompletion(true);
boolean succeeded = job.waitForCompletion(true);
replacedertTrue(succeeded);
outFs.delete(outDir, true);
}
10
Source : MR_WLA.java
with GNU General Public License v3.0
from monsonlee
with GNU General Public License v3.0
from monsonlee
public int run(String[] args) throws Exception {
String jobName = "wla_baidu";
String inputPath = args[0];
String outputPath = args[1];
Path path = new Path(outputPath);
// 删除输出目录
path.getFileSystem(getConf()).delete(path, true);
// 1、把所有代码组织到类似于Topology的类中
Job job = Job.getInstance(getConf(), jobName);
// 2、一定要打包运行,必须写下面一行代码
job.setJarByClreplaced(MR_WLA.clreplaced);
// 3、指定输入的hdfs
FileInputFormat.setInputPaths(job, inputPath);
// 4、指定map类
job.setMapperClreplaced(WLA_Mapper.clreplaced);
// 5、指定map输出的<key,value>的类型
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 6、指定reduce类
job.setReducerClreplaced(WLA_Reducer.clreplaced);
// 7、指定reduce输出的<key,value>的类型
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
// 8、指定输出的hdfs
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
10
Source : Export.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
String tableName = args[0];
Path outputDir = new Path(args[1]);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJobName(NAME + "_" + tableName);
job.setJarByClreplaced(Export.clreplaced);
// Set optional scan parameters
Scan s = getConfiguredScanForJob(conf, args);
IdenreplacedyTableMapper.initJob(tableName, s, IdenreplacedyTableMapper.clreplaced, job);
// No reducers. Just write straight to output files.
job.setNumReduceTasks(0);
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job.setOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
job.setOutputValueClreplaced(Result.clreplaced);
// job conf doesn't contain the conf so doesn't have a default fs.
FileOutputFormat.setOutputPath(job, outputDir);
return job;
}
10
Source : IntegrationTestLoadAndVerify.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "load-output");
LOG.info("Load output dir: " + outputDir);
NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
conf.set(TABLE_NAME_KEY, htd.getTableName().getNamereplacedtring());
Job job = Job.getInstance(conf);
job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
job.setJarByClreplaced(this.getClreplaced());
setMapperClreplaced(job);
job.setInputFormatClreplaced(NMapInputFormat.clreplaced);
job.setNumReduceTasks(0);
setJobScannerConf(job);
FileOutputFormat.setOutputPath(job, outputDir);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.clreplaced);
TableMapReduceUtil.initCredentials(job);
replacedertTrue(job.waitForCompletion(true));
return job;
}
10
Source : NewInstallUserRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 处理参数
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "new_install_user");
job.setJarByClreplaced(NewInstallUserRunner.clreplaced);
// 本地运行
TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
job.setReducerClreplaced(NewInstallUserReducer.clreplaced);
job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
job.setOutputValueClreplaced(MapWritableValue.clreplaced);
// job.setInputFormatClreplaced(KeyValueTextInputFormat.clreplaced);
job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
if (job.waitForCompletion(true)) {
// 执行成功, 需要计算总用户
this.calculateTotalUsers(conf);
return 0;
} else {
return -1;
}
}
10
Source : ActiveUserRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 初始化参数
this.processArgs(conf, args);
// 创建job
Job job = Job.getInstance(conf, "active_user");
// 设置job相关配置参数
job.setJarByClreplaced(ActiveUserRunner.clreplaced);
// hbase 输入mapper参数
// 1. 本地运行
TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
// 2. 集群运行
// TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.clreplaced,
// StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
// 设置reducer相关参数
job.setReducerClreplaced(ActiveUserReducer.clreplaced);
job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
job.setOutputValueClreplaced(MapWritableValue.clreplaced);
// 设置output相关参数
job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
// 开始毫秒数
long startTime = System.currentTimeMillis();
try {
return job.waitForCompletion(true) ? 0 : -1;
} finally {
// 结束的毫秒数
long endTime = System.currentTimeMillis();
logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms");
}
}
10
Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
// 设置本地提交job,集群运行,需要代码
// File jarFile = EJob.createTempJar("target/clreplacedes");
// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
// 设置本地提交job,集群运行,需要代码结束
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
// 设置reducer配置
// 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
// null, job);
// 2. 本地运行,要求参数addDependencyJars为false
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
job.setNumReduceTasks(0);
// 设置输入路径
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
10
Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
// 设置本地提交job,集群运行,需要代码
// File jarFile = EJob.createTempJar("target/clreplacedes");
// ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
// 设置本地提交job,集群运行,需要代码结束
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
// 设置reducer配置
// 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
// TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
// 2. 本地运行,要求参数addDependencyJars为false
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
job.setNumReduceTasks(0);
// 设置输入路径
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
10
Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "replacedyser_logdata");
job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Put.clreplaced);
TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
job.setNumReduceTasks(0);
// 设置输入路径
this.setJobInputPaths(job);
return job.waitForCompletion(true) ? 0 : -1;
}
9
Source : TeraGen.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
/**
* @param args the cli arguments
*/
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
if (args.length != 5 && args.length != 6) {
usage();
return 2;
}
Path outputDir = new Path(args[1]);
getConf().setStrings(OUTPUT_URI_STRING, args[2]);
getConf().setStrings(OUTPUT_SCOPE_NAME, args[3]);
getConf().setStrings(OUTPUT_STREAM_NAME, args[4]);
getConf().setStrings(OUTPUT_STREAM_SEGMENTS, args[5]);
getConf().setStrings(OUTPUT_DESERIALIZER, TextSerializer.clreplaced.getName());
Job job = Job.getInstance(getConf());
setNumberOfRows(job, parseHumanLong(args[0]));
FileOutputFormat.setOutputPath(job, outputDir);
job.setJobName("TeraGen");
job.setJarByClreplaced(TeraGen.clreplaced);
job.setMapperClreplaced(SortGenMapper.clreplaced);
job.setNumReduceTasks(0);
job.setOutputKeyClreplaced(String.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
job.setOutputFormatClreplaced(PravegaFixedSegmentsOutputFormat.clreplaced);
return job.waitForCompletion(true) ? 0 : 1;
}
9
Source : WordStandardDeviation.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordstddev <in> <out>");
return 0;
}
Configuration conf = getConf();
@SuppressWarnings("deprecation")
Job job = new Job(conf, "word stddev");
job.setJarByClreplaced(WordStandardDeviation.clreplaced);
job.setMapperClreplaced(WordStandardDeviationMapper.clreplaced);
job.setCombinerClreplaced(WordStandardDeviationReducer.clreplaced);
job.setReducerClreplaced(WordStandardDeviationReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
// read output and calculate standard deviation
stddev = readAndCalcStdDev(outputpath, conf);
return (result ? 0 : 1);
}
9
Source : WordMean.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmean <in> <out>");
return 0;
}
Configuration conf = getConf();
@SuppressWarnings("deprecation")
Job job = new Job(conf, "word mean");
job.setJarByClreplaced(WordMean.clreplaced);
job.setMapperClreplaced(WordMeanMapper.clreplaced);
job.setCombinerClreplaced(WordMeanReducer.clreplaced);
job.setReducerClreplaced(WordMeanReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
mean = readAndCalcMean(outputpath, conf);
return (result ? 0 : 1);
}
9
Source : TeraGen.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* @param args the cli arguments
*/
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 2;
}
setNumberOfRows(job, parseHumanLong(args[0]));
Path outputDir = new Path(args[1]);
if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
throw new IOException("Output directory " + outputDir + " already exists.");
}
FileOutputFormat.setOutputPath(job, outputDir);
job.setJobName("TeraGen");
job.setJarByClreplaced(TeraGen.clreplaced);
job.setMapperClreplaced(SortGenMapper.clreplaced);
job.setNumReduceTasks(0);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
job.setOutputFormatClreplaced(TeraOutputFormat.clreplaced);
return job.waitForCompletion(true) ? 0 : 1;
}
9
Source : TestMiniMRChildTask.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs) throws IOException, InterruptedException, ClreplacedNotFoundException {
String input = "The input";
configure(conf, inDir, outDir, input, EnvCheckMapper.clreplaced, EnvCheckReducer.clreplaced);
// test
// - new SET of new var (MY_PATH)
// - set of old var (LANG)
// - append to an old var from modified env (LD_LIBRARY_PATH)
// - append to an old var from tt's env (PATH)
// - append to a new var (NEW_PATH)
String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS;
String mapTaskJavaOpts = MAP_OPTS_VAL;
String reduceTaskJavaOpts = REDUCE_OPTS_VAL;
conf.setBoolean(OLD_CONFIGS, oldConfigs);
if (oldConfigs) {
mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV;
mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS;
mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL;
}
conf.set(mapTaskEnvKey, Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp," + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp" : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp," + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
conf.set(reduceTaskEnvKey, Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp," + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp" : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp," + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
conf.set("path", System.getenv("PATH"));
conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts);
conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts);
Job job = new Job(conf);
job.addFileToClreplacedPath(APP_JAR);
job.setJarByClreplaced(TestMiniMRChildTask.clreplaced);
// speed up failures
job.setMaxMapAttempts(1);
job.waitForCompletion(true);
boolean succeeded = job.waitForCompletion(true);
replacedertTrue("The environment checker job failed.", succeeded);
}
9
Source : WordMedian.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmedian <in> <out>");
return 0;
}
setConf(new Configuration());
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word median");
job.setJarByClreplaced(WordMedian.clreplaced);
job.setMapperClreplaced(WordMedianMapper.clreplaced);
job.setCombinerClreplaced(WordMedianReducer.clreplaced);
job.setReducerClreplaced(WordMedianReducer.clreplaced);
job.setOutputKeyClreplaced(IntWritable.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
// Wait for JOB 1 -- get middle value to check for Median
long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
int medianIndex2 = (int) Math.floor((totalWords / 2.0));
median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
return (result ? 0 : 1);
}
9
Source : TeraGen.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* @param args the cli arguments
*/
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 2;
}
setNumberOfRows(job, parseHumanLong(args[0]));
Path outputDir = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputDir);
job.setJobName("TeraGen");
job.setJarByClreplaced(TeraGen.clreplaced);
job.setMapperClreplaced(SortGenMapper.clreplaced);
job.setNumReduceTasks(0);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
job.setOutputFormatClreplaced(TeraOutputFormat.clreplaced);
return job.waitForCompletion(true) ? 0 : 1;
}
9
Source : UserNamePermission.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public static void main(String[] args) throws Exception {
Path outDir = new Path("output");
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "user name check");
job.setJarByClreplaced(UserNamePermission.clreplaced);
job.setMapperClreplaced(UserNamePermission.UserNameMapper.clreplaced);
job.setCombinerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
job.setNumReduceTasks(1);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
TextInputFormat.addInputPath(job, new Path("input"));
FileOutputFormat.setOutputPath(job, outDir);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
9
Source : IntegrationTestLoadAndVerify.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
Path outputDir = getTestDir(TEST_NAME, "verify-output");
LOG.info("Verify output dir: " + outputDir);
Job job = Job.getInstance(conf);
job.setJarByClreplaced(this.getClreplaced());
job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
setJobScannerConf(job);
Scan scan = new Scan();
TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNamereplacedtring(), scan, VerifyMapper.clreplaced, BytesWritable.clreplaced, BytesWritable.clreplaced, job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.clreplaced);
int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
TableMapReduceUtil.setScannerCaching(job, scannerCaching);
job.setReducerClreplaced(VerifyReducer.clreplaced);
job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
FileOutputFormat.setOutputPath(job, outputDir);
replacedertTrue(job.waitForCompletion(true));
long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
replacedertEquals(0, numOutputRecords);
}
9
Source : IntegrationTestBulkLoad.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
private void runLinkedListMRJob(int iteration) throws Exception {
String jobName = IntegrationTestBulkLoad.clreplaced.getSimpleName() + " - " + EnvironmentEdgeManager.currentTime();
Configuration conf = new Configuration(util.getConfiguration());
Path p = null;
if (conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY) == null) {
p = util.getDataTestDirOnTestFS(getTablename() + "-" + iteration);
} else {
p = new Path(conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY));
}
conf.setBoolean("mapreduce.map.speculative", false);
conf.setBoolean("mapreduce.reduce.speculative", false);
conf.setInt(ROUND_NUM_KEY, iteration);
Job job = new Job(conf);
job.setJobName(jobName);
// set the input format so that we can create map tasks with no data input.
job.setInputFormatClreplaced(ITBulkLoadInputFormat.clreplaced);
// Set the mapper clreplacedes.
job.setMapperClreplaced(LinkedListCreationMapper.clreplaced);
job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
job.setMapOutputValueClreplaced(KeyValue.clreplaced);
// Use the idenreplacedy reducer
// So nothing to do here.
// Set this jar.
job.setJarByClreplaced(getClreplaced());
// Set where to place the hfiles.
FileOutputFormat.setOutputPath(job, p);
try (Connection conn = ConnectionFactory.createConnection(conf);
Admin admin = conn.getAdmin();
Table table = conn.getTable(getTablename());
RegionLocator regionLocator = conn.getRegionLocator(getTablename())) {
// Configure the parreplacedioner and other things needed for HFileOutputFormat.
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
// Run the job making sure it works.
replacedertEquals(true, job.waitForCompletion(true));
// Create a new loader.
LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
// Load the HFiles in.
loader.doBulkLoad(p, admin, table, regionLocator);
}
// Delete the files.
util.getTestFileSystem().delete(p, true);
}
9
Source : HCatalogTestUtils.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
public List<HCatRecord> loadHCatTable(String dbName, String tableName, Map<String, String> partKeyMap, HCatSchema tblSchema, List<HCatRecord> records) throws Exception {
Job job = new Job(conf, "HCat load job");
job.setJarByClreplaced(this.getClreplaced());
job.setMapperClreplaced(HCatWriterMapper.clreplaced);
// Just writ 10 lines to the file to drive the mapper
Path path = new Path(fs.getWorkingDirectory(), "mapreduce/HCatTableIndexInput");
job.getConfiguration().setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
int writeCount = records.size();
recsToLoad.clear();
recsToLoad.addAll(records);
createInputFile(path, writeCount);
// input/output settings
HCatWriterMapper.setWrittenRecordCount(0);
FileInputFormat.setInputPaths(job, path);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setOutputFormatClreplaced(HCatOutputFormat.clreplaced);
OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partKeyMap);
HCatOutputFormat.setOutput(job, outputJobInfo);
HCatOutputFormat.setSchema(job, tblSchema);
job.setMapOutputKeyClreplaced(BytesWritable.clreplaced);
job.setMapOutputValueClreplaced(DefaultHCatRecord.clreplaced);
job.setNumReduceTasks(0);
SqoopHCatUtilities.addJars(job, new SqoopOptions());
boolean success = job.waitForCompletion(true);
if (!success) {
throw new IOException("Loading HCatalog table with test records failed");
}
utils.invokeOutputCommitterForLocalMode(job);
LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
return recsToLoad;
}
9
Source : SortTool.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
@SuppressWarnings("deprecation")
@Override
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf(), this.getClreplaced().getSimpleName());
job.setJarByClreplaced(this.getClreplaced());
if (job.getJar() == null) {
log.error("M/R requires a jar file! Run mvn package.");
return 1;
}
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
SequenceFileInputFormat.setInputPaths(job, seqFile);
job.setParreplacedionerClreplaced(org.apache.acreplacedulo.core.client.mapreduce.lib.parreplacedion.KeyRangeParreplacedioner.clreplaced);
org.apache.acreplacedulo.core.client.mapreduce.lib.parreplacedion.KeyRangeParreplacedioner.setSplitFile(job, splitFile);
job.setMapOutputKeyClreplaced(Key.clreplaced);
job.setMapOutputValueClreplaced(Value.clreplaced);
job.setNumReduceTasks(splits.size() + 1);
job.setOutputFormatClreplaced(org.apache.acreplacedulo.core.client.mapreduce.AcreplaceduloFileOutputFormat.clreplaced);
org.apache.acreplacedulo.core.client.mapreduce.AcreplaceduloFileOutputFormat.setOutputPath(job, new Path(outputDir));
job.waitForCompletion(true);
return job.isSuccessful() ? 0 : 1;
}
9
Source : CharacterHistogram.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static void main(String[] args) throws Exception {
Opts opts = new Opts();
opts.parseArgs(CharacterHistogram.clreplaced.getName(), args);
Job job = Job.getInstance(opts.getHadoopConfig());
job.setJobName(CharacterHistogram.clreplaced.getSimpleName());
job.setJarByClreplaced(CharacterHistogram.clreplaced);
job.setInputFormatClreplaced(ChunkInputFormat.clreplaced);
job.getConfiguration().set(VIS, opts.visibilities);
job.setMapperClreplaced(HistMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Mutation.clreplaced);
job.setNumReduceTasks(0);
job.setOutputFormatClreplaced(AcreplaceduloOutputFormat.clreplaced);
AcreplaceduloOutputFormat.configure().clientProperties(opts.getClientProperties()).defaultTable(opts.tableName).createTables(true);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
8
Source : HadoopTrainWorker.java
with MIT License
from yuantiku
with MIT License
from yuantiku
public boolean hadoopTrain() {
// LOG.info("getclreplaced" + GenericsUtil.getClreplaced(customParamsMap));
// DefaultStringifier<Map<String, Object>> mapStringifier =
// new DefaultStringifier<>(conf, GenericsUtil.getClreplaced(customParamsMap));
// String customParamsMapStr = mapStringifier.toString(customParamsMap);
// conf.set("customParamsMap", customParamsMapStr);
boolean sucess;
String outputPath = null;
try {
conf.set("customParamsMap", encodeMap(customParamsMap));
Job job = Job.getInstance(conf, user + " " + modelName + " training on hadoop");
String trainDataPath = getTrainDataPath();
job.setJarByClreplaced(HadoopTrainWorker.clreplaced);
job.setMapperClreplaced(TrainMapper.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(TrainReducer.clreplaced);
FileInputFormat.addInputPath(job, new Path(trainDataPath));
outputPath = trainDataPath + "_temp_will_be_deleted";
IFileSystem fs = FileSystemFactory.createFileSystem(new URI(getURI()));
if (outputPath != null) {
fs.delete(outputPath);
}
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.setNumReduceTasks(slaveNum);
sucess = job.waitForCompletion(true);
} catch (Exception e) {
sucess = false;
LOG.error("hadoop train exception!", e);
} finally {
try {
IFileSystem fs = FileSystemFactory.createFileSystem(new URI(getURI()));
if (outputPath != null) {
fs.delete(outputPath);
}
} catch (Exception e) {
sucess = false;
LOG.error("hadoop train exception!", e);
}
}
return sucess;
}
See More Examples