Here are the examples of the java api org.apache.hadoop.mapreduce.Job.setOutputKeyClass() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
167 Examples
19
Source : AveragePageCount.java
with MIT License
from PacktPublishing
with MIT License
from PacktPublishing
public static void main(String[] args) throws Exception {
Configuration con = new Configuration();
Job bookJob = Job.getInstance(con, "Average Page Count");
bookJob.setJarByClreplaced(AveragePageCount.clreplaced);
bookJob.setMapperClreplaced(TextMapper.clreplaced);
bookJob.setReducerClreplaced(AverageReduce.clreplaced);
bookJob.setOutputKeyClreplaced(Text.clreplaced);
bookJob.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
if (bookJob.waitForCompletion(true)) {
System.exit(0);
}
}
19
Source : ChainReducer.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Sets the {@link Reducer} clreplaced to the chain job.
*
* <p>
* The key and values are preplaceded from one element of the chain to the next, by
* value. For the added Reducer the configuration given for it,
* <code>reducerConf</code>, have precedence over the job's Configuration.
* This precedence is in effect when the task is running.
* </p>
* <p>
* IMPORTANT: There is no need to specify the output key/value clreplacedes for the
* ChainReducer, this is done by the setReducer or the addMapper for the last
* element in the chain.
* </p>
*
* @param job
* the job
* @param klreplaced
* the Reducer clreplaced to add.
* @param inputKeyClreplaced
* reducer input key clreplaced.
* @param inputValueClreplaced
* reducer input value clreplaced.
* @param outputKeyClreplaced
* reducer output key clreplaced.
* @param outputValueClreplaced
* reducer output value clreplaced.
* @param reducerConf
* a configuration for the Reducer clreplaced. It is recommended to use a
* Configuration without default values using the
* <code>Configuration(boolean loadDefaults)</code> constructor with
* FALSE.
*/
public static void setReducer(Job job, Clreplaced<? extends Reducer> klreplaced, Clreplaced<?> inputKeyClreplaced, Clreplaced<?> inputValueClreplaced, Clreplaced<?> outputKeyClreplaced, Clreplaced<?> outputValueClreplaced, Configuration reducerConf) {
job.setReducerClreplaced(ChainReducer.clreplaced);
job.setOutputKeyClreplaced(outputKeyClreplaced);
job.setOutputValueClreplaced(outputValueClreplaced);
Chain.setReducer(job, klreplaced, inputKeyClreplaced, inputValueClreplaced, outputKeyClreplaced, outputValueClreplaced, reducerConf);
}
19
Source : ChainReducer.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Adds a {@link Mapper} clreplaced to the chain reducer.
*
* <p>
* The key and values are preplaceded from one element of the chain to the next, by
* value For the added Mapper the configuration given for it,
* <code>mapperConf</code>, have precedence over the job's Configuration. This
* precedence is in effect when the task is running.
* </p>
* <p>
* IMPORTANT: There is no need to specify the output key/value clreplacedes for the
* ChainMapper, this is done by the addMapper for the last mapper in the
* chain.
* </p>
*
* @param job
* The job.
* @param klreplaced
* the Mapper clreplaced to add.
* @param inputKeyClreplaced
* mapper input key clreplaced.
* @param inputValueClreplaced
* mapper input value clreplaced.
* @param outputKeyClreplaced
* mapper output key clreplaced.
* @param outputValueClreplaced
* mapper output value clreplaced.
* @param mapperConf
* a configuration for the Mapper clreplaced. It is recommended to use a
* Configuration without default values using the
* <code>Configuration(boolean loadDefaults)</code> constructor with
* FALSE.
*/
public static void addMapper(Job job, Clreplaced<? extends Mapper> klreplaced, Clreplaced<?> inputKeyClreplaced, Clreplaced<?> inputValueClreplaced, Clreplaced<?> outputKeyClreplaced, Clreplaced<?> outputValueClreplaced, Configuration mapperConf) throws IOException {
job.setOutputKeyClreplaced(outputKeyClreplaced);
job.setOutputValueClreplaced(outputValueClreplaced);
Chain.addMapper(false, job, klreplaced, inputKeyClreplaced, inputValueClreplaced, outputKeyClreplaced, outputValueClreplaced, mapperConf);
}
18
Source : WordCount1Application.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(WordCount1Application.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : Hdfs2Tg.java
with Apache License 2.0
from tigergraph
with Apache License 2.0
from tigergraph
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "HDFS to TG");
job.setJarByClreplaced(Hdfs2Tg.clreplaced);
job.setMapperClreplaced(LineMapper.clreplaced);
job.setNumReduceTasks(0);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : MaxTemperature.java
with MIT License
from Tianny
with MIT License
from Tianny
public static void main(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Max Temperature");
// 在 setJarByClreplaced 方法中传递一个类即可,Hadoop 利用这个类来查找包含它的 JAR 文件
job.setJarByClreplaced(MaxTemperature.clreplaced);
// 指定输入数据和输出数据的路径
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(MaxTemperatureMapper.clreplaced);
job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
// 控制 reduce 函数的输出类型,并且必须和 Reduce 类产生的相匹配
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : WordCount.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClreplaced(WordCount.clreplaced);
job.setMapperClreplaced(TokenizerMapper.clreplaced);
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : MultiFileWordCount.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public int run(String[] args) throws Exception {
if (args.length < 2) {
printUsage();
return 2;
}
Job job = new Job(getConf());
job.setJobName("MultiFileWordCount");
job.setJarByClreplaced(MultiFileWordCount.clreplaced);
// set the InputFormat of the job to our InputFormat
job.setInputFormatClreplaced(MyInputFormat.clreplaced);
// the keys are words (strings)
job.setOutputKeyClreplaced(Text.clreplaced);
// the values are counts (ints)
job.setOutputValueClreplaced(IntWritable.clreplaced);
// use the defined mapper
job.setMapperClreplaced(MapClreplaced.clreplaced);
// use the WordCount Reducer
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
18
Source : TestMRSequenceFileAsBinaryOutputFormat.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public void testSequenceOutputClreplacedDefaultsToMapRedOutputClreplaced() throws IOException {
Job job = Job.getInstance();
// Setting Random clreplaced to test getSequenceFileOutput{Key,Value}Clreplaced
job.setOutputKeyClreplaced(FloatWritable.clreplaced);
job.setOutputValueClreplaced(BooleanWritable.clreplaced);
replacedertEquals("SequenceFileOutputKeyClreplaced should default to ouputKeyClreplaced", FloatWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
replacedertEquals("SequenceFileOutputValueClreplaced should default to " + "ouputValueClreplaced", BooleanWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClreplaced(job, IntWritable.clreplaced);
SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClreplaced(job, DoubleWritable.clreplaced);
replacedertEquals("SequenceFileOutputKeyClreplaced not updated", IntWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
replacedertEquals("SequenceFileOutputValueClreplaced not updated", DoubleWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
}
18
Source : WordCount.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("Usage: wordcount <in> [<in>...] <out>");
System.exit(2);
}
Job job = Job.getInstance(conf, "word count");
job.setJarByClreplaced(WordCount.clreplaced);
job.setMapperClreplaced(TokenizerMapper.clreplaced);
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
18
Source : Main.java
with Apache License 2.0
from lfz757077613
with Apache License 2.0
from lfz757077613
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Job job = Job.getInstance(new Configuration(), "wordCount");
// 设置jar包主类
job.setJarByClreplaced(Main.clreplaced);
// 设置mapper
job.setMapperClreplaced(MyMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 设置reducer
job.setReducerClreplaced(MyReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 在本地先进行一次reduce,减少将数据发送量
job.setCombinerClreplaced(MyReducer.clreplaced);
// 设置parreplacedioner
// 暂时没用到
// job.setParreplacedionerClreplaced(MyParreplacedioner.clreplaced);
// job.setNumReduceTasks(2);
// 设置作业输入输出路径,注意输出文件是不能事先存在的,输出文件是一个文件夹,里面有结果和运行结束状态
FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:8020/access.log"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
System.exit(job.waitForCompletion(true) ? 0 : -1);
}
18
Source : SparkUtil.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
public static void setHadoopConfForCuboid(Job job, CubeSegment segment, String metaUrl) throws Exception {
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
}
18
Source : NetezzaExternalTableImportJob.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
/**
* Set the mapper clreplaced implementation to use in the job, as well as any
* related configuration (e.g., map output types).
*/
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws ClreplacedNotFoundException, IOException {
super.configureMapper(job, tableName, tableClreplacedName);
job.setMapperClreplaced(getMapperClreplaced());
if (isHCatJob) {
LOG.info("Configuring mapper for HCatalog import job");
job.setOutputKeyClreplaced(LongWritable.clreplaced);
job.setOutputValueClreplaced(SqoopHCatUtilities.getImportValueClreplaced());
return;
}
job.setOutputKeyClreplaced(String.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
}
18
Source : HBaseImportJob.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws IOException {
job.setOutputKeyClreplaced(SqoopRecord.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
job.setMapperClreplaced(getMapperClreplaced());
}
18
Source : OdpsImportJob.java
with Apache License 2.0
from aliyun
with Apache License 2.0
from aliyun
@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) {
job.setOutputKeyClreplaced(SqoopRecord.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
job.setMapperClreplaced(getMapperClreplaced());
}
17
Source : TransformBaseRunner.java
with GNU General Public License v3.0
from wlhbdp
with GNU General Public License v3.0
from wlhbdp
protected Job initJob(Configuration conf) throws IOException {
Job job = Job.getInstance(conf, this.jobName);
job.setJarByClreplaced(this.runnerClreplaced);
// 本地运行
// TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, true);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job),
// this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
// job);
job.setReducerClreplaced(this.reducerClreplaced);
job.setOutputKeyClreplaced(this.outputKeyClreplaced);
job.setOutputValueClreplaced(this.outputValueClreplaced);
job.setOutputFormatClreplaced(this.outputFormatClreplaced);
return job;
}
17
Source : MinTemperature.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
if (args.length != 2) {
System.err.println("Usage: MinTemperature <input path> <output path>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClreplaced(MinTemperature.clreplaced);
job.setJobName("MapReduce实验-气象数据集-求气温最小值");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(TemperatureMapper.clreplaced);
// 设置 Combiner 减少数据的传输量、提高效率
// job.setCombinerClreplaced(MinTemperatureReducer.clreplaced);
job.setReducerClreplaced(MinTemperatureReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
17
Source : MaxTemperature.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
if (args.length != 2) {
System.err.println("Usage: MaxTemperature <input path> <output path>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClreplaced(MaxTemperature.clreplaced);
job.setJobName("MapReduce实验-气象数据集-求气温最大值");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(TemperatureMapper.clreplaced);
// 设置 Combiner 减少数据的传输量、提高效率
// job.setCombinerClreplaced(MaxTemperatureReducer.clreplaced);
job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
17
Source : AvgTemperature.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
if (args.length != 2) {
System.err.println("Usage: AvgTemperature <input path> <output path>");
System.exit(-1);
}
Job job = Job.getInstance();
job.setJarByClreplaced(AvgTemperature.clreplaced);
job.setJobName("MapReduce实验-气象数据集-求气温平均值");
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setMapperClreplaced(TemperatureMapper.clreplaced);
job.setReducerClreplaced(AvgTemperatureReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
17
Source : TeraChecksum.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public int run(String[] args) throws Exception {
Job job = Job.getInstance(getConf());
if (args.length != 2) {
usage();
return 2;
}
TeraInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
job.setJobName("TeraSum");
job.setJarByClreplaced(TeraChecksum.clreplaced);
job.setMapperClreplaced(ChecksumMapper.clreplaced);
job.setReducerClreplaced(ChecksumReducer.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(Unsigned16.clreplaced);
// force a single reducer
job.setNumReduceTasks(1);
job.setInputFormatClreplaced(TeraInputFormat.clreplaced);
return job.waitForCompletion(true) ? 0 : 1;
}
17
Source : TestMiniMRClientCluster.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static Job createJob() throws IOException {
final Job baseJob = new Job(mrCluster.getConfig());
baseJob.setOutputKeyClreplaced(Text.clreplaced);
baseJob.setOutputValueClreplaced(IntWritable.clreplaced);
baseJob.setMapperClreplaced(MyMapper.clreplaced);
baseJob.setReducerClreplaced(MyReducer.clreplaced);
baseJob.setNumReduceTasks(1);
return baseJob;
}
17
Source : Export.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
String tableName = args[0];
Path outputDir = new Path(args[1]);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJobName(NAME + "_" + tableName);
job.setJarByClreplaced(Export.clreplaced);
// Set optional scan parameters
Scan s = getConfiguredScanForJob(conf, args);
IdenreplacedyTableMapper.initJob(tableName, s, IdenreplacedyTableMapper.clreplaced, job);
// No reducers. Just write straight to output files.
job.setNumReduceTasks(0);
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job.setOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
job.setOutputValueClreplaced(Result.clreplaced);
// job conf doesn't contain the conf so doesn't have a default fs.
FileOutputFormat.setOutputPath(job, outputDir);
return job;
}
17
Source : CellCounter.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
String tableName = args[0];
Path outputDir = new Path(args[1]);
String reportSeparatorString = (args.length > 2) ? args[2] : ":";
conf.set("ReportSeparator", reportSeparatorString);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClreplaced(CellCounter.clreplaced);
Scan scan = getConfiguredScanForJob(conf, args);
TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.clreplaced, ImmutableBytesWritable.clreplaced, Result.clreplaced, job);
job.setNumReduceTasks(1);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(IntWritable.clreplaced);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileOutputFormat.setOutputPath(job, outputDir);
job.setReducerClreplaced(IntSumReducer.clreplaced);
return job;
}
17
Source : MySQLDumpImportJob.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
/**
* Set the mapper clreplaced implementation to use in the job,
* as well as any related configuration (e.g., map output types).
*/
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws ClreplacedNotFoundException, IOException {
job.setMapperClreplaced(getMapperClreplaced());
job.setOutputKeyClreplaced(String.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
}
17
Source : ActiveUserRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 初始化参数
this.processArgs(conf, args);
// 创建job
Job job = Job.getInstance(conf, "active_user");
// 设置job相关配置参数
job.setJarByClreplaced(ActiveUserRunner.clreplaced);
// hbase 输入mapper参数
// 1. 本地运行
TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
// 2. 集群运行
// TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.clreplaced,
// StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
// 设置reducer相关参数
job.setReducerClreplaced(ActiveUserReducer.clreplaced);
job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
job.setOutputValueClreplaced(MapWritableValue.clreplaced);
// 设置output相关参数
job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
// 开始毫秒数
long startTime = System.currentTimeMillis();
try {
return job.waitForCompletion(true) ? 0 : -1;
} finally {
// 结束的毫秒数
long endTime = System.currentTimeMillis();
logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms");
}
}
17
Source : TransformerBaseRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
/**
* 创建job
*
* @param conf
* @return
* @throws IOException
*/
protected Job initJob(Configuration conf) throws IOException {
Job job = Job.getInstance(conf, this.jobName);
job.setJarByClreplaced(this.runnerClreplaced);
// 本地运行
TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job),
// this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
// job);
job.setReducerClreplaced(this.reducerClreplaced);
job.setOutputKeyClreplaced(this.outputKeyClreplaced);
job.setOutputValueClreplaced(this.outputValueClreplaced);
job.setOutputFormatClreplaced(this.outputFormatClreplaced);
return job;
}
16
Source : DBCountPageView.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Override
public // Usage DBCountPageView [driverClreplaced dburl]
int run(String[] args) throws Exception {
String driverClreplacedName = DRIVER_CLreplaced;
String url = DB_URL;
if (args.length > 1) {
driverClreplacedName = args[0];
url = args[1];
}
initialize(driverClreplacedName, url);
Configuration conf = getConf();
DBConfiguration.configureDB(conf, driverClreplacedName, url);
Job job = new Job(conf);
job.setJobName("Count Pageviews of URLs");
job.setJarByClreplaced(DBCountPageView.clreplaced);
job.setMapperClreplaced(PageviewMapper.clreplaced);
job.setCombinerClreplaced(LongSumReducer.clreplaced);
job.setReducerClreplaced(PageviewReducer.clreplaced);
DBInputFormat.setInput(job, AccessRecord.clreplaced, "Access", null, "url", AccessFieldNames);
DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
job.setOutputKeyClreplaced(PageviewRecord.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
int ret;
try {
ret = job.waitForCompletion(true) ? 0 : 1;
boolean correct = verify();
if (!correct) {
throw new RuntimeException("Evaluation was not correct!");
}
} finally {
shutdown();
}
return ret;
}
16
Source : TestMiniMRClientCluster.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public static Job createJob() throws IOException {
final Job baseJob = Job.getInstance(mrCluster.getConfig());
baseJob.setOutputKeyClreplaced(Text.clreplaced);
baseJob.setOutputValueClreplaced(IntWritable.clreplaced);
baseJob.setMapperClreplaced(MyMapper.clreplaced);
baseJob.setReducerClreplaced(MyReducer.clreplaced);
baseJob.setNumReduceTasks(1);
return baseJob;
}
16
Source : DataDrivenImportJob.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws IOException {
if (isHCatJob) {
LOG.info("Configuring mapper for HCatalog import job");
job.setOutputKeyClreplaced(LongWritable.clreplaced);
job.setOutputValueClreplaced(SqoopHCatUtilities.getImportValueClreplaced());
job.setMapperClreplaced(SqoopHCatUtilities.getImportMapperClreplaced());
return;
}
if (options.getTargetDir() != null && options.getTargetDir().contains("@")) {
job.setOutputFormatClreplaced(EsOutputFormat.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
} else if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
// For text files, specify these as the output types; for
// other types, we just use the defaults.
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
} else if (options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) {
Schema schema = generateAvroSchema(tableName);
try {
writeAvroSchema(schema);
} catch (final IOException e) {
LOG.error("Error while writing Avro schema.", e);
}
AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
} else if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) {
Configuration conf = job.getConfiguration();
// An Avro schema is required for creating a dataset that manages
// Parquet data records. The import will fail, if schema is invalid.
Schema schema = generateAvroSchema(tableName);
String uri = getKiteUri(conf, tableName);
ParquetJob.configureImportJob(conf, schema, uri, options.isAppendMode(), options.doHiveImport() && options.doOverwriteHiveTable());
}
job.setMapperClreplaced(getMapperClreplaced());
}
16
Source : NewInstallUserRunner.java
with Apache License 2.0
from bjmashibing
with Apache License 2.0
from bjmashibing
@Override
public int run(String[] args) throws Exception {
Configuration conf = this.getConf();
// 处理参数
this.processArgs(conf, args);
Job job = Job.getInstance(conf, "new_install_user");
job.setJarByClreplaced(NewInstallUserRunner.clreplaced);
// 本地运行
TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
// 集群运行:本地提交和打包(jar)提交
// TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
job.setReducerClreplaced(NewInstallUserReducer.clreplaced);
job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
job.setOutputValueClreplaced(MapWritableValue.clreplaced);
// job.setInputFormatClreplaced(KeyValueTextInputFormat.clreplaced);
job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
if (job.waitForCompletion(true)) {
// 执行成功, 需要计算总用户
this.calculateTotalUsers(conf);
return 0;
} else {
return -1;
}
}
16
Source : DataDrivenImportJob.java
with Apache License 2.0
from aliyun
with Apache License 2.0
from aliyun
@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws IOException {
if (isHCatJob) {
LOG.info("Configuring mapper for HCatalog import job");
job.setOutputKeyClreplaced(LongWritable.clreplaced);
job.setOutputValueClreplaced(SqoopHCatUtilities.getImportValueClreplaced());
job.setMapperClreplaced(SqoopHCatUtilities.getImportMapperClreplaced());
return;
}
if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
// For text files, specify these as the output types; for
// other types, we just use the defaults.
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
} else if (options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) {
final String schemaNameOverride = null;
Schema schema = generateAvroSchema(tableName, schemaNameOverride);
try {
writeAvroSchema(schema);
} catch (final IOException e) {
LOG.error("Error while writing Avro schema.", e);
}
AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
} else if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) {
JobConf conf = (JobConf) job.getConfiguration();
// Kite SDK requires an Avro schema to represent the data structure of
// target dataset. If the schema name equals to generated java clreplaced name,
// the import will fail. So we use table name as schema name and add a
// prefix "codegen_" to generated java clreplaced to avoid the conflict.
final String schemaNameOverride = tableName;
Schema schema = generateAvroSchema(tableName, schemaNameOverride);
String uri = getKiteUri(conf, tableName);
ParquetJob.WriteMode writeMode;
if (options.doHiveImport()) {
if (options.doOverwriteHiveTable()) {
writeMode = ParquetJob.WriteMode.OVERWRITE;
} else {
writeMode = ParquetJob.WriteMode.APPEND;
if (Datasets.exists(uri)) {
LOG.warn("Target Hive table '" + tableName + "' exists! Sqoop will " + "append data into the existing Hive table. Consider using " + "--hive-overwrite, if you do NOT intend to do appending.");
}
}
} else {
// Note that there is no such an import argument for overwriting HDFS
// dataset, so overwrite mode is not supported yet.
// Sqoop's append mode means to merge two independent datasets. We
// choose DEFAULT as write mode.
writeMode = ParquetJob.WriteMode.DEFAULT;
}
ParquetJob.configureImportJob(conf, schema, uri, writeMode);
}
job.setMapperClreplaced(getMapperClreplaced());
}
15
Source : WordMean.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
@Override
public int run(String[] args) throws Exception {
if (args.length != 5) {
System.err.println("Usage: wordmean <dummy_hdfs> <uri> <scope> <stream> <out>");
return 0;
}
Configuration conf = getConf();
conf.setStrings("input.pravega.uri", args[1]);
conf.setStrings("input.pravega.scope", args[2]);
conf.setStrings("input.pravega.stream", args[3]);
conf.setStrings("input.pravega.deserializer", TextSerializer.clreplaced.getName());
Job job = Job.getInstance(conf, "word mean");
job.setJarByClreplaced(WordMean.clreplaced);
job.setMapperClreplaced(WordMeanMapper.clreplaced);
job.setCombinerClreplaced(WordMeanReducer.clreplaced);
job.setReducerClreplaced(WordMeanReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
job.setInputFormatClreplaced(PravegaInputFormat.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[4]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
mean = readAndCalcMean(outputpath, conf);
return (result ? 0 : 1);
}
15
Source : TestMRJobs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
protected Job runFailingMapperJob() throws IOException, InterruptedException, ClreplacedNotFoundException {
Configuration myConf = new Configuration(mrCluster.getConfig());
myConf.setInt(MRJobConfig.NUM_MAPS, 1);
// reduce the number of attempts
myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
Job job = new Job(myConf);
job.setJarByClreplaced(FailingMapper.clreplaced);
job.setJobName("failmapper");
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(RandomInputFormat.clreplaced);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setMapperClreplaced(FailingMapper.clreplaced);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR, "failmapper-output"));
// The AppMaster jar itself.
job.addFileToClreplacedPath(APP_JAR);
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
replacedert.replacedertFalse(succeeded);
replacedert.replacedertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
return job;
}
15
Source : WikiDriver.java
with Apache License 2.0
from mumuhadoop
with Apache License 2.0
from mumuhadoop
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException, URISyntaxException {
if (args.length != 3) {
System.out.println("useage: <input dir> <temp dir> <output dir>");
System.exit(1);
}
// OperatingFiles(args[0], args[1]);
String input = args[0];
String tempOutput = args[1];
String output = args[2];
Configuration job1Conf = new Configuration();
Job job1 = new Job(job1Conf, "job1");
job1.setJarByClreplaced(WikiDriver.clreplaced);
job1.setMapperClreplaced(WikipediaToItemPrefsMapper.clreplaced);
job1.setReducerClreplaced(WikipediaToUserVectorReducer.clreplaced);
job1.setMapOutputKeyClreplaced(VarLongWritable.clreplaced);
job1.setMapOutputValueClreplaced(VarLongWritable.clreplaced);
// 将job1输出的文件格式设置为SequenceFileOutputFormat
job1.setInputFormatClreplaced(TextInputFormat.clreplaced);
job1.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job1.setOutputKeyClreplaced(VarLongWritable.clreplaced);
job1.setOutputValueClreplaced(VectorWritable.clreplaced);
FileInputFormat.addInputPath(job1, new Path(input));
FileOutputFormat.setOutputPath(job1, new Path(tempOutput));
job1.waitForCompletion(true);
display(tempOutput, "SequenceFile");
Configuration job2Conf = new Configuration();
Job job2 = new Job(job2Conf, "job2");
job2.setJarByClreplaced(WikiDriver.clreplaced);
job2.setMapperClreplaced(UserVectorToCooccurrenceMapper.clreplaced);
job2.setReducerClreplaced(UserVectorToCooccurrenceReducer.clreplaced);
job2.setMapOutputKeyClreplaced(IntWritable.clreplaced);
job2.setMapOutputValueClreplaced(IntWritable.clreplaced);
job2.setOutputKeyClreplaced(IntWritable.clreplaced);
job2.setOutputValueClreplaced(VectorWritable.clreplaced);
// 将job2的输入文件格式设置为SequenceFileInputFormat
job2.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
job2.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
FileInputFormat.addInputPath(job2, new Path(tempOutput));
FileOutputFormat.setOutputPath(job2, new Path(output));
job2.waitForCompletion(true);
display(output, null);
}
14
Source : WordStandardDeviation.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordstddev <in> <out>");
return 0;
}
Configuration conf = getConf();
@SuppressWarnings("deprecation")
Job job = new Job(conf, "word stddev");
job.setJarByClreplaced(WordStandardDeviation.clreplaced);
job.setMapperClreplaced(WordStandardDeviationMapper.clreplaced);
job.setCombinerClreplaced(WordStandardDeviationReducer.clreplaced);
job.setReducerClreplaced(WordStandardDeviationReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
// read output and calculate standard deviation
stddev = readAndCalcStdDev(outputpath, conf);
return (result ? 0 : 1);
}
14
Source : WordMedian.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmedian <in> <out>");
return 0;
}
setConf(new Configuration());
Configuration conf = getConf();
@SuppressWarnings("deprecation")
Job job = new Job(conf, "word median");
job.setJarByClreplaced(WordMedian.clreplaced);
job.setMapperClreplaced(WordMedianMapper.clreplaced);
job.setCombinerClreplaced(WordMedianReducer.clreplaced);
job.setReducerClreplaced(WordMedianReducer.clreplaced);
job.setOutputKeyClreplaced(IntWritable.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
// Wait for JOB 1 -- get middle value to check for Median
long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
int medianIndex2 = (int) Math.floor((totalWords / 2.0));
median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
return (result ? 0 : 1);
}
14
Source : WordMean.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmean <in> <out>");
return 0;
}
Configuration conf = getConf();
@SuppressWarnings("deprecation")
Job job = new Job(conf, "word mean");
job.setJarByClreplaced(WordMean.clreplaced);
job.setMapperClreplaced(WordMeanMapper.clreplaced);
job.setCombinerClreplaced(WordMeanReducer.clreplaced);
job.setReducerClreplaced(WordMeanReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
mean = readAndCalcMean(outputpath, conf);
return (result ? 0 : 1);
}
14
Source : BaileyBorweinPlouffe.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Create and setup a job
*/
private static Job createJob(String name, Configuration conf) throws IOException {
final Job job = new Job(conf, NAME + "_" + name);
final Configuration jobconf = job.getConfiguration();
job.setJarByClreplaced(BaileyBorweinPlouffe.clreplaced);
// setup mapper
job.setMapperClreplaced(BbpMapper.clreplaced);
job.setMapOutputKeyClreplaced(LongWritable.clreplaced);
job.setMapOutputValueClreplaced(BytesWritable.clreplaced);
// setup reducer
job.setReducerClreplaced(BbpReducer.clreplaced);
job.setOutputKeyClreplaced(LongWritable.clreplaced);
job.setOutputValueClreplaced(BytesWritable.clreplaced);
job.setNumReduceTasks(1);
// setup input
job.setInputFormatClreplaced(BbpInputFormat.clreplaced);
// disable task timeout
jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
// do not use speculative execution
jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
return job;
}
14
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple fail job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a simple kill job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createKillJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
Job theJob = Job.getInstance(conf);
theJob.setJobName("Kill-Job");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(KillMapper.clreplaced);
theJob.setReducerClreplaced(Reducer.clreplaced);
theJob.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
return theJob;
}
14
Source : TestMRJobs.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
protected Job runFailingMapperJob() throws IOException, InterruptedException, ClreplacedNotFoundException {
Configuration myConf = new Configuration(mrCluster.getConfig());
myConf.setInt(MRJobConfig.NUM_MAPS, 1);
// reduce the number of attempts
myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
Job job = Job.getInstance(myConf);
job.setJarByClreplaced(FailingMapper.clreplaced);
job.setJobName("failmapper");
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(RandomInputFormat.clreplaced);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setMapperClreplaced(FailingMapper.clreplaced);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR, "failmapper-output"));
// The AppMaster jar itself.
job.addFileToClreplacedPath(APP_JAR);
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
boolean succeeded = job.waitForCompletion(true);
replacedert.replacedertFalse(succeeded);
replacedert.replacedertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
return job;
}
14
Source : ImputationJob.java
with GNU Affero General Public License v3.0
from genepi
with GNU Affero General Public License v3.0
from genepi
@Override
public void setupJob(Job job) {
NLineInputFormat.setNumLinesPerSplit(job, 1);
job.setMapperClreplaced(ImputationMapper.clreplaced);
job.setInputFormatClreplaced(NLineInputFormat.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setNumReduceTasks(0);
}
13
Source : ReduceJobsConfiguration.java
with Apache License 2.0
from WinterChenS
with Apache License 2.0
from WinterChenS
/**
* 获取单词统计的配置信息
*
* @param jobName
* @param inputPath
* @param outputPath
* @throws IOException
* @throws ClreplacedNotFoundException
* @throws InterruptedException
*/
public void getWordCountJobsConf(String jobName, String inputPath, String outputPath) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration conf = getConfiguration();
Job job = Job.getInstance(conf, jobName);
job.setMapperClreplaced(WordMapper.clreplaced);
job.setCombinerClreplaced(WordReduce.clreplaced);
job.setJarByClreplaced(HadoopDemoApplication.clreplaced);
job.setReducerClreplaced(WordReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
// 小文件合并设置
// job.setInputFormatClreplaced(CombineTextInputFormat.clreplaced);
// 最大分片
// CombineTextInputFormat.setMaxInputSplitSize(job, 4 * 1024 * 1024);
// 最小分片
// CombineTextInputFormat.setMinInputSplitSize(job, 2 * 1024 * 1024);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
job.waitForCompletion(true);
}
13
Source : IndexTool.java
with Apache License 2.0
from rayokota
with Apache License 2.0
from rayokota
/**
* Uses the HBase Front Door Api to write to index table. Submits the job and either returns or
* waits for the job completion based on runForeground parameter.
*
* @param job job
* @param outputPath output path
* @param runForeground - if true, waits for job completion, else submits and returns
* immediately.
* @throws Exception
*/
private void configureSubmittableJobUsingDirectApi(Job job, Path outputPath, TableName outputTableName, boolean skipDependencyJars, boolean runForeground) throws Exception {
job.setMapperClreplaced(getDirectMapperClreplaced());
job.setReducerClreplaced(getDirectReducerClreplaced());
Configuration conf = job.getConfiguration();
HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
conf.set(TableOutputFormat.OUTPUT_TABLE, outputTableName.getNamereplacedtring());
// Set the Output clreplacedes
job.setMapOutputValueClreplaced(IntWritable.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
if (!skipDependencyJars) {
TableMapReduceUtil.addDependencyJars(job);
}
job.setNumReduceTasks(1);
if (!runForeground) {
LOG.info("Running Index Build in Background - Submit async and exit");
job.submit();
return;
}
LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
boolean result = job.waitForCompletion(true);
if (!result) {
LOG.error("IndexTool job failed!");
throw new Exception("IndexTool job failed: " + job.toString());
}
FileSystem.get(conf).delete(outputPath, true);
}
13
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple copy job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a data copy job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
conf.setInt(MRJobConfig.NUM_MAPS, 3);
Job theJob = Job.getInstance(conf);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(DataCopyMapper.clreplaced);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
theJob.setReducerClreplaced(DataCopyReducer.clreplaced);
theJob.setNumReduceTasks(1);
return theJob;
}
13
Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testDoMultipleInputs() throws IOException {
Path in1Dir = getDir(IN1_DIR);
Path in2Dir = getDir(IN2_DIR);
Path outDir = getDir(OUT_DIR);
Configuration conf = createJobConf();
FileSystem fs = FileSystem.get(conf);
fs.delete(outDir, true);
DataOutputStream file1 = fs.create(new Path(in1Dir, "part-0"));
file1.writeBytes("a\nb\nc\nd\ne");
file1.close();
// write tab delimited to second file because we're doing
// KeyValueInputFormat
DataOutputStream file2 = fs.create(new Path(in2Dir, "part-0"));
file2.writeBytes("a\tblah\nb\tblah\nc\tblah\nd\tblah\ne\tblah");
file2.close();
Job job = Job.getInstance(conf);
job.setJobName("mi");
MultipleInputs.addInputPath(job, in1Dir, TextInputFormat.clreplaced, MapClreplaced.clreplaced);
MultipleInputs.addInputPath(job, in2Dir, KeyValueTextInputFormat.clreplaced, KeyValueMapClreplaced.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setOutputKeyClreplaced(NullWritable.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(ReducerClreplaced.clreplaced);
FileOutputFormat.setOutputPath(job, outDir);
boolean success = false;
try {
success = job.waitForCompletion(true);
} catch (InterruptedException ie) {
throw new RuntimeException(ie);
} catch (ClreplacedNotFoundException instante) {
throw new RuntimeException(instante);
}
if (!success)
throw new RuntimeException("Job failed!");
// copy bytes a bunch of times for the ease of readLine() - whatever
BufferedReader output = new BufferedReader(new InputStreamReader(fs.open(new Path(outDir, "part-r-00000"))));
// reducer should have counted one key from each file
replacedertTrue(output.readLine().equals("a 2"));
replacedertTrue(output.readLine().equals("b 2"));
replacedertTrue(output.readLine().equals("c 2"));
replacedertTrue(output.readLine().equals("d 2"));
replacedertTrue(output.readLine().equals("e 2"));
}
13
Source : WordStandardDeviation.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordstddev <in> <out>");
return 0;
}
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word stddev");
job.setJarByClreplaced(WordStandardDeviation.clreplaced);
job.setMapperClreplaced(WordStandardDeviationMapper.clreplaced);
job.setCombinerClreplaced(WordStandardDeviationReducer.clreplaced);
job.setReducerClreplaced(WordStandardDeviationReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
// read output and calculate standard deviation
stddev = readAndCalcStdDev(outputpath, conf);
return (result ? 0 : 1);
}
13
Source : WordMedian.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmedian <in> <out>");
return 0;
}
setConf(new Configuration());
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word median");
job.setJarByClreplaced(WordMedian.clreplaced);
job.setMapperClreplaced(WordMedianMapper.clreplaced);
job.setCombinerClreplaced(WordMedianReducer.clreplaced);
job.setReducerClreplaced(WordMedianReducer.clreplaced);
job.setOutputKeyClreplaced(IntWritable.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
boolean result = job.waitForCompletion(true);
// Wait for JOB 1 -- get middle value to check for Median
long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
int medianIndex2 = (int) Math.floor((totalWords / 2.0));
median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
return (result ? 0 : 1);
}
13
Source : WordMean.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
@Override
public int run(String[] args) throws Exception {
if (args.length != 2) {
System.err.println("Usage: wordmean <in> <out>");
return 0;
}
Configuration conf = getConf();
Job job = Job.getInstance(conf, "word mean");
job.setJarByClreplaced(WordMean.clreplaced);
job.setMapperClreplaced(WordMeanMapper.clreplaced);
job.setCombinerClreplaced(WordMeanReducer.clreplaced);
job.setReducerClreplaced(WordMeanReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.addInputPath(job, new Path(args[0]));
Path outputpath = new Path(args[1]);
FileOutputFormat.setOutputPath(job, outputpath);
boolean result = job.waitForCompletion(true);
mean = readAndCalcMean(outputpath, conf);
return (result ? 0 : 1);
}
13
Source : MultiFileWordCount.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public int run(String[] args) throws Exception {
if (args.length < 2) {
printUsage();
return 2;
}
Job job = Job.getInstance(getConf());
job.setJobName("MultiFileWordCount");
job.setJarByClreplaced(MultiFileWordCount.clreplaced);
// set the InputFormat of the job to our InputFormat
job.setInputFormatClreplaced(MyInputFormat.clreplaced);
// the keys are words (strings)
job.setOutputKeyClreplaced(Text.clreplaced);
// the values are counts (ints)
job.setOutputValueClreplaced(IntWritable.clreplaced);
// use the defined mapper
job.setMapperClreplaced(MapClreplaced.clreplaced);
// use the WordCount Reducer
job.setCombinerClreplaced(IntSumReducer.clreplaced);
job.setReducerClreplaced(IntSumReducer.clreplaced);
FileInputFormat.addInputPaths(job, args[0]);
FileOutputFormat.setOutputPath(job, new Path(args[1]));
return job.waitForCompletion(true) ? 0 : 1;
}
13
Source : BaileyBorweinPlouffe.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* Create and setup a job
*/
private static Job createJob(String name, Configuration conf) throws IOException {
final Job job = Job.getInstance(conf, NAME + "_" + name);
final Configuration jobconf = job.getConfiguration();
job.setJarByClreplaced(BaileyBorweinPlouffe.clreplaced);
// setup mapper
job.setMapperClreplaced(BbpMapper.clreplaced);
job.setMapOutputKeyClreplaced(LongWritable.clreplaced);
job.setMapOutputValueClreplaced(BytesWritable.clreplaced);
// setup reducer
job.setReducerClreplaced(BbpReducer.clreplaced);
job.setOutputKeyClreplaced(LongWritable.clreplaced);
job.setOutputValueClreplaced(BytesWritable.clreplaced);
job.setNumReduceTasks(1);
// setup input
job.setInputFormatClreplaced(BbpInputFormat.clreplaced);
// disable task timeout
jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
// do not use speculative execution
jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
return job;
}
See More Examples