org.apache.hadoop.mapreduce.Job.setOutputKeyClass()

Here are the examples of the java api org.apache.hadoop.mapreduce.Job.setOutputKeyClass() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

167 Examples 7

19 Source : AveragePageCount.java
with MIT License
from PacktPublishing

public static void main(String[] args) throws Exception {
    Configuration con = new Configuration();
    Job bookJob = Job.getInstance(con, "Average Page Count");
    bookJob.setJarByClreplaced(AveragePageCount.clreplaced);
    bookJob.setMapperClreplaced(TextMapper.clreplaced);
    bookJob.setReducerClreplaced(AverageReduce.clreplaced);
    bookJob.setOutputKeyClreplaced(Text.clreplaced);
    bookJob.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
    FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
    if (bookJob.waitForCompletion(true)) {
        System.exit(0);
    }
}

19 Source : ChainReducer.java
with Apache License 2.0
from NJUJYB

/**
 * Sets the {@link Reducer} clreplaced to the chain job.
 *
 * <p>
 * The key and values are preplaceded from one element of the chain to the next, by
 * value. For the added Reducer the configuration given for it,
 * <code>reducerConf</code>, have precedence over the job's Configuration.
 * This precedence is in effect when the task is running.
 * </p>
 * <p>
 * IMPORTANT: There is no need to specify the output key/value clreplacedes for the
 * ChainReducer, this is done by the setReducer or the addMapper for the last
 * element in the chain.
 * </p>
 *
 * @param job
 *          the job
 * @param klreplaced
 *          the Reducer clreplaced to add.
 * @param inputKeyClreplaced
 *          reducer input key clreplaced.
 * @param inputValueClreplaced
 *          reducer input value clreplaced.
 * @param outputKeyClreplaced
 *          reducer output key clreplaced.
 * @param outputValueClreplaced
 *          reducer output value clreplaced.
 * @param reducerConf
 *          a configuration for the Reducer clreplaced. It is recommended to use a
 *          Configuration without default values using the
 *          <code>Configuration(boolean loadDefaults)</code> constructor with
 *          FALSE.
 */
public static void setReducer(Job job, Clreplaced<? extends Reducer> klreplaced, Clreplaced<?> inputKeyClreplaced, Clreplaced<?> inputValueClreplaced, Clreplaced<?> outputKeyClreplaced, Clreplaced<?> outputValueClreplaced, Configuration reducerConf) {
    job.setReducerClreplaced(ChainReducer.clreplaced);
    job.setOutputKeyClreplaced(outputKeyClreplaced);
    job.setOutputValueClreplaced(outputValueClreplaced);
    Chain.setReducer(job, klreplaced, inputKeyClreplaced, inputValueClreplaced, outputKeyClreplaced, outputValueClreplaced, reducerConf);
}

19 Source : ChainReducer.java
with Apache License 2.0
from NJUJYB

/**
 * Adds a {@link Mapper} clreplaced to the chain reducer.
 *
 * <p>
 * The key and values are preplaceded from one element of the chain to the next, by
 * value For the added Mapper the configuration given for it,
 * <code>mapperConf</code>, have precedence over the job's Configuration. This
 * precedence is in effect when the task is running.
 * </p>
 * <p>
 * IMPORTANT: There is no need to specify the output key/value clreplacedes for the
 * ChainMapper, this is done by the addMapper for the last mapper in the
 * chain.
 * </p>
 *
 * @param job
 *          The job.
 * @param klreplaced
 *          the Mapper clreplaced to add.
 * @param inputKeyClreplaced
 *          mapper input key clreplaced.
 * @param inputValueClreplaced
 *          mapper input value clreplaced.
 * @param outputKeyClreplaced
 *          mapper output key clreplaced.
 * @param outputValueClreplaced
 *          mapper output value clreplaced.
 * @param mapperConf
 *          a configuration for the Mapper clreplaced. It is recommended to use a
 *          Configuration without default values using the
 *          <code>Configuration(boolean loadDefaults)</code> constructor with
 *          FALSE.
 */
public static void addMapper(Job job, Clreplaced<? extends Mapper> klreplaced, Clreplaced<?> inputKeyClreplaced, Clreplaced<?> inputValueClreplaced, Clreplaced<?> outputKeyClreplaced, Clreplaced<?> outputValueClreplaced, Configuration mapperConf) throws IOException {
    job.setOutputKeyClreplaced(outputKeyClreplaced);
    job.setOutputValueClreplaced(outputValueClreplaced);
    Chain.addMapper(false, job, klreplaced, inputKeyClreplaced, inputValueClreplaced, outputKeyClreplaced, outputValueClreplaced, mapperConf);
}

18 Source : WordCount1Application.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(WordCount1Application.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : Hdfs2Tg.java
with Apache License 2.0
from tigergraph

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "HDFS to TG");
    job.setJarByClreplaced(Hdfs2Tg.clreplaced);
    job.setMapperClreplaced(LineMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : MaxTemperature.java
with MIT License
from Tianny

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Max Temperature");
    // 在 setJarByClreplaced 方法中传递一个类即可,Hadoop 利用这个类来查找包含它的 JAR 文件
    job.setJarByClreplaced(MaxTemperature.clreplaced);
    // 指定输入数据和输出数据的路径
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(MaxTemperatureMapper.clreplaced);
    job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
    // 控制 reduce 函数的输出类型,并且必须和 Reduce 类产生的相匹配
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : WordCount.java
with Apache License 2.0
from NJUJYB

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClreplaced(WordCount.clreplaced);
    job.setMapperClreplaced(TokenizerMapper.clreplaced);
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : MultiFileWordCount.java
with Apache License 2.0
from NJUJYB

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return 2;
    }
    Job job = new Job(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClreplaced(MultiFileWordCount.clreplaced);
    // set the InputFormat of the job to our InputFormat
    job.setInputFormatClreplaced(MyInputFormat.clreplaced);
    // the keys are words (strings)
    job.setOutputKeyClreplaced(Text.clreplaced);
    // the values are counts (ints)
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    // use the defined mapper
    job.setMapperClreplaced(MapClreplaced.clreplaced);
    // use the WordCount Reducer
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

18 Source : TestMRSequenceFileAsBinaryOutputFormat.java
with Apache License 2.0
from NJUJYB

public void testSequenceOutputClreplacedDefaultsToMapRedOutputClreplaced() throws IOException {
    Job job = Job.getInstance();
    // Setting Random clreplaced to test getSequenceFileOutput{Key,Value}Clreplaced
    job.setOutputKeyClreplaced(FloatWritable.clreplaced);
    job.setOutputValueClreplaced(BooleanWritable.clreplaced);
    replacedertEquals("SequenceFileOutputKeyClreplaced should default to ouputKeyClreplaced", FloatWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
    replacedertEquals("SequenceFileOutputValueClreplaced should default to " + "ouputValueClreplaced", BooleanWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClreplaced(job, IntWritable.clreplaced);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClreplaced(job, DoubleWritable.clreplaced);
    replacedertEquals("SequenceFileOutputKeyClreplaced not updated", IntWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
    replacedertEquals("SequenceFileOutputValueClreplaced not updated", DoubleWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
}

18 Source : WordCount.java
with Apache License 2.0
from naver

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClreplaced(WordCount.clreplaced);
    job.setMapperClreplaced(TokenizerMapper.clreplaced);
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : Main.java
with Apache License 2.0
from lfz757077613

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Job job = Job.getInstance(new Configuration(), "wordCount");
    // 设置jar包主类
    job.setJarByClreplaced(Main.clreplaced);
    // 设置mapper
    job.setMapperClreplaced(MyMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 设置reducer
    job.setReducerClreplaced(MyReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 在本地先进行一次reduce,减少将数据发送量
    job.setCombinerClreplaced(MyReducer.clreplaced);
    // 设置parreplacedioner
    // 暂时没用到
    // job.setParreplacedionerClreplaced(MyParreplacedioner.clreplaced);
    // job.setNumReduceTasks(2);
    // 设置作业输入输出路径,注意输出文件是不能事先存在的,输出文件是一个文件夹,里面有结果和运行结束状态
    FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:8020/access.log"));
    FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
    System.exit(job.waitForCompletion(true) ? 0 : -1);
}

18 Source : SparkUtil.java
with Apache License 2.0
from Kyligence

public static void setHadoopConfForCuboid(Job job, CubeSegment segment, String metaUrl) throws Exception {
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
}

18 Source : NetezzaExternalTableImportJob.java
with Apache License 2.0
from dkhadoop

/**
 * Set the mapper clreplaced implementation to use in the job, as well as any
 * related configuration (e.g., map output types).
 */
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws ClreplacedNotFoundException, IOException {
    super.configureMapper(job, tableName, tableClreplacedName);
    job.setMapperClreplaced(getMapperClreplaced());
    if (isHCatJob) {
        LOG.info("Configuring mapper for HCatalog import job");
        job.setOutputKeyClreplaced(LongWritable.clreplaced);
        job.setOutputValueClreplaced(SqoopHCatUtilities.getImportValueClreplaced());
        return;
    }
    job.setOutputKeyClreplaced(String.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
}

18 Source : HBaseImportJob.java
with Apache License 2.0
from dkhadoop

@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws IOException {
    job.setOutputKeyClreplaced(SqoopRecord.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
    job.setMapperClreplaced(getMapperClreplaced());
}

18 Source : OdpsImportJob.java
with Apache License 2.0
from aliyun

@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) {
    job.setOutputKeyClreplaced(SqoopRecord.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
    job.setMapperClreplaced(getMapperClreplaced());
}

17 Source : TransformBaseRunner.java
with GNU General Public License v3.0
from wlhbdp

protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);
    job.setJarByClreplaced(this.runnerClreplaced);
    // 本地运行
    // TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, true);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
    // job);
    job.setReducerClreplaced(this.reducerClreplaced);
    job.setOutputKeyClreplaced(this.outputKeyClreplaced);
    job.setOutputValueClreplaced(this.outputValueClreplaced);
    job.setOutputFormatClreplaced(this.outputFormatClreplaced);
    return job;
}

17 Source : MinTemperature.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    if (args.length != 2) {
        System.err.println("Usage: MinTemperature <input path> <output path>");
        System.exit(-1);
    }
    Job job = Job.getInstance();
    job.setJarByClreplaced(MinTemperature.clreplaced);
    job.setJobName("MapReduce实验-气象数据集-求气温最小值");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(TemperatureMapper.clreplaced);
    // 设置 Combiner 减少数据的传输量、提高效率
    // job.setCombinerClreplaced(MinTemperatureReducer.clreplaced);
    job.setReducerClreplaced(MinTemperatureReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

17 Source : MaxTemperature.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }
    Job job = Job.getInstance();
    job.setJarByClreplaced(MaxTemperature.clreplaced);
    job.setJobName("MapReduce实验-气象数据集-求气温最大值");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(TemperatureMapper.clreplaced);
    // 设置 Combiner 减少数据的传输量、提高效率
    // job.setCombinerClreplaced(MaxTemperatureReducer.clreplaced);
    job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

17 Source : AvgTemperature.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    if (args.length != 2) {
        System.err.println("Usage: AvgTemperature <input path> <output path>");
        System.exit(-1);
    }
    Job job = Job.getInstance();
    job.setJarByClreplaced(AvgTemperature.clreplaced);
    job.setJobName("MapReduce实验-气象数据集-求气温平均值");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(TemperatureMapper.clreplaced);
    job.setReducerClreplaced(AvgTemperatureReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

17 Source : TeraChecksum.java
with Apache License 2.0
from NJUJYB

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClreplaced(TeraChecksum.clreplaced);
    job.setMapperClreplaced(ChecksumMapper.clreplaced);
    job.setReducerClreplaced(ChecksumReducer.clreplaced);
    job.setOutputKeyClreplaced(NullWritable.clreplaced);
    job.setOutputValueClreplaced(Unsigned16.clreplaced);
    // force a single reducer
    job.setNumReduceTasks(1);
    job.setInputFormatClreplaced(TeraInputFormat.clreplaced);
    return job.waitForCompletion(true) ? 0 : 1;
}

17 Source : TestMiniMRClientCluster.java
with Apache License 2.0
from NJUJYB

public static Job createJob() throws IOException {
    final Job baseJob = new Job(mrCluster.getConfig());
    baseJob.setOutputKeyClreplaced(Text.clreplaced);
    baseJob.setOutputValueClreplaced(IntWritable.clreplaced);
    baseJob.setMapperClreplaced(MyMapper.clreplaced);
    baseJob.setReducerClreplaced(MyReducer.clreplaced);
    baseJob.setNumReduceTasks(1);
    return baseJob;
}

17 Source : Export.java
with Apache License 2.0
from fengchen8086

/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Path outputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJobName(NAME + "_" + tableName);
    job.setJarByClreplaced(Export.clreplaced);
    // Set optional scan parameters
    Scan s = getConfiguredScanForJob(conf, args);
    IdenreplacedyTableMapper.initJob(tableName, s, IdenreplacedyTableMapper.clreplaced, job);
    // No reducers.  Just write straight to output files.
    job.setNumReduceTasks(0);
    job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
    job.setOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
    job.setOutputValueClreplaced(Result.clreplaced);
    // job conf doesn't contain the conf so doesn't have a default fs.
    FileOutputFormat.setOutputPath(job, outputDir);
    return job;
}

17 Source : CellCounter.java
with Apache License 2.0
from fengchen8086

/**
 * Sets up the actual job.
 *
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Path outputDir = new Path(args[1]);
    String reportSeparatorString = (args.length > 2) ? args[2] : ":";
    conf.set("ReportSeparator", reportSeparatorString);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClreplaced(CellCounter.clreplaced);
    Scan scan = getConfiguredScanForJob(conf, args);
    TableMapReduceUtil.initTableMapperJob(tableName, scan, CellCounterMapper.clreplaced, ImmutableBytesWritable.clreplaced, Result.clreplaced, job);
    job.setNumReduceTasks(1);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(IntWritable.clreplaced);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    return job;
}

17 Source : MySQLDumpImportJob.java
with Apache License 2.0
from dkhadoop

/**
 * Set the mapper clreplaced implementation to use in the job,
 * as well as any related configuration (e.g., map output types).
 */
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws ClreplacedNotFoundException, IOException {
    job.setMapperClreplaced(getMapperClreplaced());
    job.setOutputKeyClreplaced(String.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
}

17 Source : ActiveUserRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    // 初始化参数
    this.processArgs(conf, args);
    // 创建job
    Job job = Job.getInstance(conf, "active_user");
    // 设置job相关配置参数
    job.setJarByClreplaced(ActiveUserRunner.clreplaced);
    // hbase 输入mapper参数
    // 1. 本地运行
    TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
    // 2. 集群运行
    // TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.clreplaced,
    // StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
    // 设置reducer相关参数
    job.setReducerClreplaced(ActiveUserReducer.clreplaced);
    job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
    job.setOutputValueClreplaced(MapWritableValue.clreplaced);
    // 设置output相关参数
    job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
    // 开始毫秒数
    long startTime = System.currentTimeMillis();
    try {
        return job.waitForCompletion(true) ? 0 : -1;
    } finally {
        // 结束的毫秒数
        long endTime = System.currentTimeMillis();
        logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms");
    }
}

17 Source : TransformerBaseRunner.java
with Apache License 2.0
from bjmashibing

/**
 * 创建job
 *
 * @param conf
 * @return
 * @throws IOException
 */
protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);
    job.setJarByClreplaced(this.runnerClreplaced);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
    // job);
    job.setReducerClreplaced(this.reducerClreplaced);
    job.setOutputKeyClreplaced(this.outputKeyClreplaced);
    job.setOutputValueClreplaced(this.outputValueClreplaced);
    job.setOutputFormatClreplaced(this.outputFormatClreplaced);
    return job;
}

16 Source : DBCountPageView.java
with Apache License 2.0
from NJUJYB

@Override
public // Usage DBCountPageView [driverClreplaced dburl]
int run(String[] args) throws Exception {
    String driverClreplacedName = DRIVER_CLreplaced;
    String url = DB_URL;
    if (args.length > 1) {
        driverClreplacedName = args[0];
        url = args[1];
    }
    initialize(driverClreplacedName, url);
    Configuration conf = getConf();
    DBConfiguration.configureDB(conf, driverClreplacedName, url);
    Job job = new Job(conf);
    job.setJobName("Count Pageviews of URLs");
    job.setJarByClreplaced(DBCountPageView.clreplaced);
    job.setMapperClreplaced(PageviewMapper.clreplaced);
    job.setCombinerClreplaced(LongSumReducer.clreplaced);
    job.setReducerClreplaced(PageviewReducer.clreplaced);
    DBInputFormat.setInput(job, AccessRecord.clreplaced, "Access", null, "url", AccessFieldNames);
    DBOutputFormat.setOutput(job, "Pageview", PageviewFieldNames);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    job.setOutputKeyClreplaced(PageviewRecord.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
    int ret;
    try {
        ret = job.waitForCompletion(true) ? 0 : 1;
        boolean correct = verify();
        if (!correct) {
            throw new RuntimeException("Evaluation was not correct!");
        }
    } finally {
        shutdown();
    }
    return ret;
}

16 Source : TestMiniMRClientCluster.java
with Apache License 2.0
from naver

public static Job createJob() throws IOException {
    final Job baseJob = Job.getInstance(mrCluster.getConfig());
    baseJob.setOutputKeyClreplaced(Text.clreplaced);
    baseJob.setOutputValueClreplaced(IntWritable.clreplaced);
    baseJob.setMapperClreplaced(MyMapper.clreplaced);
    baseJob.setReducerClreplaced(MyReducer.clreplaced);
    baseJob.setNumReduceTasks(1);
    return baseJob;
}

16 Source : DataDrivenImportJob.java
with Apache License 2.0
from dkhadoop

@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws IOException {
    if (isHCatJob) {
        LOG.info("Configuring mapper for HCatalog import job");
        job.setOutputKeyClreplaced(LongWritable.clreplaced);
        job.setOutputValueClreplaced(SqoopHCatUtilities.getImportValueClreplaced());
        job.setMapperClreplaced(SqoopHCatUtilities.getImportMapperClreplaced());
        return;
    }
    if (options.getTargetDir() != null && options.getTargetDir().contains("@")) {
        job.setOutputFormatClreplaced(EsOutputFormat.clreplaced);
        job.setOutputKeyClreplaced(NullWritable.clreplaced);
        job.setOutputValueClreplaced(Text.clreplaced);
    } else if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
        // For text files, specify these as the output types; for
        // other types, we just use the defaults.
        job.setOutputKeyClreplaced(Text.clreplaced);
        job.setOutputValueClreplaced(NullWritable.clreplaced);
    } else if (options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) {
        Schema schema = generateAvroSchema(tableName);
        try {
            writeAvroSchema(schema);
        } catch (final IOException e) {
            LOG.error("Error while writing Avro schema.", e);
        }
        AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
    } else if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) {
        Configuration conf = job.getConfiguration();
        // An Avro schema is required for creating a dataset that manages
        // Parquet data records. The import will fail, if schema is invalid.
        Schema schema = generateAvroSchema(tableName);
        String uri = getKiteUri(conf, tableName);
        ParquetJob.configureImportJob(conf, schema, uri, options.isAppendMode(), options.doHiveImport() && options.doOverwriteHiveTable());
    }
    job.setMapperClreplaced(getMapperClreplaced());
}

16 Source : NewInstallUserRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    // 处理参数
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "new_install_user");
    job.setJarByClreplaced(NewInstallUserRunner.clreplaced);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
    job.setReducerClreplaced(NewInstallUserReducer.clreplaced);
    job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
    job.setOutputValueClreplaced(MapWritableValue.clreplaced);
    // job.setInputFormatClreplaced(KeyValueTextInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
    if (job.waitForCompletion(true)) {
        // 执行成功, 需要计算总用户
        this.calculateTotalUsers(conf);
        return 0;
    } else {
        return -1;
    }
}

16 Source : DataDrivenImportJob.java
with Apache License 2.0
from aliyun

@Override
protected void configureMapper(Job job, String tableName, String tableClreplacedName) throws IOException {
    if (isHCatJob) {
        LOG.info("Configuring mapper for HCatalog import job");
        job.setOutputKeyClreplaced(LongWritable.clreplaced);
        job.setOutputValueClreplaced(SqoopHCatUtilities.getImportValueClreplaced());
        job.setMapperClreplaced(SqoopHCatUtilities.getImportMapperClreplaced());
        return;
    }
    if (options.getFileLayout() == SqoopOptions.FileLayout.TextFile) {
        // For text files, specify these as the output types; for
        // other types, we just use the defaults.
        job.setOutputKeyClreplaced(Text.clreplaced);
        job.setOutputValueClreplaced(NullWritable.clreplaced);
    } else if (options.getFileLayout() == SqoopOptions.FileLayout.AvroDataFile) {
        final String schemaNameOverride = null;
        Schema schema = generateAvroSchema(tableName, schemaNameOverride);
        try {
            writeAvroSchema(schema);
        } catch (final IOException e) {
            LOG.error("Error while writing Avro schema.", e);
        }
        AvroJob.setMapOutputSchema(job.getConfiguration(), schema);
    } else if (options.getFileLayout() == SqoopOptions.FileLayout.ParquetFile) {
        JobConf conf = (JobConf) job.getConfiguration();
        // Kite SDK requires an Avro schema to represent the data structure of
        // target dataset. If the schema name equals to generated java clreplaced name,
        // the import will fail. So we use table name as schema name and add a
        // prefix "codegen_" to generated java clreplaced to avoid the conflict.
        final String schemaNameOverride = tableName;
        Schema schema = generateAvroSchema(tableName, schemaNameOverride);
        String uri = getKiteUri(conf, tableName);
        ParquetJob.WriteMode writeMode;
        if (options.doHiveImport()) {
            if (options.doOverwriteHiveTable()) {
                writeMode = ParquetJob.WriteMode.OVERWRITE;
            } else {
                writeMode = ParquetJob.WriteMode.APPEND;
                if (Datasets.exists(uri)) {
                    LOG.warn("Target Hive table '" + tableName + "' exists! Sqoop will " + "append data into the existing Hive table. Consider using " + "--hive-overwrite, if you do NOT intend to do appending.");
                }
            }
        } else {
            // Note that there is no such an import argument for overwriting HDFS
            // dataset, so overwrite mode is not supported yet.
            // Sqoop's append mode means to merge two independent datasets. We
            // choose DEFAULT as write mode.
            writeMode = ParquetJob.WriteMode.DEFAULT;
        }
        ParquetJob.configureImportJob(conf, schema, uri, writeMode);
    }
    job.setMapperClreplaced(getMapperClreplaced());
}

15 Source : WordMean.java
with Apache License 2.0
from pravega

@Override
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        System.err.println("Usage: wordmean <dummy_hdfs> <uri> <scope> <stream> <out>");
        return 0;
    }
    Configuration conf = getConf();
    conf.setStrings("input.pravega.uri", args[1]);
    conf.setStrings("input.pravega.scope", args[2]);
    conf.setStrings("input.pravega.stream", args[3]);
    conf.setStrings("input.pravega.deserializer", TextSerializer.clreplaced.getName());
    Job job = Job.getInstance(conf, "word mean");
    job.setJarByClreplaced(WordMean.clreplaced);
    job.setMapperClreplaced(WordMeanMapper.clreplaced);
    job.setCombinerClreplaced(WordMeanReducer.clreplaced);
    job.setReducerClreplaced(WordMeanReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    job.setInputFormatClreplaced(PravegaInputFormat.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[4]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);
    return (result ? 0 : 1);
}

15 Source : TestMRJobs.java
with Apache License 2.0
from NJUJYB

protected Job runFailingMapperJob() throws IOException, InterruptedException, ClreplacedNotFoundException {
    Configuration myConf = new Configuration(mrCluster.getConfig());
    myConf.setInt(MRJobConfig.NUM_MAPS, 1);
    // reduce the number of attempts
    myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
    Job job = new Job(myConf);
    job.setJarByClreplaced(FailingMapper.clreplaced);
    job.setJobName("failmapper");
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(RandomInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setMapperClreplaced(FailingMapper.clreplaced);
    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR, "failmapper-output"));
    // The AppMaster jar itself.
    job.addFileToClreplacedPath(APP_JAR);
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    replacedert.replacedertFalse(succeeded);
    replacedert.replacedertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
    return job;
}

15 Source : WikiDriver.java
with Apache License 2.0
from mumuhadoop

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException, URISyntaxException {
    if (args.length != 3) {
        System.out.println("useage: <input dir> <temp dir> <output dir>");
        System.exit(1);
    }
    // OperatingFiles(args[0], args[1]);
    String input = args[0];
    String tempOutput = args[1];
    String output = args[2];
    Configuration job1Conf = new Configuration();
    Job job1 = new Job(job1Conf, "job1");
    job1.setJarByClreplaced(WikiDriver.clreplaced);
    job1.setMapperClreplaced(WikipediaToItemPrefsMapper.clreplaced);
    job1.setReducerClreplaced(WikipediaToUserVectorReducer.clreplaced);
    job1.setMapOutputKeyClreplaced(VarLongWritable.clreplaced);
    job1.setMapOutputValueClreplaced(VarLongWritable.clreplaced);
    // 将job1输出的文件格式设置为SequenceFileOutputFormat
    job1.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job1.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
    job1.setOutputKeyClreplaced(VarLongWritable.clreplaced);
    job1.setOutputValueClreplaced(VectorWritable.clreplaced);
    FileInputFormat.addInputPath(job1, new Path(input));
    FileOutputFormat.setOutputPath(job1, new Path(tempOutput));
    job1.waitForCompletion(true);
    display(tempOutput, "SequenceFile");
    Configuration job2Conf = new Configuration();
    Job job2 = new Job(job2Conf, "job2");
    job2.setJarByClreplaced(WikiDriver.clreplaced);
    job2.setMapperClreplaced(UserVectorToCooccurrenceMapper.clreplaced);
    job2.setReducerClreplaced(UserVectorToCooccurrenceReducer.clreplaced);
    job2.setMapOutputKeyClreplaced(IntWritable.clreplaced);
    job2.setMapOutputValueClreplaced(IntWritable.clreplaced);
    job2.setOutputKeyClreplaced(IntWritable.clreplaced);
    job2.setOutputValueClreplaced(VectorWritable.clreplaced);
    // 将job2的输入文件格式设置为SequenceFileInputFormat
    job2.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    job2.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    FileInputFormat.addInputPath(job2, new Path(tempOutput));
    FileOutputFormat.setOutputPath(job2, new Path(output));
    job2.waitForCompletion(true);
    display(output, null);
}

14 Source : WordStandardDeviation.java
with Apache License 2.0
from NJUJYB

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }
    Configuration conf = getConf();
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word stddev");
    job.setJarByClreplaced(WordStandardDeviation.clreplaced);
    job.setMapperClreplaced(WordStandardDeviationMapper.clreplaced);
    job.setCombinerClreplaced(WordStandardDeviationReducer.clreplaced);
    job.setReducerClreplaced(WordStandardDeviationReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);
    return (result ? 0 : 1);
}

14 Source : WordMedian.java
with Apache License 2.0
from NJUJYB

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }
    setConf(new Configuration());
    Configuration conf = getConf();
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word median");
    job.setJarByClreplaced(WordMedian.clreplaced);
    job.setMapperClreplaced(WordMedianMapper.clreplaced);
    job.setCombinerClreplaced(WordMedianReducer.clreplaced);
    job.setReducerClreplaced(WordMedianReducer.clreplaced);
    job.setOutputKeyClreplaced(IntWritable.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);
    // Wait for JOB 1 -- get middle value to check for Median
    long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));
    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
    return (result ? 0 : 1);
}

14 Source : WordMean.java
with Apache License 2.0
from NJUJYB

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }
    Configuration conf = getConf();
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word mean");
    job.setJarByClreplaced(WordMean.clreplaced);
    job.setMapperClreplaced(WordMeanMapper.clreplaced);
    job.setCombinerClreplaced(WordMeanReducer.clreplaced);
    job.setReducerClreplaced(WordMeanReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);
    return (result ? 0 : 1);
}

14 Source : BaileyBorweinPlouffe.java
with Apache License 2.0
from NJUJYB

/**
 * Create and setup a job
 */
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = new Job(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClreplaced(BaileyBorweinPlouffe.clreplaced);
    // setup mapper
    job.setMapperClreplaced(BbpMapper.clreplaced);
    job.setMapOutputKeyClreplaced(LongWritable.clreplaced);
    job.setMapOutputValueClreplaced(BytesWritable.clreplaced);
    // setup reducer
    job.setReducerClreplaced(BbpReducer.clreplaced);
    job.setOutputKeyClreplaced(LongWritable.clreplaced);
    job.setOutputValueClreplaced(BytesWritable.clreplaced);
    job.setNumReduceTasks(1);
    // setup input
    job.setInputFormatClreplaced(BbpInputFormat.clreplaced);
    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}

14 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple fail job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple kill job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createKillJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("Kill-Job");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(KillMapper.clreplaced);
    theJob.setReducerClreplaced(Reducer.clreplaced);
    theJob.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    return theJob;
}

14 Source : TestMRJobs.java
with Apache License 2.0
from naver

protected Job runFailingMapperJob() throws IOException, InterruptedException, ClreplacedNotFoundException {
    Configuration myConf = new Configuration(mrCluster.getConfig());
    myConf.setInt(MRJobConfig.NUM_MAPS, 1);
    // reduce the number of attempts
    myConf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
    Job job = Job.getInstance(myConf);
    job.setJarByClreplaced(FailingMapper.clreplaced);
    job.setJobName("failmapper");
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(RandomInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setMapperClreplaced(FailingMapper.clreplaced);
    job.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT_ROOT_DIR, "failmapper-output"));
    // The AppMaster jar itself.
    job.addFileToClreplacedPath(APP_JAR);
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    boolean succeeded = job.waitForCompletion(true);
    replacedert.replacedertFalse(succeeded);
    replacedert.replacedertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
    return job;
}

14 Source : ImputationJob.java
with GNU Affero General Public License v3.0
from genepi

@Override
public void setupJob(Job job) {
    NLineInputFormat.setNumLinesPerSplit(job, 1);
    job.setMapperClreplaced(ImputationMapper.clreplaced);
    job.setInputFormatClreplaced(NLineInputFormat.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setNumReduceTasks(0);
}

13 Source : ReduceJobsConfiguration.java
with Apache License 2.0
from WinterChenS

/**
 * 获取单词统计的配置信息
 *
 * @param jobName
 * @param inputPath
 * @param outputPath
 * @throws IOException
 * @throws ClreplacedNotFoundException
 * @throws InterruptedException
 */
public void getWordCountJobsConf(String jobName, String inputPath, String outputPath) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration conf = getConfiguration();
    Job job = Job.getInstance(conf, jobName);
    job.setMapperClreplaced(WordMapper.clreplaced);
    job.setCombinerClreplaced(WordReduce.clreplaced);
    job.setJarByClreplaced(HadoopDemoApplication.clreplaced);
    job.setReducerClreplaced(WordReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    // 小文件合并设置
    // job.setInputFormatClreplaced(CombineTextInputFormat.clreplaced);
    // 最大分片
    // CombineTextInputFormat.setMaxInputSplitSize(job, 4 * 1024 * 1024);
    // 最小分片
    // CombineTextInputFormat.setMinInputSplitSize(job, 2 * 1024 * 1024);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.waitForCompletion(true);
}

13 Source : IndexTool.java
with Apache License 2.0
from rayokota

/**
 * Uses the HBase Front Door Api to write to index table. Submits the job and either returns or
 * waits for the job completion based on runForeground parameter.
 *
 * @param job job
 * @param outputPath output path
 * @param runForeground - if true, waits for job completion, else submits and returns
 *            immediately.
 * @throws Exception
 */
private void configureSubmittableJobUsingDirectApi(Job job, Path outputPath, TableName outputTableName, boolean skipDependencyJars, boolean runForeground) throws Exception {
    job.setMapperClreplaced(getDirectMapperClreplaced());
    job.setReducerClreplaced(getDirectReducerClreplaced());
    Configuration conf = job.getConfiguration();
    HBaseConfiguration.merge(conf, HBaseConfiguration.create(conf));
    conf.set(TableOutputFormat.OUTPUT_TABLE, outputTableName.getNamereplacedtring());
    // Set the Output clreplacedes
    job.setMapOutputValueClreplaced(IntWritable.clreplaced);
    job.setOutputKeyClreplaced(NullWritable.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
    if (!skipDependencyJars) {
        TableMapReduceUtil.addDependencyJars(job);
    }
    job.setNumReduceTasks(1);
    if (!runForeground) {
        LOG.info("Running Index Build in Background - Submit async and exit");
        job.submit();
        return;
    }
    LOG.info("Running Index Build in Foreground. Waits for the build to complete. This may take a long time!.");
    boolean result = job.waitForCompletion(true);
    if (!result) {
        LOG.error("IndexTool job failed!");
        throw new Exception("IndexTool job failed: " + job.toString());
    }
    FileSystem.get(conf).delete(outputPath, true);
}

13 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple copy job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    conf.setInt(MRJobConfig.NUM_MAPS, 3);
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("DataMoveJob");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(DataCopyMapper.clreplaced);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    theJob.setReducerClreplaced(DataCopyReducer.clreplaced);
    theJob.setNumReduceTasks(1);
    return theJob;
}

13 Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB

@Test
public void testDoMultipleInputs() throws IOException {
    Path in1Dir = getDir(IN1_DIR);
    Path in2Dir = getDir(IN2_DIR);
    Path outDir = getDir(OUT_DIR);
    Configuration conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    fs.delete(outDir, true);
    DataOutputStream file1 = fs.create(new Path(in1Dir, "part-0"));
    file1.writeBytes("a\nb\nc\nd\ne");
    file1.close();
    // write tab delimited to second file because we're doing
    // KeyValueInputFormat
    DataOutputStream file2 = fs.create(new Path(in2Dir, "part-0"));
    file2.writeBytes("a\tblah\nb\tblah\nc\tblah\nd\tblah\ne\tblah");
    file2.close();
    Job job = Job.getInstance(conf);
    job.setJobName("mi");
    MultipleInputs.addInputPath(job, in1Dir, TextInputFormat.clreplaced, MapClreplaced.clreplaced);
    MultipleInputs.addInputPath(job, in2Dir, KeyValueTextInputFormat.clreplaced, KeyValueMapClreplaced.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setOutputKeyClreplaced(NullWritable.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setReducerClreplaced(ReducerClreplaced.clreplaced);
    FileOutputFormat.setOutputPath(job, outDir);
    boolean success = false;
    try {
        success = job.waitForCompletion(true);
    } catch (InterruptedException ie) {
        throw new RuntimeException(ie);
    } catch (ClreplacedNotFoundException instante) {
        throw new RuntimeException(instante);
    }
    if (!success)
        throw new RuntimeException("Job failed!");
    // copy bytes a bunch of times for the ease of readLine() - whatever
    BufferedReader output = new BufferedReader(new InputStreamReader(fs.open(new Path(outDir, "part-r-00000"))));
    // reducer should have counted one key from each file
    replacedertTrue(output.readLine().equals("a 2"));
    replacedertTrue(output.readLine().equals("b 2"));
    replacedertTrue(output.readLine().equals("c 2"));
    replacedertTrue(output.readLine().equals("d 2"));
    replacedertTrue(output.readLine().equals("e 2"));
}

13 Source : WordStandardDeviation.java
with Apache License 2.0
from naver

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }
    Configuration conf = getConf();
    Job job = Job.getInstance(conf, "word stddev");
    job.setJarByClreplaced(WordStandardDeviation.clreplaced);
    job.setMapperClreplaced(WordStandardDeviationMapper.clreplaced);
    job.setCombinerClreplaced(WordStandardDeviationReducer.clreplaced);
    job.setReducerClreplaced(WordStandardDeviationReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);
    return (result ? 0 : 1);
}

13 Source : WordMedian.java
with Apache License 2.0
from naver

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }
    setConf(new Configuration());
    Configuration conf = getConf();
    Job job = Job.getInstance(conf, "word median");
    job.setJarByClreplaced(WordMedian.clreplaced);
    job.setMapperClreplaced(WordMedianMapper.clreplaced);
    job.setCombinerClreplaced(WordMedianReducer.clreplaced);
    job.setReducerClreplaced(WordMedianReducer.clreplaced);
    job.setOutputKeyClreplaced(IntWritable.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);
    // Wait for JOB 1 -- get middle value to check for Median
    long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));
    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
    return (result ? 0 : 1);
}

13 Source : WordMean.java
with Apache License 2.0
from naver

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }
    Configuration conf = getConf();
    Job job = Job.getInstance(conf, "word mean");
    job.setJarByClreplaced(WordMean.clreplaced);
    job.setMapperClreplaced(WordMeanMapper.clreplaced);
    job.setCombinerClreplaced(WordMeanReducer.clreplaced);
    job.setReducerClreplaced(WordMeanReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);
    return (result ? 0 : 1);
}

13 Source : MultiFileWordCount.java
with Apache License 2.0
from naver

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return 2;
    }
    Job job = Job.getInstance(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClreplaced(MultiFileWordCount.clreplaced);
    // set the InputFormat of the job to our InputFormat
    job.setInputFormatClreplaced(MyInputFormat.clreplaced);
    // the keys are words (strings)
    job.setOutputKeyClreplaced(Text.clreplaced);
    // the values are counts (ints)
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    // use the defined mapper
    job.setMapperClreplaced(MapClreplaced.clreplaced);
    // use the WordCount Reducer
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

13 Source : BaileyBorweinPlouffe.java
with Apache License 2.0
from naver

/**
 * Create and setup a job
 */
private static Job createJob(String name, Configuration conf) throws IOException {
    final Job job = Job.getInstance(conf, NAME + "_" + name);
    final Configuration jobconf = job.getConfiguration();
    job.setJarByClreplaced(BaileyBorweinPlouffe.clreplaced);
    // setup mapper
    job.setMapperClreplaced(BbpMapper.clreplaced);
    job.setMapOutputKeyClreplaced(LongWritable.clreplaced);
    job.setMapOutputValueClreplaced(BytesWritable.clreplaced);
    // setup reducer
    job.setReducerClreplaced(BbpReducer.clreplaced);
    job.setOutputKeyClreplaced(LongWritable.clreplaced);
    job.setOutputValueClreplaced(BytesWritable.clreplaced);
    job.setNumReduceTasks(1);
    // setup input
    job.setInputFormatClreplaced(BbpInputFormat.clreplaced);
    // disable task timeout
    jobconf.setLong(MRJobConfig.TASK_TIMEOUT, 0);
    // do not use speculative execution
    jobconf.setBoolean(MRJobConfig.MAP_SPECULATIVE, false);
    jobconf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, false);
    return job;
}

See More Examples