org.apache.hadoop.mapreduce.Job.setJarByClass()

Here are the examples of the java api org.apache.hadoop.mapreduce.Job.setJarByClass() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

242 Examples 7

17 Source : AggregateWordHistogram.java
with Apache License 2.0
from NJUJYB

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job.
 *
 * @throws IOException
 *           When there is communication problems with the job tracker.
 */
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
    Job job = ValueAggregatorJob.createValueAggregatorJob(args, new Clreplaced[] { AggregateWordHistogramPlugin.clreplaced });
    job.setJarByClreplaced(AggregateWordCount.clreplaced);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    System.exit(ret);
}

17 Source : AggregateWordCount.java
with Apache License 2.0
from NJUJYB

/**
 * The main driver for word count map/reduce program. Invoke this method to
 * submit the map/reduce job.
 *
 * @throws IOException
 *           When there is communication problems with the job tracker.
 */
@SuppressWarnings("unchecked")
public static void main(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
    Job job = ValueAggregatorJob.createValueAggregatorJob(args, new Clreplaced[] { WordCountPlugInClreplaced.clreplaced });
    job.setJarByClreplaced(AggregateWordCount.clreplaced);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    System.exit(ret);
}

13 Source : DistCp.java
with Apache License 2.0
from NJUJYB

/**
 * Create Job object for submitting it, with all the configuration
 *
 * @return Reference to job object.
 * @throws IOException - Exception if any
 */
private Job createJob() throws IOException {
    String jobName = "distcp";
    String userChosenName = getConf().get(JobContext.JOB_NAME);
    if (userChosenName != null)
        jobName += ": " + userChosenName;
    Job job = Job.getInstance(getConf());
    job.setJobName(jobName);
    job.setInputFormatClreplaced(DistCpUtils.getStrategy(getConf(), inputOptions));
    job.setJarByClreplaced(CopyMapper.clreplaced);
    configureOutputFormat(job);
    job.setMapperClreplaced(CopyMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setOutputFormatClreplaced(CopyOutputFormat.clreplaced);
    job.getConfiguration().set(JobContext.MAP_SPECULATIVE, "false");
    job.getConfiguration().set(JobContext.NUM_MAPS, String.valueOf(inputOptions.getMaxMaps()));
    if (inputOptions.getSslConfigurationFile() != null) {
        setupSSLConfig(job);
    }
    inputOptions.appendToConf(job.getConfiguration());
    return job;
}

13 Source : MultiFileWordCount.java
with Apache License 2.0
from NJUJYB

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return 2;
    }
    Job job = new Job(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClreplaced(MultiFileWordCount.clreplaced);
    // set the InputFormat of the job to our InputFormat
    job.setInputFormatClreplaced(MyInputFormat.clreplaced);
    // the keys are words (strings)
    job.setOutputKeyClreplaced(Text.clreplaced);
    // the values are counts (ints)
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    // use the defined mapper
    job.setMapperClreplaced(MapClreplaced.clreplaced);
    // use the WordCount Reducer
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

13 Source : TestLineRecordReaderJobs.java
with Apache License 2.0
from NJUJYB

/**
 * Creates and runs an MR job
 *
 * @param conf
 * @throws IOException
 * @throws InterruptedException
 * @throws ClreplacedNotFoundException
 */
public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClreplacedNotFoundException {
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(TestLineRecordReaderJobs.clreplaced);
    job.setMapperClreplaced(Mapper.clreplaced);
    job.setReducerClreplaced(Reducer.clreplaced);
    FileInputFormat.addInputPath(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.waitForCompletion(true);
}

13 Source : Main.java
with Apache License 2.0
from lfz757077613

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Job job = Job.getInstance(new Configuration(), "wordCount");
    // 设置jar包主类
    job.setJarByClreplaced(Main.clreplaced);
    // 设置mapper
    job.setMapperClreplaced(MyMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 设置reducer
    job.setReducerClreplaced(MyReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 在本地先进行一次reduce,减少将数据发送量
    job.setCombinerClreplaced(MyReducer.clreplaced);
    // 设置parreplacedioner
    // 暂时没用到
    // job.setParreplacedionerClreplaced(MyParreplacedioner.clreplaced);
    // job.setNumReduceTasks(2);
    // 设置作业输入输出路径,注意输出文件是不能事先存在的,输出文件是一个文件夹,里面有结果和运行结束状态
    FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:8020/access.log"));
    FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
    System.exit(job.waitForCompletion(true) ? 0 : -1);
}

13 Source : SampleUploader.java
with Apache License 2.0
from fengchen8086

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    Path inputPath = new Path(args[0]);
    String tableName = args[1];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClreplaced(Uploader.clreplaced);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    job.setMapperClreplaced(Uploader.clreplaced);
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}

12 Source : TransformBaseRunner.java
with GNU General Public License v3.0
from wlhbdp

protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);
    job.setJarByClreplaced(this.runnerClreplaced);
    // 本地运行
    // TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, true);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
    // job);
    job.setReducerClreplaced(this.reducerClreplaced);
    job.setOutputKeyClreplaced(this.outputKeyClreplaced);
    job.setOutputValueClreplaced(this.outputValueClreplaced);
    job.setOutputFormatClreplaced(this.outputFormatClreplaced);
    return job;
}

12 Source : WordCount1Application.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(WordCount1Application.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

12 Source : AveragePageCount.java
with MIT License
from PacktPublishing

public static void main(String[] args) throws Exception {
    Configuration con = new Configuration();
    Job bookJob = Job.getInstance(con, "Average Page Count");
    bookJob.setJarByClreplaced(AveragePageCount.clreplaced);
    bookJob.setMapperClreplaced(TextMapper.clreplaced);
    bookJob.setReducerClreplaced(AverageReduce.clreplaced);
    bookJob.setOutputKeyClreplaced(Text.clreplaced);
    bookJob.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(bookJob, new Path("C:/Hadoop/books.txt"));
    FileOutputFormat.setOutputPath(bookJob, new Path("C:/Hadoop/BookOutput"));
    if (bookJob.waitForCompletion(true)) {
        System.exit(0);
    }
}

12 Source : WordCount.java
with Apache License 2.0
from NJUJYB

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = new Job(conf, "word count");
    job.setJarByClreplaced(WordCount.clreplaced);
    job.setMapperClreplaced(TokenizerMapper.clreplaced);
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

12 Source : TeraChecksum.java
with Apache License 2.0
from NJUJYB

public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    TeraInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setJobName("TeraSum");
    job.setJarByClreplaced(TeraChecksum.clreplaced);
    job.setMapperClreplaced(ChecksumMapper.clreplaced);
    job.setReducerClreplaced(ChecksumReducer.clreplaced);
    job.setOutputKeyClreplaced(NullWritable.clreplaced);
    job.setOutputValueClreplaced(Unsigned16.clreplaced);
    // force a single reducer
    job.setNumReduceTasks(1);
    job.setInputFormatClreplaced(TeraInputFormat.clreplaced);
    return job.waitForCompletion(true) ? 0 : 1;
}

12 Source : TransformerBaseRunner.java
with Apache License 2.0
from bjmashibing

/**
 * 创建job
 *
 * @param conf
 * @return
 * @throws IOException
 */
protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);
    job.setJarByClreplaced(this.runnerClreplaced);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
    // job);
    job.setReducerClreplaced(this.reducerClreplaced);
    job.setOutputKeyClreplaced(this.outputKeyClreplaced);
    job.setOutputValueClreplaced(this.outputValueClreplaced);
    job.setOutputFormatClreplaced(this.outputFormatClreplaced);
    return job;
}

11 Source : MinTemperature.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    if (args.length != 2) {
        System.err.println("Usage: MinTemperature <input path> <output path>");
        System.exit(-1);
    }
    Job job = Job.getInstance();
    job.setJarByClreplaced(MinTemperature.clreplaced);
    job.setJobName("MapReduce实验-气象数据集-求气温最小值");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(TemperatureMapper.clreplaced);
    // 设置 Combiner 减少数据的传输量、提高效率
    // job.setCombinerClreplaced(MinTemperatureReducer.clreplaced);
    job.setReducerClreplaced(MinTemperatureReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : MaxTemperature.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }
    Job job = Job.getInstance();
    job.setJarByClreplaced(MaxTemperature.clreplaced);
    job.setJobName("MapReduce实验-气象数据集-求气温最大值");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(TemperatureMapper.clreplaced);
    // 设置 Combiner 减少数据的传输量、提高效率
    // job.setCombinerClreplaced(MaxTemperatureReducer.clreplaced);
    job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : AvgTemperature.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    if (args.length != 2) {
        System.err.println("Usage: AvgTemperature <input path> <output path>");
        System.exit(-1);
    }
    Job job = Job.getInstance();
    job.setJarByClreplaced(AvgTemperature.clreplaced);
    job.setJobName("MapReduce实验-气象数据集-求气温平均值");
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(TemperatureMapper.clreplaced);
    job.setReducerClreplaced(AvgTemperatureReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : Hdfs2Tg.java
with Apache License 2.0
from tigergraph

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "HDFS to TG");
    job.setJarByClreplaced(Hdfs2Tg.clreplaced);
    job.setMapperClreplaced(LineMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : MaxTemperature.java
with MIT License
from Tianny

public static void main(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: MaxTemperature <input path> <output path>");
        System.exit(-1);
    }
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "Max Temperature");
    // 在 setJarByClreplaced 方法中传递一个类即可,Hadoop 利用这个类来查找包含它的 JAR 文件
    job.setJarByClreplaced(MaxTemperature.clreplaced);
    // 指定输入数据和输出数据的路径
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapperClreplaced(MaxTemperatureMapper.clreplaced);
    job.setReducerClreplaced(MaxTemperatureReducer.clreplaced);
    // 控制 reduce 函数的输出类型,并且必须和 Reduce 类产生的相匹配
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : CompressionEmulationUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Configure the {@link Job} for enabling compression emulation.
 */
static void configure(final Job job) throws IOException, InterruptedException, ClreplacedNotFoundException {
    // set the random text mapper
    job.setMapperClreplaced(RandomTextDataMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(GenDataFormat.clreplaced);
    job.setJarByClreplaced(GenerateData.clreplaced);
    // set the output compression true
    FileOutputFormat.setCompressOutput(job, true);
    try {
        FileInputFormat.addInputPath(job, new Path("ignored"));
    } catch (IOException e) {
        LOG.error("Error while adding input path ", e);
    }
}

11 Source : TestMiniMRChildTask.java
with Apache License 2.0
from NJUJYB

/**
 * Launch tests
 * @param conf Configuration of the mapreduce job.
 * @param inDir input path
 * @param outDir output path
 * @param input Input text
 * @throws IOException
 */
public void launchTest(JobConf conf, Path inDir, Path outDir, String input) throws IOException, InterruptedException, ClreplacedNotFoundException {
    FileSystem outFs = outDir.getFileSystem(conf);
    // Launch job with default option for temp dir.
    // i.e. temp dir is ./tmp
    Job job = new Job(conf);
    job.addFileToClreplacedPath(APP_JAR);
    job.setJarByClreplaced(TestMiniMRChildTask.clreplaced);
    // speed up failures
    job.setMaxMapAttempts(1);
    job.waitForCompletion(true);
    boolean succeeded = job.waitForCompletion(true);
    replacedertTrue(succeeded);
    outFs.delete(outDir, true);
}

11 Source : WordCount.java
with Apache License 2.0
from naver

public static void main(String[] args) throws Exception {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    if (otherArgs.length < 2) {
        System.err.println("Usage: wordcount <in> [<in>...] <out>");
        System.exit(2);
    }
    Job job = Job.getInstance(conf, "word count");
    job.setJarByClreplaced(WordCount.clreplaced);
    job.setMapperClreplaced(TokenizerMapper.clreplaced);
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    for (int i = 0; i < otherArgs.length - 1; ++i) {
        FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
    }
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[otherArgs.length - 1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : HCatalogTestUtils.java
with Apache License 2.0
from dkhadoop

/**
 * Run a local map reduce job to read records from HCatalog table.
 * @param readCount
 * @param filter
 * @return
 * @throws Exception
 */
public List<HCatRecord> readHCatRecords(String dbName, String tableName, String filter) throws Exception {
    HCatReaderMapper.setReadRecordCount(0);
    recsRead.clear();
    // Configuration conf = new Configuration();
    Job job = new Job(conf, "HCatalog reader job");
    job.setJarByClreplaced(this.getClreplaced());
    job.setMapperClreplaced(HCatReaderMapper.clreplaced);
    job.getConfiguration().setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
    // input/output settings
    job.setInputFormatClreplaced(HCatInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    HCatInputFormat.setInput(job, dbName, tableName).setFilter(filter);
    job.setMapOutputKeyClreplaced(BytesWritable.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setNumReduceTasks(0);
    Path path = new Path(fs.getWorkingDirectory(), "mapreduce/HCatTableIndexOutput");
    if (fs.exists(path)) {
        fs.delete(path, true);
    }
    FileOutputFormat.setOutputPath(job, path);
    job.waitForCompletion(true);
    LOG.info("Read " + HCatReaderMapper.readRecordCount + " records");
    return recsRead;
}

10 Source : AnalyserLogDataRunner.java
with GNU General Public License v3.0
from wlhbdp

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    // 设置reducer配置
    // 1集群上运行 打成jar运行  (要求addDependencyJars为true(默认true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, true);
    // 2本地运行 打成jar运行  (要求addDependencyJars为true(默认true)
    // TableMapReduceUtil
    // .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
    // null, false);
    // 设置输入路径
    job.setNumReduceTasks(0);
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

10 Source : ReduceJobsConfiguration.java
with Apache License 2.0
from WinterChenS

/**
 * 获取单词统计的配置信息
 *
 * @param jobName
 * @param inputPath
 * @param outputPath
 * @throws IOException
 * @throws ClreplacedNotFoundException
 * @throws InterruptedException
 */
public void getWordCountJobsConf(String jobName, String inputPath, String outputPath) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration conf = getConfiguration();
    Job job = Job.getInstance(conf, jobName);
    job.setMapperClreplaced(WordMapper.clreplaced);
    job.setCombinerClreplaced(WordReduce.clreplaced);
    job.setJarByClreplaced(HadoopDemoApplication.clreplaced);
    job.setReducerClreplaced(WordReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    // 小文件合并设置
    // job.setInputFormatClreplaced(CombineTextInputFormat.clreplaced);
    // 最大分片
    // CombineTextInputFormat.setMaxInputSplitSize(job, 4 * 1024 * 1024);
    // 最小分片
    // CombineTextInputFormat.setMinInputSplitSize(job, 2 * 1024 * 1024);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.waitForCompletion(true);
}

10 Source : WordMedian.java
with Apache License 2.0
from NJUJYB

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }
    setConf(new Configuration());
    Configuration conf = getConf();
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word median");
    job.setJarByClreplaced(WordMedian.clreplaced);
    job.setMapperClreplaced(WordMedianMapper.clreplaced);
    job.setCombinerClreplaced(WordMedianReducer.clreplaced);
    job.setReducerClreplaced(WordMedianReducer.clreplaced);
    job.setOutputKeyClreplaced(IntWritable.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);
    // Wait for JOB 1 -- get middle value to check for Median
    long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));
    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
    return (result ? 0 : 1);
}

10 Source : UserNamePermission.java
with Apache License 2.0
from NJUJYB

public static void main(String[] args) throws Exception {
    Path outDir = new Path("output");
    Configuration conf = new Configuration();
    Job job = new Job(conf, "user name check");
    job.setJarByClreplaced(UserNamePermission.clreplaced);
    job.setMapperClreplaced(UserNamePermission.UserNameMapper.clreplaced);
    job.setCombinerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setReducerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
    job.setNumReduceTasks(1);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    TextInputFormat.addInputPath(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, outDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

10 Source : MultiFileWordCount.java
with Apache License 2.0
from naver

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        printUsage();
        return 2;
    }
    Job job = Job.getInstance(getConf());
    job.setJobName("MultiFileWordCount");
    job.setJarByClreplaced(MultiFileWordCount.clreplaced);
    // set the InputFormat of the job to our InputFormat
    job.setInputFormatClreplaced(MyInputFormat.clreplaced);
    // the keys are words (strings)
    job.setOutputKeyClreplaced(Text.clreplaced);
    // the values are counts (ints)
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    // use the defined mapper
    job.setMapperClreplaced(MapClreplaced.clreplaced);
    // use the WordCount Reducer
    job.setCombinerClreplaced(IntSumReducer.clreplaced);
    job.setReducerClreplaced(IntSumReducer.clreplaced);
    FileInputFormat.addInputPaths(job, args[0]);
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;
}

10 Source : TestMiniMRChildTask.java
with Apache License 2.0
from naver

/**
 * Launch tests
 * @param conf Configuration of the mapreduce job.
 * @param inDir input path
 * @param outDir output path
 * @param input Input text
 * @throws IOException
 */
public void launchTest(JobConf conf, Path inDir, Path outDir, String input) throws IOException, InterruptedException, ClreplacedNotFoundException {
    FileSystem outFs = outDir.getFileSystem(conf);
    // Launch job with default option for temp dir.
    // i.e. temp dir is ./tmp
    Job job = Job.getInstance(conf);
    job.addFileToClreplacedPath(APP_JAR);
    job.setJarByClreplaced(TestMiniMRChildTask.clreplaced);
    // speed up failures
    job.setMaxMapAttempts(1);
    job.waitForCompletion(true);
    boolean succeeded = job.waitForCompletion(true);
    replacedertTrue(succeeded);
    outFs.delete(outDir, true);
}

10 Source : MR_WLA.java
with GNU General Public License v3.0
from monsonlee

public int run(String[] args) throws Exception {
    String jobName = "wla_baidu";
    String inputPath = args[0];
    String outputPath = args[1];
    Path path = new Path(outputPath);
    // 删除输出目录
    path.getFileSystem(getConf()).delete(path, true);
    // 1、把所有代码组织到类似于Topology的类中
    Job job = Job.getInstance(getConf(), jobName);
    // 2、一定要打包运行,必须写下面一行代码
    job.setJarByClreplaced(MR_WLA.clreplaced);
    // 3、指定输入的hdfs
    FileInputFormat.setInputPaths(job, inputPath);
    // 4、指定map类
    job.setMapperClreplaced(WLA_Mapper.clreplaced);
    // 5、指定map输出的<key,value>的类型
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 6、指定reduce类
    job.setReducerClreplaced(WLA_Reducer.clreplaced);
    // 7、指定reduce输出的<key,value>的类型
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    // 8、指定输出的hdfs
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job.waitForCompletion(true) ? 0 : 1;
}

10 Source : Export.java
with Apache License 2.0
from fengchen8086

/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    String tableName = args[0];
    Path outputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJobName(NAME + "_" + tableName);
    job.setJarByClreplaced(Export.clreplaced);
    // Set optional scan parameters
    Scan s = getConfiguredScanForJob(conf, args);
    IdenreplacedyTableMapper.initJob(tableName, s, IdenreplacedyTableMapper.clreplaced, job);
    // No reducers.  Just write straight to output files.
    job.setNumReduceTasks(0);
    job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
    job.setOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
    job.setOutputValueClreplaced(Result.clreplaced);
    // job conf doesn't contain the conf so doesn't have a default fs.
    FileOutputFormat.setOutputPath(job, outputDir);
    return job;
}

10 Source : IntegrationTestLoadAndVerify.java
with Apache License 2.0
from fengchen8086

protected Job doLoad(Configuration conf, HTableDescriptor htd) throws Exception {
    Path outputDir = getTestDir(TEST_NAME, "load-output");
    LOG.info("Load output dir: " + outputDir);
    NMapInputFormat.setNumMapTasks(conf, conf.getInt(NUM_MAP_TASKS_KEY, NUM_MAP_TASKS_DEFAULT));
    conf.set(TABLE_NAME_KEY, htd.getTableName().getNamereplacedtring());
    Job job = Job.getInstance(conf);
    job.setJobName(TEST_NAME + " Load for " + htd.getTableName());
    job.setJarByClreplaced(this.getClreplaced());
    setMapperClreplaced(job);
    job.setInputFormatClreplaced(NMapInputFormat.clreplaced);
    job.setNumReduceTasks(0);
    setJobScannerConf(job);
    FileOutputFormat.setOutputPath(job, outputDir);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.clreplaced);
    TableMapReduceUtil.initCredentials(job);
    replacedertTrue(job.waitForCompletion(true));
    return job;
}

10 Source : NewInstallUserRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    // 处理参数
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "new_install_user");
    job.setJarByClreplaced(NewInstallUserRunner.clreplaced);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
    job.setReducerClreplaced(NewInstallUserReducer.clreplaced);
    job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
    job.setOutputValueClreplaced(MapWritableValue.clreplaced);
    // job.setInputFormatClreplaced(KeyValueTextInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
    if (job.waitForCompletion(true)) {
        // 执行成功, 需要计算总用户
        this.calculateTotalUsers(conf);
        return 0;
    } else {
        return -1;
    }
}

10 Source : ActiveUserRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    // 初始化参数
    this.processArgs(conf, args);
    // 创建job
    Job job = Job.getInstance(conf, "active_user");
    // 设置job相关配置参数
    job.setJarByClreplaced(ActiveUserRunner.clreplaced);
    // hbase 输入mapper参数
    // 1. 本地运行
    TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
    // 2. 集群运行
    // TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.clreplaced,
    // StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
    // 设置reducer相关参数
    job.setReducerClreplaced(ActiveUserReducer.clreplaced);
    job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
    job.setOutputValueClreplaced(MapWritableValue.clreplaced);
    // 设置output相关参数
    job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
    // 开始毫秒数
    long startTime = System.currentTimeMillis();
    try {
        return job.waitForCompletion(true) ? 0 : -1;
    } finally {
        // 结束的毫秒数
        long endTime = System.currentTimeMillis();
        logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms");
    }
}

10 Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    // 设置本地提交job,集群运行,需要代码
    // File jarFile = EJob.createTempJar("target/clreplacedes");
    // ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
    // 设置本地提交job,集群运行,需要代码结束
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    // 设置reducer配置
    // 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
    // null, job);
    // 2. 本地运行,要求参数addDependencyJars为false
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);
    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

10 Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    // 设置本地提交job,集群运行,需要代码
    // File jarFile = EJob.createTempJar("target/clreplacedes");
    // ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
    // 设置本地提交job,集群运行,需要代码结束
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    // 设置reducer配置
    // 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    // 2. 本地运行,要求参数addDependencyJars为false
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);
    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

10 Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);
    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

9 Source : TeraGen.java
with Apache License 2.0
from pravega

/**
 * @param args the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
    if (args.length != 5 && args.length != 6) {
        usage();
        return 2;
    }
    Path outputDir = new Path(args[1]);
    getConf().setStrings(OUTPUT_URI_STRING, args[2]);
    getConf().setStrings(OUTPUT_SCOPE_NAME, args[3]);
    getConf().setStrings(OUTPUT_STREAM_NAME, args[4]);
    getConf().setStrings(OUTPUT_STREAM_SEGMENTS, args[5]);
    getConf().setStrings(OUTPUT_DESERIALIZER, TextSerializer.clreplaced.getName());
    Job job = Job.getInstance(getConf());
    setNumberOfRows(job, parseHumanLong(args[0]));
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClreplaced(TeraGen.clreplaced);
    job.setMapperClreplaced(SortGenMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputKeyClreplaced(String.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
    job.setOutputFormatClreplaced(PravegaFixedSegmentsOutputFormat.clreplaced);
    return job.waitForCompletion(true) ? 0 : 1;
}

9 Source : WordStandardDeviation.java
with Apache License 2.0
from NJUJYB

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordstddev <in> <out>");
        return 0;
    }
    Configuration conf = getConf();
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word stddev");
    job.setJarByClreplaced(WordStandardDeviation.clreplaced);
    job.setMapperClreplaced(WordStandardDeviationMapper.clreplaced);
    job.setCombinerClreplaced(WordStandardDeviationReducer.clreplaced);
    job.setReducerClreplaced(WordStandardDeviationReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    // read output and calculate standard deviation
    stddev = readAndCalcStdDev(outputpath, conf);
    return (result ? 0 : 1);
}

9 Source : WordMean.java
with Apache License 2.0
from NJUJYB

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmean <in> <out>");
        return 0;
    }
    Configuration conf = getConf();
    @SuppressWarnings("deprecation")
    Job job = new Job(conf, "word mean");
    job.setJarByClreplaced(WordMean.clreplaced);
    job.setMapperClreplaced(WordMeanMapper.clreplaced);
    job.setCombinerClreplaced(WordMeanReducer.clreplaced);
    job.setReducerClreplaced(WordMeanReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);
    return (result ? 0 : 1);
}

9 Source : TeraGen.java
with Apache License 2.0
from NJUJYB

/**
 * @param args the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
        throw new IOException("Output directory " + outputDir + " already exists.");
    }
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClreplaced(TeraGen.clreplaced);
    job.setMapperClreplaced(SortGenMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TeraOutputFormat.clreplaced);
    return job.waitForCompletion(true) ? 0 : 1;
}

9 Source : TestMiniMRChildTask.java
with Apache License 2.0
from NJUJYB

void runTestTaskEnv(JobConf conf, Path inDir, Path outDir, boolean oldConfigs) throws IOException, InterruptedException, ClreplacedNotFoundException {
    String input = "The input";
    configure(conf, inDir, outDir, input, EnvCheckMapper.clreplaced, EnvCheckReducer.clreplaced);
    // test
    // - new SET of new var (MY_PATH)
    // - set of old var (LANG)
    // - append to an old var from modified env (LD_LIBRARY_PATH)
    // - append to an old var from tt's env (PATH)
    // - append to a new var (NEW_PATH)
    String mapTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
    String reduceTaskEnvKey = JobConf.MAPRED_MAP_TASK_ENV;
    String mapTaskJavaOptsKey = JobConf.MAPRED_MAP_TASK_JAVA_OPTS;
    String reduceTaskJavaOptsKey = JobConf.MAPRED_REDUCE_TASK_JAVA_OPTS;
    String mapTaskJavaOpts = MAP_OPTS_VAL;
    String reduceTaskJavaOpts = REDUCE_OPTS_VAL;
    conf.setBoolean(OLD_CONFIGS, oldConfigs);
    if (oldConfigs) {
        mapTaskEnvKey = reduceTaskEnvKey = JobConf.MAPRED_TASK_ENV;
        mapTaskJavaOptsKey = reduceTaskJavaOptsKey = JobConf.MAPRED_TASK_JAVA_OPTS;
        mapTaskJavaOpts = reduceTaskJavaOpts = TASK_OPTS_VAL;
    }
    conf.set(mapTaskEnvKey, Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp," + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp" : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp," + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
    conf.set(reduceTaskEnvKey, Shell.WINDOWS ? "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=%LD_LIBRARY_PATH%;/tmp," + "PATH=%PATH%;/tmp,NEW_PATH=%NEW_PATH%;/tmp" : "MY_PATH=/tmp,LANG=en_us_8859_1,LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/tmp," + "PATH=$PATH:/tmp,NEW_PATH=$NEW_PATH:/tmp");
    conf.set("path", System.getenv("PATH"));
    conf.set(mapTaskJavaOptsKey, mapTaskJavaOpts);
    conf.set(reduceTaskJavaOptsKey, reduceTaskJavaOpts);
    Job job = new Job(conf);
    job.addFileToClreplacedPath(APP_JAR);
    job.setJarByClreplaced(TestMiniMRChildTask.clreplaced);
    // speed up failures
    job.setMaxMapAttempts(1);
    job.waitForCompletion(true);
    boolean succeeded = job.waitForCompletion(true);
    replacedertTrue("The environment checker job failed.", succeeded);
}

9 Source : WordMedian.java
with Apache License 2.0
from naver

@Override
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println("Usage: wordmedian <in> <out>");
        return 0;
    }
    setConf(new Configuration());
    Configuration conf = getConf();
    Job job = Job.getInstance(conf, "word median");
    job.setJarByClreplaced(WordMedian.clreplaced);
    job.setMapperClreplaced(WordMedianMapper.clreplaced);
    job.setCombinerClreplaced(WordMedianReducer.clreplaced);
    job.setReducerClreplaced(WordMedianReducer.clreplaced);
    job.setOutputKeyClreplaced(IntWritable.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    boolean result = job.waitForCompletion(true);
    // Wait for JOB 1 -- get middle value to check for Median
    long totalWords = job.getCounters().getGroup(TaskCounter.clreplaced.getCanonicalName()).findCounter("MAP_OUTPUT_RECORDS", "Map output records").getValue();
    int medianIndex1 = (int) Math.ceil((totalWords / 2.0));
    int medianIndex2 = (int) Math.floor((totalWords / 2.0));
    median = readAndFindMedian(args[1], medianIndex1, medianIndex2, conf);
    return (result ? 0 : 1);
}

9 Source : TeraGen.java
with Apache License 2.0
from naver

/**
 * @param args the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClreplaced(TeraGen.clreplaced);
    job.setMapperClreplaced(SortGenMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TeraOutputFormat.clreplaced);
    return job.waitForCompletion(true) ? 0 : 1;
}

9 Source : UserNamePermission.java
with Apache License 2.0
from naver

public static void main(String[] args) throws Exception {
    Path outDir = new Path("output");
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "user name check");
    job.setJarByClreplaced(UserNamePermission.clreplaced);
    job.setMapperClreplaced(UserNamePermission.UserNameMapper.clreplaced);
    job.setCombinerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setReducerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
    job.setNumReduceTasks(1);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    TextInputFormat.addInputPath(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, outDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

9 Source : IntegrationTestLoadAndVerify.java
with Apache License 2.0
from fengchen8086

protected void doVerify(Configuration conf, HTableDescriptor htd) throws Exception {
    Path outputDir = getTestDir(TEST_NAME, "verify-output");
    LOG.info("Verify output dir: " + outputDir);
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(this.getClreplaced());
    job.setJobName(TEST_NAME + " Verification for " + htd.getTableName());
    setJobScannerConf(job);
    Scan scan = new Scan();
    TableMapReduceUtil.initTableMapperJob(htd.getTableName().getNamereplacedtring(), scan, VerifyMapper.clreplaced, BytesWritable.clreplaced, BytesWritable.clreplaced, job);
    TableMapReduceUtil.addDependencyJars(job.getConfiguration(), AbstractHBaseTool.clreplaced);
    int scannerCaching = conf.getInt("verify.scannercaching", SCANNER_CACHING);
    TableMapReduceUtil.setScannerCaching(job, scannerCaching);
    job.setReducerClreplaced(VerifyReducer.clreplaced);
    job.setNumReduceTasks(conf.getInt(NUM_REDUCE_TASKS_KEY, NUM_REDUCE_TASKS_DEFAULT));
    FileOutputFormat.setOutputPath(job, outputDir);
    replacedertTrue(job.waitForCompletion(true));
    long numOutputRecords = job.getCounters().findCounter(Counters.ROWS_WRITTEN).getValue();
    replacedertEquals(0, numOutputRecords);
}

9 Source : IntegrationTestBulkLoad.java
with Apache License 2.0
from fengchen8086

private void runLinkedListMRJob(int iteration) throws Exception {
    String jobName = IntegrationTestBulkLoad.clreplaced.getSimpleName() + " - " + EnvironmentEdgeManager.currentTime();
    Configuration conf = new Configuration(util.getConfiguration());
    Path p = null;
    if (conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY) == null) {
        p = util.getDataTestDirOnTestFS(getTablename() + "-" + iteration);
    } else {
        p = new Path(conf.get(ImportTsv.BULK_OUTPUT_CONF_KEY));
    }
    conf.setBoolean("mapreduce.map.speculative", false);
    conf.setBoolean("mapreduce.reduce.speculative", false);
    conf.setInt(ROUND_NUM_KEY, iteration);
    Job job = new Job(conf);
    job.setJobName(jobName);
    // set the input format so that we can create map tasks with no data input.
    job.setInputFormatClreplaced(ITBulkLoadInputFormat.clreplaced);
    // Set the mapper clreplacedes.
    job.setMapperClreplaced(LinkedListCreationMapper.clreplaced);
    job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
    job.setMapOutputValueClreplaced(KeyValue.clreplaced);
    // Use the idenreplacedy reducer
    // So nothing to do here.
    // Set this jar.
    job.setJarByClreplaced(getClreplaced());
    // Set where to place the hfiles.
    FileOutputFormat.setOutputPath(job, p);
    try (Connection conn = ConnectionFactory.createConnection(conf);
        Admin admin = conn.getAdmin();
        Table table = conn.getTable(getTablename());
        RegionLocator regionLocator = conn.getRegionLocator(getTablename())) {
        // Configure the parreplacedioner and other things needed for HFileOutputFormat.
        HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
        // Run the job making sure it works.
        replacedertEquals(true, job.waitForCompletion(true));
        // Create a new loader.
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        // Load the HFiles in.
        loader.doBulkLoad(p, admin, table, regionLocator);
    }
    // Delete the files.
    util.getTestFileSystem().delete(p, true);
}

9 Source : HCatalogTestUtils.java
with Apache License 2.0
from dkhadoop

public List<HCatRecord> loadHCatTable(String dbName, String tableName, Map<String, String> partKeyMap, HCatSchema tblSchema, List<HCatRecord> records) throws Exception {
    Job job = new Job(conf, "HCat load job");
    job.setJarByClreplaced(this.getClreplaced());
    job.setMapperClreplaced(HCatWriterMapper.clreplaced);
    // Just writ 10 lines to the file to drive the mapper
    Path path = new Path(fs.getWorkingDirectory(), "mapreduce/HCatTableIndexInput");
    job.getConfiguration().setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
    int writeCount = records.size();
    recsToLoad.clear();
    recsToLoad.addAll(records);
    createInputFile(path, writeCount);
    // input/output settings
    HCatWriterMapper.setWrittenRecordCount(0);
    FileInputFormat.setInputPaths(job, path);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job.setOutputFormatClreplaced(HCatOutputFormat.clreplaced);
    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partKeyMap);
    HCatOutputFormat.setOutput(job, outputJobInfo);
    HCatOutputFormat.setSchema(job, tblSchema);
    job.setMapOutputKeyClreplaced(BytesWritable.clreplaced);
    job.setMapOutputValueClreplaced(DefaultHCatRecord.clreplaced);
    job.setNumReduceTasks(0);
    SqoopHCatUtilities.addJars(job, new SqoopOptions());
    boolean success = job.waitForCompletion(true);
    if (!success) {
        throw new IOException("Loading HCatalog table with test records failed");
    }
    utils.invokeOutputCommitterForLocalMode(job);
    LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
    return recsToLoad;
}

9 Source : SortTool.java
with Apache License 2.0
from apache

@SuppressWarnings("deprecation")
@Override
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf(), this.getClreplaced().getSimpleName());
    job.setJarByClreplaced(this.getClreplaced());
    if (job.getJar() == null) {
        log.error("M/R requires a jar file!  Run mvn package.");
        return 1;
    }
    job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    SequenceFileInputFormat.setInputPaths(job, seqFile);
    job.setParreplacedionerClreplaced(org.apache.acreplacedulo.core.client.mapreduce.lib.parreplacedion.KeyRangeParreplacedioner.clreplaced);
    org.apache.acreplacedulo.core.client.mapreduce.lib.parreplacedion.KeyRangeParreplacedioner.setSplitFile(job, splitFile);
    job.setMapOutputKeyClreplaced(Key.clreplaced);
    job.setMapOutputValueClreplaced(Value.clreplaced);
    job.setNumReduceTasks(splits.size() + 1);
    job.setOutputFormatClreplaced(org.apache.acreplacedulo.core.client.mapreduce.AcreplaceduloFileOutputFormat.clreplaced);
    org.apache.acreplacedulo.core.client.mapreduce.AcreplaceduloFileOutputFormat.setOutputPath(job, new Path(outputDir));
    job.waitForCompletion(true);
    return job.isSuccessful() ? 0 : 1;
}

9 Source : CharacterHistogram.java
with Apache License 2.0
from apache

public static void main(String[] args) throws Exception {
    Opts opts = new Opts();
    opts.parseArgs(CharacterHistogram.clreplaced.getName(), args);
    Job job = Job.getInstance(opts.getHadoopConfig());
    job.setJobName(CharacterHistogram.clreplaced.getSimpleName());
    job.setJarByClreplaced(CharacterHistogram.clreplaced);
    job.setInputFormatClreplaced(ChunkInputFormat.clreplaced);
    job.getConfiguration().set(VIS, opts.visibilities);
    job.setMapperClreplaced(HistMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Mutation.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputFormatClreplaced(AcreplaceduloOutputFormat.clreplaced);
    AcreplaceduloOutputFormat.configure().clientProperties(opts.getClientProperties()).defaultTable(opts.tableName).createTables(true);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

8 Source : HadoopTrainWorker.java
with MIT License
from yuantiku

public boolean hadoopTrain() {
    // LOG.info("getclreplaced" + GenericsUtil.getClreplaced(customParamsMap));
    // DefaultStringifier<Map<String, Object>> mapStringifier =
    // new DefaultStringifier<>(conf, GenericsUtil.getClreplaced(customParamsMap));
    // String customParamsMapStr = mapStringifier.toString(customParamsMap);
    // conf.set("customParamsMap", customParamsMapStr);
    boolean sucess;
    String outputPath = null;
    try {
        conf.set("customParamsMap", encodeMap(customParamsMap));
        Job job = Job.getInstance(conf, user + " " + modelName + " training on hadoop");
        String trainDataPath = getTrainDataPath();
        job.setJarByClreplaced(HadoopTrainWorker.clreplaced);
        job.setMapperClreplaced(TrainMapper.clreplaced);
        job.setOutputKeyClreplaced(Text.clreplaced);
        job.setOutputValueClreplaced(Text.clreplaced);
        job.setReducerClreplaced(TrainReducer.clreplaced);
        FileInputFormat.addInputPath(job, new Path(trainDataPath));
        outputPath = trainDataPath + "_temp_will_be_deleted";
        IFileSystem fs = FileSystemFactory.createFileSystem(new URI(getURI()));
        if (outputPath != null) {
            fs.delete(outputPath);
        }
        FileOutputFormat.setOutputPath(job, new Path(outputPath));
        job.setNumReduceTasks(slaveNum);
        sucess = job.waitForCompletion(true);
    } catch (Exception e) {
        sucess = false;
        LOG.error("hadoop train exception!", e);
    } finally {
        try {
            IFileSystem fs = FileSystemFactory.createFileSystem(new URI(getURI()));
            if (outputPath != null) {
                fs.delete(outputPath);
            }
        } catch (Exception e) {
            sucess = false;
            LOG.error("hadoop train exception!", e);
        }
    }
    return sucess;
}

See More Examples