org.apache.hadoop.mapreduce.Job.getInstance()

Here are the examples of the java api org.apache.hadoop.mapreduce.Job.getInstance() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

314 Examples 7

19 Source : TransformBaseRunner.java
with GNU General Public License v3.0
from wlhbdp

protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);
    job.setJarByClreplaced(this.runnerClreplaced);
    // 本地运行
    // TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, true);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
    // job);
    job.setReducerClreplaced(this.reducerClreplaced);
    job.setOutputKeyClreplaced(this.outputKeyClreplaced);
    job.setOutputValueClreplaced(this.outputValueClreplaced);
    job.setOutputFormatClreplaced(this.outputFormatClreplaced);
    return job;
}

19 Source : AnalyserLogDataRunner.java
with GNU General Public License v3.0
from wlhbdp

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    // 设置reducer配置
    // 1集群上运行 打成jar运行  (要求addDependencyJars为true(默认true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, true);
    // 2本地运行 打成jar运行  (要求addDependencyJars为true(默认true)
    // TableMapReduceUtil
    // .initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null,
    // null, false);
    // 设置输入路径
    job.setNumReduceTasks(0);
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

19 Source : ReduceJobsConfiguration.java
with Apache License 2.0
from WinterChenS

/**
 * 获取单词统计的配置信息
 *
 * @param jobName
 * @param inputPath
 * @param outputPath
 * @throws IOException
 * @throws ClreplacedNotFoundException
 * @throws InterruptedException
 */
public void getWordCountJobsConf(String jobName, String inputPath, String outputPath) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration conf = getConfiguration();
    Job job = Job.getInstance(conf, jobName);
    job.setMapperClreplaced(WordMapper.clreplaced);
    job.setCombinerClreplaced(WordReduce.clreplaced);
    job.setJarByClreplaced(HadoopDemoApplication.clreplaced);
    job.setReducerClreplaced(WordReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    // 小文件合并设置
    // job.setInputFormatClreplaced(CombineTextInputFormat.clreplaced);
    // 最大分片
    // CombineTextInputFormat.setMaxInputSplitSize(job, 4 * 1024 * 1024);
    // 最小分片
    // CombineTextInputFormat.setMinInputSplitSize(job, 2 * 1024 * 1024);
    FileInputFormat.addInputPath(job, new Path(inputPath));
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    job.waitForCompletion(true);
}

19 Source : StoreDiagJob.java
with Apache License 2.0
from steveloughran

public static boolean execute(Configuration conf, List<String> targets) throws IOException, ClreplacedNotFoundException, InterruptedException {
    JobConf jobConf = new JobConf(conf);
    StoreDiagJob diagJob = new StoreDiagJob(jobConf);
    Job job = Job.getInstance(jobConf, "Store Diag");
    job.setJarByClreplaced(StoreDiagJob.clreplaced);
    job.setMapperClreplaced(DiagMapper.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    String filename = UUID.randomUUID().toString();
    FileContext clusterFC = FileContext.getFileContext(jobConf);
    Path home = clusterFC.getHomeDirectory();
    Path jobdir = new Path(home, filename);
    Path srcFile = new Path(jobdir, "input.txt");
    Path destDir = new Path(jobdir, "output");
    // one entry per line
    try (FSDataOutputStream stream = clusterFC.create(srcFile, EnumSet.of(CreateFlag.CREATE, CreateFlag.OVERWRITE), Options.CreateOpts.createParent())) {
        for (String target : targets) {
            stream.writeChars(target);
            stream.writeChar('\n');
        }
    }
    jobConf.set(DELIMITER, "\n");
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    FileInputFormat.addInputPath(job, srcFile);
    FileOutputFormat.setOutputPath(job, destDir);
    return job.waitForCompletion(true);
}

19 Source : WordMean.java
with Apache License 2.0
from pravega

@Override
public int run(String[] args) throws Exception {
    if (args.length != 5) {
        System.err.println("Usage: wordmean <dummy_hdfs> <uri> <scope> <stream> <out>");
        return 0;
    }
    Configuration conf = getConf();
    conf.setStrings("input.pravega.uri", args[1]);
    conf.setStrings("input.pravega.scope", args[2]);
    conf.setStrings("input.pravega.stream", args[3]);
    conf.setStrings("input.pravega.deserializer", TextSerializer.clreplaced.getName());
    Job job = Job.getInstance(conf, "word mean");
    job.setJarByClreplaced(WordMean.clreplaced);
    job.setMapperClreplaced(WordMeanMapper.clreplaced);
    job.setCombinerClreplaced(WordMeanReducer.clreplaced);
    job.setReducerClreplaced(WordMeanReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    job.setInputFormatClreplaced(PravegaInputFormat.clreplaced);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    Path outputpath = new Path(args[4]);
    FileOutputFormat.setOutputPath(job, outputpath);
    boolean result = job.waitForCompletion(true);
    mean = readAndCalcMean(outputpath, conf);
    return (result ? 0 : 1);
}

19 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple fail job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple kill job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createKillJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("Kill-Job");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(KillMapper.clreplaced);
    theJob.setReducerClreplaced(Reducer.clreplaced);
    theJob.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    return theJob;
}

19 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple copy job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    conf.setInt(MRJobConfig.NUM_MAPS, 3);
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("DataMoveJob");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(DataCopyMapper.clreplaced);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    theJob.setReducerClreplaced(DataCopyReducer.clreplaced);
    theJob.setNumReduceTasks(1);
    return theJob;
}

19 Source : TestMRSequenceFileAsBinaryOutputFormat.java
with Apache License 2.0
from NJUJYB

public void testSequenceOutputClreplacedDefaultsToMapRedOutputClreplaced() throws IOException {
    Job job = Job.getInstance();
    // Setting Random clreplaced to test getSequenceFileOutput{Key,Value}Clreplaced
    job.setOutputKeyClreplaced(FloatWritable.clreplaced);
    job.setOutputValueClreplaced(BooleanWritable.clreplaced);
    replacedertEquals("SequenceFileOutputKeyClreplaced should default to ouputKeyClreplaced", FloatWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
    replacedertEquals("SequenceFileOutputValueClreplaced should default to " + "ouputValueClreplaced", BooleanWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputKeyClreplaced(job, IntWritable.clreplaced);
    SequenceFileAsBinaryOutputFormat.setSequenceFileOutputValueClreplaced(job, DoubleWritable.clreplaced);
    replacedertEquals("SequenceFileOutputKeyClreplaced not updated", IntWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputKeyClreplaced(job));
    replacedertEquals("SequenceFileOutputValueClreplaced not updated", DoubleWritable.clreplaced, SequenceFileAsBinaryOutputFormat.getSequenceFileOutputValueClreplaced(job));
}

19 Source : TestMRKeyValueTextInputFormat.java
with Apache License 2.0
from NJUJYB

/**
 * Test using the gzip codec for reading
 */
@Test
public void testGzip() throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    CompressionCodec gzip = new GzipCodec();
    ReflectionUtils.setConf(gzip, conf);
    localFs.delete(workDir, true);
    writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "line-1\tthe quick\nline-2\tbrown\nline-3\t" + "fox jumped\nline-4\tover\nline-5\t the lazy\nline-6\t dog\n");
    writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "line-1\tthis is a test\nline-1\tof gzip\n");
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, workDir);
    KeyValueTextInputFormat format = new KeyValueTextInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    replacedertEquals("compressed splits == 2", 2, splits.size());
    FileSplit tmp = (FileSplit) splits.get(0);
    if (tmp.getPath().getName().equals("part2.txt.gz")) {
        splits.set(0, splits.get(1));
        splits.set(1, tmp);
    }
    List<Text> results = readSplit(format, splits.get(0), job);
    replacedertEquals("splits[0] length", 6, results.size());
    replacedertEquals("splits[0][0]", "the quick", results.get(0).toString());
    replacedertEquals("splits[0][1]", "brown", results.get(1).toString());
    replacedertEquals("splits[0][2]", "fox jumped", results.get(2).toString());
    replacedertEquals("splits[0][3]", "over", results.get(3).toString());
    replacedertEquals("splits[0][4]", " the lazy", results.get(4).toString());
    replacedertEquals("splits[0][5]", " dog", results.get(5).toString());
    results = readSplit(format, splits.get(1), job);
    replacedertEquals("splits[1] length", 2, results.size());
    replacedertEquals("splits[1][0]", "this is a test", results.get(0).toString());
    replacedertEquals("splits[1][1]", "of gzip", results.get(1).toString());
}

19 Source : TestLineRecordReaderJobs.java
with Apache License 2.0
from NJUJYB

/**
 * Creates and runs an MR job
 *
 * @param conf
 * @throws IOException
 * @throws InterruptedException
 * @throws ClreplacedNotFoundException
 */
public void createAndRunJob(Configuration conf) throws IOException, InterruptedException, ClreplacedNotFoundException {
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(TestLineRecordReaderJobs.clreplaced);
    job.setMapperClreplaced(Mapper.clreplaced);
    job.setReducerClreplaced(Reducer.clreplaced);
    FileInputFormat.addInputPath(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.waitForCompletion(true);
}

19 Source : TestCombineTextInputFormat.java
with Apache License 2.0
from NJUJYB

/**
 * Test using the gzip codec for reading
 */
@Test(timeout = 10000)
public void testGzip() throws IOException, InterruptedException {
    Configuration conf = new Configuration(defaultConf);
    CompressionCodec gzip = new GzipCodec();
    ReflectionUtils.setConf(gzip, conf);
    localFs.delete(workDir, true);
    writeFile(localFs, new Path(workDir, "part1.txt.gz"), gzip, "the quick\nbrown\nfox jumped\nover\n the lazy\n dog\n");
    writeFile(localFs, new Path(workDir, "part2.txt.gz"), gzip, "this is a test\nof gzip\n");
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, workDir);
    CombineTextInputFormat format = new CombineTextInputFormat();
    List<InputSplit> splits = format.getSplits(job);
    replacedertEquals("compressed splits == 1", 1, splits.size());
    List<Text> results = readSplit(format, splits.get(0), job);
    replacedertEquals("splits[0] length", 8, results.size());
    final String[] firstList = { "the quick", "brown", "fox jumped", "over", " the lazy", " dog" };
    final String[] secondList = { "this is a test", "of gzip" };
    String first = results.get(0).toString();
    if (first.equals(firstList[0])) {
        testResults(results, firstList, secondList);
    } else if (first.equals(secondList[0])) {
        testResults(results, secondList, firstList);
    } else {
        fail("unexpected first token!");
    }
}

19 Source : TestFileInputFormat.java
with Apache License 2.0
from NJUJYB

@Test
public void testSplitLocationInfo() throws Exception {
    Configuration conf = getConfiguration();
    conf.set(org.apache.hadoop.mapreduce.lib.input.FileInputFormat.INPUT_DIR, "test:///a1/a2");
    Job job = Job.getInstance(conf);
    TextInputFormat fileInputFormat = new TextInputFormat();
    List<InputSplit> splits = fileInputFormat.getSplits(job);
    String[] locations = splits.get(0).getLocations();
    replacedert.replacedertEquals(2, locations.length);
    SplitLocationInfo[] locationInfo = splits.get(0).getLocationInfo();
    replacedert.replacedertEquals(2, locationInfo.length);
    SplitLocationInfo localhostInfo = locations[0].equals("localhost") ? locationInfo[0] : locationInfo[1];
    SplitLocationInfo otherhostInfo = locations[0].equals("otherhost") ? locationInfo[0] : locationInfo[1];
    replacedert.replacedertTrue(localhostInfo.isOnDisk());
    replacedert.replacedertTrue(localhostInfo.isInMemory());
    replacedert.replacedertTrue(otherhostInfo.isOnDisk());
    replacedert.replacedertFalse(otherhostInfo.isInMemory());
}

19 Source : TestCompressionEmulationUtils.java
with Apache License 2.0
from naver

/**
 * Runs a GridMix data-generation job.
 */
private static void runDataGenJob(Configuration conf, Path tempDir) throws IOException, ClreplacedNotFoundException, InterruptedException {
    JobClient client = new JobClient(conf);
    // get the local job runner
    conf.setInt(MRJobConfig.NUM_MAPS, 1);
    Job job = Job.getInstance(conf);
    CompressionEmulationUtil.configure(job);
    job.setInputFormatClreplaced(CustomInputFormat.clreplaced);
    // set the output path
    FileOutputFormat.setOutputPath(job, tempDir);
    // submit and wait for completion
    job.submit();
    int ret = job.waitForCompletion(true) ? 0 : 1;
    replacedertEquals("Job Failed", 0, ret);
}

19 Source : UserNamePermission.java
with Apache License 2.0
from naver

public static void main(String[] args) throws Exception {
    Path outDir = new Path("output");
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf, "user name check");
    job.setJarByClreplaced(UserNamePermission.clreplaced);
    job.setMapperClreplaced(UserNamePermission.UserNameMapper.clreplaced);
    job.setCombinerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setReducerClreplaced(UserNamePermission.UserNameReducer.clreplaced);
    job.setNumReduceTasks(1);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    TextInputFormat.addInputPath(job, new Path("input"));
    FileOutputFormat.setOutputPath(job, outDir);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

19 Source : TestMiniMRClientCluster.java
with Apache License 2.0
from naver

public static Job createJob() throws IOException {
    final Job baseJob = Job.getInstance(mrCluster.getConfig());
    baseJob.setOutputKeyClreplaced(Text.clreplaced);
    baseJob.setOutputValueClreplaced(IntWritable.clreplaced);
    baseJob.setMapperClreplaced(MyMapper.clreplaced);
    baseJob.setReducerClreplaced(MyReducer.clreplaced);
    baseJob.setNumReduceTasks(1);
    return baseJob;
}

19 Source : InputSampler.java
with Apache License 2.0
from naver

/**
 * Driver for InputSampler from the command line.
 * Configures a JobConf instance and calls {@link #writeParreplacedionFile}.
 */
public int run(String[] args) throws Exception {
    Job job = Job.getInstance(getConf());
    ArrayList<String> otherArgs = new ArrayList<String>();
    Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                job.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-inFormat".equals(args[i])) {
                job.setInputFormatClreplaced(Clreplaced.forName(args[++i]).replacedubclreplaced(InputFormat.clreplaced));
            } else if ("-keyClreplaced".equals(args[i])) {
                job.setMapOutputKeyClreplaced(Clreplaced.forName(args[++i]).replacedubclreplaced(WritableComparable.clreplaced));
            } else if ("-splitSample".equals(args[i])) {
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new SplitSampler<K, V>(numSamples, maxSplits);
            } else if ("-splitRandom".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else if ("-splitInterval".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new IntervalSampler<K, V>(pcnt, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    if (job.getNumReduceTasks() <= 1) {
        System.err.println("Sampler requires more than one reducer");
        return printUsage();
    }
    if (otherArgs.size() < 2) {
        System.out.println("ERROR: Wrong number of parameters: ");
        return printUsage();
    }
    if (null == sampler) {
        sampler = new RandomSampler<K, V>(0.1, 10000, 10);
    }
    Path outf = new Path(otherArgs.remove(otherArgs.size() - 1));
    TotalOrderParreplacedioner.setParreplacedionFile(getConf(), outf);
    for (String s : otherArgs) {
        FileInputFormat.addInputPath(job, new Path(s));
    }
    InputSampler.<K, V>writeParreplacedionFile(job, sampler);
    return 0;
}

19 Source : CombineFileInputFormat.java
with Apache License 2.0
from naver

/**
 * List input directories.
 * Subclreplacedes may override to, e.g., select only files matching a regular
 * expression.
 *
 * @param job the job to list input paths for
 * @return array of FileStatus objects
 * @throws IOException if zero items.
 */
protected FileStatus[] listStatus(JobConf job) throws IOException {
    List<FileStatus> result = super.listStatus(Job.getInstance(job));
    return result.toArray(new FileStatus[result.size()]);
}

19 Source : CombineFileInputFormat.java
with Apache License 2.0
from naver

public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException {
    List<org.apache.hadoop.mapreduce.InputSplit> newStyleSplits = super.getSplits(Job.getInstance(job));
    InputSplit[] ret = new InputSplit[newStyleSplits.size()];
    for (int pos = 0; pos < newStyleSplits.size(); ++pos) {
        org.apache.hadoop.mapreduce.lib.input.CombineFileSplit newStyleSplit = (org.apache.hadoop.mapreduce.lib.input.CombineFileSplit) newStyleSplits.get(pos);
        ret[pos] = new CombineFileSplit(job, newStyleSplit.getPaths(), newStyleSplit.getStartOffsets(), newStyleSplit.getLengths(), newStyleSplit.getLocations());
    }
    return ret;
}

19 Source : MR_WLA.java
with GNU General Public License v3.0
from monsonlee

public int run(String[] args) throws Exception {
    String jobName = "wla_baidu";
    String inputPath = args[0];
    String outputPath = args[1];
    Path path = new Path(outputPath);
    // 删除输出目录
    path.getFileSystem(getConf()).delete(path, true);
    // 1、把所有代码组织到类似于Topology的类中
    Job job = Job.getInstance(getConf(), jobName);
    // 2、一定要打包运行,必须写下面一行代码
    job.setJarByClreplaced(MR_WLA.clreplaced);
    // 3、指定输入的hdfs
    FileInputFormat.setInputPaths(job, inputPath);
    // 4、指定map类
    job.setMapperClreplaced(WLA_Mapper.clreplaced);
    // 5、指定map输出的<key,value>的类型
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 6、指定reduce类
    job.setReducerClreplaced(WLA_Reducer.clreplaced);
    // 7、指定reduce输出的<key,value>的类型
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    // 8、指定输出的hdfs
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job.waitForCompletion(true) ? 0 : 1;
}

19 Source : HadoopInputs.java
with Apache License 2.0
from ljygz

/**
 * Creates a Flink {@link InputFormat} that wraps the given Hadoop {@link org.apache.hadoop.mapreduce.lib.input.FileInputFormat}.
 *
 * @return A Flink InputFormat that wraps the Hadoop FileInputFormat.
 */
public static <K, V> org.apache.flink.api.java.hadoop.mapreduce.HadoopInputFormat<K, V> readHadoopFile(org.apache.hadoop.mapreduce.lib.input.FileInputFormat<K, V> mapreduceInputFormat, Clreplaced<K> key, Clreplaced<V> value, String inputPath) throws IOException {
    return readHadoopFile(mapreduceInputFormat, key, value, inputPath, Job.getInstance());
}

19 Source : CarbondataInputFormat.java
with Apache License 2.0
from DTStack

@Override
public InputSplit[] createInputSplitsInternal(int num) throws IOException {
    org.apache.hadoop.conf.Configuration conf = initConfig();
    Job job = Job.getInstance(conf);
    CarbonTableInputFormat format = new CarbonTableInputFormat();
    List<org.apache.hadoop.mapreduce.InputSplit> splitList = format.getSplits(job);
    int splitNum = (splitList.size() < num ? splitList.size() : num);
    int groupSize = (int) Math.ceil(splitList.size() / (double) splitNum);
    InputSplit[] ret = new InputSplit[splitNum];
    for (int i = 0; i < splitNum; ++i) {
        List<CarbonInputSplit> carbonInputSplits = new ArrayList<>();
        for (int j = 0; j < groupSize && i * groupSize + j < splitList.size(); ++j) {
            carbonInputSplits.add((CarbonInputSplit) splitList.get(i * groupSize + j));
        }
        ret[i] = new CarbonFlinkInputSplit(carbonInputSplits, i);
    }
    return ret;
}

19 Source : HdfsUtil.java
with Apache License 2.0
from didi

public static Job getHdfsJob(Configuration conf, TaskConfig taskConfig, IndexInfo indexInfo) throws Exception {
    Job job = Job.getInstance(conf, MAIN_CLreplaced);
    job.setJobName("DidiFastIndex_" + taskConfig.getEsTemplate());
    job.setJarByClreplaced(FastIndex.clreplaced);
    job.setMapperClreplaced(FastIndexMapper.clreplaced);
    job.setInputFormatClreplaced(HCatInputFormat.clreplaced);
    job.setMapOutputKeyClreplaced(IntWritable.clreplaced);
    job.setMapOutputValueClreplaced(DefaultHCatRecord.clreplaced);
    HCatInputFormat.setInput(job, taskConfig.getHiveDB(), taskConfig.getHiveTable(), taskConfig.getFilterStr());
    job.setReducerClreplaced(FastIndexReducer.clreplaced);
    job.setOutputKeyClreplaced(NullWritable.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
    job.setNumReduceTasks(indexInfo.getReducerNum());
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    FileOutputFormat.setOutputPath(job, new Path(taskConfig.getHdfsMROutputPath()));
    return job;
}

19 Source : NewInstallUserRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    // 处理参数
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "new_install_user");
    job.setJarByClreplaced(NewInstallUserRunner.clreplaced);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job), NewInstallUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
    job.setReducerClreplaced(NewInstallUserReducer.clreplaced);
    job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
    job.setOutputValueClreplaced(MapWritableValue.clreplaced);
    // job.setInputFormatClreplaced(KeyValueTextInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
    if (job.waitForCompletion(true)) {
        // 执行成功, 需要计算总用户
        this.calculateTotalUsers(conf);
        return 0;
    } else {
        return -1;
    }
}

19 Source : ActiveUserRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    // 初始化参数
    this.processArgs(conf, args);
    // 创建job
    Job job = Job.getInstance(conf, "active_user");
    // 设置job相关配置参数
    job.setJarByClreplaced(ActiveUserRunner.clreplaced);
    // hbase 输入mapper参数
    // 1. 本地运行
    TableMapReduceUtil.initTableMapperJob(this.initScans(job), ActiveUserMapper.clreplaced, StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job, false);
    // 2. 集群运行
    // TableMapReduceUtil.initTableMapperJob(null, ActiveUserMapper.clreplaced,
    // StatsUserDimension.clreplaced, TimeOutputValue.clreplaced, job);
    // 设置reducer相关参数
    job.setReducerClreplaced(ActiveUserReducer.clreplaced);
    job.setOutputKeyClreplaced(StatsUserDimension.clreplaced);
    job.setOutputValueClreplaced(MapWritableValue.clreplaced);
    // 设置output相关参数
    job.setOutputFormatClreplaced(TransformerOutputFormat.clreplaced);
    // 开始毫秒数
    long startTime = System.currentTimeMillis();
    try {
        return job.waitForCompletion(true) ? 0 : -1;
    } finally {
        // 结束的毫秒数
        long endTime = System.currentTimeMillis();
        logger.info("Job<" + job.getJobName() + ">是否执行成功:" + job.isSuccessful() + "; 开始时间:" + startTime + "; 结束时间:" + endTime + "; 用时:" + (endTime - startTime) + "ms");
    }
}

19 Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    // 设置本地提交job,集群运行,需要代码
    // File jarFile = EJob.createTempJar("target/clreplacedes");
    // ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
    // 设置本地提交job,集群运行,需要代码结束
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    // 设置reducer配置
    // 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS,
    // null, job);
    // 2. 本地运行,要求参数addDependencyJars为false
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);
    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

19 Source : TransformerBaseRunner.java
with Apache License 2.0
from bjmashibing

/**
 * 创建job
 *
 * @param conf
 * @return
 * @throws IOException
 */
protected Job initJob(Configuration conf) throws IOException {
    Job job = Job.getInstance(conf, this.jobName);
    job.setJarByClreplaced(this.runnerClreplaced);
    // 本地运行
    TableMapReduceUtil.initTableMapperJob(initScans(job), this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced, job, false);
    // 集群运行:本地提交和打包(jar)提交
    // TableMapReduceUtil.initTableMapperJob(initScans(job),
    // this.mapperClreplaced, this.mapOutputKeyClreplaced, this.mapOutputValueClreplaced,
    // job);
    job.setReducerClreplaced(this.reducerClreplaced);
    job.setOutputKeyClreplaced(this.outputKeyClreplaced);
    job.setOutputValueClreplaced(this.outputValueClreplaced);
    job.setOutputFormatClreplaced(this.outputFormatClreplaced);
    return job;
}

19 Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    // 设置本地提交job,集群运行,需要代码
    // File jarFile = EJob.createTempJar("target/clreplacedes");
    // ((JobConf) job.getConfiguration()).setJar(jarFile.toString());
    // 设置本地提交job,集群运行,需要代码结束
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    // 设置reducer配置
    // 1. 集群上运行,打成jar运行(要求addDependencyJars参数为true,默认就是true)
    // TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job);
    // 2. 本地运行,要求参数addDependencyJars为false
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);
    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

19 Source : AnalyserLogDataRunner.java
with Apache License 2.0
from bjmashibing

@Override
public int run(String[] args) throws Exception {
    Configuration conf = this.getConf();
    this.processArgs(conf, args);
    Job job = Job.getInstance(conf, "replacedyser_logdata");
    job.setJarByClreplaced(replacedyserLogDataRunner.clreplaced);
    job.setMapperClreplaced(replacedyserLogDataMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Put.clreplaced);
    TableMapReduceUtil.initTableReducerJob(EventLogConstants.HBASE_NAME_EVENT_LOGS, null, job, null, null, null, null, false);
    job.setNumReduceTasks(0);
    // 设置输入路径
    this.setJobInputPaths(job);
    return job.waitForCompletion(true) ? 0 : -1;
}

19 Source : HoodieParquetInputFormat.java
with Apache License 2.0
from apache

/**
 * Achieves listStatus functionality for an incrementally queried table. Instead of listing all
 * parreplacedions and then filtering based on the commits of interest, this logic first extracts the
 * parreplacedions touched by the desired commits and then lists only those parreplacedions.
 */
private List<FileStatus> listStatusForIncrementalMode(JobConf job, HoodieTableMetaClient tableMetaClient, List<Path> inputPaths) throws IOException {
    String tableName = tableMetaClient.getTableConfig().getTableName();
    Job jobContext = Job.getInstance(job);
    Option<HoodieTimeline> timeline = HoodieInputFormatUtils.getFilteredCommitsTimeline(jobContext, tableMetaClient);
    if (!timeline.isPresent()) {
        return null;
    }
    Option<List<HoodieInstant>> commitsToCheck = HoodieInputFormatUtils.getCommitsForIncrementalQuery(jobContext, tableName, timeline.get());
    if (!commitsToCheck.isPresent()) {
        return null;
    }
    Option<String> incrementalInputPaths = HoodieInputFormatUtils.getAffectedParreplacedions(commitsToCheck.get(), tableMetaClient, timeline.get(), inputPaths);
    // Mutate the JobConf to set the input paths to only parreplacedions touched by incremental pull.
    if (!incrementalInputPaths.isPresent()) {
        return null;
    }
    setInputPaths(job, incrementalInputPaths.get());
    FileStatus[] fileStatuses = super.listStatus(job);
    return HoodieInputFormatUtils.filterIncrementalFileStatus(jobContext, tableMetaClient, timeline.get(), fileStatuses, commitsToCheck.get());
}

19 Source : HoodieParquetInputFormat.java
with Apache License 2.0
from apache

@Override
public FileStatus[] listStatus(JobConf job) throws IOException {
    // Segregate inputPaths[] to incremental, snapshot and non hoodie paths
    List<String> incrementalTables = HoodieHiveUtils.getIncrementalTableNames(Job.getInstance(job));
    InputPathHandler inputPathHandler = new InputPathHandler(conf, getInputPaths(job), incrementalTables);
    List<FileStatus> returns = new ArrayList<>();
    Map<String, HoodieTableMetaClient> tableMetaClientMap = inputPathHandler.getTableMetaClientMap();
    // process incremental pulls first
    for (String table : incrementalTables) {
        HoodieTableMetaClient metaClient = tableMetaClientMap.get(table);
        if (metaClient == null) {
            /* This can happen when the INCREMENTAL mode is set for a table but there were no InputPaths
         * in the jobConf
         */
            continue;
        }
        List<Path> inputPaths = inputPathHandler.getGroupedIncrementalPaths().get(metaClient);
        List<FileStatus> result = listStatusForIncrementalMode(job, metaClient, inputPaths);
        if (result != null) {
            returns.addAll(result);
        }
    }
    // process non hoodie Paths next.
    List<Path> nonHoodiePaths = inputPathHandler.getNonHoodieInputPaths();
    if (nonHoodiePaths.size() > 0) {
        setInputPaths(job, nonHoodiePaths.toArray(new Path[nonHoodiePaths.size()]));
        FileStatus[] fileStatuses = super.listStatus(job);
        returns.addAll(Arrays.asList(fileStatuses));
    }
    // process snapshot queries next.
    List<Path> snapshotPaths = inputPathHandler.getSnapshotPaths();
    if (snapshotPaths.size() > 0) {
        returns.addAll(HoodieInputFormatUtils.filterFileStatusForSnapshotMode(job, tableMetaClientMap, snapshotPaths));
    }
    return returns.toArray(new FileStatus[0]);
}

18 Source : ProvincePVAndUVJob.java
with Apache License 2.0
from xpleaf

public static void main(String[] args) throws Exception {
    if (args == null || args.length < 2) {
        System.err.println("Parameter Errors! Usage <inputPath...> <outputPath>");
        System.exit(-1);
    }
    Path outputPath = new Path(args[args.length - 1]);
    Configuration conf = new Configuration();
    String jobName = ProvincePVAndUVJob.clreplaced.getSimpleName();
    Job job = Job.getInstance(conf, jobName);
    job.setJarByClreplaced(ProvincePVAndUVJob.clreplaced);
    // 设置mr的输入参数
    for (int i = 0; i < args.length - 1; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job.setMapperClreplaced(ProvincePVAndUVMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 设置mr的输出参数
    // 避免job在运行的时候出现输出目录已经存在的异常
    outputPath.getFileSystem(conf).delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setReducerClreplaced(ProvincePVAndUVReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setNumReduceTasks(1);
    job.waitForCompletion(true);
}

18 Source : AccessLogCleanJob.java
with Apache License 2.0
from xpleaf

public static void main(String[] args) throws Exception {
    if (args == null || args.length < 2) {
        System.err.println("Parameter Errors! Usage <inputPath...> <outputPath>");
        System.exit(-1);
    }
    Path outputPath = new Path(args[args.length - 1]);
    Configuration conf = new Configuration();
    String jobName = AccessLogCleanJob.clreplaced.getSimpleName();
    Job job = Job.getInstance(conf, jobName);
    job.setJarByClreplaced(AccessLogCleanJob.clreplaced);
    // 设置mr的输入参数
    for (int i = 0; i < args.length - 1; i++) {
        FileInputFormat.addInputPath(job, new Path(args[i]));
    }
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job.setMapperClreplaced(AccessLogCleanMapper.clreplaced);
    job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 设置mr的输出参数
    // 避免job在运行的时候出现输出目录已经存在的异常
    outputPath.getFileSystem(conf).delete(outputPath, true);
    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setOutputKeyClreplaced(NullWritable.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    // map only操作,没有reducer
    job.setNumReduceTasks(0);
    job.waitForCompletion(true);
}

18 Source : CommonFriendStep2.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(CommonFriendStep2.clreplaced);
    // 设置job的mapper类和reducer类
    job.setMapperClreplaced(CommonFansStep2Mapper.clreplaced);
    job.setReducerClreplaced(CommonFansStep2Reducer.clreplaced);
    // 设置map阶段输出key:value数据的类型
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 设置reudce阶段输出key:value数据的类型
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    // 检测输出目录是否已存在,如果已存在则删除,以免在测试阶段需要反复手动删除输出目录
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path(args[1]);
    if (fs.exists(out)) {
        fs.delete(out, true);
    }
    // 设置数据输入输出目录
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, out);
    // 提交job到yarn或者local runner执行
    job.waitForCompletion(true);
}

18 Source : CommonFriendStep1.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration configuration = new Configuration();
    Job job = Job.getInstance(configuration);
    job.setJarByClreplaced(CommonFriendStep1.clreplaced);
    // 设置job的mapper类
    job.setMapperClreplaced(CommonFansStep1Mapper.clreplaced);
    // 设置job的reducer类
    job.setReducerClreplaced(CommonFansStep1Reducer.clreplaced);
    // 设置map阶段输出的key:value数据类型
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 设置reduce阶段输出的key:value数据类型
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    // 判断结果输出路径是否已存在,如果已经存在,则删除。以免在测试阶段需要反复手动删除输出目录
    FileSystem fs = FileSystem.get(configuration);
    Path out = new Path(args[1]);
    if (fs.exists(out)) {
        fs.delete(out, true);
    }
    // 设置数据输入输出路径
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, out);
    // 提交job给yarn或者local runner来运行
    job.waitForCompletion(true);
}

18 Source : WordCount2Application.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 1.1 准备清理已存在的输出目录
    Path outputPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
        _LOGGER.info("rm -rf output path success.");
    }
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(WordCount2Application.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : CombinerApplication.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 1.1 准备清理已存在的输出目录
    Path outputPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
        _LOGGER.info("rm -rf output path success.");
    }
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(CombinerApplication.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 通过Job设置combiner处理类, 其实逻辑上和我们的Reduce是一模一样的
    job.setCombinerClreplaced(WordCountReduce.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : LogAnalysisApplication.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 1.1 准备清理已存在的输出目录
    Path outputPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
        _LOGGER.info("rm -rf output path success.");
    }
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "logreplacedysis");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(LogreplacedysisApplication.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(LogreplacedysisApplication.LogreplacedysisMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(LogreplacedysisApplication.LogreplacedysisReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

18 Source : TeraStreamValidate.java
with Apache License 2.0
from pravega

public int run(String[] args) throws Exception {
    if (args.length != 5) {
        usage();
        return 2;
    }
    LOG.info("starting");
    Path inputDir = new Path(args[0]);
    Path outputDir = new Path(args[1]);
    getConf().setStrings(INPUT_URI_STRING, args[2]);
    getConf().setStrings(INPUT_SCOPE_NAME, args[3]);
    getConf().setStrings(INPUT_STREAM_NAME, args[4]);
    getConf().setStrings(INPUT_DESERIALIZER, TextSerializer.clreplaced.getName());
    getConf().setInt(MRJobConfig.NUM_MAPS, 1);
    Job job = Job.getInstance(getConf());
    TeraInputFormat.setInputPaths(job, inputDir);
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraStreamValidate");
    job.setJarByClreplaced(TeraStreamValidate.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setMapperClreplaced(TeraSortMapper.clreplaced);
    job.setNumReduceTasks(1);
    job.setInputFormatClreplaced(PravegaInputFormat.clreplaced);
    job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    LOG.info("done");
    return ret;
}

18 Source : TeraGen.java
with Apache License 2.0
from pravega

/**
 * @param args the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
    if (args.length != 5 && args.length != 6) {
        usage();
        return 2;
    }
    Path outputDir = new Path(args[1]);
    getConf().setStrings(OUTPUT_URI_STRING, args[2]);
    getConf().setStrings(OUTPUT_SCOPE_NAME, args[3]);
    getConf().setStrings(OUTPUT_STREAM_NAME, args[4]);
    getConf().setStrings(OUTPUT_STREAM_SEGMENTS, args[5]);
    getConf().setStrings(OUTPUT_DESERIALIZER, TextSerializer.clreplaced.getName());
    Job job = Job.getInstance(getConf());
    setNumberOfRows(job, parseHumanLong(args[0]));
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClreplaced(TeraGen.clreplaced);
    job.setMapperClreplaced(SortGenMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputKeyClreplaced(String.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
    job.setOutputFormatClreplaced(PravegaFixedSegmentsOutputFormat.clreplaced);
    return job.waitForCompletion(true) ? 0 : 1;
}

18 Source : TeraGen.java
with Apache License 2.0
from NJUJYB

/**
 * @param args the cli arguments
 */
public int run(String[] args) throws IOException, InterruptedException, ClreplacedNotFoundException {
    Job job = Job.getInstance(getConf());
    if (args.length != 2) {
        usage();
        return 2;
    }
    setNumberOfRows(job, parseHumanLong(args[0]));
    Path outputDir = new Path(args[1]);
    if (outputDir.getFileSystem(getConf()).exists(outputDir)) {
        throw new IOException("Output directory " + outputDir + " already exists.");
    }
    FileOutputFormat.setOutputPath(job, outputDir);
    job.setJobName("TeraGen");
    job.setJarByClreplaced(TeraGen.clreplaced);
    job.setMapperClreplaced(SortGenMapper.clreplaced);
    job.setNumReduceTasks(0);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(RangeInputFormat.clreplaced);
    job.setOutputFormatClreplaced(TeraOutputFormat.clreplaced);
    return job.waitForCompletion(true) ? 0 : 1;
}

18 Source : TestSpeculativeExecution.java
with Apache License 2.0
from NJUJYB

private Job runSpecTest(boolean mapspec, boolean redspec) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Path first = createTempFile("specexec_map_input1", "a\nz");
    Path secnd = createTempFile("specexec_map_input2", "a\nz");
    Configuration conf = mrCluster.getConfig();
    conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, mapspec);
    conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, redspec);
    conf.setClreplaced(MRJobConfig.MR_AM_TASK_ESTIMATOR, TestSpecEstimator.clreplaced, TaskRuntimeEstimator.clreplaced);
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(TestSpeculativeExecution.clreplaced);
    job.setMapperClreplaced(SpeculativeMapper.clreplaced);
    job.setReducerClreplaced(SpeculativeReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    job.setNumReduceTasks(2);
    FileInputFormat.setInputPaths(job, first);
    FileInputFormat.addInputPath(job, secnd);
    FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
    // Delete output directory if it exists.
    try {
        localFs.delete(TEST_OUT_DIR, true);
    } catch (IOException e) {
    // ignore
    }
    // Creates the Job Configuration
    // The AppMaster jar itself.
    job.addFileToClreplacedPath(APP_JAR);
    job.setMaxMapAttempts(2);
    job.submit();
    return job;
}

18 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

public static Job createJob(Configuration conf, Path inDir, Path outDir, int numInputFiles, int numReds, String input) throws IOException {
    Job job = Job.getInstance(conf);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    if (fs.exists(inDir)) {
        fs.delete(inDir, true);
    }
    fs.mkdirs(inDir);
    for (int i = 0; i < numInputFiles; ++i) {
        DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
        file.writeBytes(input);
        file.close();
    }
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);
    job.setNumReduceTasks(numReds);
    return job;
}

18 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple fail job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple fail job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createFailJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    FileSystem fs = outdir.getFileSystem(conf);
    if (fs.exists(outdir)) {
        fs.delete(outdir, true);
    }
    conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("Fail-Job");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(FailMapper.clreplaced);
    theJob.setReducerClreplaced(Reducer.clreplaced);
    theJob.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    return theJob;
}

18 Source : TestMRSequenceFileAsBinaryOutputFormat.java
with Apache License 2.0
from NJUJYB

public void testcheckOutputSpecsForbidRecordCompression() throws IOException {
    Job job = Job.getInstance();
    FileSystem fs = FileSystem.getLocal(job.getConfiguration());
    Path outputdir = new Path(System.getProperty("test.build.data", "/tmp") + "/output");
    fs.delete(outputdir, true);
    // Without outputpath, FileOutputFormat.checkoutputspecs will throw
    // InvalidJobConfException
    FileOutputFormat.setOutputPath(job, outputdir);
    // SequenceFileAsBinaryOutputFormat doesn't support record compression
    // It should throw an exception when checked by checkOutputSpecs
    SequenceFileAsBinaryOutputFormat.setCompressOutput(job, true);
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.BLOCK);
    try {
        new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
    } catch (Exception e) {
        fail("Block compression should be allowed for " + "SequenceFileAsBinaryOutputFormat:Caught " + e.getClreplaced().getName());
    }
    SequenceFileAsBinaryOutputFormat.setOutputCompressionType(job, CompressionType.RECORD);
    try {
        new SequenceFileAsBinaryOutputFormat().checkOutputSpecs(job);
        fail("Record compression should not be allowed for " + "SequenceFileAsBinaryOutputFormat");
    } catch (InvalidJobConfException ie) {
    // expected
    } catch (Exception e) {
        fail("Expected " + InvalidJobConfException.clreplaced.getName() + "but caught " + e.getClreplaced().getName());
    }
}

18 Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB

@SuppressWarnings("unchecked")
public void testAddInputPathWithMapper() throws IOException {
    final Job conf = Job.getInstance();
    MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.clreplaced, MapClreplaced.clreplaced);
    MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.clreplaced, KeyValueMapClreplaced.clreplaced);
    final Map<Path, InputFormat> inputs = MultipleInputs.getInputFormatMap(conf);
    final Map<Path, Clreplaced<? extends Mapper>> maps = MultipleInputs.getMapperTypeMap(conf);
    replacedertEquals(TextInputFormat.clreplaced, inputs.get(new Path("/foo")).getClreplaced());
    replacedertEquals(KeyValueTextInputFormat.clreplaced, inputs.get(new Path("/bar")).getClreplaced());
    replacedertEquals(MapClreplaced.clreplaced, maps.get(new Path("/foo")));
    replacedertEquals(KeyValueMapClreplaced.clreplaced, maps.get(new Path("/bar")));
}

18 Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB

@Test
public void testDoMultipleInputs() throws IOException {
    Path in1Dir = getDir(IN1_DIR);
    Path in2Dir = getDir(IN2_DIR);
    Path outDir = getDir(OUT_DIR);
    Configuration conf = createJobConf();
    FileSystem fs = FileSystem.get(conf);
    fs.delete(outDir, true);
    DataOutputStream file1 = fs.create(new Path(in1Dir, "part-0"));
    file1.writeBytes("a\nb\nc\nd\ne");
    file1.close();
    // write tab delimited to second file because we're doing
    // KeyValueInputFormat
    DataOutputStream file2 = fs.create(new Path(in2Dir, "part-0"));
    file2.writeBytes("a\tblah\nb\tblah\nc\tblah\nd\tblah\ne\tblah");
    file2.close();
    Job job = Job.getInstance(conf);
    job.setJobName("mi");
    MultipleInputs.addInputPath(job, in1Dir, TextInputFormat.clreplaced, MapClreplaced.clreplaced);
    MultipleInputs.addInputPath(job, in2Dir, KeyValueTextInputFormat.clreplaced, KeyValueMapClreplaced.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setOutputKeyClreplaced(NullWritable.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setReducerClreplaced(ReducerClreplaced.clreplaced);
    FileOutputFormat.setOutputPath(job, outDir);
    boolean success = false;
    try {
        success = job.waitForCompletion(true);
    } catch (InterruptedException ie) {
        throw new RuntimeException(ie);
    } catch (ClreplacedNotFoundException instante) {
        throw new RuntimeException(instante);
    }
    if (!success)
        throw new RuntimeException("Job failed!");
    // copy bytes a bunch of times for the ease of readLine() - whatever
    BufferedReader output = new BufferedReader(new InputStreamReader(fs.open(new Path(outDir, "part-r-00000"))));
    // reducer should have counted one key from each file
    replacedertTrue(output.readLine().equals("a 2"));
    replacedertTrue(output.readLine().equals("b 2"));
    replacedertTrue(output.readLine().equals("c 2"));
    replacedertTrue(output.readLine().equals("d 2"));
    replacedertTrue(output.readLine().equals("e 2"));
}

18 Source : TestMultipleInputs.java
with Apache License 2.0
from NJUJYB

@SuppressWarnings("unchecked")
public void testAddInputPathWithFormat() throws IOException {
    final Job conf = Job.getInstance();
    MultipleInputs.addInputPath(conf, new Path("/foo"), TextInputFormat.clreplaced);
    MultipleInputs.addInputPath(conf, new Path("/bar"), KeyValueTextInputFormat.clreplaced);
    final Map<Path, InputFormat> inputs = MultipleInputs.getInputFormatMap(conf);
    replacedertEquals(TextInputFormat.clreplaced, inputs.get(new Path("/foo")).getClreplaced());
    replacedertEquals(KeyValueTextInputFormat.clreplaced, inputs.get(new Path("/bar")).getClreplaced());
}

18 Source : TestMRCJCFileInputFormat.java
with Apache License 2.0
from NJUJYB

/**
 * Test when the input file's length is 0.
 */
@Test
public void testForEmptyFile() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fileSys = FileSystem.get(conf);
    Path file = new Path("test" + "/file");
    FSDataOutputStream out = fileSys.create(file, true, conf.getInt("io.file.buffer.size", 4096), (short) 1, (long) 1024);
    out.write(new byte[0]);
    out.close();
    // split it using a File input format
    DummyInputFormat inFormat = new DummyInputFormat();
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, "test");
    List<InputSplit> splits = inFormat.getSplits(job);
    replacedertEquals(1, splits.size());
    FileSplit fileSplit = (FileSplit) splits.get(0);
    replacedertEquals(0, fileSplit.getLocations().length);
    replacedertEquals(file.getName(), fileSplit.getPath().getName());
    replacedertEquals(0, fileSplit.getStart());
    replacedertEquals(0, fileSplit.getLength());
    fileSys.delete(file.getParent(), true);
}

18 Source : TestMRCJCFileInputFormat.java
with Apache License 2.0
from NJUJYB

@Test
public void testAddInputPath() throws IOException {
    final Configuration conf = new Configuration();
    conf.set("fs.defaultFS", "file:///abc/");
    final Job j = Job.getInstance(conf);
    // setup default fs
    final FileSystem defaultfs = FileSystem.get(conf);
    System.out.println("defaultfs.getUri() = " + defaultfs.getUri());
    {
        // test addInputPath
        final Path original = new Path("file:/foo");
        System.out.println("original = " + original);
        FileInputFormat.addInputPath(j, original);
        final Path[] results = FileInputFormat.getInputPaths(j);
        System.out.println("results = " + Arrays.asList(results));
        replacedertEquals(1, results.length);
        replacedertEquals(original, results[0]);
    }
    {
        // test setInputPaths
        final Path original = new Path("file:/bar");
        System.out.println("original = " + original);
        FileInputFormat.setInputPaths(j, original);
        final Path[] results = FileInputFormat.getInputPaths(j);
        System.out.println("results = " + Arrays.asList(results));
        replacedertEquals(1, results.length);
        replacedertEquals(original, results[0]);
    }
}

18 Source : TestFixedLengthInputFormat.java
with Apache License 2.0
from NJUJYB

private void runPartialRecordTest(CompressionCodec codec) throws Exception {
    localFs.delete(workDir, true);
    Job job = Job.getInstance(defaultConf);
    // Create a file with fixed length records with 5 byte long
    // records with a partial record at the end.
    StringBuilder fileName = new StringBuilder("testFormat.txt");
    if (codec != null) {
        fileName.append(".gz");
        ReflectionUtils.setConf(codec, job.getConfiguration());
    }
    writeFile(localFs, new Path(workDir, fileName.toString()), codec, "one  two  threefour five six  seveneightnine ten");
    FixedLengthInputFormat format = new FixedLengthInputFormat();
    format.setRecordLength(job.getConfiguration(), 5);
    FileInputFormat.setInputPaths(job, workDir);
    List<InputSplit> splits = format.getSplits(job);
    if (codec != null) {
        replacedertEquals("compressed splits == 1", 1, splits.size());
    }
    boolean exceptionThrown = false;
    for (InputSplit split : splits) {
        try {
            List<String> results = readSplit(format, split, job);
        } catch (IOException ioe) {
            exceptionThrown = true;
            LOG.info("Exception message:" + ioe.getMessage());
        }
    }
    replacedertTrue("Exception for partial record:", exceptionThrown);
}

See More Examples