org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths()

Here are the examples of the java api org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

52 Examples 7

19 Source : WordCount1Application.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(WordCount1Application.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

19 Source : Main.java
with Apache License 2.0
from lfz757077613

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Job job = Job.getInstance(new Configuration(), "wordCount");
    // 设置jar包主类
    job.setJarByClreplaced(Main.clreplaced);
    // 设置mapper
    job.setMapperClreplaced(MyMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 设置reducer
    job.setReducerClreplaced(MyReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 在本地先进行一次reduce,减少将数据发送量
    job.setCombinerClreplaced(MyReducer.clreplaced);
    // 设置parreplacedioner
    // 暂时没用到
    // job.setParreplacedionerClreplaced(MyParreplacedioner.clreplaced);
    // job.setNumReduceTasks(2);
    // 设置作业输入输出路径,注意输出文件是不能事先存在的,输出文件是一个文件夹,里面有结果和运行结束状态
    FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:8020/access.log"));
    FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
    System.exit(job.waitForCompletion(true) ? 0 : -1);
}

19 Source : CalculateStatsFromBaseCuboidJob.java
with Apache License 2.0
from Kyligence

private void setupMapper(Path input) throws IOException {
    FileInputFormat.setInputPaths(job, input);
    job.setMapperClreplaced(CalculateStatsFromBaseCuboidMapper.clreplaced);
    job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
}

19 Source : SampleUploader.java
with Apache License 2.0
from fengchen8086

/**
 * Job configuration.
 */
public static Job configureJob(Configuration conf, String[] args) throws IOException {
    Path inputPath = new Path(args[0]);
    String tableName = args[1];
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClreplaced(Uploader.clreplaced);
    FileInputFormat.setInputPaths(job, inputPath);
    job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    job.setMapperClreplaced(Uploader.clreplaced);
    // No reducers.  Just write straight to table.  Call initTableReducerJob
    // because it sets up the TableOutputFormat.
    TableMapReduceUtil.initTableReducerJob(tableName, null, job);
    job.setNumReduceTasks(0);
    return job;
}

18 Source : TestMRJobs.java
with Apache License 2.0
from NJUJYB

public void _testDistributedCache(String jobJarPath) throws Exception {
    if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
        LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
        return;
    }
    // Create a temporary file of length 1.
    Path first = createTempFile("distributed.first", "x");
    // Create two jars with a single file inside them.
    Path second = makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
    Path third = makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
    Path fourth = makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
    Job job = Job.getInstance(mrCluster.getConfig());
    // Set the job jar to a new "dummy" jar so we can check that its extracted
    // properly
    job.setJar(jobJarPath);
    // Because the job jar is a "dummy" jar, we need to include the jar with
    // DistributedCacheChecker or it won't be able to find it
    Path distributedCacheCheckerJar = new Path(JarFinder.getJar(DistributedCacheChecker.clreplaced));
    job.addFileToClreplacedPath(distributedCacheCheckerJar.makeQualified(localFs.getUri(), distributedCacheCheckerJar.getParent()));
    job.setMapperClreplaced(DistributedCacheChecker.clreplaced);
    job.setOutputFormatClreplaced(NullOutputFormat.clreplaced);
    FileInputFormat.setInputPaths(job, first);
    // Creates the Job Configuration
    job.addCacheFile(new URI(first.toUri().toString() + "#distributed.first.symlink"));
    job.addFileToClreplacedPath(second);
    // The AppMaster jar itself
    job.addFileToClreplacedPath(APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent()));
    job.addArchiveToClreplacedPath(third);
    job.addCacheArchive(fourth.toUri());
    // speed up failures
    job.setMaxMapAttempts(1);
    job.submit();
    String trackingUrl = job.getTrackingURL();
    String jobId = job.getJobID().toString();
    replacedert.replacedertTrue(job.waitForCompletion(false));
    replacedert.replacedertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}

18 Source : TestMapReduceLazyOutput.java
with Apache License 2.0
from NJUJYB

private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception {
    Job job = Job.getInstance(conf, "Test-Lazy-Output");
    FileInputFormat.setInputPaths(job, INPUT);
    FileOutputFormat.setOutputPath(job, output);
    job.setJarByClreplaced(TestMapReduceLazyOutput.clreplaced);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job.setOutputKeyClreplaced(LongWritable.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setNumReduceTasks(numReducers);
    job.setMapperClreplaced(TestMapper.clreplaced);
    job.setReducerClreplaced(TestReducer.clreplaced);
    if (createLazily) {
        LazyOutputFormat.setOutputFormatClreplaced(job, TextOutputFormat.clreplaced);
    } else {
        job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    }
    replacedertTrue(job.waitForCompletion(true));
}

17 Source : Grep.java
with Apache License 2.0
from NJUJYB

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }
    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);
    Job grepJob = new Job(conf);
    try {
        grepJob.setJobName("grep-search");
        FileInputFormat.setInputPaths(grepJob, args[0]);
        grepJob.setMapperClreplaced(RegexMapper.clreplaced);
        grepJob.setCombinerClreplaced(LongSumReducer.clreplaced);
        grepJob.setReducerClreplaced(LongSumReducer.clreplaced);
        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
        grepJob.setOutputKeyClreplaced(Text.clreplaced);
        grepJob.setOutputValueClreplaced(LongWritable.clreplaced);
        grepJob.waitForCompletion(true);
        Job sortJob = new Job(conf);
        sortJob.setJobName("grep-sort");
        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
        sortJob.setMapperClreplaced(InverseMapper.clreplaced);
        // write a single file
        sortJob.setNumReduceTasks(1);
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        // sort by decreasing freq
        sortJob.setSortComparatorClreplaced(LongWritable.DecreasingComparator.clreplaced);
        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

17 Source : TestMRWithDistributedCache.java
with Apache License 2.0
from NJUJYB

private void testWithConf(Configuration conf) throws IOException, InterruptedException, ClreplacedNotFoundException, URISyntaxException {
    // Create a temporary file of length 1.
    Path first = createTempFile("distributed.first", "x");
    // Create two jars with a single file inside them.
    Path second = makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
    Path third = makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
    Path fourth = makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
    Job job = Job.getInstance(conf);
    job.setMapperClreplaced(DistributedCacheCheckerMapper.clreplaced);
    job.setReducerClreplaced(DistributedCacheCheckerReducer.clreplaced);
    job.setOutputFormatClreplaced(NullOutputFormat.clreplaced);
    FileInputFormat.setInputPaths(job, first);
    // Creates the Job Configuration
    job.addCacheFile(new URI(first.toUri().toString() + "#distributed.first.symlink"));
    job.addFileToClreplacedPath(second);
    job.addArchiveToClreplacedPath(third);
    job.addCacheArchive(fourth.toUri());
    // speed up failures
    job.setMaxMapAttempts(1);
    job.submit();
    replacedertTrue(job.waitForCompletion(false));
}

17 Source : Grep.java
with Apache License 2.0
from naver

public int run(String[] args) throws Exception {
    if (args.length < 3) {
        System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }
    Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
    Configuration conf = getConf();
    conf.set(RegexMapper.PATTERN, args[2]);
    if (args.length == 4)
        conf.set(RegexMapper.GROUP, args[3]);
    Job grepJob = Job.getInstance(conf);
    try {
        grepJob.setJobName("grep-search");
        grepJob.setJarByClreplaced(Grep.clreplaced);
        FileInputFormat.setInputPaths(grepJob, args[0]);
        grepJob.setMapperClreplaced(RegexMapper.clreplaced);
        grepJob.setCombinerClreplaced(LongSumReducer.clreplaced);
        grepJob.setReducerClreplaced(LongSumReducer.clreplaced);
        FileOutputFormat.setOutputPath(grepJob, tempDir);
        grepJob.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
        grepJob.setOutputKeyClreplaced(Text.clreplaced);
        grepJob.setOutputValueClreplaced(LongWritable.clreplaced);
        grepJob.waitForCompletion(true);
        Job sortJob = Job.getInstance(conf);
        sortJob.setJobName("grep-sort");
        sortJob.setJarByClreplaced(Grep.clreplaced);
        FileInputFormat.setInputPaths(sortJob, tempDir);
        sortJob.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
        sortJob.setMapperClreplaced(InverseMapper.clreplaced);
        // write a single file
        sortJob.setNumReduceTasks(1);
        FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
        // sort by decreasing freq
        sortJob.setSortComparatorClreplaced(LongWritable.DecreasingComparator.clreplaced);
        sortJob.waitForCompletion(true);
    } finally {
        FileSystem.get(conf).delete(tempDir, true);
    }
    return 0;
}

17 Source : CuboidJob.java
with Apache License 2.0
from Kyligence

private void configureMapperInputFormat(CubeSegment cubeSeg) throws Exception {
    String input = getOptionValue(OPTION_INPUT_PATH);
    if ("FLAT_TABLE".equals(input)) {
        // base cuboid case
        IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
        flatTableInputFormat.configureJob(job);
    } else {
        // n-dimension cuboid case
        IMROutput2.IMROutputFormat outputFormat = MRUtil.getBatchCubingOutputSide2(cubeSeg).getOutputFormat();
        outputFormat.configureJobInput(job, input);
        FileInputFormat.setInputPaths(job, new Path(input));
    }
}

17 Source : HCatalogTestUtils.java
with Apache License 2.0
from dkhadoop

public List<HCatRecord> loadHCatTable(String dbName, String tableName, Map<String, String> partKeyMap, HCatSchema tblSchema, List<HCatRecord> records) throws Exception {
    Job job = new Job(conf, "HCat load job");
    job.setJarByClreplaced(this.getClreplaced());
    job.setMapperClreplaced(HCatWriterMapper.clreplaced);
    // Just writ 10 lines to the file to drive the mapper
    Path path = new Path(fs.getWorkingDirectory(), "mapreduce/HCatTableIndexInput");
    job.getConfiguration().setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
    int writeCount = records.size();
    recsToLoad.clear();
    recsToLoad.addAll(records);
    createInputFile(path, writeCount);
    // input/output settings
    HCatWriterMapper.setWrittenRecordCount(0);
    FileInputFormat.setInputPaths(job, path);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job.setOutputFormatClreplaced(HCatOutputFormat.clreplaced);
    OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partKeyMap);
    HCatOutputFormat.setOutput(job, outputJobInfo);
    HCatOutputFormat.setSchema(job, tblSchema);
    job.setMapOutputKeyClreplaced(BytesWritable.clreplaced);
    job.setMapOutputValueClreplaced(DefaultHCatRecord.clreplaced);
    job.setNumReduceTasks(0);
    SqoopHCatUtilities.addJars(job, new SqoopOptions());
    boolean success = job.waitForCompletion(true);
    if (!success) {
        throw new IOException("Loading HCatalog table with test records failed");
    }
    utils.invokeOutputCommitterForLocalMode(job);
    LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
    return recsToLoad;
}

16 Source : Sort.java
with Apache License 2.0
from NJUJYB

/**
 * The main driver for sort program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                     job tracker.
 */
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Clreplaced<? extends InputFormat> inputFormatClreplaced = SequenceFileInputFormat.clreplaced;
    Clreplaced<? extends OutputFormat> outputFormatClreplaced = SequenceFileOutputFormat.clreplaced;
    Clreplaced<? extends WritableComparable> outputKeyClreplaced = BytesWritable.clreplaced;
    Clreplaced<? extends Writable> outputValueClreplaced = BytesWritable.clreplaced;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(InputFormat.clreplaced);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(OutputFormat.clreplaced);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(WritableComparable.clreplaced);
            } else if ("-outValue".equals(args[i])) {
                outputValueClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(Writable.clreplaced);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            // exits
            return printUsage();
        }
    }
    // Set user-supplied (possibly default) job configs
    job = new Job(conf);
    job.setJobName("sorter");
    job.setJarByClreplaced(Sort.clreplaced);
    job.setMapperClreplaced(Mapper.clreplaced);
    job.setReducerClreplaced(Reducer.clreplaced);
    job.setNumReduceTasks(num_reduces);
    job.setInputFormatClreplaced(inputFormatClreplaced);
    job.setOutputFormatClreplaced(outputFormatClreplaced);
    job.setOutputKeyClreplaced(outputKeyClreplaced);
    job.setOutputValueClreplaced(outputValueClreplaced);
    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        job.setParreplacedionerClreplaced(TotalOrderParreplacedioner.clreplaced);
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
        Path parreplacedionFile = new Path(inputDir, "_sortParreplacedioning");
        TotalOrderParreplacedioner.setParreplacedionFile(conf, parreplacedionFile);
        InputSampler.<K, V>writeParreplacedionFile(job, sampler);
        URI parreplacedionUri = new URI(parreplacedionFile.toString() + "#" + "_sortParreplacedioning");
        DistributedCache.addCacheFile(parreplacedionUri, conf);
    }
    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

16 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple fail job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple kill job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createKillJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("Kill-Job");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(KillMapper.clreplaced);
    theJob.setReducerClreplaced(Reducer.clreplaced);
    theJob.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    return theJob;
}

16 Source : Sort.java
with Apache License 2.0
from naver

/**
 * The main driver for sort program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the
 *                     job tracker.
 */
public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    JobClient client = new JobClient(conf);
    ClusterStatus cluster = client.getClusterStatus();
    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
    String sort_reduces = conf.get(REDUCES_PER_HOST);
    if (sort_reduces != null) {
        num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
    }
    Clreplaced<? extends InputFormat> inputFormatClreplaced = SequenceFileInputFormat.clreplaced;
    Clreplaced<? extends OutputFormat> outputFormatClreplaced = SequenceFileOutputFormat.clreplaced;
    Clreplaced<? extends WritableComparable> outputKeyClreplaced = BytesWritable.clreplaced;
    Clreplaced<? extends Writable> outputValueClreplaced = BytesWritable.clreplaced;
    List<String> otherArgs = new ArrayList<String>();
    InputSampler.Sampler<K, V> sampler = null;
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-r".equals(args[i])) {
                num_reduces = Integer.parseInt(args[++i]);
            } else if ("-inFormat".equals(args[i])) {
                inputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(InputFormat.clreplaced);
            } else if ("-outFormat".equals(args[i])) {
                outputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(OutputFormat.clreplaced);
            } else if ("-outKey".equals(args[i])) {
                outputKeyClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(WritableComparable.clreplaced);
            } else if ("-outValue".equals(args[i])) {
                outputValueClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(Writable.clreplaced);
            } else if ("-totalOrder".equals(args[i])) {
                double pcnt = Double.parseDouble(args[++i]);
                int numSamples = Integer.parseInt(args[++i]);
                int maxSplits = Integer.parseInt(args[++i]);
                if (0 >= maxSplits)
                    maxSplits = Integer.MAX_VALUE;
                sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
            } else {
                otherArgs.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            // exits
            return printUsage();
        }
    }
    // Set user-supplied (possibly default) job configs
    job = Job.getInstance(conf);
    job.setJobName("sorter");
    job.setJarByClreplaced(Sort.clreplaced);
    job.setMapperClreplaced(Mapper.clreplaced);
    job.setReducerClreplaced(Reducer.clreplaced);
    job.setNumReduceTasks(num_reduces);
    job.setInputFormatClreplaced(inputFormatClreplaced);
    job.setOutputFormatClreplaced(outputFormatClreplaced);
    job.setOutputKeyClreplaced(outputKeyClreplaced);
    job.setOutputValueClreplaced(outputValueClreplaced);
    // Make sure there are exactly 2 parameters left.
    if (otherArgs.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
        return printUsage();
    }
    FileInputFormat.setInputPaths(job, otherArgs.get(0));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        job.setParreplacedionerClreplaced(TotalOrderParreplacedioner.clreplaced);
        Path inputDir = FileInputFormat.getInputPaths(job)[0];
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
        Path parreplacedionFile = new Path(inputDir, "_sortParreplacedioning");
        TotalOrderParreplacedioner.setParreplacedionFile(conf, parreplacedionFile);
        InputSampler.<K, V>writeParreplacedionFile(job, sampler);
        URI parreplacedionUri = new URI(parreplacedionFile.toString() + "#" + "_sortParreplacedioning");
        DistributedCache.addCacheFile(parreplacedionUri, conf);
    }
    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    int ret = job.waitForCompletion(true) ? 0 : 1;
    Date end_time = new Date();
    System.out.println("Job ended: " + end_time);
    System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
    return ret;
}

16 Source : Import.java
with Apache License 2.0
from fengchen8086

/**
 * Sets up the actual job.
 * @param conf The current configuration.
 * @param args The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
    TableName tableName = TableName.valueOf(args[0]);
    conf.set(TABLE_NAME, tableName.getNamereplacedtring());
    Path inputDir = new Path(args[1]);
    Job job = new Job(conf, NAME + "_" + tableName);
    job.setJarByClreplaced(Importer.clreplaced);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    // make sure we get the filter in the jars
    try {
        Clreplaced<? extends Filter> filter = conf.getClreplaced(FILTER_CLreplaced_CONF_KEY, null, Filter.clreplaced);
        if (filter != null) {
            TableMapReduceUtil.addDependencyJars(conf, filter);
        }
    } catch (Exception e) {
        throw new IOException(e);
    }
    if (hfileOutPath != null) {
        job.setMapperClreplaced(KeyValueImporter.clreplaced);
        try (Connection conn = ConnectionFactory.createConnection(conf);
            Table table = conn.getTable(tableName);
            RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            job.setReducerClreplaced(KeyValueSortReducer.clreplaced);
            Path outputDir = new Path(hfileOutPath);
            FileOutputFormat.setOutputPath(job, outputDir);
            job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
            job.setMapOutputValueClreplaced(KeyValue.clreplaced);
            HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Preconditions.clreplaced);
        }
    } else {
        // No reducers.  Just write straight to table.  Call initTableReducerJob
        // because it sets up the TableOutputFormat.
        job.setMapperClreplaced(Importer.clreplaced);
        TableMapReduceUtil.initTableReducerJob(tableName.getNamereplacedtring(), null, job);
        job.setNumReduceTasks(0);
    }
    return job;
}

15 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

public static Job createJob(Configuration conf, Path inDir, Path outDir, int numInputFiles, int numReds, String input) throws IOException {
    Job job = Job.getInstance(conf);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outDir)) {
        fs.delete(outDir, true);
    }
    if (fs.exists(inDir)) {
        fs.delete(inDir, true);
    }
    fs.mkdirs(inDir);
    for (int i = 0; i < numInputFiles; ++i) {
        DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
        file.writeBytes(input);
        file.close();
    }
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);
    job.setNumReduceTasks(numReds);
    return job;
}

15 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple copy job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a data copy job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    conf.setInt(MRJobConfig.NUM_MAPS, 3);
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("DataMoveJob");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(DataCopyMapper.clreplaced);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    theJob.setReducerClreplaced(DataCopyReducer.clreplaced);
    theJob.setNumReduceTasks(1);
    return theJob;
}

14 Source : MR_WLA.java
with GNU General Public License v3.0
from monsonlee

public int run(String[] args) throws Exception {
    String jobName = "wla_baidu";
    String inputPath = args[0];
    String outputPath = args[1];
    Path path = new Path(outputPath);
    // 删除输出目录
    path.getFileSystem(getConf()).delete(path, true);
    // 1、把所有代码组织到类似于Topology的类中
    Job job = Job.getInstance(getConf(), jobName);
    // 2、一定要打包运行,必须写下面一行代码
    job.setJarByClreplaced(MR_WLA.clreplaced);
    // 3、指定输入的hdfs
    FileInputFormat.setInputPaths(job, inputPath);
    // 4、指定map类
    job.setMapperClreplaced(WLA_Mapper.clreplaced);
    // 5、指定map输出的<key,value>的类型
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 6、指定reduce类
    job.setReducerClreplaced(WLA_Reducer.clreplaced);
    // 7、指定reduce输出的<key,value>的类型
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    // 8、指定输出的hdfs
    FileOutputFormat.setOutputPath(job, new Path(outputPath));
    return job.waitForCompletion(true) ? 0 : 1;
}

14 Source : FlinkUtil.java
with Apache License 2.0
from Kyligence

public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Clreplaced keyClreplaced, Clreplaced valueClreplaced) throws IOException {
    List<String> inputFolders = Lists.newArrayList();
    Path inputHDFSPath = new Path(inputPath);
    FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
    boolean hasDir = false;
    for (FileStatus stat : fileStatuses) {
        if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
            hasDir = true;
            inputFolders.add(stat.getPath().toString());
        }
    }
    if (!hasDir) {
        return env.createInput(HadoopInputs.readSequenceFile(keyClreplaced, valueClreplaced, inputHDFSPath.toString()));
    }
    Job job = Job.getInstance();
    FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ","));
    return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClreplaced, valueClreplaced, job));
}

14 Source : WALPlayer.java
with Apache License 2.0
from fengchen8086

/**
 * Sets up the actual job.
 *
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public Job createSubmittableJob(String[] args) throws IOException {
    Configuration conf = getConf();
    setupTime(conf, HLogInputFormat.START_TIME_KEY);
    setupTime(conf, HLogInputFormat.END_TIME_KEY);
    Path inputDir = new Path(args[0]);
    String[] tables = args[1].split(",");
    String[] tableMap;
    if (args.length > 2) {
        tableMap = args[2].split(",");
        if (tableMap.length != tables.length) {
            throw new IOException("The same number of tables and mapping must be provided.");
        }
    } else {
        // if not mapping is specified map each table to itself
        tableMap = tables;
    }
    conf.setStrings(TABLES_KEY, tables);
    conf.setStrings(TABLE_MAP_KEY, tableMap);
    Job job = new Job(conf, NAME + "_" + inputDir);
    job.setJarByClreplaced(WALPlayer.clreplaced);
    FileInputFormat.setInputPaths(job, inputDir);
    job.setInputFormatClreplaced(WALInputFormat.clreplaced);
    job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
    String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
    if (hfileOutPath != null) {
        // the bulk HFile case
        if (tables.length != 1) {
            throw new IOException("Exactly one table must be specified for the bulk export option");
        }
        TableName tableName = TableName.valueOf(tables[0]);
        job.setMapperClreplaced(WALKeyValueMapper.clreplaced);
        job.setReducerClreplaced(KeyValueSortReducer.clreplaced);
        Path outputDir = new Path(hfileOutPath);
        FileOutputFormat.setOutputPath(job, outputDir);
        job.setMapOutputValueClreplaced(KeyValue.clreplaced);
        try (Connection conn = ConnectionFactory.createConnection(conf);
            Table table = conn.getTable(tableName);
            RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
            HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
        }
        TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Preconditions.clreplaced);
    } else {
        // output to live cluster
        job.setMapperClreplaced(WALMapper.clreplaced);
        job.setOutputFormatClreplaced(MulreplacedableOutputFormat.clreplaced);
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        // No reducers.
        job.setNumReduceTasks(0);
    }
    return job;
}

14 Source : ImportTsv.java
with Apache License 2.0
from fengchen8086

/**
 * Sets up the actual job.
 *
 * @param conf  The current configuration.
 * @param args  The command line parameters.
 * @return The newly created job.
 * @throws IOException When setting up the job fails.
 */
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClreplacedNotFoundException {
    Job job = null;
    try (Connection connection = ConnectionFactory.createConnection(conf)) {
        try (Admin admin = connection.getAdmin()) {
            // Support non-XML supported characters
            // by re-encoding the preplaceded separator as a Base64 string.
            String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
            if (actualSeparator != null) {
                conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
            }
            // See if a non-default Mapper was set
            String mapperClreplacedName = conf.get(MAPPER_CONF_KEY);
            Clreplaced mapperClreplaced = mapperClreplacedName != null ? Clreplaced.forName(mapperClreplacedName) : DEFAULT_MAPPER;
            TableName tableName = TableName.valueOf(args[0]);
            Path inputDir = new Path(args[1]);
            String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNamereplacedtring());
            job = Job.getInstance(conf, jobName);
            job.setJarByClreplaced(mapperClreplaced);
            FileInputFormat.setInputPaths(job, inputDir);
            job.setInputFormatClreplaced(TextInputFormat.clreplaced);
            job.setMapperClreplaced(mapperClreplaced);
            String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
            String[] columns = conf.getStrings(COLUMNS_CONF_KEY);
            if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
                String fileLoc = conf.get(CREDENTIALS_LOCATION);
                Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
                job.getCredentials().addAll(cred);
            }
            if (hfileOutPath != null) {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
                        LOG.warn(errorMsg);
                        // TODO: this is backwards. Instead of depending on the existence of a table,
                        // create a sane splits file for HFileOutputFormat based on data sampling.
                        createTable(admin, tableName, columns);
                    } else {
                        LOG.error(errorMsg);
                        throw new TableNotFoundException(errorMsg);
                    }
                }
                try (Table table = connection.getTable(tableName);
                    RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
                    boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
                    // if no.strict is false then check column family
                    if (!noStrict) {
                        ArrayList<String> unmatchedFamilies = new ArrayList<String>();
                        Set<String> cfSet = getColumnFamilies(columns);
                        HTableDescriptor tDesc = table.getTableDescriptor();
                        for (String cf : cfSet) {
                            if (tDesc.getFamily(Bytes.toBytes(cf)) == null) {
                                unmatchedFamilies.add(cf);
                            }
                        }
                        if (unmatchedFamilies.size() > 0) {
                            ArrayList<String> familyNames = new ArrayList<String>();
                            for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) {
                                familyNames.add(family.getNamereplacedtring());
                            }
                            String msg = "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName + " column families " + familyNames + ".\n" + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY + "=true.\n";
                            usage(msg);
                            System.exit(-1);
                        }
                    }
                    job.setReducerClreplaced(PutSortReducer.clreplaced);
                    Path outputDir = new Path(hfileOutPath);
                    FileOutputFormat.setOutputPath(job, outputDir);
                    job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
                    if (mapperClreplaced.equals(TsvImporterTextMapper.clreplaced)) {
                        job.setMapOutputValueClreplaced(Text.clreplaced);
                        job.setReducerClreplaced(TextSortReducer.clreplaced);
                    } else {
                        job.setMapOutputValueClreplaced(Put.clreplaced);
                        job.setCombinerClreplaced(PutCombiner.clreplaced);
                    }
                    HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
                }
            } else {
                if (!admin.tableExists(tableName)) {
                    String errorMsg = format("Table '%s' does not exist.", tableName);
                    LOG.error(errorMsg);
                    throw new TableNotFoundException(errorMsg);
                }
                if (mapperClreplaced.equals(TsvImporterTextMapper.clreplaced)) {
                    usage(TsvImporterTextMapper.clreplaced.toString() + " should not be used for non bulkloading case. use " + TsvImporterMapper.clreplaced.toString() + " or custom mapper whose value type is Put.");
                    System.exit(-1);
                }
                // No reducers. Just write straight to table. Call initTableReducerJob
                // to set up the TableOutputFormat.
                TableMapReduceUtil.initTableReducerJob(tableName.getNamereplacedtring(), null, job);
                job.setNumReduceTasks(0);
            }
            TableMapReduceUtil.addDependencyJars(job);
            TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.clreplaced);
        }
    }
    return job;
}

13 Source : QuasiMonteCarlo.java
with Apache License 2.0
from NJUJYB

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Job job = new Job(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.clreplaced.getSimpleName());
    job.setJarByClreplaced(QuasiMonteCarlo.clreplaced);
    job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    job.setOutputKeyClreplaced(BooleanWritable.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
    job.setMapperClreplaced(QmcMapper.clreplaced);
    job.setReducerClreplaced(QmcReducer.clreplaced);
    job.setNumReduceTasks(1);
    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);
    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);
    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }
    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.clreplaced, LongWritable.clreplaced, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }
        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");
        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }
        // compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

13 Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB

/**
 * Creates a simple fail job.
 *
 * @param conf Configuration object
 * @param outdir Output directory.
 * @param indirs Comma separated input directories.
 * @return Job initialized for a simple fail job.
 * @throws Exception If an error occurs creating job configuration.
 */
public static Job createFailJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
    FileSystem fs = outdir.getFileSystem(conf);
    if (fs.exists(outdir)) {
        fs.delete(outdir, true);
    }
    conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
    Job theJob = Job.getInstance(conf);
    theJob.setJobName("Fail-Job");
    FileInputFormat.setInputPaths(theJob, indirs);
    theJob.setMapperClreplaced(FailMapper.clreplaced);
    theJob.setReducerClreplaced(Reducer.clreplaced);
    theJob.setNumReduceTasks(0);
    FileOutputFormat.setOutputPath(theJob, outdir);
    theJob.setOutputKeyClreplaced(Text.clreplaced);
    theJob.setOutputValueClreplaced(Text.clreplaced);
    return theJob;
}

13 Source : QuasiMonteCarlo.java
with Apache License 2.0
from naver

/**
 * Run a map/reduce job for estimating Pi.
 *
 * @return the estimated value of Pi
 */
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Job job = Job.getInstance(conf);
    // setup job conf
    job.setJobName(QuasiMonteCarlo.clreplaced.getSimpleName());
    job.setJarByClreplaced(QuasiMonteCarlo.clreplaced);
    job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
    job.setOutputKeyClreplaced(BooleanWritable.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
    job.setMapperClreplaced(QmcMapper.clreplaced);
    job.setReducerClreplaced(QmcReducer.clreplaced);
    job.setNumReduceTasks(1);
    // turn off speculative execution, because DFS doesn't handle
    // multiple writers to the same file.
    job.setSpeculativeExecution(false);
    // setup input/output directories
    final Path inDir = new Path(tmpDir, "in");
    final Path outDir = new Path(tmpDir, "out");
    FileInputFormat.setInputPaths(job, inDir);
    FileOutputFormat.setOutputPath(job, outDir);
    final FileSystem fs = FileSystem.get(conf);
    if (fs.exists(tmpDir)) {
        throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) + " already exists.  Please remove it first.");
    }
    if (!fs.mkdirs(inDir)) {
        throw new IOException("Cannot create input directory " + inDir);
    }
    try {
        // generate an input file for each map task
        for (int i = 0; i < numMaps; ++i) {
            final Path file = new Path(inDir, "part" + i);
            final LongWritable offset = new LongWritable(i * numPoints);
            final LongWritable size = new LongWritable(numPoints);
            final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.clreplaced, LongWritable.clreplaced, CompressionType.NONE);
            try {
                writer.append(offset, size);
            } finally {
                writer.close();
            }
            System.out.println("Wrote input for Map #" + i);
        }
        // start a map/reduce job
        System.out.println("Starting Job");
        final long startTime = System.currentTimeMillis();
        job.waitForCompletion(true);
        final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
        System.out.println("Job Finished in " + duration + " seconds");
        // read outputs
        Path inFile = new Path(outDir, "reduce-out");
        LongWritable numInside = new LongWritable();
        LongWritable numOutside = new LongWritable();
        SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
        try {
            reader.next(numInside, numOutside);
        } finally {
            reader.close();
        }
        // compute estimated value
        final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
        return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP);
    } finally {
        fs.delete(tmpDir, true);
    }
}

13 Source : ColumnToRowJob.java
with Apache License 2.0
from Kyligence

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);
        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
        CubeManager cubeMgr = CubeManager.getInstance(kylinConfig);
        CubeInstance cube = cubeMgr.getCube(cubeName);
        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClreplacedpath(job, cube.getConfig());
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);
        job.setMapperClreplaced(ColumnToRowMapper.clreplaced);
        job.setInputFormatClreplaced(ColumnarSplitDataInputFormat.clreplaced);
        job.setMapOutputKeyClreplaced(Text.clreplaced);
        job.setMapOutputValueClreplaced(Text.clreplaced);
        job.setReducerClreplaced(ColumnToRowReducer.clreplaced);
        job.setNumReduceTasks(calReducerNum(input));
        job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
        job.setOutputKeyClreplaced(Text.clreplaced);
        job.setOutputValueClreplaced(Text.clreplaced);
        job.getConfiguration().set("dfs.block.size", cube.getConfig().getStreamingBasicCuboidJobDFSBlockSize());
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
        attachSegmentMetadataWithDict(segment, job.getConfiguration());
        this.deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

12 Source : TestSpeculativeExecution.java
with Apache License 2.0
from NJUJYB

private Job runSpecTest(boolean mapspec, boolean redspec) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Path first = createTempFile("specexec_map_input1", "a\nz");
    Path secnd = createTempFile("specexec_map_input2", "a\nz");
    Configuration conf = mrCluster.getConfig();
    conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, mapspec);
    conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, redspec);
    conf.setClreplaced(MRJobConfig.MR_AM_TASK_ESTIMATOR, TestSpecEstimator.clreplaced, TaskRuntimeEstimator.clreplaced);
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(TestSpeculativeExecution.clreplaced);
    job.setMapperClreplaced(SpeculativeMapper.clreplaced);
    job.setReducerClreplaced(SpeculativeReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    job.setNumReduceTasks(2);
    FileInputFormat.setInputPaths(job, first);
    FileInputFormat.addInputPath(job, secnd);
    FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
    // Delete output directory if it exists.
    try {
        localFs.delete(TEST_OUT_DIR, true);
    } catch (IOException e) {
    // ignore
    }
    // Creates the Job Configuration
    // The AppMaster jar itself.
    job.addFileToClreplacedPath(APP_JAR);
    job.setMaxMapAttempts(2);
    job.submit();
    return job;
}

12 Source : HalyardPreSplit.java
with Apache License 2.0
from Merck

@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String target = cmd.getOptionValue('t');
    try (Connection con = ConnectionFactory.createConnection(getConf())) {
        try (Admin admin = con.getAdmin()) {
            if (admin.tableExists(TableName.valueOf(target))) {
                LOG.log(Level.WARNING, "Pre-split cannot modify already existing table {0}", target);
                return -1;
            }
        }
    }
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    if (cmd.hasOption('g'))
        getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.clreplaced, Rio.clreplaced, AbstractRDFHandler.clreplaced, RDFFormat.clreplaced, RDFParser.clreplaced);
    HBaseConfiguration.addHbaseResources(getConf());
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
    getConf().setInt(DECIMATION_FACTOR_PROPERTY, Integer.parseInt(cmd.getOptionValue('d', String.valueOf(DEFAULT_DECIMATION_FACTOR))));
    getConf().setLong(SPLIT_LIMIT_PROPERTY, Long.parseLong(cmd.getOptionValue('l', String.valueOf(DEFAULT_SPLIT_LIMIT))));
    Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + target);
    job.getConfiguration().set(TABLE_PROPERTY, target);
    job.setJarByClreplaced(HalyardPreSplit.clreplaced);
    job.setMapperClreplaced(RDFDecimatingMapper.clreplaced);
    job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    job.setInputFormatClreplaced(RioFileInputFormat.clreplaced);
    FileInputFormat.setInputDirRecursive(job, true);
    FileInputFormat.setInputPaths(job, source);
    TableMapReduceUtil.addDependencyJars(job);
    TableMapReduceUtil.initCredentials(job);
    job.setReducerClreplaced(PreSplitReducer.clreplaced);
    job.setNumReduceTasks(1);
    job.setOutputFormatClreplaced(NullOutputFormat.clreplaced);
    if (job.waitForCompletion(true)) {
        LOG.info("PreSplit Calculation Completed..");
        return 0;
    }
    return -1;
}

12 Source : InMemCuboidFromBaseCuboidJob.java
with Apache License 2.0
from Kyligence

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_OUTPUT_PATH);
        options.addOption(OPTION_CUBING_JOB_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_CUBOID_MODE);
        options.addOption(OPTION_NEED_UPDATE_BASE_CUBOID_SHARD);
        parseOptions(options, args);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        String output = getOptionValue(OPTION_OUTPUT_PATH);
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment cubeSeg = cube.getSegmentById(segmentID);
        String cubingJobId = getOptionValue(OPTION_CUBING_JOB_ID);
        String cuboidModeName = getOptionValue(OPTION_CUBOID_MODE);
        if (cuboidModeName == null) {
            cuboidModeName = CuboidModeEnum.CURRENT.toString();
        }
        String ifNeedUpdateBaseCuboidShard = getOptionValue(OPTION_NEED_UPDATE_BASE_CUBOID_SHARD);
        if (ifNeedUpdateBaseCuboidShard == null) {
            ifNeedUpdateBaseCuboidShard = "false";
        }
        CuboidScheduler cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(cubeSeg, cuboidModeName);
        if (checkSkip(cubingJobId)) {
            logger.info("Skip job " + getOptionValue(OPTION_JOB_NAME) + " for " + cubeSeg);
            return 0;
        }
        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        logger.info("Starting: " + job.getJobName());
        setJobClreplacedpath(job, cube.getConfig());
        // add metadata to distributed cache
        attachSegmentMetadataWithAll(cubeSeg, job.getConfiguration());
        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidModeName);
        job.getConfiguration().set(BatchConstants.CFG_UPDATE_SHARD, ifNeedUpdateBaseCuboidShard);
        String input = getOptionValue(OPTION_INPUT_PATH);
        FileInputFormat.setInputPaths(job, new Path(input));
        job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
        // set mapper
        job.setMapperClreplaced(InMemCuboidFromBaseCuboidMapper.clreplaced);
        job.setMapOutputKeyClreplaced(ByteArrayWritable.clreplaced);
        job.setMapOutputValueClreplaced(ByteArrayWritable.clreplaced);
        // set output
        job.setReducerClreplaced(InMemCuboidFromBaseCuboidReducer.clreplaced);
        job.setNumReduceTasks(MapReduceUtil.getInmemCubingReduceTaskNum(cubeSeg, cuboidScheduler));
        // the cuboid file and KV clreplaced must be compatible with 0.7 version for smooth upgrade
        job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
        job.setOutputKeyClreplaced(Text.clreplaced);
        job.setOutputValueClreplaced(Text.clreplaced);
        Path outputPath = new Path(output);
        FileOutputFormat.setOutputPath(job, outputPath);
        HadoopUtil.deletePath(job.getConfiguration(), outputPath);
        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

12 Source : StepTwoJob.java
with Apache License 2.0
from junneyang

public int run(String[] args) throws Exception {
    /*Configuration conf = getConf();
	    JobClient client = new JobClient(conf);
	    ClusterStatus cluster = client.getClusterStatus();
	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
	    String join_reduces = conf.get(REDUCES_PER_HOST);
	    if (join_reduces != null) {
	       num_reduces = cluster.getTaskTrackers() *
	                       Integer.parseInt(join_reduces);
	    }
	    // Set user-supplied (possibly default) job configs
	    job.setNumReduceTasks(num_reduces);*/
    Configuration conf = new Configuration();
    // conf.set("fs.defaultFS", "hdfs://node-01:9000");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String commaSeparatedPaths = null;
    String outputDir = null;
    if (otherArgs.length == 2) {
        commaSeparatedPaths = otherArgs[0];
        outputDir = otherArgs[1];
    } else {
        System.err.println("Usage: <in>[,<in>...] <out>");
        // System.exit(-1);
        return -1;
    }
    Job job = Job.getInstance(conf);
    job.setJobName("StepTwoJob");
    job.setJarByClreplaced(StepTwoJob.clreplaced);
    // job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    // job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setMapperClreplaced(StepTwoMapper.clreplaced);
    // job.setCombinerClreplaced(StepOneReducer.clreplaced);
    job.setReducerClreplaced(StepTwoReducer.clreplaced);
    // job.setParreplacedionerClreplaced(FlowParreplacedion.clreplaced);
    // job.setNumReduceTasks(5);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    FileInputFormat.setInputPaths(job, commaSeparatedPaths);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    return job.waitForCompletion(true) ? 0 : 1;
}

12 Source : StepOneJob.java
with Apache License 2.0
from junneyang

public int run(String[] args) throws Exception {
    /*Configuration conf = getConf();
	    JobClient client = new JobClient(conf);
	    ClusterStatus cluster = client.getClusterStatus();
	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
	    String join_reduces = conf.get(REDUCES_PER_HOST);
	    if (join_reduces != null) {
	       num_reduces = cluster.getTaskTrackers() *
	                       Integer.parseInt(join_reduces);
	    }
	    // Set user-supplied (possibly default) job configs
	    job.setNumReduceTasks(num_reduces);*/
    Configuration conf = new Configuration();
    // conf.set("fs.defaultFS", "hdfs://node-01:9000");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String commaSeparatedPaths = null;
    String outputDir = null;
    if (otherArgs.length == 2) {
        commaSeparatedPaths = otherArgs[0];
        outputDir = otherArgs[1];
    } else {
        System.err.println("Usage: <in>[,<in>...] <out>");
        // System.exit(-1);
        return -1;
    }
    Job job = Job.getInstance(conf);
    job.setJobName("StepOneJob");
    job.setJarByClreplaced(StepOneJob.clreplaced);
    // job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    // job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setMapperClreplaced(StepOneMapper.clreplaced);
    job.setCombinerClreplaced(StepOneReducer.clreplaced);
    job.setReducerClreplaced(StepOneReducer.clreplaced);
    // job.setParreplacedionerClreplaced(FlowParreplacedion.clreplaced);
    // job.setNumReduceTasks(5);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.setInputPaths(job, commaSeparatedPaths);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    return job.waitForCompletion(true) ? 0 : 1;
}

12 Source : FlowSumJob.java
with Apache License 2.0
from junneyang

public int run(String[] args) throws Exception {
    /*Configuration conf = getConf();
	    JobClient client = new JobClient(conf);
	    ClusterStatus cluster = client.getClusterStatus();
	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
	    String join_reduces = conf.get(REDUCES_PER_HOST);
	    if (join_reduces != null) {
	       num_reduces = cluster.getTaskTrackers() *
	                       Integer.parseInt(join_reduces);
	    }
	    // Set user-supplied (possibly default) job configs
	    job.setNumReduceTasks(num_reduces);*/
    Configuration conf = new Configuration();
    // conf.set("fs.defaultFS", "hdfs://node-01:9000");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String commaSeparatedPaths = null;
    String outputDir = null;
    if (otherArgs.length == 2) {
        commaSeparatedPaths = otherArgs[0];
        outputDir = otherArgs[1];
    } else {
        System.err.println("Usage: <in>[,<in>...] <out>");
        // System.exit(-1);
        return -1;
    }
    Job job = Job.getInstance(conf);
    job.setJobName("FlowSumJob");
    job.setJarByClreplaced(FlowSumJob.clreplaced);
    job.setMapperClreplaced(FlowSumMapper.clreplaced);
    // job.setCombinerClreplaced(WordCountReducer.clreplaced);
    job.setReducerClreplaced(FlowSumReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(FlowBean.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(FlowBean.clreplaced);
    FileInputFormat.setInputPaths(job, commaSeparatedPaths);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    return job.waitForCompletion(true) ? 0 : 1;
}

12 Source : FlowSortJob.java
with Apache License 2.0
from junneyang

public int run(String[] args) throws Exception {
    /*Configuration conf = getConf();
	    JobClient client = new JobClient(conf);
	    ClusterStatus cluster = client.getClusterStatus();
	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
	    String join_reduces = conf.get(REDUCES_PER_HOST);
	    if (join_reduces != null) {
	       num_reduces = cluster.getTaskTrackers() *
	                       Integer.parseInt(join_reduces);
	    }
	    // Set user-supplied (possibly default) job configs
	    job.setNumReduceTasks(num_reduces);*/
    Configuration conf = new Configuration();
    // conf.set("fs.defaultFS", "hdfs://node-01:9000");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String commaSeparatedPaths = null;
    String outputDir = null;
    if (otherArgs.length == 2) {
        commaSeparatedPaths = otherArgs[0];
        outputDir = otherArgs[1];
    } else {
        System.err.println("Usage: <in>[,<in>...] <out>");
        // System.exit(-1);
        return -1;
    }
    Job job = Job.getInstance(conf);
    job.setJobName("FlowSortJob");
    job.setJarByClreplaced(FlowSortJob.clreplaced);
    job.setMapperClreplaced(FlowSortMapper.clreplaced);
    // job.setCombinerClreplaced(WordCountReducer.clreplaced);
    job.setReducerClreplaced(FlowSortReducer.clreplaced);
    job.setOutputKeyClreplaced(FlowBean.clreplaced);
    job.setOutputValueClreplaced(NullWritable.clreplaced);
    job.setMapOutputKeyClreplaced(FlowBean.clreplaced);
    job.setMapOutputValueClreplaced(NullWritable.clreplaced);
    FileInputFormat.setInputPaths(job, commaSeparatedPaths);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    return job.waitForCompletion(true) ? 0 : 1;
}

11 Source : LindenJob.java
with Apache License 2.0
from XiaoMi

@Override
public int run(String[] strings) throws Exception {
    Configuration conf = getConf();
    String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
    logger.info("input dir:" + dir);
    Path inputPath = new Path(StringUtils.unEscapeString(dir));
    Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
    String indexPath = conf.get(LindenJobConfig.INDEX_PATH);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
    }
    if (fs.exists(new Path(indexPath))) {
        fs.delete(new Path(indexPath), true);
    }
    int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
    Shard[] shards = createShards(indexPath, numShards);
    Shard.setIndexShards(conf, shards);
    // empty trash;
    (new Trash(conf)).expunge();
    Job job = Job.getInstance(conf, "linden-hadoop-indexing");
    job.setJarByClreplaced(LindenJob.clreplaced);
    job.setMapperClreplaced(LindenMapper.clreplaced);
    job.setCombinerClreplaced(LindenCombiner.clreplaced);
    job.setReducerClreplaced(LindenReducer.clreplaced);
    job.setMapOutputKeyClreplaced(Shard.clreplaced);
    job.setMapOutputValueClreplaced(IntermediateForm.clreplaced);
    job.setOutputKeyClreplaced(Shard.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    job.setOutputFormatClreplaced(IndexUpdateOutputFormat.clreplaced);
    job.setReduceSpeculativeExecution(false);
    job.setNumReduceTasks(numShards);
    String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
    if (lindenSchemaFile == null) {
        throw new IOException("no schema file is found");
    }
    logger.info("Adding schema file: " + lindenSchemaFile);
    job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
    String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
    if (lindenPropertiesFile == null) {
        throw new IOException("no linden properties file is found");
    }
    logger.info("Adding linden properties file: " + lindenPropertiesFile);
    job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));
    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outputPath);
    Path[] inputs = FileInputFormat.getInputPaths(job);
    StringBuilder buffer = new StringBuilder(inputs[0].toString());
    for (int i = 1; i < inputs.length; i++) {
        buffer.append(",");
        buffer.append(inputs[i].toString());
    }
    logger.info("mapreduce.input.dir = " + buffer.toString());
    logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
    logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
    logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
    logger.info("mapreduce.input.format.clreplaced = " + job.getInputFormatClreplaced());
    logger.info("mapreduce.output.format.clreplaced = " + job.getOutputFormatClreplaced());
    logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));
    job.waitForCompletion(true);
    if (!job.isSuccessful()) {
        throw new RuntimeException("Job failed");
    }
    return 0;
}

11 Source : MapJoin.java
with Apache License 2.0
from whirlys

public static void main(String[] args) {
    try {
        // 创建配置信息
        Configuration conf = new Configuration();
        // 获取命令行的参数
        String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
        // 当参数违法时,中断程序
        if (otherArgs.length != 3) {
            System.err.println("Usage:MyMapJoin<in1> <in2> <out>");
            System.exit(1);
        }
        // 给路径赋值
        INPUT_PATH1 = otherArgs[0];
        INPUT_PATH2 = otherArgs[1];
        OUT_PATH = otherArgs[2];
        // 创建文件系统
        FileSystem fileSystem = FileSystem.get(new URI(OUT_PATH), conf);
        // 如果输出目录存在,我们就删除
        if (fileSystem.exists(new Path(OUT_PATH))) {
            fileSystem.delete(new Path(OUT_PATH), true);
        }
        // 添加到内存中的文件(随便添加多少个文件)
        DistributedCache.addCacheFile(new Path(INPUT_PATH2).toUri(), conf);
        // 创建任务
        Job job = new Job(conf, MapJoin.clreplaced.getName());
        // 打成jar包运行,这句话是关键
        job.setJarByClreplaced(MapJoin.clreplaced);
        // 1.1 设置输入目录和设置输入数据格式化的类
        FileInputFormat.setInputPaths(job, INPUT_PATH1);
        job.setInputFormatClreplaced(TextInputFormat.clreplaced);
        // 1.2 设置自定义Mapper类和设置map函数输出数据的key和value的类型
        job.setMapperClreplaced(MapJoinMapper.clreplaced);
        job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
        job.setMapOutputValueClreplaced(Emp_Dep.clreplaced);
        // 1.3 设置分区和reduce数量
        job.setParreplacedionerClreplaced(HashParreplacedioner.clreplaced);
        job.setNumReduceTasks(0);
        FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
        // 提交作业 退出
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    } catch (Exception e) {
        e.printStackTrace();
    }
}

11 Source : JiduRunner.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(JiduRunner.clreplaced);
    job.setMapperClreplaced(JiduMapper.clreplaced);
    job.setReducerClreplaced(JiduReducer.clreplaced);
    job.setCombinerClreplaced(JiduReducer.clreplaced);
    // 设置自定义分区类,不设置默认为 HashParreplacedioner
    job.setParreplacedionerClreplaced(JiduParreplacedioner.clreplaced);
    // 设置reduce task数量为4 + 1
    job.setNumReduceTasks(5);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(IntWritable.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(IntWritable.clreplaced);
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    Path out = new Path(args[1]);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(out)) {
        fs.delete(out, true);
    }
    FileOutputFormat.setOutputPath(job, out);
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : CommonFriendStep2.java
with Apache License 2.0
from whirlys

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);
    job.setJarByClreplaced(CommonFriendStep2.clreplaced);
    // 设置job的mapper类和reducer类
    job.setMapperClreplaced(CommonFansStep2Mapper.clreplaced);
    job.setReducerClreplaced(CommonFansStep2Reducer.clreplaced);
    // 设置map阶段输出key:value数据的类型
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    // 设置reudce阶段输出key:value数据的类型
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    // 检测输出目录是否已存在,如果已存在则删除,以免在测试阶段需要反复手动删除输出目录
    FileSystem fs = FileSystem.get(conf);
    Path out = new Path(args[1]);
    if (fs.exists(out)) {
        fs.delete(out, true);
    }
    // 设置数据输入输出目录
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, out);
    // 提交job到yarn或者local runner执行
    job.waitForCompletion(true);
}

11 Source : WordCount2Application.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 1.1 准备清理已存在的输出目录
    Path outputPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
        _LOGGER.info("rm -rf output path success.");
    }
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(WordCount2Application.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : CombinerApplication.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 1.1 准备清理已存在的输出目录
    Path outputPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
        _LOGGER.info("rm -rf output path success.");
    }
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(CombinerApplication.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 通过Job设置combiner处理类, 其实逻辑上和我们的Reduce是一模一样的
    job.setCombinerClreplaced(WordCountReduce.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : LogAnalysisApplication.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 1.1 准备清理已存在的输出目录
    Path outputPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
        _LOGGER.info("rm -rf output path success.");
    }
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "logreplacedysis");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(LogreplacedysisApplication.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(LogreplacedysisApplication.LogreplacedysisMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(LogreplacedysisApplication.LogreplacedysisReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 8. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

11 Source : DistributedPentomino.java
with Apache License 2.0
from NJUJYB

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    if (args.length == 0) {
        System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }
    // check for preplaceded parameters, otherwise use defaults
    int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
    int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
    int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-depth")) {
            depth = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-height")) {
            height = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-width")) {
            width = Integer.parseInt(args[++i].trim());
        }
    }
    // now set the values within conf for M/R tasks to read, this
    // will ensure values are set preventing MAPREDUCE-4678
    conf.setInt(Pentomino.WIDTH, width);
    conf.setInt(Pentomino.HEIGHT, height);
    conf.setInt(Pentomino.DEPTH, depth);
    Clreplaced<? extends Pentomino> pentClreplaced = conf.getClreplaced(Pentomino.CLreplaced, OneSidedPentomino.clreplaced, Pentomino.clreplaced);
    int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        Job job = new Job(conf);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);
        job.setJarByClreplaced(PentMap.clreplaced);
        job.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClreplaced, conf);
        pent.initialize(width, height);
        long inputSize = createInputDirectory(fileSys, input, pent, depth);
        // for forcing the number of maps
        FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps));
        // the keys are the prefix strings
        job.setOutputKeyClreplaced(Text.clreplaced);
        // the values are puzzle solutions
        job.setOutputValueClreplaced(Text.clreplaced);
        job.setMapperClreplaced(PentMap.clreplaced);
        job.setReducerClreplaced(Reducer.clreplaced);
        job.setNumReduceTasks(1);
        return (job.waitForCompletion(true) ? 0 : 1);
    } finally {
        fileSys.delete(input, true);
    }
}

11 Source : DistributedPentomino.java
with Apache License 2.0
from naver

public int run(String[] args) throws Exception {
    Configuration conf = getConf();
    if (args.length == 0) {
        System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }
    // check for preplaceded parameters, otherwise use defaults
    int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
    int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
    int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
    for (int i = 0; i < args.length; i++) {
        if (args[i].equalsIgnoreCase("-depth")) {
            depth = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-height")) {
            height = Integer.parseInt(args[++i].trim());
        } else if (args[i].equalsIgnoreCase("-width")) {
            width = Integer.parseInt(args[++i].trim());
        }
    }
    // now set the values within conf for M/R tasks to read, this
    // will ensure values are set preventing MAPREDUCE-4678
    conf.setInt(Pentomino.WIDTH, width);
    conf.setInt(Pentomino.HEIGHT, height);
    conf.setInt(Pentomino.DEPTH, depth);
    Clreplaced<? extends Pentomino> pentClreplaced = conf.getClreplaced(Pentomino.CLreplaced, OneSidedPentomino.clreplaced, Pentomino.clreplaced);
    int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
    Path output = new Path(args[0]);
    Path input = new Path(output + "_input");
    FileSystem fileSys = FileSystem.get(conf);
    try {
        Job job = Job.getInstance(conf);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);
        job.setJarByClreplaced(PentMap.clreplaced);
        job.setJobName("dancingElephant");
        Pentomino pent = ReflectionUtils.newInstance(pentClreplaced, conf);
        pent.initialize(width, height);
        long inputSize = createInputDirectory(fileSys, input, pent, depth);
        // for forcing the number of maps
        FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps));
        // the keys are the prefix strings
        job.setOutputKeyClreplaced(Text.clreplaced);
        // the values are puzzle solutions
        job.setOutputValueClreplaced(Text.clreplaced);
        job.setMapperClreplaced(PentMap.clreplaced);
        job.setReducerClreplaced(Reducer.clreplaced);
        job.setNumReduceTasks(1);
        return (job.waitForCompletion(true) ? 0 : 1);
    } finally {
        fileSys.delete(input, true);
    }
}

11 Source : FlowPartitionJob.java
with Apache License 2.0
from junneyang

public int run(String[] args) throws Exception {
    /*Configuration conf = getConf();
	    JobClient client = new JobClient(conf);
	    ClusterStatus cluster = client.getClusterStatus();
	    int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
	    String join_reduces = conf.get(REDUCES_PER_HOST);
	    if (join_reduces != null) {
	       num_reduces = cluster.getTaskTrackers() *
	                       Integer.parseInt(join_reduces);
	    }
	    // Set user-supplied (possibly default) job configs
	    job.setNumReduceTasks(num_reduces);*/
    Configuration conf = new Configuration();
    // conf.set("fs.defaultFS", "hdfs://node-01:9000");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String commaSeparatedPaths = null;
    String outputDir = null;
    if (otherArgs.length == 2) {
        commaSeparatedPaths = otherArgs[0];
        outputDir = otherArgs[1];
    } else {
        System.err.println("Usage: <in>[,<in>...] <out>");
        // System.exit(-1);
        return -1;
    }
    Job job = Job.getInstance(conf);
    job.setJobName("FlowParreplacedionJob");
    job.setJarByClreplaced(FlowParreplacedionJob.clreplaced);
    // job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    // job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setMapperClreplaced(FlowParreplacedionMapper.clreplaced);
    // job.setCombinerClreplaced(WordCountReducer.clreplaced);
    job.setReducerClreplaced(FlowParreplacedionReducer.clreplaced);
    job.setParreplacedionerClreplaced(FlowParreplacedion.clreplaced);
    job.setNumReduceTasks(5);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(FlowBean.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(FlowBean.clreplaced);
    FileInputFormat.setInputPaths(job, commaSeparatedPaths);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    return job.waitForCompletion(true) ? 0 : 1;
}

11 Source : Step4.java
with MIT License
from josonle

public static boolean run(Configuration config, Map<String, String> paths) throws IOException, ClreplacedNotFoundException, InterruptedException {
    String jobName = "step4";
    Job job = Job.getInstance(config, jobName);
    job.setJarByClreplaced(Step4.clreplaced);
    job.setJar("export\\ItemCF.jar");
    job.setMapperClreplaced(Step4_Mapper.clreplaced);
    job.setReducerClreplaced(Step4_Reducer.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    Path[] inPaths = new Path[] { new Path(paths.get("Step4Input1")), new Path(paths.get("Step4Input2")) };
    Path outpath = new Path(paths.get("Step4Output"));
    FileInputFormat.setInputPaths(job, inPaths);
    FileOutputFormat.setOutputPath(job, outpath);
    FileSystem fs = FileSystem.get(config);
    if (fs.exists(outpath)) {
        fs.delete(outpath, true);
    }
    return job.waitForCompletion(true);
}

10 Source : PartitionerApplication.java
with Apache License 2.0
from ukihsoroy

/**
 * 定义Driver: 封装了MapReduce作业的所有信息
 * @param args
 */
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1. 创建Configuration
    Configuration configuration = new Configuration();
    // 1.1 准备清理已存在的输出目录
    Path outputPath = new Path(args[1]);
    FileSystem fs = FileSystem.get(configuration);
    if (fs.exists(outputPath)) {
        fs.delete(outputPath, true);
        _LOGGER.info("rm -rf output path success.");
    }
    // 2. 创建一个Job
    Job job = Job.getInstance(configuration, "wordcount");
    // 3. 设置Job的处理类
    job.setJarByClreplaced(ParreplacedionerApplication.clreplaced);
    // 4. 设置作业处理的输入路径
    FileInputFormat.setInputPaths(job, new Path(args[0]));
    // 5. 设置Map相关参数
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    // 6. 设置Reduce相关参数
    job.setReducerClreplaced(WordCountReduce.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    // 7. 设置job的Parreplacedion
    job.setParreplacedionerClreplaced(PhoneParreplacedioner.clreplaced);
    // 8. 设置4个reducer, 每个分区一个
    job.setNumReduceTasks(4);
    // 9. 设置作业处理的输出路径
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    // 10. 提交
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

10 Source : MergeDictJob.java
with Apache License 2.0
from Kyligence

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_NAME);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        String jobName = getOptionValue(OPTION_JOB_NAME);
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentName = getOptionValue(OPTION_SEGMENT_NAME);
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        setJobClreplacedpath(job, cube.getConfig());
        job.setJobName(jobName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
        FileInputFormat.setInputPaths(job, input);
        FileOutputFormat.setOutputPath(job, output);
        logger.info("MergeDictReducer output path: {}", output);
        // Mapper
        job.setMapperClreplaced(MergeDictMapper.clreplaced);
        job.setInputFormatClreplaced(ColumnarSplitDictInputFormat.clreplaced);
        job.setMapOutputKeyClreplaced(Text.clreplaced);
        job.setMapOutputValueClreplaced(Text.clreplaced);
        // Reducer
        job.setReducerClreplaced(MergeDictReducer.clreplaced);
        job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
        job.setOutputKeyClreplaced(Text.clreplaced);
        job.setOutputValueClreplaced(Text.clreplaced);
        attachCubeMetadata(cube, job.getConfiguration());
        deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);
    } catch (Exception e) {
        printUsage(options);
        logger.error("job {} failed. ", job.getJobName(), e);
        throw e;
    }
}

10 Source : WordCountJob.java
with Apache License 2.0
from junneyang

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    // conf.set("fs.defaultFS", "hdfs://node-01:9000");
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    String commaSeparatedPaths = null;
    String outputDir = null;
    if (otherArgs.length == 2) {
        commaSeparatedPaths = otherArgs[0];
        outputDir = otherArgs[1];
    } else {
        System.err.println("Usage: <in>[,<in>...] <out>");
        System.exit(-1);
    }
    LOGGER.info("==========job start");
    Job job = Job.getInstance(conf);
    job.setJobName("WordCountJob");
    job.setJarByClreplaced(WordCountJob.clreplaced);
    job.setMapperClreplaced(WordCountMapper.clreplaced);
    job.setCombinerClreplaced(WordCountReducer.clreplaced);
    job.setReducerClreplaced(WordCountReducer.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(LongWritable.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(LongWritable.clreplaced);
    FileInputFormat.setInputPaths(job, commaSeparatedPaths);
    FileOutputFormat.setOutputPath(job, new Path(outputDir));
    if (job.waitForCompletion(true)) {
        LOGGER.info("==========job success");
    } else {
        LOGGER.info("==========job failed");
    }
}

9 Source : HalyardBulkLoad.java
with Apache License 2.0
from Merck

@Override
protected int run(CommandLine cmd) throws Exception {
    String source = cmd.getOptionValue('s');
    String workdir = cmd.getOptionValue('w');
    String target = cmd.getOptionValue('t');
    getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
    getConf().setBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, cmd.hasOption('d'));
    getConf().setBoolean(TRUNCATE_PROPERTY, cmd.hasOption('r'));
    getConf().setInt(SPLIT_BITS_PROPERTY, Integer.parseInt(cmd.getOptionValue('b', "3")));
    if (cmd.hasOption('g'))
        getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
    getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
    getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
    if (cmd.hasOption('m'))
        getConf().setLong("mapreduce.input.fileinputformat.split.maxsize", Long.parseLong(cmd.getOptionValue('m')));
    TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.clreplaced, Rio.clreplaced, AbstractRDFHandler.clreplaced, RDFFormat.clreplaced, RDFParser.clreplaced);
    HBaseConfiguration.addHbaseResources(getConf());
    Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + workdir + " -> " + target);
    job.setJarByClreplaced(HalyardBulkLoad.clreplaced);
    job.setMapperClreplaced(RDFMapper.clreplaced);
    job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
    job.setMapOutputValueClreplaced(KeyValue.clreplaced);
    job.setInputFormatClreplaced(RioFileInputFormat.clreplaced);
    job.setSpeculativeExecution(false);
    job.setReduceSpeculativeExecution(false);
    try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
        HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
        FileInputFormat.setInputDirRecursive(job, true);
        FileInputFormat.setInputPaths(job, source);
        FileOutputFormat.setOutputPath(job, new Path(workdir));
        TableMapReduceUtil.addDependencyJars(job);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
                HalyardTableUtils.truncateTable(hTable).close();
            }
            new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(workdir), hTable);
            LOG.info("Bulk Load Completed..");
            return 0;
        }
    }
    return -1;
}

9 Source : MergeJob.java
with MIT License
from josonle

public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
    // 1.设置HDFS配置信息
    String namenode_ip = "192.168.17.10";
    String hdfs = "hdfs://" + namenode_ip + ":9000";
    Configuration conf = new Configuration();
    conf.set("fs.defaultFS", hdfs);
    conf.set("mapreduce.app-submission.cross-platform", "true");
    // 2.设置MapReduce作业配置信息
    // 作业名称
    String jobName = "MergeMultipleFiles";
    Job job = Job.getInstance(conf, jobName);
    // 指定运行时作业类
    job.setJarByClreplaced(MultiInOutput.clreplaced);
    // 指定本地jar包
    job.setJar("export\\MergeMultipleFiles.jar");
    // 设置Mapper输出Key类型
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    // 设置Mapper输出Value类型
    job.setMapOutputValueClreplaced(BytesWritable.clreplaced);
    job.setMapperClreplaced(MergeMapper.clreplaced);
    // 输入数据格式
    job.setInputFormatClreplaced(MyInputFormat.clreplaced);
    // 以文件格式输出,使用序列化文件输出类
    job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
    // 设置作业输出路径
    String inputDir = "/workspace/mergeFiles/data";
    // 输出目录
    String outputDir = "/workspace/mergeFiles/output";
    Path outPath = new Path(hdfs + outputDir);
    Path inputPath = new Path(hdfs + inputDir);
    FileInputFormat.setInputPaths(job, inputPath);
    FileOutputFormat.setOutputPath(job, outPath);
    FileSystem fs = FileSystem.get(conf);
    if (fs.exists(outPath)) {
        fs.delete(outPath, true);
    }
    // 运行作业
    System.out.println("Job: " + jobName + " is running...");
    if (job.waitForCompletion(true)) {
        System.out.println("success!");
        System.exit(0);
    } else {
        System.out.println("failed!");
        System.exit(1);
    }
}

8 Source : TestMapReduceAggregates.java
with Apache License 2.0
from NJUJYB

public static void launch() throws Exception {
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);
    int numOfInputLines = 20;
    Path OUTPUT_DIR = new Path("build/test/output_for_aggregates_test");
    Path INPUT_DIR = new Path("build/test/input_for_aggregates_test");
    String inputFile = "input.txt";
    fs.delete(INPUT_DIR, true);
    fs.mkdirs(INPUT_DIR);
    fs.delete(OUTPUT_DIR, true);
    StringBuffer inputData = new StringBuffer();
    StringBuffer expectedOutput = new StringBuffer();
    expectedOutput.append("max\t19\n");
    expectedOutput.append("min\t1\n");
    FSDataOutputStream fileOut = fs.create(new Path(INPUT_DIR, inputFile));
    for (int i = 1; i < numOfInputLines; i++) {
        expectedOutput.append("count_").append(idFormat.format(i));
        expectedOutput.append("\t").append(i).append("\n");
        inputData.append(idFormat.format(i));
        for (int j = 1; j < i; j++) {
            inputData.append(" ").append(idFormat.format(i));
        }
        inputData.append("\n");
    }
    expectedOutput.append("value_as_string_max\t9\n");
    expectedOutput.append("value_as_string_min\t1\n");
    expectedOutput.append("uniq_count\t15\n");
    fileOut.write(inputData.toString().getBytes("utf-8"));
    fileOut.close();
    System.out.println("inputData:");
    System.out.println(inputData.toString());
    conf.setInt(ValueAggregatorJobBase.DESCRIPTOR_NUM, 1);
    conf.set(ValueAggregatorJobBase.DESCRIPTOR + ".0", "UserDefined,org.apache.hadoop.mapreduce.lib.aggregate.AggregatorTests");
    conf.setLong(UniqValueCount.MAX_NUM_UNIQUE_VALUES, 14);
    Job job = Job.getInstance(conf);
    FileInputFormat.setInputPaths(job, INPUT_DIR);
    job.setInputFormatClreplaced(TextInputFormat.clreplaced);
    FileOutputFormat.setOutputPath(job, OUTPUT_DIR);
    job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
    job.setMapOutputKeyClreplaced(Text.clreplaced);
    job.setMapOutputValueClreplaced(Text.clreplaced);
    job.setOutputKeyClreplaced(Text.clreplaced);
    job.setOutputValueClreplaced(Text.clreplaced);
    job.setNumReduceTasks(1);
    job.setMapperClreplaced(ValueAggregatorMapper.clreplaced);
    job.setReducerClreplaced(ValueAggregatorReducer.clreplaced);
    job.setCombinerClreplaced(ValueAggregatorCombiner.clreplaced);
    job.waitForCompletion(true);
    replacedertTrue(job.isSuccessful());
    // 
    // Finally, we compare the reconstructed answer key with the
    // original one.  Remember, we need to ignore zero-count items
    // in the original key.
    // 
    String outdata = MapReduceTestUtil.readOutput(OUTPUT_DIR, conf);
    System.out.println("full out data:");
    System.out.println(outdata.toString());
    outdata = outdata.substring(0, expectedOutput.toString().length());
    replacedertEquals(expectedOutput.toString(), outdata);
    fs.delete(OUTPUT_DIR, true);
    fs.delete(INPUT_DIR, true);
}

8 Source : FilterRecommendCuboidDataJob.java
with Apache License 2.0
from Kyligence

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    try {
        options.addOption(OPTION_JOB_NAME);
        options.addOption(OPTION_CUBE_NAME);
        options.addOption(OPTION_SEGMENT_ID);
        options.addOption(OPTION_INPUT_PATH);
        options.addOption(OPTION_OUTPUT_PATH);
        parseOptions(options, args);
        job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
        String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
        String segmentID = getOptionValue(OPTION_SEGMENT_ID);
        Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
        Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
        CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
        CubeInstance cube = cubeMgr.getCube(cubeName);
        CubeSegment optSegment = cube.getSegmentById(segmentID);
        CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);
        logger.info("Starting: " + job.getJobName());
        setJobClreplacedpath(job, cube.getConfig());
        // Mapper
        job.setMapperClreplaced(FilterRecommendCuboidDataMapper.clreplaced);
        job.setMapOutputKeyClreplaced(Text.clreplaced);
        job.setMapOutputValueClreplaced(Text.clreplaced);
        // Input
        job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
        FileInputFormat.setInputPaths(job, input);
        // Reducer
        ConvergeCuboidDataUtil.setupReducer(job, originalSegment, output);
        // set job configuration
        job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
        job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
        // add metadata to distributed cache
        attachSegmentMetadata(originalSegment, job.getConfiguration(), false, false);
        this.deletePath(job.getConfiguration(), output);
        return waitForCompletion(job);
    } catch (Exception e) {
        logger.error("error in CuboidJob", e);
        printUsage(options);
        throw e;
    } finally {
        if (job != null)
            cleanupTempConfFile(job.getConfiguration());
    }
}

See More Examples