Here are the examples of the java api org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
52 Examples
19
Source : WordCount1Application.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(WordCount1Application.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
19
Source : Main.java
with Apache License 2.0
from lfz757077613
with Apache License 2.0
from lfz757077613
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Job job = Job.getInstance(new Configuration(), "wordCount");
// 设置jar包主类
job.setJarByClreplaced(Main.clreplaced);
// 设置mapper
job.setMapperClreplaced(MyMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 设置reducer
job.setReducerClreplaced(MyReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 在本地先进行一次reduce,减少将数据发送量
job.setCombinerClreplaced(MyReducer.clreplaced);
// 设置parreplacedioner
// 暂时没用到
// job.setParreplacedionerClreplaced(MyParreplacedioner.clreplaced);
// job.setNumReduceTasks(2);
// 设置作业输入输出路径,注意输出文件是不能事先存在的,输出文件是一个文件夹,里面有结果和运行结束状态
FileInputFormat.setInputPaths(job, new Path("hdfs://localhost:8020/access.log"));
FileOutputFormat.setOutputPath(job, new Path("hdfs://localhost:8020/output"));
System.exit(job.waitForCompletion(true) ? 0 : -1);
}
19
Source : CalculateStatsFromBaseCuboidJob.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
private void setupMapper(Path input) throws IOException {
FileInputFormat.setInputPaths(job, input);
job.setMapperClreplaced(CalculateStatsFromBaseCuboidMapper.clreplaced);
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
}
19
Source : SampleUploader.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Job configuration.
*/
public static Job configureJob(Configuration conf, String[] args) throws IOException {
Path inputPath = new Path(args[0]);
String tableName = args[1];
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClreplaced(Uploader.clreplaced);
FileInputFormat.setInputPaths(job, inputPath);
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
job.setMapperClreplaced(Uploader.clreplaced);
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName, null, job);
job.setNumReduceTasks(0);
return job;
}
18
Source : TestMRJobs.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public void _testDistributedCache(String jobJarPath) throws Exception {
if (!(new File(MiniMRYarnCluster.APPJAR)).exists()) {
LOG.info("MRAppJar " + MiniMRYarnCluster.APPJAR + " not found. Not running test.");
return;
}
// Create a temporary file of length 1.
Path first = createTempFile("distributed.first", "x");
// Create two jars with a single file inside them.
Path second = makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
Path third = makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
Path fourth = makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
Job job = Job.getInstance(mrCluster.getConfig());
// Set the job jar to a new "dummy" jar so we can check that its extracted
// properly
job.setJar(jobJarPath);
// Because the job jar is a "dummy" jar, we need to include the jar with
// DistributedCacheChecker or it won't be able to find it
Path distributedCacheCheckerJar = new Path(JarFinder.getJar(DistributedCacheChecker.clreplaced));
job.addFileToClreplacedPath(distributedCacheCheckerJar.makeQualified(localFs.getUri(), distributedCacheCheckerJar.getParent()));
job.setMapperClreplaced(DistributedCacheChecker.clreplaced);
job.setOutputFormatClreplaced(NullOutputFormat.clreplaced);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration
job.addCacheFile(new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClreplacedPath(second);
// The AppMaster jar itself
job.addFileToClreplacedPath(APP_JAR.makeQualified(localFs.getUri(), APP_JAR.getParent()));
job.addArchiveToClreplacedPath(third);
job.addCacheArchive(fourth.toUri());
// speed up failures
job.setMaxMapAttempts(1);
job.submit();
String trackingUrl = job.getTrackingURL();
String jobId = job.getJobID().toString();
replacedert.replacedertTrue(job.waitForCompletion(false));
replacedert.replacedertTrue("Tracking URL was " + trackingUrl + " but didn't Match Job ID " + jobId, trackingUrl.endsWith(jobId.substring(jobId.lastIndexOf("_")) + "/"));
}
18
Source : TestMapReduceLazyOutput.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private static void runTestLazyOutput(Configuration conf, Path output, int numReducers, boolean createLazily) throws Exception {
Job job = Job.getInstance(conf, "Test-Lazy-Output");
FileInputFormat.setInputPaths(job, INPUT);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClreplaced(TestMapReduceLazyOutput.clreplaced);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setOutputKeyClreplaced(LongWritable.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setNumReduceTasks(numReducers);
job.setMapperClreplaced(TestMapper.clreplaced);
job.setReducerClreplaced(TestReducer.clreplaced);
if (createLazily) {
LazyOutputFormat.setOutputFormatClreplaced(job, TextOutputFormat.clreplaced);
} else {
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
}
replacedertTrue(job.waitForCompletion(true));
}
17
Source : Grep.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = new Job(conf);
try {
grepJob.setJobName("grep-search");
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClreplaced(RegexMapper.clreplaced);
grepJob.setCombinerClreplaced(LongSumReducer.clreplaced);
grepJob.setReducerClreplaced(LongSumReducer.clreplaced);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
grepJob.setOutputKeyClreplaced(Text.clreplaced);
grepJob.setOutputValueClreplaced(LongWritable.clreplaced);
grepJob.waitForCompletion(true);
Job sortJob = new Job(conf);
sortJob.setJobName("grep-sort");
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
sortJob.setMapperClreplaced(InverseMapper.clreplaced);
// write a single file
sortJob.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
// sort by decreasing freq
sortJob.setSortComparatorClreplaced(LongWritable.DecreasingComparator.clreplaced);
sortJob.waitForCompletion(true);
} finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
17
Source : TestMRWithDistributedCache.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void testWithConf(Configuration conf) throws IOException, InterruptedException, ClreplacedNotFoundException, URISyntaxException {
// Create a temporary file of length 1.
Path first = createTempFile("distributed.first", "x");
// Create two jars with a single file inside them.
Path second = makeJar(new Path(TEST_ROOT_DIR, "distributed.second.jar"), 2);
Path third = makeJar(new Path(TEST_ROOT_DIR, "distributed.third.jar"), 3);
Path fourth = makeJar(new Path(TEST_ROOT_DIR, "distributed.fourth.jar"), 4);
Job job = Job.getInstance(conf);
job.setMapperClreplaced(DistributedCacheCheckerMapper.clreplaced);
job.setReducerClreplaced(DistributedCacheCheckerReducer.clreplaced);
job.setOutputFormatClreplaced(NullOutputFormat.clreplaced);
FileInputFormat.setInputPaths(job, first);
// Creates the Job Configuration
job.addCacheFile(new URI(first.toUri().toString() + "#distributed.first.symlink"));
job.addFileToClreplacedPath(second);
job.addArchiveToClreplacedPath(third);
job.addCacheArchive(fourth.toUri());
// speed up failures
job.setMaxMapAttempts(1);
job.submit();
replacedertTrue(job.waitForCompletion(false));
}
17
Source : Grep.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public int run(String[] args) throws Exception {
if (args.length < 3) {
System.out.println("Grep <inDir> <outDir> <regex> [<group>]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
Path tempDir = new Path("grep-temp-" + Integer.toString(new Random().nextInt(Integer.MAX_VALUE)));
Configuration conf = getConf();
conf.set(RegexMapper.PATTERN, args[2]);
if (args.length == 4)
conf.set(RegexMapper.GROUP, args[3]);
Job grepJob = Job.getInstance(conf);
try {
grepJob.setJobName("grep-search");
grepJob.setJarByClreplaced(Grep.clreplaced);
FileInputFormat.setInputPaths(grepJob, args[0]);
grepJob.setMapperClreplaced(RegexMapper.clreplaced);
grepJob.setCombinerClreplaced(LongSumReducer.clreplaced);
grepJob.setReducerClreplaced(LongSumReducer.clreplaced);
FileOutputFormat.setOutputPath(grepJob, tempDir);
grepJob.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
grepJob.setOutputKeyClreplaced(Text.clreplaced);
grepJob.setOutputValueClreplaced(LongWritable.clreplaced);
grepJob.waitForCompletion(true);
Job sortJob = Job.getInstance(conf);
sortJob.setJobName("grep-sort");
sortJob.setJarByClreplaced(Grep.clreplaced);
FileInputFormat.setInputPaths(sortJob, tempDir);
sortJob.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
sortJob.setMapperClreplaced(InverseMapper.clreplaced);
// write a single file
sortJob.setNumReduceTasks(1);
FileOutputFormat.setOutputPath(sortJob, new Path(args[1]));
// sort by decreasing freq
sortJob.setSortComparatorClreplaced(LongWritable.DecreasingComparator.clreplaced);
sortJob.waitForCompletion(true);
} finally {
FileSystem.get(conf).delete(tempDir, true);
}
return 0;
}
17
Source : CuboidJob.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
private void configureMapperInputFormat(CubeSegment cubeSeg) throws Exception {
String input = getOptionValue(OPTION_INPUT_PATH);
if ("FLAT_TABLE".equals(input)) {
// base cuboid case
IMRTableInputFormat flatTableInputFormat = MRUtil.getBatchCubingInputSide(cubeSeg).getFlatTableInputFormat();
flatTableInputFormat.configureJob(job);
} else {
// n-dimension cuboid case
IMROutput2.IMROutputFormat outputFormat = MRUtil.getBatchCubingOutputSide2(cubeSeg).getOutputFormat();
outputFormat.configureJobInput(job, input);
FileInputFormat.setInputPaths(job, new Path(input));
}
}
17
Source : HCatalogTestUtils.java
with Apache License 2.0
from dkhadoop
with Apache License 2.0
from dkhadoop
public List<HCatRecord> loadHCatTable(String dbName, String tableName, Map<String, String> partKeyMap, HCatSchema tblSchema, List<HCatRecord> records) throws Exception {
Job job = new Job(conf, "HCat load job");
job.setJarByClreplaced(this.getClreplaced());
job.setMapperClreplaced(HCatWriterMapper.clreplaced);
// Just writ 10 lines to the file to drive the mapper
Path path = new Path(fs.getWorkingDirectory(), "mapreduce/HCatTableIndexInput");
job.getConfiguration().setInt(ConfigurationConstants.PROP_MAPRED_MAP_TASKS, 1);
int writeCount = records.size();
recsToLoad.clear();
recsToLoad.addAll(records);
createInputFile(path, writeCount);
// input/output settings
HCatWriterMapper.setWrittenRecordCount(0);
FileInputFormat.setInputPaths(job, path);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setOutputFormatClreplaced(HCatOutputFormat.clreplaced);
OutputJobInfo outputJobInfo = OutputJobInfo.create(dbName, tableName, partKeyMap);
HCatOutputFormat.setOutput(job, outputJobInfo);
HCatOutputFormat.setSchema(job, tblSchema);
job.setMapOutputKeyClreplaced(BytesWritable.clreplaced);
job.setMapOutputValueClreplaced(DefaultHCatRecord.clreplaced);
job.setNumReduceTasks(0);
SqoopHCatUtilities.addJars(job, new SqoopOptions());
boolean success = job.waitForCompletion(true);
if (!success) {
throw new IOException("Loading HCatalog table with test records failed");
}
utils.invokeOutputCommitterForLocalMode(job);
LOG.info("Loaded " + HCatWriterMapper.writtenRecordCount + " records");
return recsToLoad;
}
16
Source : Sort.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* The main driver for sort program.
* Invoke this method to submit the map/reduce job.
* @throws IOException When there is communication problems with the
* job tracker.
*/
public int run(String[] args) throws Exception {
Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String sort_reduces = conf.get(REDUCES_PER_HOST);
if (sort_reduces != null) {
num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
}
Clreplaced<? extends InputFormat> inputFormatClreplaced = SequenceFileInputFormat.clreplaced;
Clreplaced<? extends OutputFormat> outputFormatClreplaced = SequenceFileOutputFormat.clreplaced;
Clreplaced<? extends WritableComparable> outputKeyClreplaced = BytesWritable.clreplaced;
Clreplaced<? extends Writable> outputValueClreplaced = BytesWritable.clreplaced;
List<String> otherArgs = new ArrayList<String>();
InputSampler.Sampler<K, V> sampler = null;
for (int i = 0; i < args.length; ++i) {
try {
if ("-r".equals(args[i])) {
num_reduces = Integer.parseInt(args[++i]);
} else if ("-inFormat".equals(args[i])) {
inputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(InputFormat.clreplaced);
} else if ("-outFormat".equals(args[i])) {
outputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(OutputFormat.clreplaced);
} else if ("-outKey".equals(args[i])) {
outputKeyClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(WritableComparable.clreplaced);
} else if ("-outValue".equals(args[i])) {
outputValueClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(Writable.clreplaced);
} else if ("-totalOrder".equals(args[i])) {
double pcnt = Double.parseDouble(args[++i]);
int numSamples = Integer.parseInt(args[++i]);
int maxSplits = Integer.parseInt(args[++i]);
if (0 >= maxSplits)
maxSplits = Integer.MAX_VALUE;
sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
} else {
otherArgs.add(args[i]);
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + args[i]);
return printUsage();
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
// exits
return printUsage();
}
}
// Set user-supplied (possibly default) job configs
job = new Job(conf);
job.setJobName("sorter");
job.setJarByClreplaced(Sort.clreplaced);
job.setMapperClreplaced(Mapper.clreplaced);
job.setReducerClreplaced(Reducer.clreplaced);
job.setNumReduceTasks(num_reduces);
job.setInputFormatClreplaced(inputFormatClreplaced);
job.setOutputFormatClreplaced(outputFormatClreplaced);
job.setOutputKeyClreplaced(outputKeyClreplaced);
job.setOutputValueClreplaced(outputValueClreplaced);
// Make sure there are exactly 2 parameters left.
if (otherArgs.size() != 2) {
System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
return printUsage();
}
FileInputFormat.setInputPaths(job, otherArgs.get(0));
FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
if (sampler != null) {
System.out.println("Sampling input to effect total-order sort...");
job.setParreplacedionerClreplaced(TotalOrderParreplacedioner.clreplaced);
Path inputDir = FileInputFormat.getInputPaths(job)[0];
inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
Path parreplacedionFile = new Path(inputDir, "_sortParreplacedioning");
TotalOrderParreplacedioner.setParreplacedionFile(conf, parreplacedionFile);
InputSampler.<K, V>writeParreplacedionFile(job, sampler);
URI parreplacedionUri = new URI(parreplacedionFile.toString() + "#" + "_sortParreplacedioning");
DistributedCache.addCacheFile(parreplacedionUri, conf);
}
System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces.");
Date startTime = new Date();
System.out.println("Job started: " + startTime);
int ret = job.waitForCompletion(true) ? 0 : 1;
Date end_time = new Date();
System.out.println("Job ended: " + end_time);
System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
return ret;
}
16
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple fail job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a simple kill job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createKillJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
Job theJob = Job.getInstance(conf);
theJob.setJobName("Kill-Job");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(KillMapper.clreplaced);
theJob.setReducerClreplaced(Reducer.clreplaced);
theJob.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
return theJob;
}
16
Source : Sort.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* The main driver for sort program.
* Invoke this method to submit the map/reduce job.
* @throws IOException When there is communication problems with the
* job tracker.
*/
public int run(String[] args) throws Exception {
Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String sort_reduces = conf.get(REDUCES_PER_HOST);
if (sort_reduces != null) {
num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces);
}
Clreplaced<? extends InputFormat> inputFormatClreplaced = SequenceFileInputFormat.clreplaced;
Clreplaced<? extends OutputFormat> outputFormatClreplaced = SequenceFileOutputFormat.clreplaced;
Clreplaced<? extends WritableComparable> outputKeyClreplaced = BytesWritable.clreplaced;
Clreplaced<? extends Writable> outputValueClreplaced = BytesWritable.clreplaced;
List<String> otherArgs = new ArrayList<String>();
InputSampler.Sampler<K, V> sampler = null;
for (int i = 0; i < args.length; ++i) {
try {
if ("-r".equals(args[i])) {
num_reduces = Integer.parseInt(args[++i]);
} else if ("-inFormat".equals(args[i])) {
inputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(InputFormat.clreplaced);
} else if ("-outFormat".equals(args[i])) {
outputFormatClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(OutputFormat.clreplaced);
} else if ("-outKey".equals(args[i])) {
outputKeyClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(WritableComparable.clreplaced);
} else if ("-outValue".equals(args[i])) {
outputValueClreplaced = Clreplaced.forName(args[++i]).replacedubclreplaced(Writable.clreplaced);
} else if ("-totalOrder".equals(args[i])) {
double pcnt = Double.parseDouble(args[++i]);
int numSamples = Integer.parseInt(args[++i]);
int maxSplits = Integer.parseInt(args[++i]);
if (0 >= maxSplits)
maxSplits = Integer.MAX_VALUE;
sampler = new InputSampler.RandomSampler<K, V>(pcnt, numSamples, maxSplits);
} else {
otherArgs.add(args[i]);
}
} catch (NumberFormatException except) {
System.out.println("ERROR: Integer expected instead of " + args[i]);
return printUsage();
} catch (ArrayIndexOutOfBoundsException except) {
System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
// exits
return printUsage();
}
}
// Set user-supplied (possibly default) job configs
job = Job.getInstance(conf);
job.setJobName("sorter");
job.setJarByClreplaced(Sort.clreplaced);
job.setMapperClreplaced(Mapper.clreplaced);
job.setReducerClreplaced(Reducer.clreplaced);
job.setNumReduceTasks(num_reduces);
job.setInputFormatClreplaced(inputFormatClreplaced);
job.setOutputFormatClreplaced(outputFormatClreplaced);
job.setOutputKeyClreplaced(outputKeyClreplaced);
job.setOutputValueClreplaced(outputValueClreplaced);
// Make sure there are exactly 2 parameters left.
if (otherArgs.size() != 2) {
System.out.println("ERROR: Wrong number of parameters: " + otherArgs.size() + " instead of 2.");
return printUsage();
}
FileInputFormat.setInputPaths(job, otherArgs.get(0));
FileOutputFormat.setOutputPath(job, new Path(otherArgs.get(1)));
if (sampler != null) {
System.out.println("Sampling input to effect total-order sort...");
job.setParreplacedionerClreplaced(TotalOrderParreplacedioner.clreplaced);
Path inputDir = FileInputFormat.getInputPaths(job)[0];
inputDir = inputDir.makeQualified(inputDir.getFileSystem(conf));
Path parreplacedionFile = new Path(inputDir, "_sortParreplacedioning");
TotalOrderParreplacedioner.setParreplacedionFile(conf, parreplacedionFile);
InputSampler.<K, V>writeParreplacedionFile(job, sampler);
URI parreplacedionUri = new URI(parreplacedionFile.toString() + "#" + "_sortParreplacedioning");
DistributedCache.addCacheFile(parreplacedionUri, conf);
}
System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from " + FileInputFormat.getInputPaths(job)[0] + " into " + FileOutputFormat.getOutputPath(job) + " with " + num_reduces + " reduces.");
Date startTime = new Date();
System.out.println("Job started: " + startTime);
int ret = job.waitForCompletion(true) ? 0 : 1;
Date end_time = new Date();
System.out.println("Job ended: " + end_time);
System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds.");
return ret;
}
16
Source : Import.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Sets up the actual job.
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException {
TableName tableName = TableName.valueOf(args[0]);
conf.set(TABLE_NAME, tableName.getNamereplacedtring());
Path inputDir = new Path(args[1]);
Job job = new Job(conf, NAME + "_" + tableName);
job.setJarByClreplaced(Importer.clreplaced);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
// make sure we get the filter in the jars
try {
Clreplaced<? extends Filter> filter = conf.getClreplaced(FILTER_CLreplaced_CONF_KEY, null, Filter.clreplaced);
if (filter != null) {
TableMapReduceUtil.addDependencyJars(conf, filter);
}
} catch (Exception e) {
throw new IOException(e);
}
if (hfileOutPath != null) {
job.setMapperClreplaced(KeyValueImporter.clreplaced);
try (Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(tableName);
RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
job.setReducerClreplaced(KeyValueSortReducer.clreplaced);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
job.setMapOutputValueClreplaced(KeyValue.clreplaced);
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Preconditions.clreplaced);
}
} else {
// No reducers. Just write straight to table. Call initTableReducerJob
// because it sets up the TableOutputFormat.
job.setMapperClreplaced(Importer.clreplaced);
TableMapReduceUtil.initTableReducerJob(tableName.getNamereplacedtring(), null, job);
job.setNumReduceTasks(0);
}
return job;
}
15
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static Job createJob(Configuration conf, Path inDir, Path outDir, int numInputFiles, int numReds, String input) throws IOException {
Job job = Job.getInstance(conf);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outDir)) {
fs.delete(outDir, true);
}
if (fs.exists(inDir)) {
fs.delete(inDir, true);
}
fs.mkdirs(inDir);
for (int i = 0; i < numInputFiles; ++i) {
DataOutputStream file = fs.create(new Path(inDir, "part-" + i));
file.writeBytes(input);
file.close();
}
FileInputFormat.setInputPaths(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
job.setNumReduceTasks(numReds);
return job;
}
15
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple copy job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a data copy job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createCopyJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
conf.setInt(MRJobConfig.NUM_MAPS, 3);
Job theJob = Job.getInstance(conf);
theJob.setJobName("DataMoveJob");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(DataCopyMapper.clreplaced);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
theJob.setReducerClreplaced(DataCopyReducer.clreplaced);
theJob.setNumReduceTasks(1);
return theJob;
}
14
Source : MR_WLA.java
with GNU General Public License v3.0
from monsonlee
with GNU General Public License v3.0
from monsonlee
public int run(String[] args) throws Exception {
String jobName = "wla_baidu";
String inputPath = args[0];
String outputPath = args[1];
Path path = new Path(outputPath);
// 删除输出目录
path.getFileSystem(getConf()).delete(path, true);
// 1、把所有代码组织到类似于Topology的类中
Job job = Job.getInstance(getConf(), jobName);
// 2、一定要打包运行,必须写下面一行代码
job.setJarByClreplaced(MR_WLA.clreplaced);
// 3、指定输入的hdfs
FileInputFormat.setInputPaths(job, inputPath);
// 4、指定map类
job.setMapperClreplaced(WLA_Mapper.clreplaced);
// 5、指定map输出的<key,value>的类型
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 6、指定reduce类
job.setReducerClreplaced(WLA_Reducer.clreplaced);
// 7、指定reduce输出的<key,value>的类型
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
// 8、指定输出的hdfs
FileOutputFormat.setOutputPath(job, new Path(outputPath));
return job.waitForCompletion(true) ? 0 : 1;
}
14
Source : FlinkUtil.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
public static DataSet parseInputPath(String inputPath, FileSystem fs, ExecutionEnvironment env, Clreplaced keyClreplaced, Clreplaced valueClreplaced) throws IOException {
List<String> inputFolders = Lists.newArrayList();
Path inputHDFSPath = new Path(inputPath);
FileStatus[] fileStatuses = fs.listStatus(inputHDFSPath);
boolean hasDir = false;
for (FileStatus stat : fileStatuses) {
if (stat.isDirectory() && !stat.getPath().getName().startsWith("_")) {
hasDir = true;
inputFolders.add(stat.getPath().toString());
}
}
if (!hasDir) {
return env.createInput(HadoopInputs.readSequenceFile(keyClreplaced, valueClreplaced, inputHDFSPath.toString()));
}
Job job = Job.getInstance();
FileInputFormat.setInputPaths(job, StringUtil.join(inputFolders, ","));
return env.createInput(HadoopInputs.createHadoopInput(new SequenceFileInputFormat(), keyClreplaced, valueClreplaced, job));
}
14
Source : WALPlayer.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Sets up the actual job.
*
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public Job createSubmittableJob(String[] args) throws IOException {
Configuration conf = getConf();
setupTime(conf, HLogInputFormat.START_TIME_KEY);
setupTime(conf, HLogInputFormat.END_TIME_KEY);
Path inputDir = new Path(args[0]);
String[] tables = args[1].split(",");
String[] tableMap;
if (args.length > 2) {
tableMap = args[2].split(",");
if (tableMap.length != tables.length) {
throw new IOException("The same number of tables and mapping must be provided.");
}
} else {
// if not mapping is specified map each table to itself
tableMap = tables;
}
conf.setStrings(TABLES_KEY, tables);
conf.setStrings(TABLE_MAP_KEY, tableMap);
Job job = new Job(conf, NAME + "_" + inputDir);
job.setJarByClreplaced(WALPlayer.clreplaced);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClreplaced(WALInputFormat.clreplaced);
job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
if (hfileOutPath != null) {
// the bulk HFile case
if (tables.length != 1) {
throw new IOException("Exactly one table must be specified for the bulk export option");
}
TableName tableName = TableName.valueOf(tables[0]);
job.setMapperClreplaced(WALKeyValueMapper.clreplaced);
job.setReducerClreplaced(KeyValueSortReducer.clreplaced);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
job.setMapOutputValueClreplaced(KeyValue.clreplaced);
try (Connection conn = ConnectionFactory.createConnection(conf);
Table table = conn.getTable(tableName);
RegionLocator regionLocator = conn.getRegionLocator(tableName)) {
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
}
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Preconditions.clreplaced);
} else {
// output to live cluster
job.setMapperClreplaced(WALMapper.clreplaced);
job.setOutputFormatClreplaced(MulreplacedableOutputFormat.clreplaced);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
// No reducers.
job.setNumReduceTasks(0);
}
return job;
}
14
Source : ImportTsv.java
with Apache License 2.0
from fengchen8086
with Apache License 2.0
from fengchen8086
/**
* Sets up the actual job.
*
* @param conf The current configuration.
* @param args The command line parameters.
* @return The newly created job.
* @throws IOException When setting up the job fails.
*/
public static Job createSubmittableJob(Configuration conf, String[] args) throws IOException, ClreplacedNotFoundException {
Job job = null;
try (Connection connection = ConnectionFactory.createConnection(conf)) {
try (Admin admin = connection.getAdmin()) {
// Support non-XML supported characters
// by re-encoding the preplaceded separator as a Base64 string.
String actualSeparator = conf.get(SEPARATOR_CONF_KEY);
if (actualSeparator != null) {
conf.set(SEPARATOR_CONF_KEY, Base64.encodeBytes(actualSeparator.getBytes()));
}
// See if a non-default Mapper was set
String mapperClreplacedName = conf.get(MAPPER_CONF_KEY);
Clreplaced mapperClreplaced = mapperClreplacedName != null ? Clreplaced.forName(mapperClreplacedName) : DEFAULT_MAPPER;
TableName tableName = TableName.valueOf(args[0]);
Path inputDir = new Path(args[1]);
String jobName = conf.get(JOB_NAME_CONF_KEY, NAME + "_" + tableName.getNamereplacedtring());
job = Job.getInstance(conf, jobName);
job.setJarByClreplaced(mapperClreplaced);
FileInputFormat.setInputPaths(job, inputDir);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setMapperClreplaced(mapperClreplaced);
String hfileOutPath = conf.get(BULK_OUTPUT_CONF_KEY);
String[] columns = conf.getStrings(COLUMNS_CONF_KEY);
if (StringUtils.isNotEmpty(conf.get(CREDENTIALS_LOCATION))) {
String fileLoc = conf.get(CREDENTIALS_LOCATION);
Credentials cred = Credentials.readTokenStorageFile(new File(fileLoc), conf);
job.getCredentials().addAll(cred);
}
if (hfileOutPath != null) {
if (!admin.tableExists(tableName)) {
String errorMsg = format("Table '%s' does not exist.", tableName);
if ("yes".equalsIgnoreCase(conf.get(CREATE_TABLE_CONF_KEY, "yes"))) {
LOG.warn(errorMsg);
// TODO: this is backwards. Instead of depending on the existence of a table,
// create a sane splits file for HFileOutputFormat based on data sampling.
createTable(admin, tableName, columns);
} else {
LOG.error(errorMsg);
throw new TableNotFoundException(errorMsg);
}
}
try (Table table = connection.getTable(tableName);
RegionLocator regionLocator = connection.getRegionLocator(tableName)) {
boolean noStrict = conf.getBoolean(NO_STRICT_COL_FAMILY, false);
// if no.strict is false then check column family
if (!noStrict) {
ArrayList<String> unmatchedFamilies = new ArrayList<String>();
Set<String> cfSet = getColumnFamilies(columns);
HTableDescriptor tDesc = table.getTableDescriptor();
for (String cf : cfSet) {
if (tDesc.getFamily(Bytes.toBytes(cf)) == null) {
unmatchedFamilies.add(cf);
}
}
if (unmatchedFamilies.size() > 0) {
ArrayList<String> familyNames = new ArrayList<String>();
for (HColumnDescriptor family : table.getTableDescriptor().getFamilies()) {
familyNames.add(family.getNamereplacedtring());
}
String msg = "Column Families " + unmatchedFamilies + " specified in " + COLUMNS_CONF_KEY + " does not match with any of the table " + tableName + " column families " + familyNames + ".\n" + "To disable column family check, use -D" + NO_STRICT_COL_FAMILY + "=true.\n";
usage(msg);
System.exit(-1);
}
}
job.setReducerClreplaced(PutSortReducer.clreplaced);
Path outputDir = new Path(hfileOutPath);
FileOutputFormat.setOutputPath(job, outputDir);
job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
if (mapperClreplaced.equals(TsvImporterTextMapper.clreplaced)) {
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(TextSortReducer.clreplaced);
} else {
job.setMapOutputValueClreplaced(Put.clreplaced);
job.setCombinerClreplaced(PutCombiner.clreplaced);
}
HFileOutputFormat2.configureIncrementalLoad(job, table.getTableDescriptor(), regionLocator);
}
} else {
if (!admin.tableExists(tableName)) {
String errorMsg = format("Table '%s' does not exist.", tableName);
LOG.error(errorMsg);
throw new TableNotFoundException(errorMsg);
}
if (mapperClreplaced.equals(TsvImporterTextMapper.clreplaced)) {
usage(TsvImporterTextMapper.clreplaced.toString() + " should not be used for non bulkloading case. use " + TsvImporterMapper.clreplaced.toString() + " or custom mapper whose value type is Put.");
System.exit(-1);
}
// No reducers. Just write straight to table. Call initTableReducerJob
// to set up the TableOutputFormat.
TableMapReduceUtil.initTableReducerJob(tableName.getNamereplacedtring(), null, job);
job.setNumReduceTasks(0);
}
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.addDependencyJars(job.getConfiguration(), com.google.common.base.Function.clreplaced);
}
}
return job;
}
13
Source : QuasiMonteCarlo.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Run a map/reduce job for estimating Pi.
*
* @return the estimated value of Pi
*/
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClreplacedNotFoundException, InterruptedException {
Job job = new Job(conf);
// setup job conf
job.setJobName(QuasiMonteCarlo.clreplaced.getSimpleName());
job.setJarByClreplaced(QuasiMonteCarlo.clreplaced);
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
job.setOutputKeyClreplaced(BooleanWritable.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job.setMapperClreplaced(QmcMapper.clreplaced);
job.setReducerClreplaced(QmcReducer.clreplaced);
job.setNumReduceTasks(1);
// turn off speculative execution, because DFS doesn't handle
// multiple writers to the same file.
job.setSpeculativeExecution(false);
// setup input/output directories
final Path inDir = new Path(tmpDir, "in");
final Path outDir = new Path(tmpDir, "out");
FileInputFormat.setInputPaths(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
final FileSystem fs = FileSystem.get(conf);
if (fs.exists(tmpDir)) {
throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first.");
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Cannot create input directory " + inDir);
}
try {
// generate an input file for each map task
for (int i = 0; i < numMaps; ++i) {
final Path file = new Path(inDir, "part" + i);
final LongWritable offset = new LongWritable(i * numPoints);
final LongWritable size = new LongWritable(numPoints);
final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.clreplaced, LongWritable.clreplaced, CompressionType.NONE);
try {
writer.append(offset, size);
} finally {
writer.close();
}
System.out.println("Wrote input for Map #" + i);
}
// start a map/reduce job
System.out.println("Starting Job");
final long startTime = System.currentTimeMillis();
job.waitForCompletion(true);
final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
System.out.println("Job Finished in " + duration + " seconds");
// read outputs
Path inFile = new Path(outDir, "reduce-out");
LongWritable numInside = new LongWritable();
LongWritable numOutside = new LongWritable();
SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
try {
reader.next(numInside, numOutside);
} finally {
reader.close();
}
// compute estimated value
final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP);
} finally {
fs.delete(tmpDir, true);
}
}
13
Source : MapReduceTestUtil.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Creates a simple fail job.
*
* @param conf Configuration object
* @param outdir Output directory.
* @param indirs Comma separated input directories.
* @return Job initialized for a simple fail job.
* @throws Exception If an error occurs creating job configuration.
*/
public static Job createFailJob(Configuration conf, Path outdir, Path... indirs) throws Exception {
FileSystem fs = outdir.getFileSystem(conf);
if (fs.exists(outdir)) {
fs.delete(outdir, true);
}
conf.setInt(MRJobConfig.MAP_MAX_ATTEMPTS, 2);
Job theJob = Job.getInstance(conf);
theJob.setJobName("Fail-Job");
FileInputFormat.setInputPaths(theJob, indirs);
theJob.setMapperClreplaced(FailMapper.clreplaced);
theJob.setReducerClreplaced(Reducer.clreplaced);
theJob.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(theJob, outdir);
theJob.setOutputKeyClreplaced(Text.clreplaced);
theJob.setOutputValueClreplaced(Text.clreplaced);
return theJob;
}
13
Source : QuasiMonteCarlo.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* Run a map/reduce job for estimating Pi.
*
* @return the estimated value of Pi
*/
public static BigDecimal estimatePi(int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClreplacedNotFoundException, InterruptedException {
Job job = Job.getInstance(conf);
// setup job conf
job.setJobName(QuasiMonteCarlo.clreplaced.getSimpleName());
job.setJarByClreplaced(QuasiMonteCarlo.clreplaced);
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
job.setOutputKeyClreplaced(BooleanWritable.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job.setMapperClreplaced(QmcMapper.clreplaced);
job.setReducerClreplaced(QmcReducer.clreplaced);
job.setNumReduceTasks(1);
// turn off speculative execution, because DFS doesn't handle
// multiple writers to the same file.
job.setSpeculativeExecution(false);
// setup input/output directories
final Path inDir = new Path(tmpDir, "in");
final Path outDir = new Path(tmpDir, "out");
FileInputFormat.setInputPaths(job, inDir);
FileOutputFormat.setOutputPath(job, outDir);
final FileSystem fs = FileSystem.get(conf);
if (fs.exists(tmpDir)) {
throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) + " already exists. Please remove it first.");
}
if (!fs.mkdirs(inDir)) {
throw new IOException("Cannot create input directory " + inDir);
}
try {
// generate an input file for each map task
for (int i = 0; i < numMaps; ++i) {
final Path file = new Path(inDir, "part" + i);
final LongWritable offset = new LongWritable(i * numPoints);
final LongWritable size = new LongWritable(numPoints);
final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.clreplaced, LongWritable.clreplaced, CompressionType.NONE);
try {
writer.append(offset, size);
} finally {
writer.close();
}
System.out.println("Wrote input for Map #" + i);
}
// start a map/reduce job
System.out.println("Starting Job");
final long startTime = System.currentTimeMillis();
job.waitForCompletion(true);
final double duration = (System.currentTimeMillis() - startTime) / 1000.0;
System.out.println("Job Finished in " + duration + " seconds");
// read outputs
Path inFile = new Path(outDir, "reduce-out");
LongWritable numInside = new LongWritable();
LongWritable numOutside = new LongWritable();
SequenceFile.Reader reader = new SequenceFile.Reader(fs, inFile, conf);
try {
reader.next(numInside, numOutside);
} finally {
reader.close();
}
// compute estimated value
final BigDecimal numTotal = BigDecimal.valueOf(numMaps).multiply(BigDecimal.valueOf(numPoints));
return BigDecimal.valueOf(4).setScale(20).multiply(BigDecimal.valueOf(numInside.get())).divide(numTotal, RoundingMode.HALF_UP);
} finally {
fs.delete(tmpDir, true);
}
}
13
Source : ColumnToRowJob.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
@Override
public int run(String[] args) throws Exception {
Options options = new Options();
try {
options.addOption(OPTION_JOB_NAME);
options.addOption(OPTION_CUBE_NAME);
options.addOption(OPTION_SEGMENT_NAME);
options.addOption(OPTION_INPUT_PATH);
options.addOption(OPTION_OUTPUT_PATH);
parseOptions(options, args);
Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
String segmentName = getOptionValue(OPTION_SEGMENT_NAME);
KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
CubeManager cubeMgr = CubeManager.getInstance(kylinConfig);
CubeInstance cube = cubeMgr.getCube(cubeName);
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
setJobClreplacedpath(job, cube.getConfig());
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
job.setMapperClreplaced(ColumnToRowMapper.clreplaced);
job.setInputFormatClreplaced(ColumnarSplitDataInputFormat.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setReducerClreplaced(ColumnToRowReducer.clreplaced);
job.setNumReduceTasks(calReducerNum(input));
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.getConfiguration().set("dfs.block.size", cube.getConfig().getStreamingBasicCuboidJobDFSBlockSize());
job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
attachSegmentMetadataWithDict(segment, job.getConfiguration());
this.deletePath(job.getConfiguration(), output);
return waitForCompletion(job);
} catch (Exception e) {
logger.error("error in CuboidJob", e);
printUsage(options);
throw e;
} finally {
if (job != null)
cleanupTempConfFile(job.getConfiguration());
}
}
12
Source : TestSpeculativeExecution.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private Job runSpecTest(boolean mapspec, boolean redspec) throws IOException, ClreplacedNotFoundException, InterruptedException {
Path first = createTempFile("specexec_map_input1", "a\nz");
Path secnd = createTempFile("specexec_map_input2", "a\nz");
Configuration conf = mrCluster.getConfig();
conf.setBoolean(MRJobConfig.MAP_SPECULATIVE, mapspec);
conf.setBoolean(MRJobConfig.REDUCE_SPECULATIVE, redspec);
conf.setClreplaced(MRJobConfig.MR_AM_TASK_ESTIMATOR, TestSpecEstimator.clreplaced, TaskRuntimeEstimator.clreplaced);
Job job = Job.getInstance(conf);
job.setJarByClreplaced(TestSpeculativeExecution.clreplaced);
job.setMapperClreplaced(SpeculativeMapper.clreplaced);
job.setReducerClreplaced(SpeculativeReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
job.setNumReduceTasks(2);
FileInputFormat.setInputPaths(job, first);
FileInputFormat.addInputPath(job, secnd);
FileOutputFormat.setOutputPath(job, TEST_OUT_DIR);
// Delete output directory if it exists.
try {
localFs.delete(TEST_OUT_DIR, true);
} catch (IOException e) {
// ignore
}
// Creates the Job Configuration
// The AppMaster jar itself.
job.addFileToClreplacedPath(APP_JAR);
job.setMaxMapAttempts(2);
job.submit();
return job;
}
12
Source : HalyardPreSplit.java
with Apache License 2.0
from Merck
with Apache License 2.0
from Merck
@Override
protected int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('s');
String target = cmd.getOptionValue('t');
try (Connection con = ConnectionFactory.createConnection(getConf())) {
try (Admin admin = con.getAdmin()) {
if (admin.tableExists(TableName.valueOf(target))) {
LOG.log(Level.WARNING, "Pre-split cannot modify already existing table {0}", target);
return -1;
}
}
}
getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
if (cmd.hasOption('g'))
getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.clreplaced, Rio.clreplaced, AbstractRDFHandler.clreplaced, RDFFormat.clreplaced, RDFParser.clreplaced);
HBaseConfiguration.addHbaseResources(getConf());
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, getConf().getLong(DEFAULT_TIMESTAMP_PROPERTY, System.currentTimeMillis()));
getConf().setInt(DECIMATION_FACTOR_PROPERTY, Integer.parseInt(cmd.getOptionValue('d', String.valueOf(DEFAULT_DECIMATION_FACTOR))));
getConf().setLong(SPLIT_LIMIT_PROPERTY, Long.parseLong(cmd.getOptionValue('l', String.valueOf(DEFAULT_SPLIT_LIMIT))));
Job job = Job.getInstance(getConf(), "HalyardPreSplit -> " + target);
job.getConfiguration().set(TABLE_PROPERTY, target);
job.setJarByClreplaced(HalyardPreSplit.clreplaced);
job.setMapperClreplaced(RDFDecimatingMapper.clreplaced);
job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
job.setInputFormatClreplaced(RioFileInputFormat.clreplaced);
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, source);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
job.setReducerClreplaced(PreSplitReducer.clreplaced);
job.setNumReduceTasks(1);
job.setOutputFormatClreplaced(NullOutputFormat.clreplaced);
if (job.waitForCompletion(true)) {
LOG.info("PreSplit Calculation Completed..");
return 0;
}
return -1;
}
12
Source : InMemCuboidFromBaseCuboidJob.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
@Override
public int run(String[] args) throws Exception {
Options options = new Options();
try {
options.addOption(OPTION_JOB_NAME);
options.addOption(OPTION_CUBE_NAME);
options.addOption(OPTION_SEGMENT_ID);
options.addOption(OPTION_OUTPUT_PATH);
options.addOption(OPTION_CUBING_JOB_ID);
options.addOption(OPTION_INPUT_PATH);
options.addOption(OPTION_CUBOID_MODE);
options.addOption(OPTION_NEED_UPDATE_BASE_CUBOID_SHARD);
parseOptions(options, args);
String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
String segmentID = getOptionValue(OPTION_SEGMENT_ID);
String output = getOptionValue(OPTION_OUTPUT_PATH);
CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
CubeInstance cube = cubeMgr.getCube(cubeName);
CubeSegment cubeSeg = cube.getSegmentById(segmentID);
String cubingJobId = getOptionValue(OPTION_CUBING_JOB_ID);
String cuboidModeName = getOptionValue(OPTION_CUBOID_MODE);
if (cuboidModeName == null) {
cuboidModeName = CuboidModeEnum.CURRENT.toString();
}
String ifNeedUpdateBaseCuboidShard = getOptionValue(OPTION_NEED_UPDATE_BASE_CUBOID_SHARD);
if (ifNeedUpdateBaseCuboidShard == null) {
ifNeedUpdateBaseCuboidShard = "false";
}
CuboidScheduler cuboidScheduler = CuboidSchedulerUtil.getCuboidSchedulerByMode(cubeSeg, cuboidModeName);
if (checkSkip(cubingJobId)) {
logger.info("Skip job " + getOptionValue(OPTION_JOB_NAME) + " for " + cubeSeg);
return 0;
}
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
logger.info("Starting: " + job.getJobName());
setJobClreplacedpath(job, cube.getConfig());
// add metadata to distributed cache
attachSegmentMetadataWithAll(cubeSeg, job.getConfiguration());
// set job configuration
job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
job.getConfiguration().set(BatchConstants.CFG_CUBOID_MODE, cuboidModeName);
job.getConfiguration().set(BatchConstants.CFG_UPDATE_SHARD, ifNeedUpdateBaseCuboidShard);
String input = getOptionValue(OPTION_INPUT_PATH);
FileInputFormat.setInputPaths(job, new Path(input));
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
// set mapper
job.setMapperClreplaced(InMemCuboidFromBaseCuboidMapper.clreplaced);
job.setMapOutputKeyClreplaced(ByteArrayWritable.clreplaced);
job.setMapOutputValueClreplaced(ByteArrayWritable.clreplaced);
// set output
job.setReducerClreplaced(InMemCuboidFromBaseCuboidReducer.clreplaced);
job.setNumReduceTasks(MapReduceUtil.getInmemCubingReduceTaskNum(cubeSeg, cuboidScheduler));
// the cuboid file and KV clreplaced must be compatible with 0.7 version for smooth upgrade
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
Path outputPath = new Path(output);
FileOutputFormat.setOutputPath(job, outputPath);
HadoopUtil.deletePath(job.getConfiguration(), outputPath);
return waitForCompletion(job);
} catch (Exception e) {
logger.error("error in CuboidJob", e);
printUsage(options);
throw e;
} finally {
if (job != null)
cleanupTempConfFile(job.getConfiguration());
}
}
12
Source : StepTwoJob.java
with Apache License 2.0
from junneyang
with Apache License 2.0
from junneyang
public int run(String[] args) throws Exception {
/*Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String join_reduces = conf.get(REDUCES_PER_HOST);
if (join_reduces != null) {
num_reduces = cluster.getTaskTrackers() *
Integer.parseInt(join_reduces);
}
// Set user-supplied (possibly default) job configs
job.setNumReduceTasks(num_reduces);*/
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://node-01:9000");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
String commaSeparatedPaths = null;
String outputDir = null;
if (otherArgs.length == 2) {
commaSeparatedPaths = otherArgs[0];
outputDir = otherArgs[1];
} else {
System.err.println("Usage: <in>[,<in>...] <out>");
// System.exit(-1);
return -1;
}
Job job = Job.getInstance(conf);
job.setJobName("StepTwoJob");
job.setJarByClreplaced(StepTwoJob.clreplaced);
// job.setInputFormatClreplaced(TextInputFormat.clreplaced);
// job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setMapperClreplaced(StepTwoMapper.clreplaced);
// job.setCombinerClreplaced(StepOneReducer.clreplaced);
job.setReducerClreplaced(StepTwoReducer.clreplaced);
// job.setParreplacedionerClreplaced(FlowParreplacedion.clreplaced);
// job.setNumReduceTasks(5);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
FileInputFormat.setInputPaths(job, commaSeparatedPaths);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
return job.waitForCompletion(true) ? 0 : 1;
}
12
Source : StepOneJob.java
with Apache License 2.0
from junneyang
with Apache License 2.0
from junneyang
public int run(String[] args) throws Exception {
/*Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String join_reduces = conf.get(REDUCES_PER_HOST);
if (join_reduces != null) {
num_reduces = cluster.getTaskTrackers() *
Integer.parseInt(join_reduces);
}
// Set user-supplied (possibly default) job configs
job.setNumReduceTasks(num_reduces);*/
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://node-01:9000");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
String commaSeparatedPaths = null;
String outputDir = null;
if (otherArgs.length == 2) {
commaSeparatedPaths = otherArgs[0];
outputDir = otherArgs[1];
} else {
System.err.println("Usage: <in>[,<in>...] <out>");
// System.exit(-1);
return -1;
}
Job job = Job.getInstance(conf);
job.setJobName("StepOneJob");
job.setJarByClreplaced(StepOneJob.clreplaced);
// job.setInputFormatClreplaced(TextInputFormat.clreplaced);
// job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setMapperClreplaced(StepOneMapper.clreplaced);
job.setCombinerClreplaced(StepOneReducer.clreplaced);
job.setReducerClreplaced(StepOneReducer.clreplaced);
// job.setParreplacedionerClreplaced(FlowParreplacedion.clreplaced);
// job.setNumReduceTasks(5);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.setInputPaths(job, commaSeparatedPaths);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
return job.waitForCompletion(true) ? 0 : 1;
}
12
Source : FlowSumJob.java
with Apache License 2.0
from junneyang
with Apache License 2.0
from junneyang
public int run(String[] args) throws Exception {
/*Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String join_reduces = conf.get(REDUCES_PER_HOST);
if (join_reduces != null) {
num_reduces = cluster.getTaskTrackers() *
Integer.parseInt(join_reduces);
}
// Set user-supplied (possibly default) job configs
job.setNumReduceTasks(num_reduces);*/
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://node-01:9000");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
String commaSeparatedPaths = null;
String outputDir = null;
if (otherArgs.length == 2) {
commaSeparatedPaths = otherArgs[0];
outputDir = otherArgs[1];
} else {
System.err.println("Usage: <in>[,<in>...] <out>");
// System.exit(-1);
return -1;
}
Job job = Job.getInstance(conf);
job.setJobName("FlowSumJob");
job.setJarByClreplaced(FlowSumJob.clreplaced);
job.setMapperClreplaced(FlowSumMapper.clreplaced);
// job.setCombinerClreplaced(WordCountReducer.clreplaced);
job.setReducerClreplaced(FlowSumReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(FlowBean.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(FlowBean.clreplaced);
FileInputFormat.setInputPaths(job, commaSeparatedPaths);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
return job.waitForCompletion(true) ? 0 : 1;
}
12
Source : FlowSortJob.java
with Apache License 2.0
from junneyang
with Apache License 2.0
from junneyang
public int run(String[] args) throws Exception {
/*Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String join_reduces = conf.get(REDUCES_PER_HOST);
if (join_reduces != null) {
num_reduces = cluster.getTaskTrackers() *
Integer.parseInt(join_reduces);
}
// Set user-supplied (possibly default) job configs
job.setNumReduceTasks(num_reduces);*/
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://node-01:9000");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
String commaSeparatedPaths = null;
String outputDir = null;
if (otherArgs.length == 2) {
commaSeparatedPaths = otherArgs[0];
outputDir = otherArgs[1];
} else {
System.err.println("Usage: <in>[,<in>...] <out>");
// System.exit(-1);
return -1;
}
Job job = Job.getInstance(conf);
job.setJobName("FlowSortJob");
job.setJarByClreplaced(FlowSortJob.clreplaced);
job.setMapperClreplaced(FlowSortMapper.clreplaced);
// job.setCombinerClreplaced(WordCountReducer.clreplaced);
job.setReducerClreplaced(FlowSortReducer.clreplaced);
job.setOutputKeyClreplaced(FlowBean.clreplaced);
job.setOutputValueClreplaced(NullWritable.clreplaced);
job.setMapOutputKeyClreplaced(FlowBean.clreplaced);
job.setMapOutputValueClreplaced(NullWritable.clreplaced);
FileInputFormat.setInputPaths(job, commaSeparatedPaths);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
return job.waitForCompletion(true) ? 0 : 1;
}
11
Source : LindenJob.java
with Apache License 2.0
from XiaoMi
with Apache License 2.0
from XiaoMi
@Override
public int run(String[] strings) throws Exception {
Configuration conf = getConf();
String dir = conf.get(LindenJobConfig.INPUT_DIR, null);
logger.info("input dir:" + dir);
Path inputPath = new Path(StringUtils.unEscapeString(dir));
Path outputPath = new Path(conf.get(LindenJobConfig.OUTPUT_DIR));
String indexPath = conf.get(LindenJobConfig.INDEX_PATH);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
}
if (fs.exists(new Path(indexPath))) {
fs.delete(new Path(indexPath), true);
}
int numShards = conf.getInt(LindenJobConfig.NUM_SHARDS, 1);
Shard[] shards = createShards(indexPath, numShards);
Shard.setIndexShards(conf, shards);
// empty trash;
(new Trash(conf)).expunge();
Job job = Job.getInstance(conf, "linden-hadoop-indexing");
job.setJarByClreplaced(LindenJob.clreplaced);
job.setMapperClreplaced(LindenMapper.clreplaced);
job.setCombinerClreplaced(LindenCombiner.clreplaced);
job.setReducerClreplaced(LindenReducer.clreplaced);
job.setMapOutputKeyClreplaced(Shard.clreplaced);
job.setMapOutputValueClreplaced(IntermediateForm.clreplaced);
job.setOutputKeyClreplaced(Shard.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
job.setOutputFormatClreplaced(IndexUpdateOutputFormat.clreplaced);
job.setReduceSpeculativeExecution(false);
job.setNumReduceTasks(numShards);
String lindenSchemaFile = conf.get(LindenJobConfig.SCHEMA_FILE_URL);
if (lindenSchemaFile == null) {
throw new IOException("no schema file is found");
}
logger.info("Adding schema file: " + lindenSchemaFile);
job.addCacheFile(new URI(lindenSchemaFile + "#lindenSchema"));
String lindenPropertiesFile = conf.get(LindenJobConfig.LINDEN_PROPERTIES_FILE_URL);
if (lindenPropertiesFile == null) {
throw new IOException("no linden properties file is found");
}
logger.info("Adding linden properties file: " + lindenPropertiesFile);
job.addCacheFile(new URI(lindenPropertiesFile + "#lindenProperties"));
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
Path[] inputs = FileInputFormat.getInputPaths(job);
StringBuilder buffer = new StringBuilder(inputs[0].toString());
for (int i = 1; i < inputs.length; i++) {
buffer.append(",");
buffer.append(inputs[i].toString());
}
logger.info("mapreduce.input.dir = " + buffer.toString());
logger.info("mapreduce.output.dir = " + FileOutputFormat.getOutputPath(job).toString());
logger.info("mapreduce.job.num.reduce.tasks = " + job.getNumReduceTasks());
logger.info(shards.length + " shards = " + conf.get(LindenJobConfig.INDEX_SHARDS));
logger.info("mapreduce.input.format.clreplaced = " + job.getInputFormatClreplaced());
logger.info("mapreduce.output.format.clreplaced = " + job.getOutputFormatClreplaced());
logger.info("mapreduce.cluster.temp.dir = " + conf.get(MRJobConfig.TEMP_DIR));
job.waitForCompletion(true);
if (!job.isSuccessful()) {
throw new RuntimeException("Job failed");
}
return 0;
}
11
Source : MapJoin.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) {
try {
// 创建配置信息
Configuration conf = new Configuration();
// 获取命令行的参数
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
// 当参数违法时,中断程序
if (otherArgs.length != 3) {
System.err.println("Usage:MyMapJoin<in1> <in2> <out>");
System.exit(1);
}
// 给路径赋值
INPUT_PATH1 = otherArgs[0];
INPUT_PATH2 = otherArgs[1];
OUT_PATH = otherArgs[2];
// 创建文件系统
FileSystem fileSystem = FileSystem.get(new URI(OUT_PATH), conf);
// 如果输出目录存在,我们就删除
if (fileSystem.exists(new Path(OUT_PATH))) {
fileSystem.delete(new Path(OUT_PATH), true);
}
// 添加到内存中的文件(随便添加多少个文件)
DistributedCache.addCacheFile(new Path(INPUT_PATH2).toUri(), conf);
// 创建任务
Job job = new Job(conf, MapJoin.clreplaced.getName());
// 打成jar包运行,这句话是关键
job.setJarByClreplaced(MapJoin.clreplaced);
// 1.1 设置输入目录和设置输入数据格式化的类
FileInputFormat.setInputPaths(job, INPUT_PATH1);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
// 1.2 设置自定义Mapper类和设置map函数输出数据的key和value的类型
job.setMapperClreplaced(MapJoinMapper.clreplaced);
job.setMapOutputKeyClreplaced(NullWritable.clreplaced);
job.setMapOutputValueClreplaced(Emp_Dep.clreplaced);
// 1.3 设置分区和reduce数量
job.setParreplacedionerClreplaced(HashParreplacedioner.clreplaced);
job.setNumReduceTasks(0);
FileOutputFormat.setOutputPath(job, new Path(OUT_PATH));
// 提交作业 退出
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (Exception e) {
e.printStackTrace();
}
}
11
Source : JiduRunner.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClreplaced(JiduRunner.clreplaced);
job.setMapperClreplaced(JiduMapper.clreplaced);
job.setReducerClreplaced(JiduReducer.clreplaced);
job.setCombinerClreplaced(JiduReducer.clreplaced);
// 设置自定义分区类,不设置默认为 HashParreplacedioner
job.setParreplacedionerClreplaced(JiduParreplacedioner.clreplaced);
// 设置reduce task数量为4 + 1
job.setNumReduceTasks(5);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(IntWritable.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(IntWritable.clreplaced);
FileInputFormat.setInputPaths(job, new Path(args[0]));
Path out = new Path(args[1]);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(out)) {
fs.delete(out, true);
}
FileOutputFormat.setOutputPath(job, out);
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : CommonFriendStep2.java
with Apache License 2.0
from whirlys
with Apache License 2.0
from whirlys
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClreplaced(CommonFriendStep2.clreplaced);
// 设置job的mapper类和reducer类
job.setMapperClreplaced(CommonFansStep2Mapper.clreplaced);
job.setReducerClreplaced(CommonFansStep2Reducer.clreplaced);
// 设置map阶段输出key:value数据的类型
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// 设置reudce阶段输出key:value数据的类型
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
// 检测输出目录是否已存在,如果已存在则删除,以免在测试阶段需要反复手动删除输出目录
FileSystem fs = FileSystem.get(conf);
Path out = new Path(args[1]);
if (fs.exists(out)) {
fs.delete(out, true);
}
// 设置数据输入输出目录
FileInputFormat.setInputPaths(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, out);
// 提交job到yarn或者local runner执行
job.waitForCompletion(true);
}
11
Source : WordCount2Application.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 1.1 准备清理已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
_LOGGER.info("rm -rf output path success.");
}
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(WordCount2Application.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : CombinerApplication.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 1.1 准备清理已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
_LOGGER.info("rm -rf output path success.");
}
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(CombinerApplication.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 通过Job设置combiner处理类, 其实逻辑上和我们的Reduce是一模一样的
job.setCombinerClreplaced(WordCountReduce.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : LogAnalysisApplication.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 1.1 准备清理已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
_LOGGER.info("rm -rf output path success.");
}
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "logreplacedysis");
// 3. 设置Job的处理类
job.setJarByClreplaced(LogreplacedysisApplication.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(LogreplacedysisApplication.LogreplacedysisMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(LogreplacedysisApplication.LogreplacedysisReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 8. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
11
Source : DistributedPentomino.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public int run(String[] args) throws Exception {
Configuration conf = getConf();
if (args.length == 0) {
System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
// check for preplaceded parameters, otherwise use defaults
int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
for (int i = 0; i < args.length; i++) {
if (args[i].equalsIgnoreCase("-depth")) {
depth = Integer.parseInt(args[++i].trim());
} else if (args[i].equalsIgnoreCase("-height")) {
height = Integer.parseInt(args[++i].trim());
} else if (args[i].equalsIgnoreCase("-width")) {
width = Integer.parseInt(args[++i].trim());
}
}
// now set the values within conf for M/R tasks to read, this
// will ensure values are set preventing MAPREDUCE-4678
conf.setInt(Pentomino.WIDTH, width);
conf.setInt(Pentomino.HEIGHT, height);
conf.setInt(Pentomino.DEPTH, depth);
Clreplaced<? extends Pentomino> pentClreplaced = conf.getClreplaced(Pentomino.CLreplaced, OneSidedPentomino.clreplaced, Pentomino.clreplaced);
int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
Path output = new Path(args[0]);
Path input = new Path(output + "_input");
FileSystem fileSys = FileSystem.get(conf);
try {
Job job = new Job(conf);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClreplaced(PentMap.clreplaced);
job.setJobName("dancingElephant");
Pentomino pent = ReflectionUtils.newInstance(pentClreplaced, conf);
pent.initialize(width, height);
long inputSize = createInputDirectory(fileSys, input, pent, depth);
// for forcing the number of maps
FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps));
// the keys are the prefix strings
job.setOutputKeyClreplaced(Text.clreplaced);
// the values are puzzle solutions
job.setOutputValueClreplaced(Text.clreplaced);
job.setMapperClreplaced(PentMap.clreplaced);
job.setReducerClreplaced(Reducer.clreplaced);
job.setNumReduceTasks(1);
return (job.waitForCompletion(true) ? 0 : 1);
} finally {
fileSys.delete(input, true);
}
}
11
Source : DistributedPentomino.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
public int run(String[] args) throws Exception {
Configuration conf = getConf();
if (args.length == 0) {
System.out.println("Usage: pentomino <output> [-depth #] [-height #] [-width #]");
ToolRunner.printGenericCommandUsage(System.out);
return 2;
}
// check for preplaceded parameters, otherwise use defaults
int width = conf.getInt(Pentomino.WIDTH, PENT_WIDTH);
int height = conf.getInt(Pentomino.HEIGHT, PENT_HEIGHT);
int depth = conf.getInt(Pentomino.DEPTH, PENT_DEPTH);
for (int i = 0; i < args.length; i++) {
if (args[i].equalsIgnoreCase("-depth")) {
depth = Integer.parseInt(args[++i].trim());
} else if (args[i].equalsIgnoreCase("-height")) {
height = Integer.parseInt(args[++i].trim());
} else if (args[i].equalsIgnoreCase("-width")) {
width = Integer.parseInt(args[++i].trim());
}
}
// now set the values within conf for M/R tasks to read, this
// will ensure values are set preventing MAPREDUCE-4678
conf.setInt(Pentomino.WIDTH, width);
conf.setInt(Pentomino.HEIGHT, height);
conf.setInt(Pentomino.DEPTH, depth);
Clreplaced<? extends Pentomino> pentClreplaced = conf.getClreplaced(Pentomino.CLreplaced, OneSidedPentomino.clreplaced, Pentomino.clreplaced);
int numMaps = conf.getInt(MRJobConfig.NUM_MAPS, DEFAULT_MAPS);
Path output = new Path(args[0]);
Path input = new Path(output + "_input");
FileSystem fileSys = FileSystem.get(conf);
try {
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
job.setJarByClreplaced(PentMap.clreplaced);
job.setJobName("dancingElephant");
Pentomino pent = ReflectionUtils.newInstance(pentClreplaced, conf);
pent.initialize(width, height);
long inputSize = createInputDirectory(fileSys, input, pent, depth);
// for forcing the number of maps
FileInputFormat.setMaxInputSplitSize(job, (inputSize / numMaps));
// the keys are the prefix strings
job.setOutputKeyClreplaced(Text.clreplaced);
// the values are puzzle solutions
job.setOutputValueClreplaced(Text.clreplaced);
job.setMapperClreplaced(PentMap.clreplaced);
job.setReducerClreplaced(Reducer.clreplaced);
job.setNumReduceTasks(1);
return (job.waitForCompletion(true) ? 0 : 1);
} finally {
fileSys.delete(input, true);
}
}
11
Source : FlowPartitionJob.java
with Apache License 2.0
from junneyang
with Apache License 2.0
from junneyang
public int run(String[] args) throws Exception {
/*Configuration conf = getConf();
JobClient client = new JobClient(conf);
ClusterStatus cluster = client.getClusterStatus();
int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9);
String join_reduces = conf.get(REDUCES_PER_HOST);
if (join_reduces != null) {
num_reduces = cluster.getTaskTrackers() *
Integer.parseInt(join_reduces);
}
// Set user-supplied (possibly default) job configs
job.setNumReduceTasks(num_reduces);*/
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://node-01:9000");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
String commaSeparatedPaths = null;
String outputDir = null;
if (otherArgs.length == 2) {
commaSeparatedPaths = otherArgs[0];
outputDir = otherArgs[1];
} else {
System.err.println("Usage: <in>[,<in>...] <out>");
// System.exit(-1);
return -1;
}
Job job = Job.getInstance(conf);
job.setJobName("FlowParreplacedionJob");
job.setJarByClreplaced(FlowParreplacedionJob.clreplaced);
// job.setInputFormatClreplaced(TextInputFormat.clreplaced);
// job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setMapperClreplaced(FlowParreplacedionMapper.clreplaced);
// job.setCombinerClreplaced(WordCountReducer.clreplaced);
job.setReducerClreplaced(FlowParreplacedionReducer.clreplaced);
job.setParreplacedionerClreplaced(FlowParreplacedion.clreplaced);
job.setNumReduceTasks(5);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(FlowBean.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(FlowBean.clreplaced);
FileInputFormat.setInputPaths(job, commaSeparatedPaths);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
return job.waitForCompletion(true) ? 0 : 1;
}
11
Source : Step4.java
with MIT License
from josonle
with MIT License
from josonle
public static boolean run(Configuration config, Map<String, String> paths) throws IOException, ClreplacedNotFoundException, InterruptedException {
String jobName = "step4";
Job job = Job.getInstance(config, jobName);
job.setJarByClreplaced(Step4.clreplaced);
job.setJar("export\\ItemCF.jar");
job.setMapperClreplaced(Step4_Mapper.clreplaced);
job.setReducerClreplaced(Step4_Reducer.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
Path[] inPaths = new Path[] { new Path(paths.get("Step4Input1")), new Path(paths.get("Step4Input2")) };
Path outpath = new Path(paths.get("Step4Output"));
FileInputFormat.setInputPaths(job, inPaths);
FileOutputFormat.setOutputPath(job, outpath);
FileSystem fs = FileSystem.get(config);
if (fs.exists(outpath)) {
fs.delete(outpath, true);
}
return job.waitForCompletion(true);
}
10
Source : PartitionerApplication.java
with Apache License 2.0
from ukihsoroy
with Apache License 2.0
from ukihsoroy
/**
* 定义Driver: 封装了MapReduce作业的所有信息
* @param args
*/
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1. 创建Configuration
Configuration configuration = new Configuration();
// 1.1 准备清理已存在的输出目录
Path outputPath = new Path(args[1]);
FileSystem fs = FileSystem.get(configuration);
if (fs.exists(outputPath)) {
fs.delete(outputPath, true);
_LOGGER.info("rm -rf output path success.");
}
// 2. 创建一个Job
Job job = Job.getInstance(configuration, "wordcount");
// 3. 设置Job的处理类
job.setJarByClreplaced(ParreplacedionerApplication.clreplaced);
// 4. 设置作业处理的输入路径
FileInputFormat.setInputPaths(job, new Path(args[0]));
// 5. 设置Map相关参数
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
// 6. 设置Reduce相关参数
job.setReducerClreplaced(WordCountReduce.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
// 7. 设置job的Parreplacedion
job.setParreplacedionerClreplaced(PhoneParreplacedioner.clreplaced);
// 8. 设置4个reducer, 每个分区一个
job.setNumReduceTasks(4);
// 9. 设置作业处理的输出路径
FileOutputFormat.setOutputPath(job, new Path(args[1]));
// 10. 提交
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
10
Source : MergeDictJob.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
@Override
public int run(String[] args) throws Exception {
Options options = new Options();
try {
options.addOption(OPTION_JOB_NAME);
options.addOption(OPTION_CUBE_NAME);
options.addOption(OPTION_SEGMENT_NAME);
options.addOption(OPTION_INPUT_PATH);
options.addOption(OPTION_OUTPUT_PATH);
parseOptions(options, args);
Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
String jobName = getOptionValue(OPTION_JOB_NAME);
String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
String segmentName = getOptionValue(OPTION_SEGMENT_NAME);
CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
CubeInstance cube = cubeMgr.getCube(cubeName);
CubeSegment segment = cube.getSegment(segmentName, SegmentStatusEnum.NEW);
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
setJobClreplacedpath(job, cube.getConfig());
job.setJobName(jobName);
job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_NAME, segmentName);
FileInputFormat.setInputPaths(job, input);
FileOutputFormat.setOutputPath(job, output);
logger.info("MergeDictReducer output path: {}", output);
// Mapper
job.setMapperClreplaced(MergeDictMapper.clreplaced);
job.setInputFormatClreplaced(ColumnarSplitDictInputFormat.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// Reducer
job.setReducerClreplaced(MergeDictReducer.clreplaced);
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
attachCubeMetadata(cube, job.getConfiguration());
deletePath(job.getConfiguration(), output);
return waitForCompletion(job);
} catch (Exception e) {
printUsage(options);
logger.error("job {} failed. ", job.getJobName(), e);
throw e;
}
}
10
Source : WordCountJob.java
with Apache License 2.0
from junneyang
with Apache License 2.0
from junneyang
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
Configuration conf = new Configuration();
// conf.set("fs.defaultFS", "hdfs://node-01:9000");
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
String commaSeparatedPaths = null;
String outputDir = null;
if (otherArgs.length == 2) {
commaSeparatedPaths = otherArgs[0];
outputDir = otherArgs[1];
} else {
System.err.println("Usage: <in>[,<in>...] <out>");
System.exit(-1);
}
LOGGER.info("==========job start");
Job job = Job.getInstance(conf);
job.setJobName("WordCountJob");
job.setJarByClreplaced(WordCountJob.clreplaced);
job.setMapperClreplaced(WordCountMapper.clreplaced);
job.setCombinerClreplaced(WordCountReducer.clreplaced);
job.setReducerClreplaced(WordCountReducer.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(LongWritable.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(LongWritable.clreplaced);
FileInputFormat.setInputPaths(job, commaSeparatedPaths);
FileOutputFormat.setOutputPath(job, new Path(outputDir));
if (job.waitForCompletion(true)) {
LOGGER.info("==========job success");
} else {
LOGGER.info("==========job failed");
}
}
9
Source : HalyardBulkLoad.java
with Apache License 2.0
from Merck
with Apache License 2.0
from Merck
@Override
protected int run(CommandLine cmd) throws Exception {
String source = cmd.getOptionValue('s');
String workdir = cmd.getOptionValue('w');
String target = cmd.getOptionValue('t');
getConf().setBoolean(SKIP_INVALID_PROPERTY, cmd.hasOption('i'));
getConf().setBoolean(VERIFY_DATATYPE_VALUES_PROPERTY, cmd.hasOption('d'));
getConf().setBoolean(TRUNCATE_PROPERTY, cmd.hasOption('r'));
getConf().setInt(SPLIT_BITS_PROPERTY, Integer.parseInt(cmd.getOptionValue('b', "3")));
if (cmd.hasOption('g'))
getConf().set(DEFAULT_CONTEXT_PROPERTY, cmd.getOptionValue('g'));
getConf().setBoolean(OVERRIDE_CONTEXT_PROPERTY, cmd.hasOption('o'));
getConf().setLong(DEFAULT_TIMESTAMP_PROPERTY, Long.parseLong(cmd.getOptionValue('e', String.valueOf(System.currentTimeMillis()))));
if (cmd.hasOption('m'))
getConf().setLong("mapreduce.input.fileinputformat.split.maxsize", Long.parseLong(cmd.getOptionValue('m')));
TableMapReduceUtil.addDependencyJars(getConf(), NTriplesUtil.clreplaced, Rio.clreplaced, AbstractRDFHandler.clreplaced, RDFFormat.clreplaced, RDFParser.clreplaced);
HBaseConfiguration.addHbaseResources(getConf());
Job job = Job.getInstance(getConf(), "HalyardBulkLoad -> " + workdir + " -> " + target);
job.setJarByClreplaced(HalyardBulkLoad.clreplaced);
job.setMapperClreplaced(RDFMapper.clreplaced);
job.setMapOutputKeyClreplaced(ImmutableBytesWritable.clreplaced);
job.setMapOutputValueClreplaced(KeyValue.clreplaced);
job.setInputFormatClreplaced(RioFileInputFormat.clreplaced);
job.setSpeculativeExecution(false);
job.setReduceSpeculativeExecution(false);
try (HTable hTable = HalyardTableUtils.getTable(getConf(), target, true, getConf().getInt(SPLIT_BITS_PROPERTY, 3))) {
HFileOutputFormat2.configureIncrementalLoad(job, hTable.getTableDescriptor(), hTable.getRegionLocator());
FileInputFormat.setInputDirRecursive(job, true);
FileInputFormat.setInputPaths(job, source);
FileOutputFormat.setOutputPath(job, new Path(workdir));
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initCredentials(job);
if (job.waitForCompletion(true)) {
if (getConf().getBoolean(TRUNCATE_PROPERTY, false)) {
HalyardTableUtils.truncateTable(hTable).close();
}
new LoadIncrementalHFiles(getConf()).doBulkLoad(new Path(workdir), hTable);
LOG.info("Bulk Load Completed..");
return 0;
}
}
return -1;
}
9
Source : MergeJob.java
with MIT License
from josonle
with MIT License
from josonle
public static void main(String[] args) throws IOException, ClreplacedNotFoundException, InterruptedException {
// 1.设置HDFS配置信息
String namenode_ip = "192.168.17.10";
String hdfs = "hdfs://" + namenode_ip + ":9000";
Configuration conf = new Configuration();
conf.set("fs.defaultFS", hdfs);
conf.set("mapreduce.app-submission.cross-platform", "true");
// 2.设置MapReduce作业配置信息
// 作业名称
String jobName = "MergeMultipleFiles";
Job job = Job.getInstance(conf, jobName);
// 指定运行时作业类
job.setJarByClreplaced(MultiInOutput.clreplaced);
// 指定本地jar包
job.setJar("export\\MergeMultipleFiles.jar");
// 设置Mapper输出Key类型
job.setMapOutputKeyClreplaced(Text.clreplaced);
// 设置Mapper输出Value类型
job.setMapOutputValueClreplaced(BytesWritable.clreplaced);
job.setMapperClreplaced(MergeMapper.clreplaced);
// 输入数据格式
job.setInputFormatClreplaced(MyInputFormat.clreplaced);
// 以文件格式输出,使用序列化文件输出类
job.setOutputFormatClreplaced(SequenceFileOutputFormat.clreplaced);
// 设置作业输出路径
String inputDir = "/workspace/mergeFiles/data";
// 输出目录
String outputDir = "/workspace/mergeFiles/output";
Path outPath = new Path(hdfs + outputDir);
Path inputPath = new Path(hdfs + inputDir);
FileInputFormat.setInputPaths(job, inputPath);
FileOutputFormat.setOutputPath(job, outPath);
FileSystem fs = FileSystem.get(conf);
if (fs.exists(outPath)) {
fs.delete(outPath, true);
}
// 运行作业
System.out.println("Job: " + jobName + " is running...");
if (job.waitForCompletion(true)) {
System.out.println("success!");
System.exit(0);
} else {
System.out.println("failed!");
System.exit(1);
}
}
8
Source : TestMapReduceAggregates.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
public static void launch() throws Exception {
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(conf);
int numOfInputLines = 20;
Path OUTPUT_DIR = new Path("build/test/output_for_aggregates_test");
Path INPUT_DIR = new Path("build/test/input_for_aggregates_test");
String inputFile = "input.txt";
fs.delete(INPUT_DIR, true);
fs.mkdirs(INPUT_DIR);
fs.delete(OUTPUT_DIR, true);
StringBuffer inputData = new StringBuffer();
StringBuffer expectedOutput = new StringBuffer();
expectedOutput.append("max\t19\n");
expectedOutput.append("min\t1\n");
FSDataOutputStream fileOut = fs.create(new Path(INPUT_DIR, inputFile));
for (int i = 1; i < numOfInputLines; i++) {
expectedOutput.append("count_").append(idFormat.format(i));
expectedOutput.append("\t").append(i).append("\n");
inputData.append(idFormat.format(i));
for (int j = 1; j < i; j++) {
inputData.append(" ").append(idFormat.format(i));
}
inputData.append("\n");
}
expectedOutput.append("value_as_string_max\t9\n");
expectedOutput.append("value_as_string_min\t1\n");
expectedOutput.append("uniq_count\t15\n");
fileOut.write(inputData.toString().getBytes("utf-8"));
fileOut.close();
System.out.println("inputData:");
System.out.println(inputData.toString());
conf.setInt(ValueAggregatorJobBase.DESCRIPTOR_NUM, 1);
conf.set(ValueAggregatorJobBase.DESCRIPTOR + ".0", "UserDefined,org.apache.hadoop.mapreduce.lib.aggregate.AggregatorTests");
conf.setLong(UniqValueCount.MAX_NUM_UNIQUE_VALUES, 14);
Job job = Job.getInstance(conf);
FileInputFormat.setInputPaths(job, INPUT_DIR);
job.setInputFormatClreplaced(TextInputFormat.clreplaced);
FileOutputFormat.setOutputPath(job, OUTPUT_DIR);
job.setOutputFormatClreplaced(TextOutputFormat.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
job.setOutputKeyClreplaced(Text.clreplaced);
job.setOutputValueClreplaced(Text.clreplaced);
job.setNumReduceTasks(1);
job.setMapperClreplaced(ValueAggregatorMapper.clreplaced);
job.setReducerClreplaced(ValueAggregatorReducer.clreplaced);
job.setCombinerClreplaced(ValueAggregatorCombiner.clreplaced);
job.waitForCompletion(true);
replacedertTrue(job.isSuccessful());
//
// Finally, we compare the reconstructed answer key with the
// original one. Remember, we need to ignore zero-count items
// in the original key.
//
String outdata = MapReduceTestUtil.readOutput(OUTPUT_DIR, conf);
System.out.println("full out data:");
System.out.println(outdata.toString());
outdata = outdata.substring(0, expectedOutput.toString().length());
replacedertEquals(expectedOutput.toString(), outdata);
fs.delete(OUTPUT_DIR, true);
fs.delete(INPUT_DIR, true);
}
8
Source : FilterRecommendCuboidDataJob.java
with Apache License 2.0
from Kyligence
with Apache License 2.0
from Kyligence
@Override
public int run(String[] args) throws Exception {
Options options = new Options();
try {
options.addOption(OPTION_JOB_NAME);
options.addOption(OPTION_CUBE_NAME);
options.addOption(OPTION_SEGMENT_ID);
options.addOption(OPTION_INPUT_PATH);
options.addOption(OPTION_OUTPUT_PATH);
parseOptions(options, args);
job = Job.getInstance(getConf(), getOptionValue(OPTION_JOB_NAME));
String cubeName = getOptionValue(OPTION_CUBE_NAME).toUpperCase(Locale.ROOT);
String segmentID = getOptionValue(OPTION_SEGMENT_ID);
Path input = new Path(getOptionValue(OPTION_INPUT_PATH));
Path output = new Path(getOptionValue(OPTION_OUTPUT_PATH));
CubeManager cubeMgr = CubeManager.getInstance(KylinConfig.getInstanceFromEnv());
CubeInstance cube = cubeMgr.getCube(cubeName);
CubeSegment optSegment = cube.getSegmentById(segmentID);
CubeSegment originalSegment = cube.getOriginalSegmentToOptimize(optSegment);
logger.info("Starting: " + job.getJobName());
setJobClreplacedpath(job, cube.getConfig());
// Mapper
job.setMapperClreplaced(FilterRecommendCuboidDataMapper.clreplaced);
job.setMapOutputKeyClreplaced(Text.clreplaced);
job.setMapOutputValueClreplaced(Text.clreplaced);
// Input
job.setInputFormatClreplaced(SequenceFileInputFormat.clreplaced);
FileInputFormat.setInputPaths(job, input);
// Reducer
ConvergeCuboidDataUtil.setupReducer(job, originalSegment, output);
// set job configuration
job.getConfiguration().set(BatchConstants.CFG_CUBE_NAME, cubeName);
job.getConfiguration().set(BatchConstants.CFG_CUBE_SEGMENT_ID, segmentID);
// add metadata to distributed cache
attachSegmentMetadata(originalSegment, job.getConfiguration(), false, false);
this.deletePath(job.getConfiguration(), output);
return waitForCompletion(job);
} catch (Exception e) {
logger.error("error in CuboidJob", e);
printUsage(options);
throw e;
} finally {
if (job != null)
cleanupTempConfFile(job.getConfiguration());
}
}
See More Examples