org.apache.hadoop.fs.ContentSummary

Here are the examples of the java api class org.apache.hadoop.fs.ContentSummary taken from open source projects.

1. TestAvatarAPI#checkPrimary()

Project: hadoop-20
Source File: TestAvatarAPI.java
View license
private void checkPrimary() throws Exception {
    FileStatus fs = dafs.getFileStatus(path, false);
    FileStatus[] dir = dafs.listStatus(dirPath, false);
    RemoteIterator<Path> cfb = dafs.listCorruptFileBlocks(dirPath, false);
    assertTrue("DAFS file status has the wrong length", fs != null && fs.getLen() == FILE_LEN);
    assertTrue("DAFS directory listing has the wrong length", dir != null && dir.length == 1);
    assertTrue("DAFS expected 0 corrupt file blocks", countPaths(cfb) == 0);
    ContentSummary cs = dafs.getContentSummary(path, false);
    DatanodeInfo[] di = dafs.getDataNodeStats(false);
    assertTrue("DAFS datanode info should contain 3 data nodes", di.length == 3);
}

2. TestINodeDirectoryReplaceChild#testSetQuota()

View license
/**
   * Test make sure after set quota, all the parent-children relationship are wired up correctly
   */
@Test
public void testSetQuota() throws IOException {
    int consFileSpace = 2048;
    FSDirectory fsd = cluster.getNameNode().namesystem.dir;
    Path dir = new Path("/qdir1/qdir2/qdir3");
    assertTrue(dfs.mkdirs(dir));
    dir = new Path("/qdir1/qdir2/qdir4");
    assertTrue(dfs.mkdirs(dir));
    Path quotaDir = new Path("/qdir1/qdir2");
    dfs.setQuota(quotaDir, FSConstants.QUOTA_DONT_SET, 4 * consFileSpace);
    ContentSummary c = dfs.getContentSummary(quotaDir);
    assertEquals(c.getDirectoryCount(), 3);
    assertEquals(c.getSpaceQuota(), 4 * consFileSpace);
    INodeDirectory qdir2 = (INodeDirectory) fsd.getINode("/qdir1/qdir2");
    INode qdir3 = fsd.getINode("/qdir1/qdir2/qdir3");
    INode qdir4 = fsd.getINode("/qdir1/qdir2/qdir4");
    assertSame(qdir2, qdir3.parent);
    assertSame(qdir2.getChild("qdir3"), qdir3);
    assertSame(qdir2.getChild("qdir4"), qdir4);
}

3. HCatProcessTest#twoHCatInputTwoHCatOutput()

Project: falcon
Source File: HCatProcessTest.java
View license
@Test(dataProvider = "generateSeparators")
public void twoHCatInputTwoHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);
    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir, dataDates);
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2, dataDates);
    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();
    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols).ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols).ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols).ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName2, cols).partCols(partitionCols).ifNotExists(true).isTableExternal(true).location(outputHDFSDir2).build());
    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);
    final String tableUriPartitionFragment = StringUtils.join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";
    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);
    bundles[0].addInputFeedToBundle("inputData2", feedObj);
    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    String outputTableUri2 = "catalog:" + dbName + ":" + outputTableName2 + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);
    final String outputFeed1 = bundles[0].getOutputFeedFromBundle();
    final String outputFeed2Name = Util.readEntityName(outputFeed1) + "-second";
    FeedMerlin feedObj2 = new FeedMerlin(outputFeed1);
    feedObj2.setName(outputFeed2Name);
    feedObj2.getTable().setUri(outputTableUri2);
    bundles[0].addOutputFeedToBundle("outputData2", feedObj2);
    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputTwoHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();
    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);
    final ContentSummary inputContentSummary = clusterFS.getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS.getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS.getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    final ContentSummary outputContentSummary2 = clusterFS.getContentSummary(new Path(outputHDFSDir2 + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    LOGGER.info("outputContentSummary2 = " + outputContentSummary2.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(), outputContentSummary.getLength(), "Unexpected size of the output.");
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(), outputContentSummary2.getLength(), "Unexpected size of the output.");
}

4. TestMergeFile#mergeFile()

Project: hadoop-20
Source File: TestMergeFile.java
View license
public void mergeFile(int numBlocks, int parityBlocks, short sourceRepl, String codecId) throws Exception {
    LOG.info("RUNNING testMergeFile numBlocks=" + numBlocks + " parityBlocks=" + parityBlocks + " sourceRepl=" + sourceRepl + " codecId=" + codecId);
    id++;
    long fileLen = blockSize * numBlocks;
    long parityLen = blockSize * parityBlocks;
    Path dir = new Path("/user/facebook" + id);
    assertTrue(dfs.mkdirs(dir));
    Path source = new Path(dir, "1");
    Path dest = new Path(dir, "2");
    long crc = DFSTestUtil.createFile(dfs, source, fileLen, sourceRepl, 1);
    Path parityDir = new Path("/raid/user/facebook" + id);
    assertTrue(dfs.mkdirs(parityDir));
    RaidCodec codec = RaidCodec.getCodec(codecId);
    Path parity = new Path(parityDir, "1");
    DFSTestUtil.createFile(dfs, parity, parityLen, codec.parityReplication, 1);
    int[] checksums = new int[numBlocks];
    for (int i = 0; i < numBlocks; i++) {
        checksums[i] = rand.nextInt();
    }
    ContentSummary cBefore = dfs.getContentSummary(dir);
    ContentSummary cParityBefore = dfs.getContentSummary(parityDir);
    FileStatus statBefore = dfs.getFileStatus(source);
    LocatedBlocks lbsBefore = dfs.getLocatedBlocks(source, 0, fileLen);
    dfs.setTimes(parity, statBefore.getModificationTime(), 0);
    // now merge
    dfs.merge(parity, source, codecId, checksums);
    ContentSummary cAfter = dfs.getContentSummary(dir);
    ContentSummary cParityAfter = dfs.getContentSummary(parityDir);
    // verify directory stat
    assertEquals("File count doesn't change", cBefore.getFileCount(), cAfter.getFileCount());
    assertEquals("Space consumed is increased", cBefore.getSpaceConsumed() + parityLen * codec.parityReplication, cAfter.getSpaceConsumed());
    assertEquals("Parity file is removed", cParityBefore.getFileCount() - 1, cParityAfter.getFileCount());
    assertEquals("Space consumed is 0", 0, cParityAfter.getSpaceConsumed());
    // Verify parity is removed
    assertTrue(!dfs.exists(parity));
    verifyMergeFiles(dfs, statBefore, lbsBefore, source, fileLen, crc);
    LocatedBlocks lbsAfter = dfs.getLocatedBlocks(source, blockSize, fileLen);
    assertEquals(numBlocks - 1, lbsAfter.getLocatedBlocks().size());
    for (int i = 0; i < numBlocks - 1; i++) {
        assertEquals(lbsBefore.get(i + 1).getBlock(), lbsAfter.get(i).getBlock());
    }
    assertTrue("Should not be able to hardlink a raided file", !dfs.hardLink(source, dest));
}

5. HCatProcessTest#twoHCatInputOneHCatOutput()

Project: falcon
Source File: HCatProcessTest.java
View license
@Test(dataProvider = "generateSeparators")
public void twoHCatInputOneHCatOutput(String separator) throws Exception {
    /* upload data and create partition */
    final String datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH" }, separator);
    List<String> dataDates = getDatesList(startDate, endDate, datePattern, 60);
    final List<String> dataset = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir, dataDates);
    final List<String> dataset2 = HadoopUtil.flattenAndPutDataInFolder(clusterFS, localHCatData, inputHDFSDir2, dataDates);
    ArrayList<HCatFieldSchema> cols = new ArrayList<>();
    cols.add(HCatUtil.getStringSchema(col1Name, col1Name + " comment"));
    cols.add(HCatUtil.getStringSchema(col2Name, col2Name + " comment"));
    ArrayList<HCatFieldSchema> partitionCols = new ArrayList<>();
    partitionCols.add(HCatUtil.getStringSchema(partitionColumn, partitionColumn + " partition"));
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName, cols).partCols(partitionCols).ifNotExists(true).isTableExternal(true).location(inputHDFSDir).build());
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, inputTableName2, cols).partCols(partitionCols).ifNotExists(true).isTableExternal(true).location(inputHDFSDir2).build());
    clusterHC.createTable(HCatCreateTableDesc.create(dbName, outputTableName, cols).partCols(partitionCols).ifNotExists(true).isTableExternal(true).location(outputHDFSDir).build());
    addPartitionsToTable(dataDates, dataset, "dt", dbName, inputTableName);
    addPartitionsToTable(dataDates, dataset2, "dt", dbName, inputTableName2);
    final String tableUriPartitionFragment = StringUtils.join(new String[] { "#dt=${YEAR}", "${MONTH}", "${DAY}", "${HOUR}" }, separator);
    String inputTableUri = "catalog:" + dbName + ":" + inputTableName + tableUriPartitionFragment;
    String inputTableUri2 = "catalog:" + dbName + ":" + inputTableName2 + tableUriPartitionFragment;
    bundles[0].setInputFeedTableUri(inputTableUri);
    bundles[0].setInputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setInputFeedValidity(startDate, endDate);
    final String inputFeed1 = bundles[0].getInputFeedFromBundle();
    final String inputFeed2Name = Util.readEntityName(inputFeed1) + "-second";
    FeedMerlin feedObj = new FeedMerlin(inputFeed1);
    feedObj.setName(inputFeed2Name);
    feedObj.getTable().setUri(inputTableUri2);
    bundles[0].addInputFeedToBundle("inputData2", feedObj);
    String outputTableUri = "catalog:" + dbName + ":" + outputTableName + tableUriPartitionFragment;
    bundles[0].setOutputFeedTableUri(outputTableUri);
    bundles[0].setOutputFeedPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setOutputFeedValidity(startDate, endDate);
    bundles[0].setProcessValidity(startDate, endDate);
    bundles[0].setProcessPeriodicity(1, Frequency.TimeUnit.hours);
    bundles[0].setProcessInputStartEnd("now(0,0)", "now(0,0)");
    bundles[0].setProcessWorkflow(hiveScriptTwoHCatInputOneHCatOutput, EngineType.HIVE);
    bundles[0].submitFeedsScheduleProcess();
    InstanceUtil.waitTillInstanceReachState(clusterOC, bundles[0].getProcessName(), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.PROCESS);
    final ContentSummary inputContentSummary = clusterFS.getContentSummary(new Path(inputHDFSDir + "/" + dataDates.get(0)));
    final ContentSummary inputContentSummary2 = clusterFS.getContentSummary(new Path(inputHDFSDir2 + "/" + dataDates.get(0)));
    final ContentSummary outputContentSummary = clusterFS.getContentSummary(new Path(outputHDFSDir + "/dt=" + dataDates.get(0)));
    LOGGER.info("inputContentSummary = " + inputContentSummary.toString(false));
    LOGGER.info("inputContentSummary2 = " + inputContentSummary2.toString(false));
    LOGGER.info("outputContentSummary = " + outputContentSummary.toString(false));
    Assert.assertEquals(inputContentSummary.getLength() + inputContentSummary2.getLength(), outputContentSummary.getLength(), "Unexpected size of the output.");
}

6. ExternalFSTest#replicateToExternalFS()

Project: falcon
Source File: ExternalFSTest.java
View license
@Test(dataProvider = "getData")
public void replicateToExternalFS(final FileSystem externalFS, final String separator, final boolean withData) throws Exception {
    final String endpoint = externalFS.getUri().toString();
    Bundle.submitCluster(bundles[0], externalBundle);
    String startTime = TimeUtil.getTimeWrtSystemTime(0);
    String endTime = TimeUtil.addMinsToTime(startTime, 5);
    LOGGER.info("Time range between : " + startTime + " and " + endTime);
    String datePattern = StringUtils.join(new String[] { "${YEAR}", "${MONTH}", "${DAY}", "${HOUR}", "${MINUTE}" }, separator);
    //configure feed
    FeedMerlin feed = new FeedMerlin(bundles[0].getDataSets().get(0));
    String targetDataLocation = endpoint + testWasbTargetDir + datePattern;
    feed.setFilePath(sourcePath + '/' + datePattern);
    //erase all clusters from feed definition
    feed.clearFeedClusters();
    //set local cluster as source
    feed.addFeedCluster(new FeedMerlin.FeedClusterBuilder(Util.readEntityName(bundles[0].getClusters().get(0))).withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime).withClusterType(ClusterType.SOURCE).build());
    //set externalFS cluster as target
    feed.addFeedCluster(new FeedMerlin.FeedClusterBuilder(Util.readEntityName(externalBundle.getClusters().get(0))).withRetention("days(1000000)", ActionType.DELETE).withValidity(startTime, endTime).withClusterType(ClusterType.TARGET).withDataLocation(targetDataLocation).build());
    //submit and schedule feed
    LOGGER.info("Feed : " + Util.prettyPrintXml(feed.toString()));
    AssertUtil.assertSucceeded(prism.getFeedHelper().submitAndSchedule(feed.toString()));
    datePattern = StringUtils.join(new String[] { "yyyy", "MM", "dd", "HH", "mm" }, separator);
    //upload necessary data
    DateTime date = new DateTime(startTime, DateTimeZone.UTC);
    DateTimeFormatter fmt = DateTimeFormat.forPattern(datePattern);
    String timePattern = fmt.print(date);
    HadoopUtil.recreateDir(clusterFS, sourcePath + '/' + timePattern);
    if (withData) {
        HadoopUtil.copyDataToFolder(clusterFS, sourcePath + '/' + timePattern, OSUtil.SINGLE_FILE);
    }
    Path srcPath = new Path(sourcePath + '/' + timePattern);
    Path dstPath = new Path(endpoint + testWasbTargetDir + '/' + timePattern);
    //check if coordinator exists
    TimeUtil.sleepSeconds(10);
    InstanceUtil.waitTillInstancesAreCreated(clusterOC, feed.toString(), 0);
    Assert.assertEquals(OozieUtil.checkIfFeedCoordExist(clusterOC, feed.getName(), "REPLICATION"), 1);
    //replication should start, wait while it ends
    InstanceUtil.waitTillInstanceReachState(clusterOC, Util.readEntityName(feed.toString()), 1, CoordinatorAction.Status.SUCCEEDED, EntityType.FEED);
    //check if data has been replicated correctly
    List<Path> cluster1ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(clusterFS, srcPath);
    List<Path> cluster2ReplicatedData = HadoopUtil.getAllFilesRecursivelyHDFS(externalFS, dstPath);
    AssertUtil.checkForListSizes(cluster1ReplicatedData, cluster2ReplicatedData);
    final ContentSummary srcSummary = clusterFS.getContentSummary(srcPath);
    final ContentSummary dstSummary = externalFS.getContentSummary(dstPath);
    Assert.assertEquals(dstSummary.getLength(), srcSummary.getLength());
}

7. AssertUtil#checkContentSize()

Project: falcon
Source File: AssertUtil.java
View license
/**
     * Checks size of the content a two locations.
     *
     * @param firstPath  path to the first location
     * @param secondPath path to the second location
     * @param fs         hadoop file system for the locations
     * @throws IOException
     */
public static void checkContentSize(String firstPath, String secondPath, FileSystem fs) throws IOException {
    final ContentSummary firstSummary = fs.getContentSummary(new Path(firstPath));
    final ContentSummary secondSummary = fs.getContentSummary(new Path(secondPath));
    LOGGER.info(firstPath + " : firstSummary = " + firstSummary.toString(false));
    LOGGER.info(secondPath + " : secondSummary = " + secondSummary.toString(false));
    Assert.assertEquals(firstSummary.getLength(), secondSummary.getLength(), "Contents at the two locations don't have same size.");
}

8. HdfsUtils#getDirectorySize()

Project: elephant-bird
Source File: HdfsUtils.java
View license
/**
   * Calculates the total size of all the contents of a directory,
   * including the contents of all of its subdirectories.
   * Does not include the size of directories themselves
   * (which are 0 in HDFS but may not be 0 on local file systems)
   *
   * @param path path to recursively walk
   * @param fs FileSystem for this path
   * @return size of the directory's contents in bytes
   * @throws IOException
   */
public static long getDirectorySize(Path path, FileSystem fs) throws IOException {
    ContentSummary cs = fs.getContentSummary(path);
    return cs.getLength();
}

9. TestRaidFile#raidFile()

Project: hadoop-20
Source File: TestRaidFile.java
View license
/**
   * 1. Create a file
   * 2. Fill fake checksums in it
   * 3. Call raidFile to convert it into Raid format and return false. Namenode
   *  will start schedule raiding
   * 4. verify we could read the file
   * 5. Datanodes will receive RaidTaskCommand from namenode and jumps into
   *  FakeBlockGeneratorInjectionHandler, this handler will create a fake parity
   *  block in the datanode and notifies namenode 
   * 6. keep calling raidFile until all parity blocks are generated, then raidFile
   *  will succeed to reduce replication and return true
   * 
   */
private void raidFile(int numBlocks, short expectedSourceRepl, String codecId) throws Exception {
    LOG.info("RUNNING testMergeFile numBlocks=" + numBlocks + " sourceRepl=" + expectedSourceRepl + " codecId=" + codecId);
    id++;
    long fileLen = blockSize * numBlocks;
    Path dir = new Path("/user/facebook" + id);
    assertTrue(dfs.mkdirs(dir));
    Path source = new Path(dir, "1");
    long crc = DFSTestUtil.createFile(dfs, source, fileLen, (short) 3, 1);
    LOG.info("Fill fake checksums to the file");
    fillChecksums(source);
    ContentSummary cBefore = dfs.getContentSummary(dir);
    FileStatus statBefore = dfs.getFileStatus(source);
    LocatedBlocks lbsBefore = dfs.getLocatedBlocks(source, 0, fileLen);
    // now raid the file 
    boolean result = dfs.raidFile(source, codecId, expectedSourceRepl);
    assertTrue("raidFile should return false", !result);
    ContentSummary cAfter = dfs.getContentSummary(dir);
    // verify directory stat
    assertEquals("File count doesn't change", cBefore.getFileCount(), cAfter.getFileCount());
    verifyRaidFiles(nn, dfs, statBefore, lbsBefore, source, fileLen, crc, statBefore.getReplication(), codecId, false);
    LocatedBlocks lbsAfter = dfs.getLocatedBlocks(source, blockSize, fileLen);
    assertEquals(numBlocks - 1, lbsAfter.getLocatedBlocks().size());
    for (int i = 0; i < numBlocks - 1; i++) {
        assertEquals(lbsBefore.get(i + 1).getBlock(), lbsAfter.get(i).getBlock());
    }
    String otherCodec = codecId.equals("xor") ? "rs" : "xor";
    raidFile(dfs, source, otherCodec, (short) 2, "raidFile: couldn't raid a raided file");
    RaidCodec codec = RaidCodec.getCodec(codecId);
    long startTime = System.currentTimeMillis();
    result = false;
    while (System.currentTimeMillis() - startTime < 70000 && !result) {
        DFSTestUtil.waitNSecond(3);
        result = dfs.raidFile(source, codecId, expectedSourceRepl);
    }
    assertTrue("Finish raiding", result);
    verifyRaidFiles(nn, dfs, statBefore, lbsBefore, source, fileLen, crc, expectedSourceRepl, codecId, true);
    if (codec.minSourceReplication >= 2) {
        try {
            dfs.setReplication(source, (short) (codec.minSourceReplication - 1));
            assertTrue("setReplication should fail", false);
        } catch (IOException ioe) {
            assertTrue("fail to setReplication", ioe.getMessage().contains("Couldn't set replication smaller than "));
        }
    }
}

10. TestHDFSConcat#runTestConcat()

Project: hadoop-20
Source File: TestHDFSConcat.java
View license
private void runTestConcat(boolean restricted, boolean violate) throws IOException, InterruptedException {
    LOG.info("RUNNING testConcat restricted= " + restricted + ", violate= " + violate);
    final int numFiles = 10;
    long fileLen = blockSize * 3;
    FileStatus fStatus;
    FSDataInputStream stm;
    String trg = new String("/trg");
    Path trgPath = new Path(trg);
    DFSTestUtil.createFile(dfs, trgPath, fileLen, REPL_FACTOR, 1);
    fStatus = nn.getFileInfo(trg);
    long trgLen = fStatus.getLen();
    long trgBlocks = nn.getBlockLocations(trg, 0, trgLen).locatedBlockCount();
    Path[] files = new Path[numFiles];
    long initLens[] = new long[numFiles];
    Random rand = new Random();
    for (int i = 0; i < numFiles; i++) {
        initLens[i] = fileLen + ((restricted && !violate) ? 0 : rand.nextInt((int) blockSize));
    }
    byte[][] bytes = new byte[numFiles + 1][];
    bytes[0] = new byte[(int) trgLen];
    for (int i = 1; i <= numFiles; i++) {
        bytes[i] = new byte[(int) initLens[i - 1]];
    }
    stm = dfs.open(trgPath);
    stm.readFully(0, bytes[0]);
    stm.close();
    LocatedBlocks[] lblocks = new LocatedBlocks[numFiles];
    long[] lens = new long[numFiles];
    int i = 0;
    for (i = 0; i < files.length; i++) {
        files[i] = new Path("/file" + i);
        Path path = files[i];
        DFSTestUtil.createFile(dfs, path, initLens[i], REPL_FACTOR, 1);
        fStatus = nn.getFileInfo(path.toUri().getPath());
        lens[i] = fStatus.getLen();
        // file of the same length.
        assertEquals(initLens[i], lens[i]);
        lblocks[i] = nn.getBlockLocations(path.toUri().getPath(), 0, lens[i]);
        //read the file
        stm = dfs.open(path);
        stm.readFully(0, bytes[i + 1]);
        stm.close();
    }
    // check permissions -try the operation with the "wrong" user
    //final UserGroupInformation user1 = UserGroupInformation.createUserForTesting(
    //    "theDoctor", new String[] { "tardis" });
    //DistributedFileSystem hdfs = 
    //  (DistributedFileSystem)DFSTestUtil.getFileSystemAs(user1, conf);
    //try {
    // hdfs.concat(trgPath, files);
    //  fail("Permission exception expected");
    //} catch (IOException ie) {
    //  System.out.println("Got expected exception for permissions:"
    //      + ie.getLocalizedMessage());
    // expected
    //}
    System.out.println("Skipping concat with file permissions");
    // check count update
    ContentSummary cBefore = dfs.getContentSummary(trgPath.getParent());
    // now concatenate
    dfs.concat(trgPath, files, restricted);
    // verify  count
    ContentSummary cAfter = dfs.getContentSummary(trgPath.getParent());
    assertEquals(cBefore.getFileCount(), cAfter.getFileCount() + files.length);
    // verify other stuff
    long totalLen = trgLen;
    long totalBlocks = trgBlocks;
    for (i = 0; i < files.length; i++) {
        totalLen += lens[i];
        totalBlocks += lblocks[i].locatedBlockCount();
    }
    LOG.info("total len=" + totalLen + "; totalBlocks=" + totalBlocks);
    fStatus = nn.getFileInfo(trg);
    // new length
    trgLen = fStatus.getLen();
    // read the resulting file
    stm = dfs.open(trgPath);
    System.out.println("targetLen " + trgLen);
    byte[] byteFileConcat = new byte[(int) trgLen];
    stm.readFully(0, byteFileConcat);
    stm.close();
    trgBlocks = nn.getBlockLocations(trg, 0, trgLen).locatedBlockCount();
    //verifications
    // 1. number of blocks
    assertEquals(trgBlocks, totalBlocks);
    // 2. file lengths
    assertEquals(trgLen, totalLen);
    // 3. removal of the src file
    for (Path p : files) {
        fStatus = nn.getFileInfo(p.toUri().getPath());
        // file shouldn't exist
        assertNull("File " + p + " still exists", fStatus);
        // try to create fie with the same name
        DFSTestUtil.createFile(dfs, p, fileLen, REPL_FACTOR, 1);
    }
    // 4. content
    checkFileContent(byteFileConcat, bytes);
    // 5. read in parts
    int start = 0;
    byte[] total = new byte[(int) trgLen];
    while (start < trgLen) {
        byte[] part = new byte[(int) Math.min(1000, trgLen - start)];
        stm = dfs.open(trgPath);
        stm.readFully(start, part);
        System.arraycopy(part, 0, total, start, (int) Math.min(part.length, trgLen - start));
        start += 1000;
        stm.close();
    }
    checkFileContent(total, bytes);
    // 6. read by seeking
    start = 0;
    total = new byte[(int) trgLen];
    stm = dfs.open(trgPath);
    while (start < trgLen) {
        stm.seek(start);
        total[start++] = stm.readByte();
    }
    stm.close();
    checkFileContent(total, bytes);
    // 7. positioned reading
    start = 0;
    total = new byte[(int) trgLen];
    stm = dfs.open(trgPath);
    while (start < trgLen) {
        stm.read(start, total, start++, 1);
    }
    stm.close();
    checkFileContent(total, bytes);
    ////////
    // add a small file (less then a block)
    Path smallFile = new Path("/sfile");
    int sFileLen = 10;
    DFSTestUtil.createFile(dfs, smallFile, sFileLen, REPL_FACTOR, 1);
    LOG.info("Trying the second concat operation.");
    dfs.concat(trgPath, new Path[] { smallFile }, restricted);
    LOG.info("Second concat operation successful.");
    fStatus = nn.getFileInfo(trg);
    // new length
    trgLen = fStatus.getLen();
    // check number of blocks
    trgBlocks = nn.getBlockLocations(trg, 0, trgLen).locatedBlockCount();
    assertEquals(trgBlocks, totalBlocks + 1);
    // and length
    assertEquals(trgLen, totalLen + sFileLen);
}

11. JobFilePartitioner#processHDFSSources()

Project: hraven
Source File: JobFilePartitioner.java
View license
/**
   * Process HDFS source directory.
   * 
   * @param hdfs
   * @param outputPath
   * @param inputPath
   * @throws IOException
   */
/**
   * @param inputPath
   * @throws IOException
   */
private void processHDFSSources(Path inputPath) throws IOException {
    // Try to get the fileStatus only if we're reasonably confident that this
    // is an HDFS path.s
    FileStatus inputFileStatus = hdfs.getFileStatus(inputPath);
    // Check if input is a directory
    if (!inputFileStatus.isDir()) {
        throw new IOException("Input is not a directory in HDFS: " + input);
    }
    // Accept only jobFiles and only those that fall in the desired range of
    // modification time.
    JobFileModifiedRangePathFilter jobFileModifiedRangePathFilter = new JobFileModifiedRangePathFilter(myConf, 0L);
    ContentSummary contentSummary = hdfs.getContentSummary(inputPath);
    LOG.info("Listing / filtering (" + contentSummary.getFileCount() + ") files in: " + inputPath);
    // get the files in the done folder,
    // need to traverse dirs under done recursively for versions
    // that include MAPREDUCE-323: on/after hadoop 0.20.203.0
    // on/after cdh3u5
    FileStatus[] jobFileStatusses = FileLister.listFiles(true, hdfs, inputPath, jobFileModifiedRangePathFilter);
    LOG.info("Sorting " + jobFileStatusses.length + " job files.");
    Arrays.sort(jobFileStatusses, new FileStatusModificationComparator());
    int processedCount = 0;
    try {
        for (int i = 0; i < jobFileStatusses.length; i++) {
            FileStatus jobFileStatus = jobFileStatusses[i];
            boolean retain = BatchUtil.shouldRetain(i, maXretention, jobFileStatusses.length);
            processHDFSSource(hdfs, jobFileStatus, outputPath, myConf, skipExisting, retain);
            processedCount++;
            // Print something each 1k files to show progress.
            if ((i % 1000) == 0) {
                LOG.info("Processed " + i + " files.");
            }
        }
    } finally {
        LOG.info("Processed " + processedCount + " files.");
    }
}

12. TestQuota#testNamespaceCommands()

Project: hadoop-20
Source File: TestQuota.java
View license
/** Test commands that change the size of the name space:
   *  mkdirs, rename, and delete */
@Test
public void testNamespaceCommands() throws Exception {
    setUp(false, false);
    // 1: create directory /nqdir0/qdir1/qdir20/nqdir30
    assertTrue(dfs.mkdirs(new Path("/nqdir0/qdir1/qdir20/nqdir30")));
    // 2: set the quota of /nqdir0/qdir1 to be 6
    final Path quotaDir1 = new Path("/nqdir0/qdir1");
    dfs.setQuota(quotaDir1, 6, FSConstants.QUOTA_DONT_SET);
    ContentSummary c = dfs.getContentSummary(quotaDir1);
    assertEquals(c.getDirectoryCount(), 3);
    assertEquals(c.getQuota(), 6);
    // 3: set the quota of /nqdir0/qdir1/qdir20 to be 7
    final Path quotaDir2 = new Path("/nqdir0/qdir1/qdir20");
    dfs.setQuota(quotaDir2, 7, FSConstants.QUOTA_DONT_SET);
    c = dfs.getContentSummary(quotaDir2);
    assertEquals(c.getDirectoryCount(), 2);
    assertEquals(c.getQuota(), 7);
    // 4: Create directory /nqdir0/qdir1/qdir21 and set its quota to 2
    final Path quotaDir3 = new Path("/nqdir0/qdir1/qdir21");
    assertTrue(dfs.mkdirs(quotaDir3));
    dfs.setQuota(quotaDir3, 2, FSConstants.QUOTA_DONT_SET);
    c = dfs.getContentSummary(quotaDir3);
    assertEquals(c.getDirectoryCount(), 1);
    assertEquals(c.getQuota(), 2);
    // 5: Create directory /nqdir0/qdir1/qdir21/nqdir32
    Path tempPath = new Path(quotaDir3, "nqdir32");
    assertTrue(dfs.mkdirs(tempPath));
    c = dfs.getContentSummary(quotaDir3);
    assertEquals(c.getDirectoryCount(), 2);
    assertEquals(c.getQuota(), 2);
    // 6: Create directory /nqdir0/qdir1/qdir21/nqdir33
    tempPath = new Path(quotaDir3, "nqdir33");
    boolean hasException = false;
    try {
        assertFalse(dfs.mkdirs(tempPath));
    } catch (NSQuotaExceededException e) {
        hasException = true;
    }
    assertTrue(hasException);
    c = dfs.getContentSummary(quotaDir3);
    assertEquals(c.getDirectoryCount(), 2);
    assertEquals(c.getQuota(), 2);
    // 7: Create directory /nqdir0/qdir1/qdir20/nqdir31
    tempPath = new Path(quotaDir2, "nqdir31");
    assertTrue(dfs.mkdirs(tempPath));
    c = dfs.getContentSummary(quotaDir2);
    assertEquals(c.getDirectoryCount(), 3);
    assertEquals(c.getQuota(), 7);
    c = dfs.getContentSummary(quotaDir1);
    assertEquals(c.getDirectoryCount(), 6);
    assertEquals(c.getQuota(), 6);
    // 8: Create directory /nqdir0/qdir1/qdir20/nqdir33
    tempPath = new Path(quotaDir2, "nqdir33");
    hasException = false;
    try {
        assertFalse(dfs.mkdirs(tempPath));
    } catch (NSQuotaExceededException e) {
        hasException = true;
    }
    assertTrue(hasException);
    // 9: Move /nqdir0/qdir1/qdir21/nqdir32 /nqdir0/qdir1/qdir20/nqdir30
    tempPath = new Path(quotaDir2, "nqdir30");
    dfs.rename(new Path(quotaDir3, "nqdir32"), tempPath);
    c = dfs.getContentSummary(quotaDir2);
    assertEquals(c.getDirectoryCount(), 4);
    assertEquals(c.getQuota(), 7);
    c = dfs.getContentSummary(quotaDir1);
    assertEquals(c.getDirectoryCount(), 6);
    assertEquals(c.getQuota(), 6);
    // 10: Move /nqdir0/qdir1/qdir20/nqdir30 to /nqdir0/qdir1/qdir21
    hasException = false;
    try {
        assertFalse(dfs.rename(tempPath, quotaDir3));
    } catch (NSQuotaExceededException e) {
        hasException = true;
    }
    assertTrue(hasException);
    assertTrue(dfs.exists(tempPath));
    assertFalse(dfs.exists(new Path(quotaDir3, "nqdir30")));
    // 10.a: Rename /nqdir0/qdir1/qdir20/nqdir30 to /nqdir0/qdir1/qdir21/nqdir32
    hasException = false;
    try {
        assertFalse(dfs.rename(tempPath, new Path(quotaDir3, "nqdir32")));
    } catch (NSQuotaExceededException e) {
        hasException = true;
    }
    assertTrue(hasException);
    assertTrue(dfs.exists(tempPath));
    assertFalse(dfs.exists(new Path(quotaDir3, "nqdir32")));
    // 11: Move /nqdir0/qdir1/qdir20/nqdir30 to /nqdir0
    assertTrue(dfs.rename(tempPath, new Path("/nqdir0")));
    c = dfs.getContentSummary(quotaDir2);
    assertEquals(c.getDirectoryCount(), 2);
    assertEquals(c.getQuota(), 7);
    c = dfs.getContentSummary(quotaDir1);
    assertEquals(c.getDirectoryCount(), 4);
    assertEquals(c.getQuota(), 6);
    // 12: Create directory /nqdir0/nqdir30/nqdir33
    assertTrue(dfs.mkdirs(new Path("/nqdir0/nqdir30/nqdir33")));
    // 13: Move /nqdir0/nqdir30 /nqdir0/qdir1/qdir20/qdir30
    hasException = false;
    try {
        assertFalse(dfs.rename(new Path("/nqdir0/nqdir30"), tempPath));
    } catch (NSQuotaExceededException e) {
        hasException = true;
    }
    assertTrue(hasException);
    // 14: Move /nqdir0/qdir1/qdir21 /nqdir0/qdir1/qdir20
    assertTrue(dfs.rename(quotaDir3, quotaDir2));
    c = dfs.getContentSummary(quotaDir1);
    assertEquals(c.getDirectoryCount(), 4);
    assertEquals(c.getQuota(), 6);
    c = dfs.getContentSummary(quotaDir2);
    assertEquals(c.getDirectoryCount(), 3);
    assertEquals(c.getQuota(), 7);
    tempPath = new Path(quotaDir2, "qdir21");
    c = dfs.getContentSummary(tempPath);
    assertEquals(c.getDirectoryCount(), 1);
    assertEquals(c.getQuota(), 2);
    // 15: Delete /nqdir0/qdir1/qdir20/qdir21
    dfs.delete(tempPath, true);
    c = dfs.getContentSummary(quotaDir2);
    assertEquals(c.getDirectoryCount(), 2);
    assertEquals(c.getQuota(), 7);
    c = dfs.getContentSummary(quotaDir1);
    assertEquals(c.getDirectoryCount(), 3);
    assertEquals(c.getQuota(), 6);
    // 16: Move /nqdir0/qdir30 /nqdir0/qdir1/qdir20
    assertTrue(dfs.rename(new Path("/nqdir0/nqdir30"), quotaDir2));
    c = dfs.getContentSummary(quotaDir2);
    assertEquals(c.getDirectoryCount(), 5);
    assertEquals(c.getQuota(), 7);
    c = dfs.getContentSummary(quotaDir1);
    assertEquals(c.getDirectoryCount(), 6);
    assertEquals(c.getQuota(), 6);
}

13. TestFileHardLink#verifyDSQuotaSetting()

Project: hadoop-20
Source File: TestFileHardLink.java
View license
private static void verifyDSQuotaSetting(DistributedFileSystem dfs, Path path, long dsCount, long diskComsumed) throws IOException {
    ContentSummary c = dfs.getContentSummary(path);
    assertEquals(dsCount, c.getSpaceQuota());
    assertEquals(diskComsumed, c.getSpaceConsumed());
}

14. TestFileHardLink#verifyNSQuotaSetting()

Project: hadoop-20
Source File: TestFileHardLink.java
View license
private static void verifyNSQuotaSetting(DistributedFileSystem dfs, Path path, int nsQuota, int nsComsumed) throws IOException {
    ContentSummary c = dfs.getContentSummary(path);
    assertEquals(nsQuota, c.getQuota());
    assertEquals(nsComsumed, c.getFileCount() + c.getDirectoryCount());
}

15. TestAvatarAPI#checkStandby()

Project: hadoop-20
Source File: TestAvatarAPI.java
View license
private void checkStandby() throws Exception {
    FileStatus fs = null;
    FileStatus[] dir = null;
    long startTime = System.currentTimeMillis();
    while ((fs == null || dir == null || dir.length == 0 || dafs.getDataNodeStats(true).length != 3) && (System.currentTimeMillis() < startTime + MAX_WAIT)) {
        try {
            fs = dafs.getFileStatus(path, true);
            dir = dafs.listStatus(dirPath, true);
        } catch (FileNotFoundException fe) {
            LOG.info("DAFS File not found on standby avatar, retrying.");
        }
        if (fs == null) {
            LOG.info("DAFS file " + path.toString() + " not found");
        }
        if (dir == null) {
            LOG.info("DAFS dir " + dirPath.toString() + " not found");
        } else {
            if (dir.length == 0) {
                LOG.info("DAFS dir " + dirPath.toString() + " is empty");
            }
        }
        try {
            Thread.sleep(100);
        } catch (InterruptedException ignore) {
        }
    }
    assertTrue("DAFS file status has the wrong length", fs != null && fs.getLen() == FILE_LEN);
    assertTrue("DAFS directory listing has the wrong length", dir != null && dir.length == 1);
    ContentSummary cs = dafs.getContentSummary(path, true);
    DatanodeInfo[] di = dafs.getDataNodeStats(true);
    assertEquals("DAFS datanode info should contain 3 data nodes", 3, di.length);
}

16. StageOSMToHDFSCommand#getHDFSFileSummary()

Project: geowave
Source File: StageOSMToHDFSCommand.java
View license
private static ContentSummary getHDFSFileSummary(Configuration conf, String filename) throws IOException {
    org.apache.hadoop.fs.Path path = new org.apache.hadoop.fs.Path(filename);
    FileSystem file = path.getFileSystem(conf);
    ContentSummary cs = file.getContentSummary(path);
    file.close();
    return cs;
}

17. StageOSMToHDFSCommand#execute()

Project: geowave
Source File: StageOSMToHDFSCommand.java
View license
@Override
public void execute(OperationParams params) throws Exception {
    // Ensure we have all the required arguments
    if (parameters.size() != 3) {
        throw new ParameterException("Requires arguments: <file or directory> <hdfs host:port> <path to base directory to write to>");
    }
    String inputPath = parameters.get(0);
    String hdfsHostPort = parameters.get(1);
    String basePath = parameters.get(2);
    // Ensures that the url starts with hdfs://
    if (!hdfsHostPort.contains("://")) {
        hdfsHostPort = "hdfs://" + hdfsHostPort;
    }
    if (!basePath.startsWith("/")) {
        throw new ParameterException("HDFS Base path must start with forward slash /");
    }
    // These are set as main parameter arguments, to keep consistency with
    // GeoWave.
    parserOptions.setIngestDirectory(inputPath);
    parserOptions.setHdfsBasePath(basePath);
    parserOptions.setNameNode(hdfsHostPort);
    OsmPbfParser osmPbfParser = new OsmPbfParser();
    Configuration conf = osmPbfParser.stageData(parserOptions);
    ContentSummary cs = getHDFSFileSummary(conf, basePath);
    System.out.println("**************************************************");
    System.out.println("Directories: " + cs.getDirectoryCount());
    System.out.println("Files: " + cs.getFileCount());
    System.out.println("Nodes size: " + getHDFSFileSummary(conf, parserOptions.getNodesBasePath()).getLength());
    System.out.println("Ways size: " + getHDFSFileSummary(conf, parserOptions.getWaysBasePath()).getLength());
    System.out.println("Relations size: " + getHDFSFileSummary(conf, parserOptions.getRelationsBasePath()).getLength());
    System.out.println("**************************************************");
    System.out.println("finished osmpbf ingest");
}