org.apache.hadoop.hdfs.MiniDFSCluster.transitionToActive()

Here are the examples of the java api org.apache.hadoop.hdfs.MiniDFSCluster.transitionToActive() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

94 Examples 7

19 Source : TestDFSHAAdminMiniCluster.java
with Apache License 2.0
from NJUJYB

@Test
public void testGetServiceState() throws Exception {
    replacedertEquals(0, runTool("-getServiceState", "nn1"));
    replacedertEquals(0, runTool("-getServiceState", "nn2"));
    cluster.transitionToActive(0);
    replacedertEquals(0, runTool("-getServiceState", "nn1"));
    NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
    replacedertEquals(0, runTool("-getServiceState", "nn1"));
}

19 Source : TestFailureToReadEdits.java
with Apache License 2.0
from NJUJYB

/**
 * Ensure that the standby fails to become active if it cannot read all
 * available edits in the shared edits dir when it is transitioning to active
 * state.
 */
@Test
public void testFailureToReadEditsOnTransitionToActive() throws Exception {
    replacedertTrue(fs.mkdirs(new Path(TEST_DIR1)));
    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
    // It should also upload it back to the active.
    HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 3));
    causeFailureOnEditLogRead();
    replacedertTrue(fs.mkdirs(new Path(TEST_DIR2)));
    replacedertTrue(fs.mkdirs(new Path(TEST_DIR3)));
    try {
        HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
        fail("Standby fully caught up, but should not have been able to");
    } catch (HATestUtil.CouldNotCatchUpException e) {
    // Expected. The NN did not exit.
    }
    // Shutdown the active NN.
    cluster.shutdownNameNode(0);
    try {
        // Transition the standby to active.
        cluster.transitionToActive(1);
        fail("Standby transitioned to active, but should not have been able to");
    } catch (ExitException ee) {
        GenericTestUtils.replacedertExceptionContains("Error replaying edit log", ee);
    }
}

17 Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB

/**
 * Make sure the WebHdfsFileSystem will retry based on RetriableException when
 * rpcServer is null in NamenodeWebHdfsMethods while NameNode starts up.
 */
@Test(timeout = 120000)
public void testRetryWhileNNStartup() throws Exception {
    final Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
    MiniDFSCluster cluster = null;
    final Map<String, Boolean> resultMap = new HashMap<String, Boolean>();
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(0).build();
        HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
        cluster.waitActive();
        cluster.transitionToActive(0);
        final NameNode namenode = cluster.getNameNode(0);
        final NamenodeProtocols rpcServer = namenode.getRpcServer();
        Whitebox.setInternalState(namenode, "rpcServer", null);
        new Thread() {

            @Override
            public void run() {
                boolean result = false;
                FileSystem fs = null;
                try {
                    fs = FileSystem.get(WEBHDFS_URI, conf);
                    final Path dir = new Path("/test");
                    result = fs.mkdirs(dir);
                } catch (IOException e) {
                    result = false;
                } finally {
                    IOUtils.cleanup(null, fs);
                }
                synchronized (TestWebHDFSForHA.this) {
                    resultMap.put("mkdirs", result);
                    TestWebHDFSForHA.this.notifyAll();
                }
            }
        }.start();
        Thread.sleep(1000);
        Whitebox.setInternalState(namenode, "rpcServer", rpcServer);
        synchronized (this) {
            while (!resultMap.containsKey("mkdirs")) {
                this.wait();
            }
            replacedert.replacedertTrue(resultMap.get("mkdirs"));
        }
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

17 Source : TestNamenodeRetryCache.java
with Apache License 2.0
from NJUJYB

/**
 * Make sure a retry call does not hang because of the exception thrown in the
 * first call.
 */
@Test(timeout = 60000)
public void testUpdatePipelineWithFailOver() throws Exception {
    cluster.shutdown();
    namesystem = null;
    filesystem = null;
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).build();
    FSNamesystem ns0 = cluster.getNamesystem(0);
    ExtendedBlock oldBlock = new ExtendedBlock();
    ExtendedBlock newBlock = new ExtendedBlock();
    DatanodeID[] newNodes = new DatanodeID[2];
    String[] newStorages = new String[2];
    newCall();
    try {
        ns0.updatePipeline("testClient", oldBlock, newBlock, newNodes, newStorages);
        fail("Expect StandbyException from the updatePipeline call");
    } catch (StandbyException e) {
        // expected, since in the beginning both nn are in standby state
        GenericTestUtils.replacedertExceptionContains(HAServiceState.STANDBY.toString(), e);
    }
    cluster.transitionToActive(0);
    try {
        ns0.updatePipeline("testClient", oldBlock, newBlock, newNodes, newStorages);
    } catch (IOException e) {
    // ignore call should not hang.
    }
}

17 Source : TestXAttrsWithHA.java
with Apache License 2.0
from NJUJYB

@Before
public void setupCluster() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    HAUtil.setAllowStandbyReads(conf, true);
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).waitSafeMode(false).build();
    cluster.waitActive();
    nn0 = cluster.getNameNode(0);
    nn1 = cluster.getNameNode(1);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    cluster.transitionToActive(0);
}

17 Source : TestFailoverWithBlockTokensEnabled.java
with Apache License 2.0
from NJUJYB

@Test
public void ensureInvalidBlockTokensAreRejected() throws IOException, URISyntaxException {
    cluster.transitionToActive(0);
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    DFSTestUtil.writeFile(fs, TEST_PATH, TEST_DATA);
    replacedertEquals(TEST_DATA, DFSTestUtil.readFile(fs, TEST_PATH));
    DFSClient dfsClient = DFSClientAdapter.getDFSClient((DistributedFileSystem) fs);
    DFSClient spyDfsClient = Mockito.spy(dfsClient);
    Mockito.doAnswer(new Answer<LocatedBlocks>() {

        @Override
        public LocatedBlocks answer(InvocationOnMock arg0) throws Throwable {
            LocatedBlocks locatedBlocks = (LocatedBlocks) arg0.callRealMethod();
            for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) {
                Token<BlockTokenIdentifier> token = lb.getBlockToken();
                BlockTokenIdentifier id = lb.getBlockToken().decodeIdentifier();
                // This will make the token invalid, since the preplacedword
                // won't match anymore
                id.setExpiryDate(Time.now() + 10);
                Token<BlockTokenIdentifier> newToken = new Token<BlockTokenIdentifier>(id.getBytes(), token.getPreplacedword(), token.getKind(), token.getService());
                lb.setBlockToken(newToken);
            }
            return locatedBlocks;
        }
    }).when(spyDfsClient).getLocatedBlocks(Mockito.anyString(), Mockito.anyLong(), Mockito.anyLong());
    DFSClientAdapter.setDFSClient((DistributedFileSystem) fs, spyDfsClient);
    try {
        replacedertEquals(TEST_DATA, DFSTestUtil.readFile(fs, TEST_PATH));
        fail("Shouldn't have been able to read a file with invalid block tokens");
    } catch (IOException ioe) {
        GenericTestUtils.replacedertExceptionContains("Could not obtain block", ioe);
    }
}

17 Source : TestEditLogsDuringFailover.java
with Apache License 2.0
from NJUJYB

private void testFailoverFinalizesAndReadsInProgress(boolean partialTxAtEnd) throws Exception {
    Configuration conf = new Configuration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
    try {
        // Create a fake in-progress edit-log in the shared directory
        URI sharedUri = cluster.getSharedEditsDir(0, 1);
        File sharedDir = new File(sharedUri.getPath(), "current");
        FSNamesystem fsn = cluster.getNamesystem(0);
        FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1, fsn.getLastInodeId() + 1);
        replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getInProgressEditsFileName(1));
        if (partialTxAtEnd) {
            FileOutputStream outs = null;
            try {
                File editLogFile = new File(sharedDir, NNStorage.getInProgressEditsFileName(1));
                outs = new FileOutputStream(editLogFile, true);
                outs.write(new byte[] { 0x18, 0x00, 0x00, 0x00 });
                LOG.error("editLogFile = " + editLogFile);
            } finally {
                IOUtils.cleanup(LOG, outs);
            }
        }
        // Transition one of the NNs to active
        cluster.transitionToActive(0);
        // In the transition to active, it should have read the log -- and
        // hence see one of the dirs we made in the fake log.
        String testPath = "/dir" + NUM_DIRS_IN_LOG;
        replacedertNotNull(cluster.getNameNode(0).getRpcServer().getFileInfo(testPath));
        // It also should have finalized that log in the shared directory and started
        // writing to a new one at the next txid.
        replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getFinalizedEditsFileName(1, NUM_DIRS_IN_LOG + 1), NNStorage.getInProgressEditsFileName(NUM_DIRS_IN_LOG + 2));
    } finally {
        cluster.shutdown();
    }
}

17 Source : TestBootstrapStandby.java
with Apache License 2.0
from NJUJYB

@Before
public void setupCluster() throws IOException {
    Configuration conf = new Configuration();
    MiniDFSNNTopology topology = new MiniDFSNNTopology().addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(20001)).addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(20002)));
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology).numDataNodes(0).build();
    cluster.waitActive();
    nn0 = cluster.getNameNode(0);
    cluster.transitionToActive(0);
    cluster.shutdownNameNode(1);
}

17 Source : TestEditLogsDuringFailover.java
with Apache License 2.0
from naver

private void testFailoverFinalizesAndReadsInProgress(boolean partialTxAtEnd) throws Exception {
    Configuration conf = new Configuration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
    try {
        // Create a fake in-progress edit-log in the shared directory
        URI sharedUri = cluster.getSharedEditsDir(0, 1);
        File sharedDir = new File(sharedUri.getPath(), "current");
        FSNamesystem fsn = cluster.getNamesystem(0);
        FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1, fsn.getFSDirectory().getLastInodeId() + 1);
        replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getInProgressEditsFileName(1));
        if (partialTxAtEnd) {
            FileOutputStream outs = null;
            try {
                File editLogFile = new File(sharedDir, NNStorage.getInProgressEditsFileName(1));
                outs = new FileOutputStream(editLogFile, true);
                outs.write(new byte[] { 0x18, 0x00, 0x00, 0x00 });
                LOG.error("editLogFile = " + editLogFile);
            } finally {
                IOUtils.cleanup(LOG, outs);
            }
        }
        // Transition one of the NNs to active
        cluster.transitionToActive(0);
        // In the transition to active, it should have read the log -- and
        // hence see one of the dirs we made in the fake log.
        String testPath = "/dir" + NUM_DIRS_IN_LOG;
        replacedertNotNull(cluster.getNameNode(0).getRpcServer().getFileInfo(testPath));
        // It also should have finalized that log in the shared directory and started
        // writing to a new one at the next txid.
        replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getFinalizedEditsFileName(1, NUM_DIRS_IN_LOG + 1), NNStorage.getInProgressEditsFileName(NUM_DIRS_IN_LOG + 2));
    } finally {
        cluster.shutdown();
    }
}

16 Source : TestLossyRetryInvocationHandler.java
with Apache License 2.0
from NJUJYB

@Test
public void testStartNNWithTrashEmptier() throws Exception {
    MiniDFSCluster cluster = null;
    Configuration conf = new HdfsConfiguration();
    // enable both trash emptier and dropping response
    conf.setLong("fs.trash.interval", 360);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2);
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
        cluster.waitActive();
        cluster.transitionToActive(0);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

16 Source : TestHAWebUI.java
with Apache License 2.0
from NJUJYB

/**
 * Tests that the web UI of the name node provides a link to browse the file
 * system and summary of under-replicated blocks only in active state
 */
@Test
public void testLinkAndClusterSummary() throws Exception {
    Configuration conf = new Configuration();
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
    try {
        cluster.waitActive();
        cluster.transitionToActive(0);
        String pageContents = DFSTestUtil.urlGet(new URL("http://localhost:" + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort() + "/dfshealth.jsp"));
        replacedertTrue(pageContents.contains("Browse the filesystem"));
        replacedertTrue(pageContents.contains("Number of Under-Replicated Blocks"));
        cluster.transitionToStandby(0);
        pageContents = DFSTestUtil.urlGet(new URL("http://localhost:" + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort() + "/dfshealth.jsp"));
        replacedertFalse(pageContents.contains("Browse the filesystem"));
        replacedertFalse(pageContents.contains("Number of Under-Replicated Blocks"));
        cluster.transitionToActive(0);
        pageContents = DFSTestUtil.urlGet(new URL("http://localhost:" + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort() + "/dfshealth.jsp"));
        replacedertTrue(pageContents.contains("Browse the filesystem"));
        replacedertTrue(pageContents.contains("Number of Under-Replicated Blocks"));
    } finally {
        cluster.shutdown();
    }
}

16 Source : TestHASafeMode.java
with Apache License 2.0
from NJUJYB

/**
 * Set up a namesystem with several edits, both deletions and
 * additions, and failover to a new NN while that NN is in
 * safemode. Ensure that it will exit safemode.
 */
@Test
public void testComplexFailoverIntoSafemode() throws Exception {
    banner("Starting with NN0 active and NN1 standby, creating some blocks");
    DFSTestUtil.createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
    // Roll edit log so that, when the SBN restarts, it will load
    // the namespace during startup and enter safemode.
    nn0.getRpcServer().rollEditLog();
    banner("Creating some blocks that won't be in the edit log");
    DFSTestUtil.createFile(fs, new Path("/test2"), 5 * BLOCK_SIZE, (short) 3, 1L);
    banner("Deleting the original blocks");
    fs.delete(new Path("/test"), true);
    banner("Restarting standby");
    restartStandby();
    // We expect it to be on its way out of safemode, since all of the blocks
    // from the edit log have been reported.
    replacedertSafeMode(nn1, 3, 3, 3, 0);
    // Initiate a failover into it while it's in safemode
    banner("Initiating a failover into NN1 in safemode");
    NameNodeAdapter.abortEditLogs(nn0);
    cluster.transitionToActive(1);
    replacedertSafeMode(nn1, 5, 5, 3, 0);
}

16 Source : TestHASafeMode.java
with Apache License 2.0
from NJUJYB

/**
 * Make sure that when we transition to active in safe mode that we don't
 * prematurely consider blocks missing just because not all DNs have reported
 * yet.
 *
 * This is a regression test for HDFS-3921.
 */
@Test
public void testNoPopulatingReplQueuesWhenStartingActiveInSafeMode() throws IOException {
    DFSTestUtil.createFile(fs, new Path("/test"), 15 * BLOCK_SIZE, (short) 3, 1L);
    // Stop the DN so that when the NN restarts not all blocks wil be reported
    // and the NN won't leave safe mode.
    cluster.stopDataNode(1);
    // Restart the namenode but don't wait for it to hear from all DNs (since
    // one DN is deliberately shut down.)
    cluster.restartNameNode(0, false);
    cluster.transitionToActive(0);
    replacedertTrue(cluster.getNameNode(0).isInSafeMode());
    // We shouldn't yet consider any blocks "missing" since we're in startup
    // safemode, i.e. not all DNs may have reported.
    replacedertEquals(0, cluster.getNamesystem(0).getMissingBlocksCount());
}

16 Source : TestHarFileSystemWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Test that the HarFileSystem works with underlying HDFS URIs that have no
 * port specified, as is often the case with an HA setup.
 */
@Test
public void testHarUriWithHaUriWithNoPort() throws Exception {
    Configuration conf = new HdfsConfiguration();
    MiniDFSCluster cluster = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
        cluster.transitionToActive(0);
        HATestUtil.setFailoverConfigurations(cluster, conf);
        createEmptyHarArchive(HATestUtil.configureFailoverFs(cluster, conf), TEST_HAR_PATH);
        URI failoverUri = FileSystem.getDefaultUri(conf);
        Path p = new Path("har://hdfs-" + failoverUri.getAuthority() + TEST_HAR_PATH);
        p.getFileSystem(conf);
    } finally {
        cluster.shutdown();
    }
}

16 Source : TestFailoverWithBlockTokensEnabled.java
with Apache License 2.0
from NJUJYB

private void writeUsingBothNameNodes() throws ServiceFailedException, IOException, URISyntaxException {
    cluster.transitionToActive(0);
    FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
    DFSTestUtil.writeFile(fs, TEST_PATH, TEST_DATA);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    fs.delete(TEST_PATH, false);
    DFSTestUtil.writeFile(fs, TEST_PATH, TEST_DATA);
}

16 Source : TestEditLogsDuringFailover.java
with Apache License 2.0
from NJUJYB

@Test
public void testStartup() throws Exception {
    Configuration conf = new Configuration();
    HAUtil.setAllowStandbyReads(conf, true);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
    try {
        // During HA startup, both nodes should be in
        // standby and we shouldn't have any edits files
        // in any edits directory!
        List<URI> allDirs = Lists.newArrayList();
        allDirs.addAll(cluster.getNameDirs(0));
        allDirs.addAll(cluster.getNameDirs(1));
        allDirs.add(cluster.getSharedEditsDir(0, 1));
        replacedertNoEditFiles(allDirs);
        // Set the first NN to active, make sure it creates edits
        // in its own dirs and the shared dir. The standby
        // should still have no edits!
        cluster.transitionToActive(0);
        replacedertEditFiles(cluster.getNameDirs(0), NNStorage.getInProgressEditsFileName(1));
        replacedertEditFiles(Collections.singletonList(cluster.getSharedEditsDir(0, 1)), NNStorage.getInProgressEditsFileName(1));
        replacedertNoEditFiles(cluster.getNameDirs(1));
        cluster.getNameNode(0).getRpcServer().mkdirs("/test", FsPermission.createImmutable((short) 0755), true);
        // Restarting the standby should not finalize any edits files
        // in the shared directory when it starts up!
        cluster.restartNameNode(1);
        replacedertEditFiles(cluster.getNameDirs(0), NNStorage.getInProgressEditsFileName(1));
        replacedertEditFiles(Collections.singletonList(cluster.getSharedEditsDir(0, 1)), NNStorage.getInProgressEditsFileName(1));
        replacedertNoEditFiles(cluster.getNameDirs(1));
        // Additionally it should not have applied any in-progress logs
        // at start-up -- otherwise, it would have read half-way into
        // the current log segment, and on the next roll, it would have to
        // either replay starting in the middle of the segment (not allowed)
        // or double-replay the edits (incorrect).
        replacedertNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
        cluster.getNameNode(0).getRpcServer().mkdirs("/test2", FsPermission.createImmutable((short) 0755), true);
        // If we restart NN0, it'll come back as standby, and we can
        // transition NN1 to active and make sure it reads edits correctly at this point.
        cluster.restartNameNode(0);
        cluster.transitionToActive(1);
        // NN1 should have both the edits that came before its restart, and the edits that
        // came after its restart.
        replacedertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
        replacedertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test2", true));
    } finally {
        cluster.shutdown();
    }
}

16 Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Test rollback with NFS shared dir.
 */
@Test
public void testRollbackWithNfs() throws Exception {
    MiniDFSCluster cluster = null;
    FileSystem fs = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
        File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
        // No upgrade is in progress at the moment.
        checkClusterPreviousDirExistence(cluster, false);
        replacedertCTimesEqual(cluster);
        checkPreviousDirExistence(sharedDir, false);
        // Transition NN0 to active and do some FS ops.
        cluster.transitionToActive(0);
        fs = HATestUtil.configureFailoverFs(cluster, conf);
        replacedertTrue(fs.mkdirs(new Path("/foo1")));
        // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
        // flag.
        cluster.shutdownNameNode(1);
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
        cluster.restartNameNode(0, false);
        checkNnPreviousDirExistence(cluster, 0, true);
        checkNnPreviousDirExistence(cluster, 1, false);
        checkPreviousDirExistence(sharedDir, true);
        // NN0 should come up in the active state when given the -upgrade option,
        // so no need to transition it to active.
        replacedertTrue(fs.mkdirs(new Path("/foo2")));
        // Now bootstrap the standby with the upgraded info.
        int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
        replacedertEquals(0, rc);
        cluster.restartNameNode(1);
        checkNnPreviousDirExistence(cluster, 0, true);
        checkNnPreviousDirExistence(cluster, 1, true);
        checkPreviousDirExistence(sharedDir, true);
        replacedertCTimesEqual(cluster);
        // Now shut down the cluster and do the rollback.
        Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
        cluster.shutdown();
        conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
        NameNode.doRollback(conf, false);
        // The rollback operation should have rolled back the first NN's local
        // dirs, and the shared dir, but not the other NN's dirs. Those have to be
        // done by bootstrapping the standby.
        checkNnPreviousDirExistence(cluster, 0, false);
        checkPreviousDirExistence(sharedDir, false);
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

16 Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Make sure that starting a second NN with the -upgrade flag fails if the
 * other NN has already done that.
 */
@Test
public void testCannotUpgradeSecondNameNode() throws IOException, URISyntaxException {
    MiniDFSCluster cluster = null;
    FileSystem fs = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
        File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
        // No upgrade is in progress at the moment.
        checkClusterPreviousDirExistence(cluster, false);
        replacedertCTimesEqual(cluster);
        checkPreviousDirExistence(sharedDir, false);
        // Transition NN0 to active and do some FS ops.
        cluster.transitionToActive(0);
        fs = HATestUtil.configureFailoverFs(cluster, conf);
        replacedertTrue(fs.mkdirs(new Path("/foo1")));
        // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
        // flag.
        cluster.shutdownNameNode(1);
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
        cluster.restartNameNode(0, false);
        checkNnPreviousDirExistence(cluster, 0, true);
        checkNnPreviousDirExistence(cluster, 1, false);
        checkPreviousDirExistence(sharedDir, true);
        // NN0 should come up in the active state when given the -upgrade option,
        // so no need to transition it to active.
        replacedertTrue(fs.mkdirs(new Path("/foo2")));
        // Restart NN0 without the -upgrade flag, to make sure that works.
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
        cluster.restartNameNode(0, false);
        // Make sure we can still do FS ops after upgrading.
        cluster.transitionToActive(0);
        replacedertTrue(fs.mkdirs(new Path("/foo3")));
        // Make sure that starting the second NN with the -upgrade flag fails.
        cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE);
        try {
            cluster.restartNameNode(1, false);
            fail("Should not have been able to start second NN with -upgrade");
        } catch (IOException ioe) {
            GenericTestUtils.replacedertExceptionContains("It looks like the shared log is already being upgraded", ioe);
        }
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

16 Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Ensure that an admin cannot finalize an HA upgrade without at least one NN
 * being active.
 */
@Test
public void testCannotFinalizeIfNoActive() throws IOException, URISyntaxException {
    MiniDFSCluster cluster = null;
    FileSystem fs = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
        File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
        // No upgrade is in progress at the moment.
        checkClusterPreviousDirExistence(cluster, false);
        replacedertCTimesEqual(cluster);
        checkPreviousDirExistence(sharedDir, false);
        // Transition NN0 to active and do some FS ops.
        cluster.transitionToActive(0);
        fs = HATestUtil.configureFailoverFs(cluster, conf);
        replacedertTrue(fs.mkdirs(new Path("/foo1")));
        // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
        // flag.
        cluster.shutdownNameNode(1);
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
        cluster.restartNameNode(0, false);
        checkNnPreviousDirExistence(cluster, 0, true);
        checkNnPreviousDirExistence(cluster, 1, false);
        checkPreviousDirExistence(sharedDir, true);
        // NN0 should come up in the active state when given the -upgrade option,
        // so no need to transition it to active.
        replacedertTrue(fs.mkdirs(new Path("/foo2")));
        // Restart NN0 without the -upgrade flag, to make sure that works.
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
        cluster.restartNameNode(0, false);
        // Make sure we can still do FS ops after upgrading.
        cluster.transitionToActive(0);
        replacedertTrue(fs.mkdirs(new Path("/foo3")));
        // Now bootstrap the standby with the upgraded info.
        int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
        replacedertEquals(0, rc);
        // Now restart NN1 and make sure that we can do ops against that as well.
        cluster.restartNameNode(1);
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
        replacedertTrue(fs.mkdirs(new Path("/foo4")));
        replacedertCTimesEqual(cluster);
        // Now there's no active NN.
        cluster.transitionToStandby(1);
        try {
            runFinalizeCommand(cluster);
            fail("Should not have been able to finalize upgrade with no NN active");
        } catch (IOException ioe) {
            GenericTestUtils.replacedertExceptionContains("Cannot finalize with no NameNode active", ioe);
        }
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

16 Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Make sure that an HA NN with NFS-based HA can successfully start and
 * upgrade.
 */
@Test
public void testNfsUpgrade() throws IOException, URISyntaxException {
    MiniDFSCluster cluster = null;
    FileSystem fs = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
        File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
        // No upgrade is in progress at the moment.
        checkClusterPreviousDirExistence(cluster, false);
        replacedertCTimesEqual(cluster);
        checkPreviousDirExistence(sharedDir, false);
        // Transition NN0 to active and do some FS ops.
        cluster.transitionToActive(0);
        fs = HATestUtil.configureFailoverFs(cluster, conf);
        replacedertTrue(fs.mkdirs(new Path("/foo1")));
        // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
        // flag.
        cluster.shutdownNameNode(1);
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
        cluster.restartNameNode(0, false);
        checkNnPreviousDirExistence(cluster, 0, true);
        checkNnPreviousDirExistence(cluster, 1, false);
        checkPreviousDirExistence(sharedDir, true);
        // NN0 should come up in the active state when given the -upgrade option,
        // so no need to transition it to active.
        replacedertTrue(fs.mkdirs(new Path("/foo2")));
        // Restart NN0 without the -upgrade flag, to make sure that works.
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
        cluster.restartNameNode(0, false);
        // Make sure we can still do FS ops after upgrading.
        cluster.transitionToActive(0);
        replacedertTrue(fs.mkdirs(new Path("/foo3")));
        // Now bootstrap the standby with the upgraded info.
        int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
        replacedertEquals(0, rc);
        // Now restart NN1 and make sure that we can do ops against that as well.
        cluster.restartNameNode(1);
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
        replacedertTrue(fs.mkdirs(new Path("/foo4")));
        replacedertCTimesEqual(cluster);
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

16 Source : TestBootstrapStandbyWithQJM.java
with Apache License 2.0
from NJUJYB

/**
 * BootstrapStandby when the existing NN is active
 */
@Test
public void testBootstrapStandbyWithActiveNN() throws Exception {
    // make the first NN in active state
    cluster.transitionToActive(0);
    Configuration confNN1 = cluster.getConfiguration(1);
    // shut down nn1
    cluster.shutdownNameNode(1);
    int rc = BootstrapStandby.run(new String[] { "-force" }, confNN1);
    replacedertEquals(0, rc);
    // Should have copied over the namespace from the standby
    FSImageTestUtil.replacedertNNHasCheckpoints(cluster, 1, ImmutableList.of(0));
    FSImageTestUtil.replacedertNNFilesMatch(cluster);
}

16 Source : TestBootstrapStandbyWithQJM.java
with Apache License 2.0
from NJUJYB

private void testUpgrade(UpgradeState state) throws Exception {
    cluster.transitionToActive(0);
    final Configuration confNN1 = cluster.getConfiguration(1);
    final File current = cluster.getNameNode(1).getFSImage().getStorage().getStorageDir(0).getCurrentDir();
    final File tmp = cluster.getNameNode(1).getFSImage().getStorage().getStorageDir(0).getPreviousTmp();
    // shut down nn1
    cluster.shutdownNameNode(1);
    // make NN0 in upgrade state
    FSImage fsImage0 = cluster.getNameNode(0).getNamesystem().getFSImage();
    Whitebox.setInternalState(fsImage0, "isUpgradeFinalized", false);
    switch(state) {
        case RECOVER:
            // rename the current directory to previous.tmp in nn1
            NNStorage.rename(current, tmp);
            break;
        case FORMAT:
            // rename the current directory to a random name so it's not formatted
            final File wrongPath = new File(current.getParentFile(), "wrong");
            NNStorage.rename(current, wrongPath);
            break;
        default:
            break;
    }
    int rc = BootstrapStandby.run(new String[] { "-force" }, confNN1);
    replacedertEquals(0, rc);
    // Should have copied over the namespace from the standby
    FSImageTestUtil.replacedertNNHasCheckpoints(cluster, 1, ImmutableList.of(0));
    FSImageTestUtil.replacedertNNFilesMatch(cluster);
    // make sure the NN1 is in upgrade state, i.e., the previous directory has
    // been successfully created
    cluster.restartNameNode(1);
    replacedertFalse(cluster.getNameNode(1).getNamesystem().isUpgradeFinalized());
}

16 Source : TestBootstrapStandbyWithQJM.java
with Apache License 2.0
from NJUJYB

@Before
public void setup() throws Exception {
    Configuration conf = new Configuration();
    // Turn off IPC client caching, so that the suite can handle
    // the restart of the daemons between test cases.
    conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, 0);
    MiniQJMHACluster miniQjmHaCluster = new MiniQJMHACluster.Builder(conf).build();
    cluster = miniQjmHaCluster.getDfsCluster();
    jCluster = miniQjmHaCluster.getJournalCluster();
    // make nn0 active
    cluster.transitionToActive(0);
    // do sth to generate in-progress edit log data
    DistributedFileSystem dfs = (DistributedFileSystem) HATestUtil.configureFailoverFs(cluster, conf);
    dfs.mkdirs(new Path("/test2"));
    dfs.close();
}

15 Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB

@Test
public void testHA() throws IOException {
    Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
    MiniDFSCluster cluster = null;
    FileSystem fs = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(0).build();
        HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
        cluster.waitActive();
        fs = FileSystem.get(WEBHDFS_URI, conf);
        cluster.transitionToActive(0);
        final Path dir = new Path("/test");
        replacedert.replacedertTrue(fs.mkdirs(dir));
        cluster.shutdownNameNode(0);
        cluster.transitionToActive(1);
        final Path dir2 = new Path("/test2");
        replacedert.replacedertTrue(fs.mkdirs(dir2));
    } finally {
        IOUtils.cleanup(null, fs);
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

15 Source : TestXAttrsWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Test that xattrs are properly tracked by the standby
 */
@Test(timeout = 60000)
public void testXAttrsTrackedOnStandby() throws Exception {
    fs.create(path).close();
    fs.setXAttr(path, name1, value1, EnumSet.of(XAttrSetFlag.CREATE));
    fs.setXAttr(path, name2, value2, EnumSet.of(XAttrSetFlag.CREATE));
    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
    List<XAttr> xAttrs = nn1.getRpcServer().getXAttrs("/file", null);
    replacedertEquals(2, xAttrs.size());
    cluster.shutdownNameNode(0);
    // Failover the current standby to active.
    cluster.shutdownNameNode(0);
    cluster.transitionToActive(1);
    Map<String, byte[]> xattrs = fs.getXAttrs(path);
    replacedert.replacedertEquals(xattrs.size(), 2);
    replacedert.replacedertArrayEquals(value1, xattrs.get(name1));
    replacedert.replacedertArrayEquals(value2, xattrs.get(name2));
    fs.delete(path, true);
}

15 Source : TestStateTransitionFailure.java
with Apache License 2.0
from NJUJYB

/**
 * Ensure that a failure to fully transition to the active state causes a
 * shutdown of the NameNode.
 */
@Test
public void testFailureToTransitionCausesShutdown() throws IOException {
    MiniDFSCluster cluster = null;
    try {
        Configuration conf = new Configuration();
        // Set an illegal value for the trash emptier interval. This will cause
        // the NN to fail to transition to the active state.
        conf.setLong(CommonConfigurationKeys.FS_TRASH_INTERVAL_KEY, -1);
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).checkExitOnShutdown(false).build();
        cluster.waitActive();
        try {
            cluster.transitionToActive(0);
            fail("Transitioned to active but should not have been able to.");
        } catch (ExitException ee) {
            replacedertExceptionContains("Cannot start trash emptier with negative interval", ee);
        }
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

15 Source : TestQuotasWithHA.java
with Apache License 2.0
from NJUJYB

@Before
public void setupCluster() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    HAUtil.setAllowStandbyReads(conf, true);
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).waitSafeMode(false).build();
    cluster.waitActive();
    nn0 = cluster.getNameNode(0);
    nn1 = cluster.getNameNode(1);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    cluster.transitionToActive(0);
}

15 Source : TestDNFencing.java
with Apache License 2.0
from NJUJYB

/**
 * Regression test for HDFS-2742. The issue in this bug was:
 * - DN does a block report while file is open. This BR contains
 *   the block in RBW state.
 * - Standby queues the RBW state in PendingDatanodeMessages
 * - Standby processes edit logs during failover. Before fixing
 *   this bug, it was mistakenly applying the RBW reported state
 *   after the block had been completed, causing the block to get
 *   marked corrupt. Instead, we should now be applying the RBW
 *   message on OP_ADD, and then the FINALIZED message on OP_CLOSE.
 */
@Test
public void testBlockReportsWhileFileBeingWritten() throws Exception {
    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
        AppendTestUtil.write(out, 0, 10);
        out.hflush();
        // Block report will include the RBW replica, but will be
        // queued on the StandbyNode.
        cluster.triggerBlockReports();
    } finally {
        IOUtils.closeStream(out);
    }
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    replacedertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    replacedertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
    DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}

15 Source : TestBookKeeperAsHASharedDir.java
with Apache License 2.0
from NJUJYB

/**
 * NameNode should load the edits correctly if the applicable edits are
 * present in the BKJM.
 */
@Test
public void testNameNodeMultipleSwitchesUsingBKJM() throws Exception {
    MiniDFSCluster cluster = null;
    try {
        Configuration conf = new Configuration();
        conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
        conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil.createJournalURI("/correctEditLogSelection").toString());
        BKJMUtil.addJournalManagerDefinition(conf);
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).manageNameDfsSharedDirs(false).build();
        NameNode nn1 = cluster.getNameNode(0);
        NameNode nn2 = cluster.getNameNode(1);
        cluster.waitActive();
        cluster.transitionToActive(0);
        // Roll Edits from current Active.
        nn1.getRpcServer().rollEditLog();
        // Transition to standby current active gracefully.
        cluster.transitionToStandby(0);
        // Make the other Active and Roll edits multiple times
        cluster.transitionToActive(1);
        nn2.getRpcServer().rollEditLog();
        nn2.getRpcServer().rollEditLog();
        // Now One more failover. So NN1 should be able to failover successfully.
        cluster.transitionToStandby(1);
        cluster.transitionToActive(0);
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

15 Source : HdfsTestUtil.java
with Apache License 2.0
from apache

public static MiniDFSCluster setupClreplaced(String dir, boolean safeModeTesting, boolean haTesting) throws Exception {
    checkreplacedumptions();
    if (!HA_TESTING_ENABLED)
        haTesting = false;
    DefaultMetricsSystem.setInstance(new FakeMetricsSystem());
    Configuration conf = getBasicConfiguration(new Configuration());
    conf.set("hdfs.minidfs.basedir", dir + File.separator + "hdfsBaseDir");
    conf.set("dfs.namenode.name.dir", dir + File.separator + "nameNodeNameDir");
    // Disable metrics logging for HDFS
    conf.setInt("dfs.namenode.metrics.logger.period.seconds", 0);
    conf.setInt("dfs.datanode.metrics.logger.period.seconds", 0);
    System.setProperty("test.build.data", dir + File.separator + "hdfs" + File.separator + "build");
    System.setProperty("test.cache.data", dir + File.separator + "hdfs" + File.separator + "cache");
    System.setProperty("solr.lock.type", DirectoryFactory.LOCK_TYPE_HDFS);
    // test-files/solr/solr.xml sets this to be 15000. This isn't long enough for HDFS in some cases.
    System.setProperty("socketTimeout", "90000");
    String blockcacheGlobal = System.getProperty("solr.hdfs.blockcache.global", Boolean.toString(random().nextBoolean()));
    System.setProperty("solr.hdfs.blockcache.global", blockcacheGlobal);
    // Limit memory usage for HDFS tests
    if (Boolean.parseBoolean(blockcacheGlobal)) {
        System.setProperty("solr.hdfs.blockcache.blocksperbank", "4096");
    } else {
        System.setProperty("solr.hdfs.blockcache.blocksperbank", "512");
        System.setProperty("tests.hdfs.numdatanodes", "1");
    }
    int dataNodes = Integer.getInteger("tests.hdfs.numdatanodes", 2);
    final MiniDFSCluster.Builder dfsClusterBuilder = new MiniDFSCluster.Builder(conf).numDataNodes(dataNodes).format(true);
    if (haTesting) {
        dfsClusterBuilder.nnTopology(MiniDFSNNTopology.simpleHATopology());
    }
    MiniDFSCluster dfsCluster = dfsClusterBuilder.build();
    HdfsUtil.TEST_CONF = getClientConfiguration(dfsCluster);
    System.setProperty("solr.hdfs.home", getDataDir(dfsCluster, "solr_hdfs_home"));
    dfsCluster.waitActive();
    if (haTesting)
        dfsCluster.transitionToActive(0);
    int rndMode = random().nextInt(3);
    if (safeModeTesting && rndMode == 1) {
        NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
        int rnd = random().nextInt(10000);
        Timer timer = new Timer();
        synchronized (TIMERS_LOCK) {
            if (timers == null) {
                timers = new HashMap<>();
            }
            timers.put(dfsCluster, timer);
        }
        timer.schedule(new TimerTask() {

            @Override
            public void run() {
                NameNodeAdapter.leaveSafeMode(dfsCluster.getNameNode());
            }
        }, rnd);
    } else if (haTesting && rndMode == 2) {
        int rnd = random().nextInt(30000);
        Timer timer = new Timer();
        synchronized (TIMERS_LOCK) {
            if (timers == null) {
                timers = new HashMap<>();
            }
            timers.put(dfsCluster, timer);
        }
        timer.schedule(new TimerTask() {

            @Override
            public void run() {
            // TODO: randomly transition to standby
            // try {
            // dfsCluster.transitionToStandby(0);
            // dfsCluster.transitionToActive(1);
            // } catch (IOException e) {
            // throw new RuntimeException();
            // }
            }
        }, rnd);
    } else {
        // TODO: we could do much better at testing this
        // force a lease recovery by creating a tlog file and not closing it
        URI uri = dfsCluster.getURI();
        Path hdfsDirPath = new Path(uri.toString() + "/solr/collection1/core_node1/data/tlog/tlog.0000000000000000000");
        // tran log already being created testing
        badTlogOutStreamFs = FileSystem.get(hdfsDirPath.toUri(), getClientConfiguration(dfsCluster));
        badTlogOutStream = badTlogOutStreamFs.create(hdfsDirPath);
    }
    SolrTestCaseJ4.useFactory("org.apache.solr.core.HdfsDirectoryFactory");
    return dfsCluster;
}

14 Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB

@Test
public void testSecureHAToken() throws IOException, InterruptedException {
    Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
    conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
    MiniDFSCluster cluster = null;
    WebHdfsFileSystem fs = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(0).build();
        HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
        cluster.waitActive();
        fs = spy((WebHdfsFileSystem) FileSystem.get(WEBHDFS_URI, conf));
        FileSystemTestHelper.addFileSystemForTesting(WEBHDFS_URI, conf, fs);
        cluster.transitionToActive(0);
        Token<?> token = fs.getDelegationToken(null);
        cluster.shutdownNameNode(0);
        cluster.transitionToActive(1);
        token.renew(conf);
        token.cancel(conf);
        verify(fs).renewDelegationToken(token);
        verify(fs).cancelDelegationToken(token);
    } finally {
        IOUtils.cleanup(null, fs);
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

14 Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB

@Test
public void testFailoverAfterOpen() throws IOException {
    Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
    conf.set(FS_DEFAULT_NAME_KEY, HdfsConstants.HDFS_URI_SCHEME + "://" + LOGICAL_NAME);
    MiniDFSCluster cluster = null;
    FileSystem fs = null;
    final Path p = new Path("/test");
    final byte[] data = "Hello".getBytes();
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(1).build();
        HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
        cluster.waitActive();
        fs = FileSystem.get(WEBHDFS_URI, conf);
        cluster.transitionToActive(1);
        FSDataOutputStream out = fs.create(p);
        cluster.shutdownNameNode(1);
        cluster.transitionToActive(0);
        out.write(data);
        out.close();
        FSDataInputStream in = fs.open(p);
        byte[] buf = new byte[data.length];
        IOUtils.readFully(in, buf, 0, buf.length);
        replacedert.replacedertArrayEquals(data, buf);
    } finally {
        IOUtils.cleanup(null, fs);
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

14 Source : TestNameNodeRetryCacheMetrics.java
with Apache License 2.0
from NJUJYB

/**
 * Start a cluster
 */
@Before
public void setup() throws Exception {
    conf = new HdfsConfiguration();
    conf.setBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, true);
    conf.setInt(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2);
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3).build();
    cluster.waitActive();
    cluster.transitionToActive(namenodeId);
    HATestUtil.setFailoverConfigurations(cluster, conf);
    filesystem = (DistributedFileSystem) HATestUtil.configureFailoverFs(cluster, conf);
    namesystem = cluster.getNamesystem(namenodeId);
    metrics = namesystem.getRetryCache().getMetricsForTests();
}

14 Source : TestStandbyCheckpoints.java
with Apache License 2.0
from NJUJYB

/**
 * Test cancellation of ongoing checkpoints when failover happens
 * mid-checkpoint.
 */
@Test(timeout = 120000)
public void testCheckpointCancellation() throws Exception {
    cluster.transitionToStandby(0);
    // Create an edit log in the shared edits dir with a lot
    // of mkdirs operations. This is solely so that the image is
    // large enough to take a non-trivial amount of time to load.
    // (only ~15MB)
    URI sharedUri = cluster.getSharedEditsDir(0, 1);
    File sharedDir = new File(sharedUri.getPath(), "current");
    File tmpDir = new File(MiniDFSCluster.getBaseDirectory(), "testCheckpointCancellation-tmp");
    FSNamesystem fsn = cluster.getNamesystem(0);
    FSImageTestUtil.createAbortedLogWithMkdirs(tmpDir, NUM_DIRS_IN_LOG, 3, fsn.getLastInodeId() + 1);
    String fname = NNStorage.getInProgressEditsFileName(3);
    new File(tmpDir, fname).renameTo(new File(sharedDir, fname));
    // Checkpoint as fast as we can, in a tight loop.
    cluster.getConfiguration(1).setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
    cluster.restartNameNode(1);
    nn1 = cluster.getNameNode(1);
    cluster.transitionToActive(0);
    boolean canceledOne = false;
    for (int i = 0; i < 10 && !canceledOne; i++) {
        doEdits(i * 10, i * 10 + 10);
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
        cluster.transitionToStandby(1);
        cluster.transitionToActive(0);
        canceledOne = StandbyCheckpointer.getCanceledCount() > 0;
    }
    replacedertTrue(canceledOne);
}

14 Source : TestRetryCacheWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Add a list of cache pools, list cache pools,
 * switch active NN, and list cache pools again.
 */
@Test(timeout = 60000)
public void testListCachePools() throws Exception {
    final int poolCount = 7;
    HashSet<String> poolNames = new HashSet<String>(poolCount);
    for (int i = 0; i < poolCount; i++) {
        String poolName = "testListCachePools-" + i;
        dfs.addCachePool(new CachePoolInfo(poolName));
        poolNames.add(poolName);
    }
    listCachePools(poolNames, 0);
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    cluster.waitActive(1);
    listCachePools(poolNames, 1);
}

14 Source : TestRetryCacheWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * When NN failover happens, if the client did not receive the response and
 * send a retry request to the other NN, the same response should be recieved
 * based on the retry cache.
 */
public void testClientRetryWithFailover(final AtMostOnceOp op) throws Exception {
    final Map<String, Object> results = new HashMap<String, Object>();
    op.prepare();
    // set DummyRetryInvocationHandler#block to true
    DummyRetryInvocationHandler.block.set(true);
    new Thread() {

        @Override
        public void run() {
            try {
                op.invoke();
                Object result = op.getResult();
                LOG.info("Operation " + op.name + " finished");
                synchronized (TestRetryCacheWithHA.this) {
                    results.put(op.name, result == null ? "SUCCESS" : result);
                    TestRetryCacheWithHA.this.notifyAll();
                }
            } catch (Exception e) {
                LOG.info("Got Exception while calling " + op.name, e);
            } finally {
                IOUtils.cleanup(null, op.client);
            }
        }
    }.start();
    // make sure the client's call has actually been handled by the active NN
    replacedertTrue("After waiting the operation " + op.name + " still has not taken effect on NN yet", op.checkNamenodeBeforeReturn());
    // force the failover
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    // disable the block in DummyHandler
    LOG.info("Setting block to false");
    DummyRetryInvocationHandler.block.set(false);
    synchronized (this) {
        while (!results.containsKey(op.name)) {
            this.wait();
        }
        LOG.info("Got the result of " + op.name + ": " + results.get(op.name));
    }
    // Waiting for failover.
    while (cluster.getNamesystem(1).isInStandbyState()) {
        Thread.sleep(10);
    }
    long hitNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheHit();
    long hitNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheHit();
    replacedertTrue("CacheHit: " + hitNN0 + ", " + hitNN1, hitNN0 + hitNN1 > 0);
    long updatedNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheUpdated();
    long updatedNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheUpdated();
    // Cache updated metrics on NN0 should be >0 since the op was process on NN0
    replacedertTrue("CacheUpdated on NN0: " + updatedNN0, updatedNN0 > 0);
    // Cache updated metrics on NN0 should be >0 since NN1 applied the editlog
    replacedertTrue("CacheUpdated on NN1: " + updatedNN1, updatedNN1 > 0);
    long expectedUpdateCount = op.getExpectedCacheUpdateCount();
    if (expectedUpdateCount > 0) {
        replacedertEquals("CacheUpdated on NN0: " + updatedNN0, expectedUpdateCount, updatedNN0);
        replacedertEquals("CacheUpdated on NN0: " + updatedNN1, expectedUpdateCount, updatedNN1);
    }
}

14 Source : TestPendingCorruptDnMessages.java
with Apache License 2.0
from NJUJYB

@Test
public void testChangedStorageId() throws IOException, URISyntaxException, InterruptedException {
    HdfsConfiguration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
    try {
        cluster.transitionToActive(0);
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        OutputStream out = fs.create(filePath);
        out.write("foo bar baz".getBytes());
        out.close();
        HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), cluster.getNameNode(1));
        // Change the gen stamp of the block on datanode to go back in time (gen
        // stamps start at 1000)
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
        replacedertTrue(MiniDFSCluster.changeGenStampOfBlock(0, block, 900));
        // Stop the DN so the replica with the changed gen stamp will be reported
        // when this DN starts up.
        DataNodeProperties dnProps = cluster.stopDataNode(0);
        // Restart the namenode so that when the DN comes up it will see an initial
        // block report.
        cluster.restartNameNode(1, false);
        replacedertTrue(cluster.restartDataNode(dnProps, true));
        // Wait until the standby NN queues up the corrupt block in the pending DN
        // message queue.
        while (cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount() < 1) {
            ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
        }
        replacedertEquals(1, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
        String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
        // Reformat/restart the DN.
        replacedertTrue(wipeAndRestartDn(cluster, 0));
        // Give the DN time to start up and register, which will cause the
        // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
        String newStorageId = "";
        do {
            ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
            newStorageId = getRegisteredDatanodeUid(cluster, 1);
            System.out.println("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId);
        } while (newStorageId.equals(oldStorageId));
        replacedertEquals(0, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
        // Now try to fail over.
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
    } finally {
        cluster.shutdown();
    }
}

14 Source : TestDNFencing.java
with Apache License 2.0
from NJUJYB

/**
 * Test that, when a block is re-opened for append, the related
 * datanode messages are correctly queued by the SBN because
 * they have future states and genstamps.
 */
@Test
public void testQueueingWithAppend() throws Exception {
    int numQueued = 0;
    int numDN = cluster.getDataNodes().size();
    // case 1: create file and call hflush after write
    FSDataOutputStream out = fs.create(TEST_FILE_PATH);
    try {
        AppendTestUtil.write(out, 0, 10);
        out.hflush();
        // Opening the file will report RBW replicas, but will be
        // queued on the StandbyNode.
        // However, the delivery of RBW messages is delayed by HDFS-7217 fix.
        // Apply cluster.triggerBlockReports() to trigger the reporting sooner.
        // 
        cluster.triggerBlockReports();
        // RBW messages
        numQueued += numDN;
        // The cluster.triggerBlockReports() call above does a full
        // block report that incurs 3 extra RBW messages
        // RBW messages
        numQueued += numDN;
    } finally {
        IOUtils.closeStream(out);
        // blockReceived messages
        numQueued += numDN;
    }
    cluster.triggerBlockReports();
    numQueued += numDN;
    replacedertEquals(numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
    // case 2: append to file and call hflush after write
    try {
        out = fs.append(TEST_FILE_PATH);
        AppendTestUtil.write(out, 10, 10);
        out.hflush();
        cluster.triggerBlockReports();
        // RBW messages, see comments in case 1
        numQueued += numDN * 2;
    } finally {
        IOUtils.closeStream(out);
        // blockReceived
        numQueued += numDN;
    }
    replacedertEquals(numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
    // case 3: similar to case 2, except no hflush is called.
    try {
        out = fs.append(TEST_FILE_PATH);
        AppendTestUtil.write(out, 20, 10);
    } finally {
        // The write operation in the try block is buffered, thus no RBW message
        // is reported yet until the closeStream call here. When closeStream is
        // called, before HDFS-7217 fix, there would be three RBW messages
        // (blockReceiving), plus three FINALIZED messages (blockReceived)
        // delivered to NN. However, because of HDFS-7217 fix, the reporting of
        // RBW  messages is postponed. In this case, they are even overwritten
        // by the blockReceived messages of the same block when they are waiting
        // to be delivered. All this happens within the closeStream() call.
        // What's delivered to NN is the three blockReceived messages. See
        // BPServiceActor#addPendingReplicationBlockInfo
        // 
        IOUtils.closeStream(out);
        // blockReceived
        numQueued += numDN;
    }
    cluster.triggerBlockReports();
    numQueued += numDN;
    LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
    replacedertEquals(numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    // Verify that no replicas are marked corrupt, and that the
    // file is readable from the failed-over standby.
    BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
    BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
    replacedertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
    replacedertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
    AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}

14 Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB

@Test
public void testFinalizeWithJournalNodes() throws IOException, URISyntaxException {
    MiniQJMHACluster qjCluster = null;
    FileSystem fs = null;
    try {
        Builder builder = new MiniQJMHACluster.Builder(conf);
        builder.getDfsBuilder().numDataNodes(0);
        qjCluster = builder.build();
        MiniDFSCluster cluster = qjCluster.getDfsCluster();
        // No upgrade is in progress at the moment.
        checkJnPreviousDirExistence(qjCluster, false);
        checkClusterPreviousDirExistence(cluster, false);
        replacedertCTimesEqual(cluster);
        // Transition NN0 to active and do some FS ops.
        cluster.transitionToActive(0);
        fs = HATestUtil.configureFailoverFs(cluster, conf);
        replacedertTrue(fs.mkdirs(new Path("/foo1")));
        final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);
        // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
        // flag.
        cluster.shutdownNameNode(1);
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
        cluster.restartNameNode(0, false);
        replacedertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
        replacedertTrue(fs.mkdirs(new Path("/foo2")));
        checkNnPreviousDirExistence(cluster, 0, true);
        checkNnPreviousDirExistence(cluster, 1, false);
        checkJnPreviousDirExistence(qjCluster, true);
        // Now bootstrap the standby with the upgraded info.
        int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
        replacedertEquals(0, rc);
        cluster.restartNameNode(1);
        final long cidDuringUpgrade = getCommittedTxnIdValue(qjCluster);
        replacedertTrue(cidDuringUpgrade > cidBeforeUpgrade);
        runFinalizeCommand(cluster);
        replacedertEquals(cidDuringUpgrade, getCommittedTxnIdValue(qjCluster));
        checkClusterPreviousDirExistence(cluster, false);
        checkJnPreviousDirExistence(qjCluster, false);
        replacedertCTimesEqual(cluster);
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (qjCluster != null) {
            qjCluster.shutdown();
        }
    }
}

14 Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB

/**
 * Make sure that even if the NN which initiated the upgrade is in the standby
 * state that we're allowed to finalize.
 */
@Test
public void testFinalizeFromSecondNameNodeWithJournalNodes() throws IOException, URISyntaxException {
    MiniQJMHACluster qjCluster = null;
    FileSystem fs = null;
    try {
        Builder builder = new MiniQJMHACluster.Builder(conf);
        builder.getDfsBuilder().numDataNodes(0);
        qjCluster = builder.build();
        MiniDFSCluster cluster = qjCluster.getDfsCluster();
        // No upgrade is in progress at the moment.
        checkJnPreviousDirExistence(qjCluster, false);
        checkClusterPreviousDirExistence(cluster, false);
        replacedertCTimesEqual(cluster);
        // Transition NN0 to active and do some FS ops.
        cluster.transitionToActive(0);
        fs = HATestUtil.configureFailoverFs(cluster, conf);
        replacedertTrue(fs.mkdirs(new Path("/foo1")));
        // Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
        // flag.
        cluster.shutdownNameNode(1);
        cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
        cluster.restartNameNode(0, false);
        checkNnPreviousDirExistence(cluster, 0, true);
        checkNnPreviousDirExistence(cluster, 1, false);
        checkJnPreviousDirExistence(qjCluster, true);
        // Now bootstrap the standby with the upgraded info.
        int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
        replacedertEquals(0, rc);
        cluster.restartNameNode(1);
        // Make the second NN (not the one that initiated the upgrade) active when
        // the finalize command is run.
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
        runFinalizeCommand(cluster);
        checkClusterPreviousDirExistence(cluster, false);
        checkJnPreviousDirExistence(qjCluster, false);
        replacedertCTimesEqual(cluster);
    } finally {
        if (fs != null) {
            fs.close();
        }
        if (qjCluster != null) {
            qjCluster.shutdown();
        }
    }
}

14 Source : TestBookKeeperAsHASharedDir.java
with Apache License 2.0
from NJUJYB

/**
 * Test simple HA failover usecase with BK
 */
@Test
public void testFailoverWithBK() throws Exception {
    MiniDFSCluster cluster = null;
    try {
        Configuration conf = new Configuration();
        conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
        conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil.createJournalURI("/hotfailover").toString());
        BKJMUtil.addJournalManagerDefinition(conf);
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).manageNameDfsSharedDirs(false).build();
        NameNode nn1 = cluster.getNameNode(0);
        NameNode nn2 = cluster.getNameNode(1);
        cluster.waitActive();
        cluster.transitionToActive(0);
        Path p = new Path("/testBKJMfailover");
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        fs.mkdirs(p);
        cluster.shutdownNameNode(0);
        cluster.transitionToActive(1);
        replacedertTrue(fs.exists(p));
    } finally {
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

14 Source : TestStandbyCheckpoints.java
with Apache License 2.0
from naver

/**
 * Test cancellation of ongoing checkpoints when failover happens
 * mid-checkpoint.
 */
@Test(timeout = 120000)
public void testCheckpointCancellation() throws Exception {
    cluster.transitionToStandby(0);
    // Create an edit log in the shared edits dir with a lot
    // of mkdirs operations. This is solely so that the image is
    // large enough to take a non-trivial amount of time to load.
    // (only ~15MB)
    URI sharedUri = cluster.getSharedEditsDir(0, 1);
    File sharedDir = new File(sharedUri.getPath(), "current");
    File tmpDir = new File(MiniDFSCluster.getBaseDirectory(), "testCheckpointCancellation-tmp");
    FSNamesystem fsn = cluster.getNamesystem(0);
    FSImageTestUtil.createAbortedLogWithMkdirs(tmpDir, NUM_DIRS_IN_LOG, 3, fsn.getFSDirectory().getLastInodeId() + 1);
    String fname = NNStorage.getInProgressEditsFileName(3);
    new File(tmpDir, fname).renameTo(new File(sharedDir, fname));
    // Checkpoint as fast as we can, in a tight loop.
    cluster.getConfiguration(1).setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
    cluster.restartNameNode(1);
    nn1 = cluster.getNameNode(1);
    cluster.transitionToActive(0);
    boolean canceledOne = false;
    for (int i = 0; i < 10 && !canceledOne; i++) {
        doEdits(i * 10, i * 10 + 10);
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
        cluster.transitionToStandby(1);
        cluster.transitionToActive(0);
        canceledOne = StandbyCheckpointer.getCanceledCount() > 0;
    }
    replacedertTrue(canceledOne);
}

14 Source : TestRetryCacheWithHA.java
with Apache License 2.0
from naver

/**
 * When NN failover happens, if the client did not receive the response and
 * send a retry request to the other NN, the same response should be recieved
 * based on the retry cache.
 */
public void testClientRetryWithFailover(final AtMostOnceOp op) throws Exception {
    final Map<String, Object> results = new HashMap<String, Object>();
    op.prepare();
    // set DummyRetryInvocationHandler#block to true
    DummyRetryInvocationHandler.block.set(true);
    new Thread() {

        @Override
        public void run() {
            try {
                op.invoke();
                Object result = op.getResult();
                LOG.info("Operation " + op.name + " finished");
                synchronized (TestRetryCacheWithHA.this) {
                    results.put(op.name, result == null ? "SUCCESS" : result);
                    TestRetryCacheWithHA.this.notifyAll();
                }
            } catch (Exception e) {
                LOG.info("Got Exception while calling " + op.name, e);
            } finally {
                IOUtils.cleanup(null, op.client);
            }
        }
    }.start();
    // make sure the client's call has actually been handled by the active NN
    replacedertTrue("After waiting the operation " + op.name + " still has not taken effect on NN yet", op.checkNamenodeBeforeReturn());
    // force the failover
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    // disable the block in DummyHandler
    LOG.info("Setting block to false");
    DummyRetryInvocationHandler.block.set(false);
    synchronized (this) {
        while (!results.containsKey(op.name)) {
            this.wait();
        }
        LOG.info("Got the result of " + op.name + ": " + results.get(op.name));
    }
    // Waiting for failover.
    while (cluster.getNamesystem(1).isInStandbyState()) {
        Thread.sleep(10);
    }
    long hitNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheHit();
    long hitNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheHit();
    replacedertTrue("CacheHit: " + hitNN0 + ", " + hitNN1, hitNN0 + hitNN1 > 0);
    long updatedNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheUpdated();
    long updatedNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheUpdated();
    // Cache updated metrics on NN0 should be >0 since the op was process on NN0
    replacedertTrue("CacheUpdated on NN0: " + updatedNN0, updatedNN0 > 0);
    // Cache updated metrics on NN0 should be >0 since NN1 applied the editlog
    replacedertTrue("CacheUpdated on NN1: " + updatedNN1, updatedNN1 > 0);
}

14 Source : TestPendingCorruptDnMessages.java
with Apache License 2.0
from naver

@Test
public void testChangedStorageId() throws IOException, URISyntaxException, InterruptedException {
    HdfsConfiguration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
    try {
        cluster.transitionToActive(0);
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        OutputStream out = fs.create(filePath);
        out.write("foo bar baz".getBytes());
        out.close();
        HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), cluster.getNameNode(1));
        // Change the gen stamp of the block on datanode to go back in time (gen
        // stamps start at 1000)
        ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
        replacedertTrue(cluster.changeGenStampOfBlock(0, block, 900));
        // Stop the DN so the replica with the changed gen stamp will be reported
        // when this DN starts up.
        DataNodeProperties dnProps = cluster.stopDataNode(0);
        // Restart the namenode so that when the DN comes up it will see an initial
        // block report.
        cluster.restartNameNode(1, false);
        replacedertTrue(cluster.restartDataNode(dnProps, true));
        // Wait until the standby NN queues up the corrupt block in the pending DN
        // message queue.
        while (cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount() < 1) {
            ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
        }
        replacedertEquals(1, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
        String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
        // Reformat/restart the DN.
        replacedertTrue(wipeAndRestartDn(cluster, 0));
        // Give the DN time to start up and register, which will cause the
        // DatanodeManager to dissociate the old storage ID from the DN xfer addr.
        String newStorageId = "";
        do {
            ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
            newStorageId = getRegisteredDatanodeUid(cluster, 1);
            System.out.println("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId);
        } while (newStorageId.equals(oldStorageId));
        replacedertEquals(0, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
        // Now try to fail over.
        cluster.transitionToStandby(0);
        cluster.transitionToActive(1);
    } finally {
        cluster.shutdown();
    }
}

14 Source : TestHAAppend.java
with Apache License 2.0
from naver

/**
 * Test to verify the processing of PendingDataNodeMessageQueue in case of
 * append. One block will marked as corrupt if the OP_ADD, OP_UPDATE_BLOCKS
 * comes in one edit log segment and OP_CLOSE edit comes in next log segment
 * which is loaded during failover. Regression test for HDFS-3605.
 */
@Test
public void testMultipleAppendsDuringCatchupTailing() throws Exception {
    Configuration conf = new Configuration();
    // Set a length edits tailing period, and explicit rolling, so we can
    // control the ingest of edits by the standby for this test.
    conf.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "5000");
    conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, -1);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3).build();
    FileSystem fs = null;
    try {
        cluster.transitionToActive(0);
        fs = HATestUtil.configureFailoverFs(cluster, conf);
        Path fileToAppend = new Path("/FileToAppend");
        Path fileToTruncate = new Path("/FileToTruncate");
        final byte[] data = new byte[1 << 16];
        DFSUtil.getRandom().nextBytes(data);
        final int[] appendPos = AppendTestUtil.randomFileParreplacedion(data.length, COUNT);
        final int[] truncatePos = AppendTestUtil.randomFileParreplacedion(data.length, 1);
        // Create file, write some data, and hflush so that the first
        // block is in the edit log prior to roll.
        FSDataOutputStream out = createAndHflush(fs, fileToAppend, data, appendPos[0]);
        FSDataOutputStream out4Truncate = createAndHflush(fs, fileToTruncate, data, data.length);
        // Let the StandbyNode catch the creation of the file.
        cluster.getNameNode(0).getRpcServer().rollEditLog();
        cluster.getNameNode(1).getNamesystem().getEditLogTailer().doTailEdits();
        out.close();
        out4Truncate.close();
        // Append and re-close a few time, so that many block entries are queued.
        for (int i = 0; i < COUNT; i++) {
            int end = i < COUNT - 1 ? appendPos[i + 1] : data.length;
            out = fs.append(fileToAppend);
            out.write(data, appendPos[i], end - appendPos[i]);
            out.close();
        }
        boolean isTruncateReady = fs.truncate(fileToTruncate, truncatePos[0]);
        // Ensure that blocks have been reported to the SBN ahead of the edits
        // arriving.
        cluster.triggerBlockReports();
        // Failover the current standby to active.
        cluster.shutdownNameNode(0);
        cluster.transitionToActive(1);
        // Check the FSCK doesn't detect any bad blocks on the SBN.
        int rc = ToolRunner.run(new DFSck(cluster.getConfiguration(1)), new String[] { "/", "-files", "-blocks" });
        replacedertEquals(0, rc);
        replacedertEquals("CorruptBlocks should be empty.", 0, cluster.getNameNode(1).getNamesystem().getCorruptReplicaBlocks());
        AppendTestUtil.checkFullFile(fs, fileToAppend, data.length, data, fileToAppend.toString());
        if (!isTruncateReady) {
            TestFileTruncate.checkBlockRecovery(fileToTruncate, cluster.getFileSystem(1));
        }
        AppendTestUtil.checkFullFile(fs, fileToTruncate, truncatePos[0], data, fileToTruncate.toString());
    } finally {
        if (null != cluster) {
            cluster.shutdown();
        }
        if (null != fs) {
            fs.close();
        }
    }
}

13 Source : TestStandbyCheckpoints.java
with Apache License 2.0
from NJUJYB

/**
 * Test cancellation of ongoing checkpoints when failover happens
 * mid-checkpoint during image upload from standby to active NN.
 */
@Test(timeout = 60000)
public void testCheckpointCancellationDuringUpload() throws Exception {
    // don't compress, we want a big image
    cluster.getConfiguration(0).setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false);
    cluster.getConfiguration(1).setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false);
    // Throttle SBN upload to make it hang during upload to ANN
    cluster.getConfiguration(1).setLong(DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 100);
    cluster.restartNameNode(0);
    cluster.restartNameNode(1);
    nn0 = cluster.getNameNode(0);
    nn1 = cluster.getNameNode(1);
    cluster.transitionToActive(0);
    doEdits(0, 100);
    HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
    HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(104));
    cluster.transitionToStandby(0);
    cluster.transitionToActive(1);
    // Wait to make sure background TransferFsImageUpload thread was cancelled.
    // This needs to be done before the next test in the suite starts, so that a
    // file descriptor is not held open during the next cluster init.
    cluster.shutdown();
    cluster = null;
    GenericTestUtils.waitFor(new Supplier<Boolean>() {

        @Override
        public Boolean get() {
            ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
            ThreadInfo[] threads = threadBean.getThreadInfo(threadBean.getAllThreadIds(), 1);
            for (ThreadInfo thread : threads) {
                if (thread.getThreadName().startsWith("TransferFsImageUpload")) {
                    return false;
                }
            }
            return true;
        }
    }, 1000, 30000);
    // replacedert that former active did not accept the canceled checkpoint file.
    replacedertEquals(0, nn0.getFSImage().getMostRecentCheckpointTxId());
}

13 Source : TestHASafeMode.java
with Apache License 2.0
from NJUJYB

@Before
public void setupCluster() throws Exception {
    Configuration conf = new Configuration();
    conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
    conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3).waitSafeMode(false).build();
    cluster.waitActive();
    nn0 = cluster.getNameNode(0);
    nn1 = cluster.getNameNode(1);
    fs = HATestUtil.configureFailoverFs(cluster, conf);
    cluster.transitionToActive(0);
}

13 Source : TestFailureOfSharedDir.java
with Apache License 2.0
from NJUJYB

/**
 * Test that marking the shared edits dir as being "required" causes the NN to
 * fail if that dir can't be accessed.
 */
@Test
public void testFailureOfSharedDir() throws Exception {
    Configuration conf = new Configuration();
    conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
    // The shared edits dir will automatically be marked required.
    MiniDFSCluster cluster = null;
    File sharedEditsDir = null;
    try {
        cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).checkExitOnShutdown(false).build();
        cluster.waitActive();
        cluster.transitionToActive(0);
        FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
        replacedertTrue(fs.mkdirs(new Path("/test1")));
        // Blow away the shared edits dir.
        URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
        sharedEditsDir = new File(sharedEditsUri);
        replacedertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w", true));
        Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
        NameNode nn1 = cluster.getNameNode(1);
        replacedertTrue(nn1.isStandbyState());
        replacedertFalse("StandBy NameNode should not go to SafeMode on resource unavailability", nn1.isInSafeMode());
        NameNode nn0 = cluster.getNameNode(0);
        try {
            // Make sure that subsequent operations on the NN fail.
            nn0.getRpcServer().rollEditLog();
            fail("Succeeded in rolling edit log despite shared dir being deleted");
        } catch (ExitException ee) {
            GenericTestUtils.replacedertExceptionContains("finalize log segment 1, 3 failed for required journal", ee);
        }
        // Check that none of the edits dirs rolled, since the shared edits
        // dir didn't roll. Regression test for HDFS-2874.
        for (URI editsUri : cluster.getNameEditsDirs(0)) {
            if (editsUri.equals(sharedEditsUri)) {
                continue;
            }
            File editsDir = new File(editsUri.getPath());
            File curDir = new File(editsDir, "current");
            GenericTestUtils.replacedertGlobEquals(curDir, "edits_.*", NNStorage.getInProgressEditsFileName(1));
        }
    } finally {
        if (sharedEditsDir != null) {
            // without this test cleanup will fail
            FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
        }
        if (cluster != null) {
            cluster.shutdown();
        }
    }
}

13 Source : TestEditLogTailer.java
with Apache License 2.0
from NJUJYB

private static void testStandbyTriggersLogRolls(int activeIndex) throws Exception {
    Configuration conf = new Configuration();
    // Roll every 1s
    conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, 1);
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    // Have to specify IPC ports so the NNs can talk to each other.
    MiniDFSNNTopology topology = new MiniDFSNNTopology().addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10031)).addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10032)));
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology).numDataNodes(0).build();
    try {
        cluster.transitionToActive(activeIndex);
        waitForLogRollInSharedDir(cluster, 3);
    } finally {
        cluster.shutdown();
    }
}

13 Source : TestEditLogTailer.java
with Apache License 2.0
from NJUJYB

@Test
public void testTailer() throws IOException, InterruptedException, ServiceFailedException {
    Configuration conf = new HdfsConfiguration();
    conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
    HAUtil.setAllowStandbyReads(conf, true);
    MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
    cluster.waitActive();
    cluster.transitionToActive(0);
    NameNode nn1 = cluster.getNameNode(0);
    NameNode nn2 = cluster.getNameNode(1);
    try {
        for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
            NameNodeAdapter.mkdirs(nn1, getDirPath(i), new PermissionStatus("test", "test", new FsPermission((short) 00755)), true);
        }
        HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
        for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
            replacedertTrue(NameNodeAdapter.getFileInfo(nn2, getDirPath(i), false).isDir());
        }
        for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
            NameNodeAdapter.mkdirs(nn1, getDirPath(i), new PermissionStatus("test", "test", new FsPermission((short) 00755)), true);
        }
        HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
        for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
            replacedertTrue(NameNodeAdapter.getFileInfo(nn2, getDirPath(i), false).isDir());
        }
    } finally {
        cluster.shutdown();
    }
}

See More Examples