Here are the examples of the java api org.apache.hadoop.hdfs.MiniDFSCluster.transitionToActive() taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
94 Examples
19
Source : TestDFSHAAdminMiniCluster.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testGetServiceState() throws Exception {
replacedertEquals(0, runTool("-getServiceState", "nn1"));
replacedertEquals(0, runTool("-getServiceState", "nn2"));
cluster.transitionToActive(0);
replacedertEquals(0, runTool("-getServiceState", "nn1"));
NameNodeAdapter.enterSafeMode(cluster.getNameNode(0), false);
replacedertEquals(0, runTool("-getServiceState", "nn1"));
}
19
Source : TestFailureToReadEdits.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Ensure that the standby fails to become active if it cannot read all
* available edits in the shared edits dir when it is transitioning to active
* state.
*/
@Test
public void testFailureToReadEditsOnTransitionToActive() throws Exception {
replacedertTrue(fs.mkdirs(new Path(TEST_DIR1)));
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
// It should also upload it back to the active.
HATestUtil.waitForCheckpoint(cluster, 0, ImmutableList.of(0, 3));
causeFailureOnEditLogRead();
replacedertTrue(fs.mkdirs(new Path(TEST_DIR2)));
replacedertTrue(fs.mkdirs(new Path(TEST_DIR3)));
try {
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
fail("Standby fully caught up, but should not have been able to");
} catch (HATestUtil.CouldNotCatchUpException e) {
// Expected. The NN did not exit.
}
// Shutdown the active NN.
cluster.shutdownNameNode(0);
try {
// Transition the standby to active.
cluster.transitionToActive(1);
fail("Standby transitioned to active, but should not have been able to");
} catch (ExitException ee) {
GenericTestUtils.replacedertExceptionContains("Error replaying edit log", ee);
}
}
17
Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Make sure the WebHdfsFileSystem will retry based on RetriableException when
* rpcServer is null in NamenodeWebHdfsMethods while NameNode starts up.
*/
@Test(timeout = 120000)
public void testRetryWhileNNStartup() throws Exception {
final Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
MiniDFSCluster cluster = null;
final Map<String, Boolean> resultMap = new HashMap<String, Boolean>();
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(0).build();
HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
cluster.waitActive();
cluster.transitionToActive(0);
final NameNode namenode = cluster.getNameNode(0);
final NamenodeProtocols rpcServer = namenode.getRpcServer();
Whitebox.setInternalState(namenode, "rpcServer", null);
new Thread() {
@Override
public void run() {
boolean result = false;
FileSystem fs = null;
try {
fs = FileSystem.get(WEBHDFS_URI, conf);
final Path dir = new Path("/test");
result = fs.mkdirs(dir);
} catch (IOException e) {
result = false;
} finally {
IOUtils.cleanup(null, fs);
}
synchronized (TestWebHDFSForHA.this) {
resultMap.put("mkdirs", result);
TestWebHDFSForHA.this.notifyAll();
}
}
}.start();
Thread.sleep(1000);
Whitebox.setInternalState(namenode, "rpcServer", rpcServer);
synchronized (this) {
while (!resultMap.containsKey("mkdirs")) {
this.wait();
}
replacedert.replacedertTrue(resultMap.get("mkdirs"));
}
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
17
Source : TestNamenodeRetryCache.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Make sure a retry call does not hang because of the exception thrown in the
* first call.
*/
@Test(timeout = 60000)
public void testUpdatePipelineWithFailOver() throws Exception {
cluster.shutdown();
namesystem = null;
filesystem = null;
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).build();
FSNamesystem ns0 = cluster.getNamesystem(0);
ExtendedBlock oldBlock = new ExtendedBlock();
ExtendedBlock newBlock = new ExtendedBlock();
DatanodeID[] newNodes = new DatanodeID[2];
String[] newStorages = new String[2];
newCall();
try {
ns0.updatePipeline("testClient", oldBlock, newBlock, newNodes, newStorages);
fail("Expect StandbyException from the updatePipeline call");
} catch (StandbyException e) {
// expected, since in the beginning both nn are in standby state
GenericTestUtils.replacedertExceptionContains(HAServiceState.STANDBY.toString(), e);
}
cluster.transitionToActive(0);
try {
ns0.updatePipeline("testClient", oldBlock, newBlock, newNodes, newStorages);
} catch (IOException e) {
// ignore call should not hang.
}
}
17
Source : TestXAttrsWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Before
public void setupCluster() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
HAUtil.setAllowStandbyReads(conf, true);
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).waitSafeMode(false).build();
cluster.waitActive();
nn0 = cluster.getNameNode(0);
nn1 = cluster.getNameNode(1);
fs = HATestUtil.configureFailoverFs(cluster, conf);
cluster.transitionToActive(0);
}
17
Source : TestFailoverWithBlockTokensEnabled.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void ensureInvalidBlockTokensAreRejected() throws IOException, URISyntaxException {
cluster.transitionToActive(0);
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
DFSTestUtil.writeFile(fs, TEST_PATH, TEST_DATA);
replacedertEquals(TEST_DATA, DFSTestUtil.readFile(fs, TEST_PATH));
DFSClient dfsClient = DFSClientAdapter.getDFSClient((DistributedFileSystem) fs);
DFSClient spyDfsClient = Mockito.spy(dfsClient);
Mockito.doAnswer(new Answer<LocatedBlocks>() {
@Override
public LocatedBlocks answer(InvocationOnMock arg0) throws Throwable {
LocatedBlocks locatedBlocks = (LocatedBlocks) arg0.callRealMethod();
for (LocatedBlock lb : locatedBlocks.getLocatedBlocks()) {
Token<BlockTokenIdentifier> token = lb.getBlockToken();
BlockTokenIdentifier id = lb.getBlockToken().decodeIdentifier();
// This will make the token invalid, since the preplacedword
// won't match anymore
id.setExpiryDate(Time.now() + 10);
Token<BlockTokenIdentifier> newToken = new Token<BlockTokenIdentifier>(id.getBytes(), token.getPreplacedword(), token.getKind(), token.getService());
lb.setBlockToken(newToken);
}
return locatedBlocks;
}
}).when(spyDfsClient).getLocatedBlocks(Mockito.anyString(), Mockito.anyLong(), Mockito.anyLong());
DFSClientAdapter.setDFSClient((DistributedFileSystem) fs, spyDfsClient);
try {
replacedertEquals(TEST_DATA, DFSTestUtil.readFile(fs, TEST_PATH));
fail("Shouldn't have been able to read a file with invalid block tokens");
} catch (IOException ioe) {
GenericTestUtils.replacedertExceptionContains("Could not obtain block", ioe);
}
}
17
Source : TestEditLogsDuringFailover.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void testFailoverFinalizesAndReadsInProgress(boolean partialTxAtEnd) throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
try {
// Create a fake in-progress edit-log in the shared directory
URI sharedUri = cluster.getSharedEditsDir(0, 1);
File sharedDir = new File(sharedUri.getPath(), "current");
FSNamesystem fsn = cluster.getNamesystem(0);
FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1, fsn.getLastInodeId() + 1);
replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getInProgressEditsFileName(1));
if (partialTxAtEnd) {
FileOutputStream outs = null;
try {
File editLogFile = new File(sharedDir, NNStorage.getInProgressEditsFileName(1));
outs = new FileOutputStream(editLogFile, true);
outs.write(new byte[] { 0x18, 0x00, 0x00, 0x00 });
LOG.error("editLogFile = " + editLogFile);
} finally {
IOUtils.cleanup(LOG, outs);
}
}
// Transition one of the NNs to active
cluster.transitionToActive(0);
// In the transition to active, it should have read the log -- and
// hence see one of the dirs we made in the fake log.
String testPath = "/dir" + NUM_DIRS_IN_LOG;
replacedertNotNull(cluster.getNameNode(0).getRpcServer().getFileInfo(testPath));
// It also should have finalized that log in the shared directory and started
// writing to a new one at the next txid.
replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getFinalizedEditsFileName(1, NUM_DIRS_IN_LOG + 1), NNStorage.getInProgressEditsFileName(NUM_DIRS_IN_LOG + 2));
} finally {
cluster.shutdown();
}
}
17
Source : TestBootstrapStandby.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Before
public void setupCluster() throws IOException {
Configuration conf = new Configuration();
MiniDFSNNTopology topology = new MiniDFSNNTopology().addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(new MiniDFSNNTopology.NNConf("nn1").setHttpPort(20001)).addNN(new MiniDFSNNTopology.NNConf("nn2").setHttpPort(20002)));
cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology).numDataNodes(0).build();
cluster.waitActive();
nn0 = cluster.getNameNode(0);
cluster.transitionToActive(0);
cluster.shutdownNameNode(1);
}
17
Source : TestEditLogsDuringFailover.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
private void testFailoverFinalizesAndReadsInProgress(boolean partialTxAtEnd) throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
try {
// Create a fake in-progress edit-log in the shared directory
URI sharedUri = cluster.getSharedEditsDir(0, 1);
File sharedDir = new File(sharedUri.getPath(), "current");
FSNamesystem fsn = cluster.getNamesystem(0);
FSImageTestUtil.createAbortedLogWithMkdirs(sharedDir, NUM_DIRS_IN_LOG, 1, fsn.getFSDirectory().getLastInodeId() + 1);
replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getInProgressEditsFileName(1));
if (partialTxAtEnd) {
FileOutputStream outs = null;
try {
File editLogFile = new File(sharedDir, NNStorage.getInProgressEditsFileName(1));
outs = new FileOutputStream(editLogFile, true);
outs.write(new byte[] { 0x18, 0x00, 0x00, 0x00 });
LOG.error("editLogFile = " + editLogFile);
} finally {
IOUtils.cleanup(LOG, outs);
}
}
// Transition one of the NNs to active
cluster.transitionToActive(0);
// In the transition to active, it should have read the log -- and
// hence see one of the dirs we made in the fake log.
String testPath = "/dir" + NUM_DIRS_IN_LOG;
replacedertNotNull(cluster.getNameNode(0).getRpcServer().getFileInfo(testPath));
// It also should have finalized that log in the shared directory and started
// writing to a new one at the next txid.
replacedertEditFiles(Collections.singletonList(sharedUri), NNStorage.getFinalizedEditsFileName(1, NUM_DIRS_IN_LOG + 1), NNStorage.getInProgressEditsFileName(NUM_DIRS_IN_LOG + 2));
} finally {
cluster.shutdown();
}
}
16
Source : TestLossyRetryInvocationHandler.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testStartNNWithTrashEmptier() throws Exception {
MiniDFSCluster cluster = null;
Configuration conf = new HdfsConfiguration();
// enable both trash emptier and dropping response
conf.setLong("fs.trash.interval", 360);
conf.setInt(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2);
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
cluster.waitActive();
cluster.transitionToActive(0);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
16
Source : TestHAWebUI.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Tests that the web UI of the name node provides a link to browse the file
* system and summary of under-replicated blocks only in active state
*/
@Test
public void testLinkAndClusterSummary() throws Exception {
Configuration conf = new Configuration();
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
try {
cluster.waitActive();
cluster.transitionToActive(0);
String pageContents = DFSTestUtil.urlGet(new URL("http://localhost:" + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort() + "/dfshealth.jsp"));
replacedertTrue(pageContents.contains("Browse the filesystem"));
replacedertTrue(pageContents.contains("Number of Under-Replicated Blocks"));
cluster.transitionToStandby(0);
pageContents = DFSTestUtil.urlGet(new URL("http://localhost:" + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort() + "/dfshealth.jsp"));
replacedertFalse(pageContents.contains("Browse the filesystem"));
replacedertFalse(pageContents.contains("Number of Under-Replicated Blocks"));
cluster.transitionToActive(0);
pageContents = DFSTestUtil.urlGet(new URL("http://localhost:" + NameNode.getHttpAddress(cluster.getConfiguration(0)).getPort() + "/dfshealth.jsp"));
replacedertTrue(pageContents.contains("Browse the filesystem"));
replacedertTrue(pageContents.contains("Number of Under-Replicated Blocks"));
} finally {
cluster.shutdown();
}
}
16
Source : TestHASafeMode.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Set up a namesystem with several edits, both deletions and
* additions, and failover to a new NN while that NN is in
* safemode. Ensure that it will exit safemode.
*/
@Test
public void testComplexFailoverIntoSafemode() throws Exception {
banner("Starting with NN0 active and NN1 standby, creating some blocks");
DFSTestUtil.createFile(fs, new Path("/test"), 3 * BLOCK_SIZE, (short) 3, 1L);
// Roll edit log so that, when the SBN restarts, it will load
// the namespace during startup and enter safemode.
nn0.getRpcServer().rollEditLog();
banner("Creating some blocks that won't be in the edit log");
DFSTestUtil.createFile(fs, new Path("/test2"), 5 * BLOCK_SIZE, (short) 3, 1L);
banner("Deleting the original blocks");
fs.delete(new Path("/test"), true);
banner("Restarting standby");
restartStandby();
// We expect it to be on its way out of safemode, since all of the blocks
// from the edit log have been reported.
replacedertSafeMode(nn1, 3, 3, 3, 0);
// Initiate a failover into it while it's in safemode
banner("Initiating a failover into NN1 in safemode");
NameNodeAdapter.abortEditLogs(nn0);
cluster.transitionToActive(1);
replacedertSafeMode(nn1, 5, 5, 3, 0);
}
16
Source : TestHASafeMode.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Make sure that when we transition to active in safe mode that we don't
* prematurely consider blocks missing just because not all DNs have reported
* yet.
*
* This is a regression test for HDFS-3921.
*/
@Test
public void testNoPopulatingReplQueuesWhenStartingActiveInSafeMode() throws IOException {
DFSTestUtil.createFile(fs, new Path("/test"), 15 * BLOCK_SIZE, (short) 3, 1L);
// Stop the DN so that when the NN restarts not all blocks wil be reported
// and the NN won't leave safe mode.
cluster.stopDataNode(1);
// Restart the namenode but don't wait for it to hear from all DNs (since
// one DN is deliberately shut down.)
cluster.restartNameNode(0, false);
cluster.transitionToActive(0);
replacedertTrue(cluster.getNameNode(0).isInSafeMode());
// We shouldn't yet consider any blocks "missing" since we're in startup
// safemode, i.e. not all DNs may have reported.
replacedertEquals(0, cluster.getNamesystem(0).getMissingBlocksCount());
}
16
Source : TestHarFileSystemWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test that the HarFileSystem works with underlying HDFS URIs that have no
* port specified, as is often the case with an HA setup.
*/
@Test
public void testHarUriWithHaUriWithNoPort() throws Exception {
Configuration conf = new HdfsConfiguration();
MiniDFSCluster cluster = null;
try {
cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
cluster.transitionToActive(0);
HATestUtil.setFailoverConfigurations(cluster, conf);
createEmptyHarArchive(HATestUtil.configureFailoverFs(cluster, conf), TEST_HAR_PATH);
URI failoverUri = FileSystem.getDefaultUri(conf);
Path p = new Path("har://hdfs-" + failoverUri.getAuthority() + TEST_HAR_PATH);
p.getFileSystem(conf);
} finally {
cluster.shutdown();
}
}
16
Source : TestFailoverWithBlockTokensEnabled.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void writeUsingBothNameNodes() throws ServiceFailedException, IOException, URISyntaxException {
cluster.transitionToActive(0);
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
DFSTestUtil.writeFile(fs, TEST_PATH, TEST_DATA);
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
fs.delete(TEST_PATH, false);
DFSTestUtil.writeFile(fs, TEST_PATH, TEST_DATA);
}
16
Source : TestEditLogsDuringFailover.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testStartup() throws Exception {
Configuration conf = new Configuration();
HAUtil.setAllowStandbyReads(conf, true);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
try {
// During HA startup, both nodes should be in
// standby and we shouldn't have any edits files
// in any edits directory!
List<URI> allDirs = Lists.newArrayList();
allDirs.addAll(cluster.getNameDirs(0));
allDirs.addAll(cluster.getNameDirs(1));
allDirs.add(cluster.getSharedEditsDir(0, 1));
replacedertNoEditFiles(allDirs);
// Set the first NN to active, make sure it creates edits
// in its own dirs and the shared dir. The standby
// should still have no edits!
cluster.transitionToActive(0);
replacedertEditFiles(cluster.getNameDirs(0), NNStorage.getInProgressEditsFileName(1));
replacedertEditFiles(Collections.singletonList(cluster.getSharedEditsDir(0, 1)), NNStorage.getInProgressEditsFileName(1));
replacedertNoEditFiles(cluster.getNameDirs(1));
cluster.getNameNode(0).getRpcServer().mkdirs("/test", FsPermission.createImmutable((short) 0755), true);
// Restarting the standby should not finalize any edits files
// in the shared directory when it starts up!
cluster.restartNameNode(1);
replacedertEditFiles(cluster.getNameDirs(0), NNStorage.getInProgressEditsFileName(1));
replacedertEditFiles(Collections.singletonList(cluster.getSharedEditsDir(0, 1)), NNStorage.getInProgressEditsFileName(1));
replacedertNoEditFiles(cluster.getNameDirs(1));
// Additionally it should not have applied any in-progress logs
// at start-up -- otherwise, it would have read half-way into
// the current log segment, and on the next roll, it would have to
// either replay starting in the middle of the segment (not allowed)
// or double-replay the edits (incorrect).
replacedertNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
cluster.getNameNode(0).getRpcServer().mkdirs("/test2", FsPermission.createImmutable((short) 0755), true);
// If we restart NN0, it'll come back as standby, and we can
// transition NN1 to active and make sure it reads edits correctly at this point.
cluster.restartNameNode(0);
cluster.transitionToActive(1);
// NN1 should have both the edits that came before its restart, and the edits that
// came after its restart.
replacedertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test", true));
replacedertNotNull(NameNodeAdapter.getFileInfo(cluster.getNameNode(1), "/test2", true));
} finally {
cluster.shutdown();
}
}
16
Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test rollback with NFS shared dir.
*/
@Test
public void testRollbackWithNfs() throws Exception {
MiniDFSCluster cluster = null;
FileSystem fs = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
// No upgrade is in progress at the moment.
checkClusterPreviousDirExistence(cluster, false);
replacedertCTimesEqual(cluster);
checkPreviousDirExistence(sharedDir, false);
// Transition NN0 to active and do some FS ops.
cluster.transitionToActive(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
replacedertTrue(fs.mkdirs(new Path("/foo1")));
// Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
// flag.
cluster.shutdownNameNode(1);
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
cluster.restartNameNode(0, false);
checkNnPreviousDirExistence(cluster, 0, true);
checkNnPreviousDirExistence(cluster, 1, false);
checkPreviousDirExistence(sharedDir, true);
// NN0 should come up in the active state when given the -upgrade option,
// so no need to transition it to active.
replacedertTrue(fs.mkdirs(new Path("/foo2")));
// Now bootstrap the standby with the upgraded info.
int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
replacedertEquals(0, rc);
cluster.restartNameNode(1);
checkNnPreviousDirExistence(cluster, 0, true);
checkNnPreviousDirExistence(cluster, 1, true);
checkPreviousDirExistence(sharedDir, true);
replacedertCTimesEqual(cluster);
// Now shut down the cluster and do the rollback.
Collection<URI> nn1NameDirs = cluster.getNameDirs(0);
cluster.shutdown();
conf.setStrings(DFSConfigKeys.DFS_NAMENODE_NAME_DIR_KEY, Joiner.on(",").join(nn1NameDirs));
NameNode.doRollback(conf, false);
// The rollback operation should have rolled back the first NN's local
// dirs, and the shared dir, but not the other NN's dirs. Those have to be
// done by bootstrapping the standby.
checkNnPreviousDirExistence(cluster, 0, false);
checkPreviousDirExistence(sharedDir, false);
} finally {
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
}
}
16
Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Make sure that starting a second NN with the -upgrade flag fails if the
* other NN has already done that.
*/
@Test
public void testCannotUpgradeSecondNameNode() throws IOException, URISyntaxException {
MiniDFSCluster cluster = null;
FileSystem fs = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
// No upgrade is in progress at the moment.
checkClusterPreviousDirExistence(cluster, false);
replacedertCTimesEqual(cluster);
checkPreviousDirExistence(sharedDir, false);
// Transition NN0 to active and do some FS ops.
cluster.transitionToActive(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
replacedertTrue(fs.mkdirs(new Path("/foo1")));
// Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
// flag.
cluster.shutdownNameNode(1);
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
cluster.restartNameNode(0, false);
checkNnPreviousDirExistence(cluster, 0, true);
checkNnPreviousDirExistence(cluster, 1, false);
checkPreviousDirExistence(sharedDir, true);
// NN0 should come up in the active state when given the -upgrade option,
// so no need to transition it to active.
replacedertTrue(fs.mkdirs(new Path("/foo2")));
// Restart NN0 without the -upgrade flag, to make sure that works.
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
cluster.restartNameNode(0, false);
// Make sure we can still do FS ops after upgrading.
cluster.transitionToActive(0);
replacedertTrue(fs.mkdirs(new Path("/foo3")));
// Make sure that starting the second NN with the -upgrade flag fails.
cluster.getNameNodeInfos()[1].setStartOpt(StartupOption.UPGRADE);
try {
cluster.restartNameNode(1, false);
fail("Should not have been able to start second NN with -upgrade");
} catch (IOException ioe) {
GenericTestUtils.replacedertExceptionContains("It looks like the shared log is already being upgraded", ioe);
}
} finally {
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
}
}
16
Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Ensure that an admin cannot finalize an HA upgrade without at least one NN
* being active.
*/
@Test
public void testCannotFinalizeIfNoActive() throws IOException, URISyntaxException {
MiniDFSCluster cluster = null;
FileSystem fs = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
// No upgrade is in progress at the moment.
checkClusterPreviousDirExistence(cluster, false);
replacedertCTimesEqual(cluster);
checkPreviousDirExistence(sharedDir, false);
// Transition NN0 to active and do some FS ops.
cluster.transitionToActive(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
replacedertTrue(fs.mkdirs(new Path("/foo1")));
// Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
// flag.
cluster.shutdownNameNode(1);
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
cluster.restartNameNode(0, false);
checkNnPreviousDirExistence(cluster, 0, true);
checkNnPreviousDirExistence(cluster, 1, false);
checkPreviousDirExistence(sharedDir, true);
// NN0 should come up in the active state when given the -upgrade option,
// so no need to transition it to active.
replacedertTrue(fs.mkdirs(new Path("/foo2")));
// Restart NN0 without the -upgrade flag, to make sure that works.
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
cluster.restartNameNode(0, false);
// Make sure we can still do FS ops after upgrading.
cluster.transitionToActive(0);
replacedertTrue(fs.mkdirs(new Path("/foo3")));
// Now bootstrap the standby with the upgraded info.
int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
replacedertEquals(0, rc);
// Now restart NN1 and make sure that we can do ops against that as well.
cluster.restartNameNode(1);
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
replacedertTrue(fs.mkdirs(new Path("/foo4")));
replacedertCTimesEqual(cluster);
// Now there's no active NN.
cluster.transitionToStandby(1);
try {
runFinalizeCommand(cluster);
fail("Should not have been able to finalize upgrade with no NN active");
} catch (IOException ioe) {
GenericTestUtils.replacedertExceptionContains("Cannot finalize with no NameNode active", ioe);
}
} finally {
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
}
}
16
Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Make sure that an HA NN with NFS-based HA can successfully start and
* upgrade.
*/
@Test
public void testNfsUpgrade() throws IOException, URISyntaxException {
MiniDFSCluster cluster = null;
FileSystem fs = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
File sharedDir = new File(cluster.getSharedEditsDir(0, 1));
// No upgrade is in progress at the moment.
checkClusterPreviousDirExistence(cluster, false);
replacedertCTimesEqual(cluster);
checkPreviousDirExistence(sharedDir, false);
// Transition NN0 to active and do some FS ops.
cluster.transitionToActive(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
replacedertTrue(fs.mkdirs(new Path("/foo1")));
// Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
// flag.
cluster.shutdownNameNode(1);
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
cluster.restartNameNode(0, false);
checkNnPreviousDirExistence(cluster, 0, true);
checkNnPreviousDirExistence(cluster, 1, false);
checkPreviousDirExistence(sharedDir, true);
// NN0 should come up in the active state when given the -upgrade option,
// so no need to transition it to active.
replacedertTrue(fs.mkdirs(new Path("/foo2")));
// Restart NN0 without the -upgrade flag, to make sure that works.
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.REGULAR);
cluster.restartNameNode(0, false);
// Make sure we can still do FS ops after upgrading.
cluster.transitionToActive(0);
replacedertTrue(fs.mkdirs(new Path("/foo3")));
// Now bootstrap the standby with the upgraded info.
int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
replacedertEquals(0, rc);
// Now restart NN1 and make sure that we can do ops against that as well.
cluster.restartNameNode(1);
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
replacedertTrue(fs.mkdirs(new Path("/foo4")));
replacedertCTimesEqual(cluster);
} finally {
if (fs != null) {
fs.close();
}
if (cluster != null) {
cluster.shutdown();
}
}
}
16
Source : TestBootstrapStandbyWithQJM.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* BootstrapStandby when the existing NN is active
*/
@Test
public void testBootstrapStandbyWithActiveNN() throws Exception {
// make the first NN in active state
cluster.transitionToActive(0);
Configuration confNN1 = cluster.getConfiguration(1);
// shut down nn1
cluster.shutdownNameNode(1);
int rc = BootstrapStandby.run(new String[] { "-force" }, confNN1);
replacedertEquals(0, rc);
// Should have copied over the namespace from the standby
FSImageTestUtil.replacedertNNHasCheckpoints(cluster, 1, ImmutableList.of(0));
FSImageTestUtil.replacedertNNFilesMatch(cluster);
}
16
Source : TestBootstrapStandbyWithQJM.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void testUpgrade(UpgradeState state) throws Exception {
cluster.transitionToActive(0);
final Configuration confNN1 = cluster.getConfiguration(1);
final File current = cluster.getNameNode(1).getFSImage().getStorage().getStorageDir(0).getCurrentDir();
final File tmp = cluster.getNameNode(1).getFSImage().getStorage().getStorageDir(0).getPreviousTmp();
// shut down nn1
cluster.shutdownNameNode(1);
// make NN0 in upgrade state
FSImage fsImage0 = cluster.getNameNode(0).getNamesystem().getFSImage();
Whitebox.setInternalState(fsImage0, "isUpgradeFinalized", false);
switch(state) {
case RECOVER:
// rename the current directory to previous.tmp in nn1
NNStorage.rename(current, tmp);
break;
case FORMAT:
// rename the current directory to a random name so it's not formatted
final File wrongPath = new File(current.getParentFile(), "wrong");
NNStorage.rename(current, wrongPath);
break;
default:
break;
}
int rc = BootstrapStandby.run(new String[] { "-force" }, confNN1);
replacedertEquals(0, rc);
// Should have copied over the namespace from the standby
FSImageTestUtil.replacedertNNHasCheckpoints(cluster, 1, ImmutableList.of(0));
FSImageTestUtil.replacedertNNFilesMatch(cluster);
// make sure the NN1 is in upgrade state, i.e., the previous directory has
// been successfully created
cluster.restartNameNode(1);
replacedertFalse(cluster.getNameNode(1).getNamesystem().isUpgradeFinalized());
}
16
Source : TestBootstrapStandbyWithQJM.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Before
public void setup() throws Exception {
Configuration conf = new Configuration();
// Turn off IPC client caching, so that the suite can handle
// the restart of the daemons between test cases.
conf.setInt(CommonConfigurationKeysPublic.IPC_CLIENT_CONNECTION_MAXIDLETIME_KEY, 0);
MiniQJMHACluster miniQjmHaCluster = new MiniQJMHACluster.Builder(conf).build();
cluster = miniQjmHaCluster.getDfsCluster();
jCluster = miniQjmHaCluster.getJournalCluster();
// make nn0 active
cluster.transitionToActive(0);
// do sth to generate in-progress edit log data
DistributedFileSystem dfs = (DistributedFileSystem) HATestUtil.configureFailoverFs(cluster, conf);
dfs.mkdirs(new Path("/test2"));
dfs.close();
}
15
Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testHA() throws IOException {
Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
MiniDFSCluster cluster = null;
FileSystem fs = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(0).build();
HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
cluster.waitActive();
fs = FileSystem.get(WEBHDFS_URI, conf);
cluster.transitionToActive(0);
final Path dir = new Path("/test");
replacedert.replacedertTrue(fs.mkdirs(dir));
cluster.shutdownNameNode(0);
cluster.transitionToActive(1);
final Path dir2 = new Path("/test2");
replacedert.replacedertTrue(fs.mkdirs(dir2));
} finally {
IOUtils.cleanup(null, fs);
if (cluster != null) {
cluster.shutdown();
}
}
}
15
Source : TestXAttrsWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test that xattrs are properly tracked by the standby
*/
@Test(timeout = 60000)
public void testXAttrsTrackedOnStandby() throws Exception {
fs.create(path).close();
fs.setXAttr(path, name1, value1, EnumSet.of(XAttrSetFlag.CREATE));
fs.setXAttr(path, name2, value2, EnumSet.of(XAttrSetFlag.CREATE));
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
List<XAttr> xAttrs = nn1.getRpcServer().getXAttrs("/file", null);
replacedertEquals(2, xAttrs.size());
cluster.shutdownNameNode(0);
// Failover the current standby to active.
cluster.shutdownNameNode(0);
cluster.transitionToActive(1);
Map<String, byte[]> xattrs = fs.getXAttrs(path);
replacedert.replacedertEquals(xattrs.size(), 2);
replacedert.replacedertArrayEquals(value1, xattrs.get(name1));
replacedert.replacedertArrayEquals(value2, xattrs.get(name2));
fs.delete(path, true);
}
15
Source : TestStateTransitionFailure.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Ensure that a failure to fully transition to the active state causes a
* shutdown of the NameNode.
*/
@Test
public void testFailureToTransitionCausesShutdown() throws IOException {
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
// Set an illegal value for the trash emptier interval. This will cause
// the NN to fail to transition to the active state.
conf.setLong(CommonConfigurationKeys.FS_TRASH_INTERVAL_KEY, -1);
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).checkExitOnShutdown(false).build();
cluster.waitActive();
try {
cluster.transitionToActive(0);
fail("Transitioned to active but should not have been able to.");
} catch (ExitException ee) {
replacedertExceptionContains("Cannot start trash emptier with negative interval", ee);
}
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
15
Source : TestQuotasWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Before
public void setupCluster() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.setLong(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
HAUtil.setAllowStandbyReads(conf, true);
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(1).waitSafeMode(false).build();
cluster.waitActive();
nn0 = cluster.getNameNode(0);
nn1 = cluster.getNameNode(1);
fs = HATestUtil.configureFailoverFs(cluster, conf);
cluster.transitionToActive(0);
}
15
Source : TestDNFencing.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Regression test for HDFS-2742. The issue in this bug was:
* - DN does a block report while file is open. This BR contains
* the block in RBW state.
* - Standby queues the RBW state in PendingDatanodeMessages
* - Standby processes edit logs during failover. Before fixing
* this bug, it was mistakenly applying the RBW reported state
* after the block had been completed, causing the block to get
* marked corrupt. Instead, we should now be applying the RBW
* message on OP_ADD, and then the FINALIZED message on OP_CLOSE.
*/
@Test
public void testBlockReportsWhileFileBeingWritten() throws Exception {
FSDataOutputStream out = fs.create(TEST_FILE_PATH);
try {
AppendTestUtil.write(out, 0, 10);
out.hflush();
// Block report will include the RBW replica, but will be
// queued on the StandbyNode.
cluster.triggerBlockReports();
} finally {
IOUtils.closeStream(out);
}
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
// Verify that no replicas are marked corrupt, and that the
// file is readable from the failed-over standby.
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
replacedertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
replacedertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
DFSTestUtil.readFile(fs, TEST_FILE_PATH);
}
15
Source : TestBookKeeperAsHASharedDir.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* NameNode should load the edits correctly if the applicable edits are
* present in the BKJM.
*/
@Test
public void testNameNodeMultipleSwitchesUsingBKJM() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil.createJournalURI("/correctEditLogSelection").toString());
BKJMUtil.addJournalManagerDefinition(conf);
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).manageNameDfsSharedDirs(false).build();
NameNode nn1 = cluster.getNameNode(0);
NameNode nn2 = cluster.getNameNode(1);
cluster.waitActive();
cluster.transitionToActive(0);
// Roll Edits from current Active.
nn1.getRpcServer().rollEditLog();
// Transition to standby current active gracefully.
cluster.transitionToStandby(0);
// Make the other Active and Roll edits multiple times
cluster.transitionToActive(1);
nn2.getRpcServer().rollEditLog();
nn2.getRpcServer().rollEditLog();
// Now One more failover. So NN1 should be able to failover successfully.
cluster.transitionToStandby(1);
cluster.transitionToActive(0);
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
15
Source : HdfsTestUtil.java
with Apache License 2.0
from apache
with Apache License 2.0
from apache
public static MiniDFSCluster setupClreplaced(String dir, boolean safeModeTesting, boolean haTesting) throws Exception {
checkreplacedumptions();
if (!HA_TESTING_ENABLED)
haTesting = false;
DefaultMetricsSystem.setInstance(new FakeMetricsSystem());
Configuration conf = getBasicConfiguration(new Configuration());
conf.set("hdfs.minidfs.basedir", dir + File.separator + "hdfsBaseDir");
conf.set("dfs.namenode.name.dir", dir + File.separator + "nameNodeNameDir");
// Disable metrics logging for HDFS
conf.setInt("dfs.namenode.metrics.logger.period.seconds", 0);
conf.setInt("dfs.datanode.metrics.logger.period.seconds", 0);
System.setProperty("test.build.data", dir + File.separator + "hdfs" + File.separator + "build");
System.setProperty("test.cache.data", dir + File.separator + "hdfs" + File.separator + "cache");
System.setProperty("solr.lock.type", DirectoryFactory.LOCK_TYPE_HDFS);
// test-files/solr/solr.xml sets this to be 15000. This isn't long enough for HDFS in some cases.
System.setProperty("socketTimeout", "90000");
String blockcacheGlobal = System.getProperty("solr.hdfs.blockcache.global", Boolean.toString(random().nextBoolean()));
System.setProperty("solr.hdfs.blockcache.global", blockcacheGlobal);
// Limit memory usage for HDFS tests
if (Boolean.parseBoolean(blockcacheGlobal)) {
System.setProperty("solr.hdfs.blockcache.blocksperbank", "4096");
} else {
System.setProperty("solr.hdfs.blockcache.blocksperbank", "512");
System.setProperty("tests.hdfs.numdatanodes", "1");
}
int dataNodes = Integer.getInteger("tests.hdfs.numdatanodes", 2);
final MiniDFSCluster.Builder dfsClusterBuilder = new MiniDFSCluster.Builder(conf).numDataNodes(dataNodes).format(true);
if (haTesting) {
dfsClusterBuilder.nnTopology(MiniDFSNNTopology.simpleHATopology());
}
MiniDFSCluster dfsCluster = dfsClusterBuilder.build();
HdfsUtil.TEST_CONF = getClientConfiguration(dfsCluster);
System.setProperty("solr.hdfs.home", getDataDir(dfsCluster, "solr_hdfs_home"));
dfsCluster.waitActive();
if (haTesting)
dfsCluster.transitionToActive(0);
int rndMode = random().nextInt(3);
if (safeModeTesting && rndMode == 1) {
NameNodeAdapter.enterSafeMode(dfsCluster.getNameNode(), false);
int rnd = random().nextInt(10000);
Timer timer = new Timer();
synchronized (TIMERS_LOCK) {
if (timers == null) {
timers = new HashMap<>();
}
timers.put(dfsCluster, timer);
}
timer.schedule(new TimerTask() {
@Override
public void run() {
NameNodeAdapter.leaveSafeMode(dfsCluster.getNameNode());
}
}, rnd);
} else if (haTesting && rndMode == 2) {
int rnd = random().nextInt(30000);
Timer timer = new Timer();
synchronized (TIMERS_LOCK) {
if (timers == null) {
timers = new HashMap<>();
}
timers.put(dfsCluster, timer);
}
timer.schedule(new TimerTask() {
@Override
public void run() {
// TODO: randomly transition to standby
// try {
// dfsCluster.transitionToStandby(0);
// dfsCluster.transitionToActive(1);
// } catch (IOException e) {
// throw new RuntimeException();
// }
}
}, rnd);
} else {
// TODO: we could do much better at testing this
// force a lease recovery by creating a tlog file and not closing it
URI uri = dfsCluster.getURI();
Path hdfsDirPath = new Path(uri.toString() + "/solr/collection1/core_node1/data/tlog/tlog.0000000000000000000");
// tran log already being created testing
badTlogOutStreamFs = FileSystem.get(hdfsDirPath.toUri(), getClientConfiguration(dfsCluster));
badTlogOutStream = badTlogOutStreamFs.create(hdfsDirPath);
}
SolrTestCaseJ4.useFactory("org.apache.solr.core.HdfsDirectoryFactory");
return dfsCluster;
}
14
Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testSecureHAToken() throws IOException, InterruptedException {
Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
conf.setBoolean(DFSConfigKeys.DFS_NAMENODE_DELEGATION_TOKEN_ALWAYS_USE_KEY, true);
MiniDFSCluster cluster = null;
WebHdfsFileSystem fs = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(0).build();
HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
cluster.waitActive();
fs = spy((WebHdfsFileSystem) FileSystem.get(WEBHDFS_URI, conf));
FileSystemTestHelper.addFileSystemForTesting(WEBHDFS_URI, conf, fs);
cluster.transitionToActive(0);
Token<?> token = fs.getDelegationToken(null);
cluster.shutdownNameNode(0);
cluster.transitionToActive(1);
token.renew(conf);
token.cancel(conf);
verify(fs).renewDelegationToken(token);
verify(fs).cancelDelegationToken(token);
} finally {
IOUtils.cleanup(null, fs);
if (cluster != null) {
cluster.shutdown();
}
}
}
14
Source : TestWebHDFSForHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testFailoverAfterOpen() throws IOException {
Configuration conf = DFSTestUtil.newHAConfiguration(LOGICAL_NAME);
conf.set(FS_DEFAULT_NAME_KEY, HdfsConstants.HDFS_URI_SCHEME + "://" + LOGICAL_NAME);
MiniDFSCluster cluster = null;
FileSystem fs = null;
final Path p = new Path("/test");
final byte[] data = "Hello".getBytes();
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(topo).numDataNodes(1).build();
HATestUtil.setFailoverConfigurations(cluster, conf, LOGICAL_NAME);
cluster.waitActive();
fs = FileSystem.get(WEBHDFS_URI, conf);
cluster.transitionToActive(1);
FSDataOutputStream out = fs.create(p);
cluster.shutdownNameNode(1);
cluster.transitionToActive(0);
out.write(data);
out.close();
FSDataInputStream in = fs.open(p);
byte[] buf = new byte[data.length];
IOUtils.readFully(in, buf, 0, buf.length);
replacedert.replacedertArrayEquals(data, buf);
} finally {
IOUtils.cleanup(null, fs);
if (cluster != null) {
cluster.shutdown();
}
}
}
14
Source : TestNameNodeRetryCacheMetrics.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Start a cluster
*/
@Before
public void setup() throws Exception {
conf = new HdfsConfiguration();
conf.setBoolean(DFS_NAMENODE_ENABLE_RETRY_CACHE_KEY, true);
conf.setInt(DFSConfigKeys.DFS_CLIENT_TEST_DROP_NAMENODE_RESPONSE_NUM_KEY, 2);
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3).build();
cluster.waitActive();
cluster.transitionToActive(namenodeId);
HATestUtil.setFailoverConfigurations(cluster, conf);
filesystem = (DistributedFileSystem) HATestUtil.configureFailoverFs(cluster, conf);
namesystem = cluster.getNamesystem(namenodeId);
metrics = namesystem.getRetryCache().getMetricsForTests();
}
14
Source : TestStandbyCheckpoints.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test cancellation of ongoing checkpoints when failover happens
* mid-checkpoint.
*/
@Test(timeout = 120000)
public void testCheckpointCancellation() throws Exception {
cluster.transitionToStandby(0);
// Create an edit log in the shared edits dir with a lot
// of mkdirs operations. This is solely so that the image is
// large enough to take a non-trivial amount of time to load.
// (only ~15MB)
URI sharedUri = cluster.getSharedEditsDir(0, 1);
File sharedDir = new File(sharedUri.getPath(), "current");
File tmpDir = new File(MiniDFSCluster.getBaseDirectory(), "testCheckpointCancellation-tmp");
FSNamesystem fsn = cluster.getNamesystem(0);
FSImageTestUtil.createAbortedLogWithMkdirs(tmpDir, NUM_DIRS_IN_LOG, 3, fsn.getLastInodeId() + 1);
String fname = NNStorage.getInProgressEditsFileName(3);
new File(tmpDir, fname).renameTo(new File(sharedDir, fname));
// Checkpoint as fast as we can, in a tight loop.
cluster.getConfiguration(1).setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
cluster.restartNameNode(1);
nn1 = cluster.getNameNode(1);
cluster.transitionToActive(0);
boolean canceledOne = false;
for (int i = 0; i < 10 && !canceledOne; i++) {
doEdits(i * 10, i * 10 + 10);
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
cluster.transitionToStandby(1);
cluster.transitionToActive(0);
canceledOne = StandbyCheckpointer.getCanceledCount() > 0;
}
replacedertTrue(canceledOne);
}
14
Source : TestRetryCacheWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Add a list of cache pools, list cache pools,
* switch active NN, and list cache pools again.
*/
@Test(timeout = 60000)
public void testListCachePools() throws Exception {
final int poolCount = 7;
HashSet<String> poolNames = new HashSet<String>(poolCount);
for (int i = 0; i < poolCount; i++) {
String poolName = "testListCachePools-" + i;
dfs.addCachePool(new CachePoolInfo(poolName));
poolNames.add(poolName);
}
listCachePools(poolNames, 0);
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
cluster.waitActive(1);
listCachePools(poolNames, 1);
}
14
Source : TestRetryCacheWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* When NN failover happens, if the client did not receive the response and
* send a retry request to the other NN, the same response should be recieved
* based on the retry cache.
*/
public void testClientRetryWithFailover(final AtMostOnceOp op) throws Exception {
final Map<String, Object> results = new HashMap<String, Object>();
op.prepare();
// set DummyRetryInvocationHandler#block to true
DummyRetryInvocationHandler.block.set(true);
new Thread() {
@Override
public void run() {
try {
op.invoke();
Object result = op.getResult();
LOG.info("Operation " + op.name + " finished");
synchronized (TestRetryCacheWithHA.this) {
results.put(op.name, result == null ? "SUCCESS" : result);
TestRetryCacheWithHA.this.notifyAll();
}
} catch (Exception e) {
LOG.info("Got Exception while calling " + op.name, e);
} finally {
IOUtils.cleanup(null, op.client);
}
}
}.start();
// make sure the client's call has actually been handled by the active NN
replacedertTrue("After waiting the operation " + op.name + " still has not taken effect on NN yet", op.checkNamenodeBeforeReturn());
// force the failover
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
// disable the block in DummyHandler
LOG.info("Setting block to false");
DummyRetryInvocationHandler.block.set(false);
synchronized (this) {
while (!results.containsKey(op.name)) {
this.wait();
}
LOG.info("Got the result of " + op.name + ": " + results.get(op.name));
}
// Waiting for failover.
while (cluster.getNamesystem(1).isInStandbyState()) {
Thread.sleep(10);
}
long hitNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheHit();
long hitNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheHit();
replacedertTrue("CacheHit: " + hitNN0 + ", " + hitNN1, hitNN0 + hitNN1 > 0);
long updatedNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheUpdated();
long updatedNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheUpdated();
// Cache updated metrics on NN0 should be >0 since the op was process on NN0
replacedertTrue("CacheUpdated on NN0: " + updatedNN0, updatedNN0 > 0);
// Cache updated metrics on NN0 should be >0 since NN1 applied the editlog
replacedertTrue("CacheUpdated on NN1: " + updatedNN1, updatedNN1 > 0);
long expectedUpdateCount = op.getExpectedCacheUpdateCount();
if (expectedUpdateCount > 0) {
replacedertEquals("CacheUpdated on NN0: " + updatedNN0, expectedUpdateCount, updatedNN0);
replacedertEquals("CacheUpdated on NN0: " + updatedNN1, expectedUpdateCount, updatedNN1);
}
}
14
Source : TestPendingCorruptDnMessages.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testChangedStorageId() throws IOException, URISyntaxException, InterruptedException {
HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
try {
cluster.transitionToActive(0);
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
OutputStream out = fs.create(filePath);
out.write("foo bar baz".getBytes());
out.close();
HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), cluster.getNameNode(1));
// Change the gen stamp of the block on datanode to go back in time (gen
// stamps start at 1000)
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
replacedertTrue(MiniDFSCluster.changeGenStampOfBlock(0, block, 900));
// Stop the DN so the replica with the changed gen stamp will be reported
// when this DN starts up.
DataNodeProperties dnProps = cluster.stopDataNode(0);
// Restart the namenode so that when the DN comes up it will see an initial
// block report.
cluster.restartNameNode(1, false);
replacedertTrue(cluster.restartDataNode(dnProps, true));
// Wait until the standby NN queues up the corrupt block in the pending DN
// message queue.
while (cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount() < 1) {
ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
}
replacedertEquals(1, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
// Reformat/restart the DN.
replacedertTrue(wipeAndRestartDn(cluster, 0));
// Give the DN time to start up and register, which will cause the
// DatanodeManager to dissociate the old storage ID from the DN xfer addr.
String newStorageId = "";
do {
ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
newStorageId = getRegisteredDatanodeUid(cluster, 1);
System.out.println("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId);
} while (newStorageId.equals(oldStorageId));
replacedertEquals(0, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
// Now try to fail over.
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
} finally {
cluster.shutdown();
}
}
14
Source : TestDNFencing.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test that, when a block is re-opened for append, the related
* datanode messages are correctly queued by the SBN because
* they have future states and genstamps.
*/
@Test
public void testQueueingWithAppend() throws Exception {
int numQueued = 0;
int numDN = cluster.getDataNodes().size();
// case 1: create file and call hflush after write
FSDataOutputStream out = fs.create(TEST_FILE_PATH);
try {
AppendTestUtil.write(out, 0, 10);
out.hflush();
// Opening the file will report RBW replicas, but will be
// queued on the StandbyNode.
// However, the delivery of RBW messages is delayed by HDFS-7217 fix.
// Apply cluster.triggerBlockReports() to trigger the reporting sooner.
//
cluster.triggerBlockReports();
// RBW messages
numQueued += numDN;
// The cluster.triggerBlockReports() call above does a full
// block report that incurs 3 extra RBW messages
// RBW messages
numQueued += numDN;
} finally {
IOUtils.closeStream(out);
// blockReceived messages
numQueued += numDN;
}
cluster.triggerBlockReports();
numQueued += numDN;
replacedertEquals(numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
// case 2: append to file and call hflush after write
try {
out = fs.append(TEST_FILE_PATH);
AppendTestUtil.write(out, 10, 10);
out.hflush();
cluster.triggerBlockReports();
// RBW messages, see comments in case 1
numQueued += numDN * 2;
} finally {
IOUtils.closeStream(out);
// blockReceived
numQueued += numDN;
}
replacedertEquals(numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
// case 3: similar to case 2, except no hflush is called.
try {
out = fs.append(TEST_FILE_PATH);
AppendTestUtil.write(out, 20, 10);
} finally {
// The write operation in the try block is buffered, thus no RBW message
// is reported yet until the closeStream call here. When closeStream is
// called, before HDFS-7217 fix, there would be three RBW messages
// (blockReceiving), plus three FINALIZED messages (blockReceived)
// delivered to NN. However, because of HDFS-7217 fix, the reporting of
// RBW messages is postponed. In this case, they are even overwritten
// by the blockReceived messages of the same block when they are waiting
// to be delivered. All this happens within the closeStream() call.
// What's delivered to NN is the three blockReceived messages. See
// BPServiceActor#addPendingReplicationBlockInfo
//
IOUtils.closeStream(out);
// blockReceived
numQueued += numDN;
}
cluster.triggerBlockReports();
numQueued += numDN;
LOG.info("Expect " + numQueued + " and got: " + cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
replacedertEquals(numQueued, cluster.getNameNode(1).getNamesystem().getPendingDataNodeMessageCount());
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
// Verify that no replicas are marked corrupt, and that the
// file is readable from the failed-over standby.
BlockManagerTestUtil.updateState(nn1.getNamesystem().getBlockManager());
BlockManagerTestUtil.updateState(nn2.getNamesystem().getBlockManager());
replacedertEquals(0, nn1.getNamesystem().getCorruptReplicaBlocks());
replacedertEquals(0, nn2.getNamesystem().getCorruptReplicaBlocks());
AppendTestUtil.check(fs, TEST_FILE_PATH, 30);
}
14
Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testFinalizeWithJournalNodes() throws IOException, URISyntaxException {
MiniQJMHACluster qjCluster = null;
FileSystem fs = null;
try {
Builder builder = new MiniQJMHACluster.Builder(conf);
builder.getDfsBuilder().numDataNodes(0);
qjCluster = builder.build();
MiniDFSCluster cluster = qjCluster.getDfsCluster();
// No upgrade is in progress at the moment.
checkJnPreviousDirExistence(qjCluster, false);
checkClusterPreviousDirExistence(cluster, false);
replacedertCTimesEqual(cluster);
// Transition NN0 to active and do some FS ops.
cluster.transitionToActive(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
replacedertTrue(fs.mkdirs(new Path("/foo1")));
final long cidBeforeUpgrade = getCommittedTxnIdValue(qjCluster);
// Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
// flag.
cluster.shutdownNameNode(1);
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
cluster.restartNameNode(0, false);
replacedertTrue(cidBeforeUpgrade <= getCommittedTxnIdValue(qjCluster));
replacedertTrue(fs.mkdirs(new Path("/foo2")));
checkNnPreviousDirExistence(cluster, 0, true);
checkNnPreviousDirExistence(cluster, 1, false);
checkJnPreviousDirExistence(qjCluster, true);
// Now bootstrap the standby with the upgraded info.
int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
replacedertEquals(0, rc);
cluster.restartNameNode(1);
final long cidDuringUpgrade = getCommittedTxnIdValue(qjCluster);
replacedertTrue(cidDuringUpgrade > cidBeforeUpgrade);
runFinalizeCommand(cluster);
replacedertEquals(cidDuringUpgrade, getCommittedTxnIdValue(qjCluster));
checkClusterPreviousDirExistence(cluster, false);
checkJnPreviousDirExistence(qjCluster, false);
replacedertCTimesEqual(cluster);
} finally {
if (fs != null) {
fs.close();
}
if (qjCluster != null) {
qjCluster.shutdown();
}
}
}
14
Source : TestDFSUpgradeWithHA.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Make sure that even if the NN which initiated the upgrade is in the standby
* state that we're allowed to finalize.
*/
@Test
public void testFinalizeFromSecondNameNodeWithJournalNodes() throws IOException, URISyntaxException {
MiniQJMHACluster qjCluster = null;
FileSystem fs = null;
try {
Builder builder = new MiniQJMHACluster.Builder(conf);
builder.getDfsBuilder().numDataNodes(0);
qjCluster = builder.build();
MiniDFSCluster cluster = qjCluster.getDfsCluster();
// No upgrade is in progress at the moment.
checkJnPreviousDirExistence(qjCluster, false);
checkClusterPreviousDirExistence(cluster, false);
replacedertCTimesEqual(cluster);
// Transition NN0 to active and do some FS ops.
cluster.transitionToActive(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
replacedertTrue(fs.mkdirs(new Path("/foo1")));
// Do the upgrade. Shut down NN1 and then restart NN0 with the upgrade
// flag.
cluster.shutdownNameNode(1);
cluster.getNameNodeInfos()[0].setStartOpt(StartupOption.UPGRADE);
cluster.restartNameNode(0, false);
checkNnPreviousDirExistence(cluster, 0, true);
checkNnPreviousDirExistence(cluster, 1, false);
checkJnPreviousDirExistence(qjCluster, true);
// Now bootstrap the standby with the upgraded info.
int rc = BootstrapStandby.run(new String[] { "-force" }, cluster.getConfiguration(1));
replacedertEquals(0, rc);
cluster.restartNameNode(1);
// Make the second NN (not the one that initiated the upgrade) active when
// the finalize command is run.
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
runFinalizeCommand(cluster);
checkClusterPreviousDirExistence(cluster, false);
checkJnPreviousDirExistence(qjCluster, false);
replacedertCTimesEqual(cluster);
} finally {
if (fs != null) {
fs.close();
}
if (qjCluster != null) {
qjCluster.shutdown();
}
}
}
14
Source : TestBookKeeperAsHASharedDir.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test simple HA failover usecase with BK
*/
@Test
public void testFailoverWithBK() throws Exception {
MiniDFSCluster cluster = null;
try {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
conf.set(DFSConfigKeys.DFS_NAMENODE_SHARED_EDITS_DIR_KEY, BKJMUtil.createJournalURI("/hotfailover").toString());
BKJMUtil.addJournalManagerDefinition(conf);
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).manageNameDfsSharedDirs(false).build();
NameNode nn1 = cluster.getNameNode(0);
NameNode nn2 = cluster.getNameNode(1);
cluster.waitActive();
cluster.transitionToActive(0);
Path p = new Path("/testBKJMfailover");
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
fs.mkdirs(p);
cluster.shutdownNameNode(0);
cluster.transitionToActive(1);
replacedertTrue(fs.exists(p));
} finally {
if (cluster != null) {
cluster.shutdown();
}
}
}
14
Source : TestStandbyCheckpoints.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* Test cancellation of ongoing checkpoints when failover happens
* mid-checkpoint.
*/
@Test(timeout = 120000)
public void testCheckpointCancellation() throws Exception {
cluster.transitionToStandby(0);
// Create an edit log in the shared edits dir with a lot
// of mkdirs operations. This is solely so that the image is
// large enough to take a non-trivial amount of time to load.
// (only ~15MB)
URI sharedUri = cluster.getSharedEditsDir(0, 1);
File sharedDir = new File(sharedUri.getPath(), "current");
File tmpDir = new File(MiniDFSCluster.getBaseDirectory(), "testCheckpointCancellation-tmp");
FSNamesystem fsn = cluster.getNamesystem(0);
FSImageTestUtil.createAbortedLogWithMkdirs(tmpDir, NUM_DIRS_IN_LOG, 3, fsn.getFSDirectory().getLastInodeId() + 1);
String fname = NNStorage.getInProgressEditsFileName(3);
new File(tmpDir, fname).renameTo(new File(sharedDir, fname));
// Checkpoint as fast as we can, in a tight loop.
cluster.getConfiguration(1).setInt(DFSConfigKeys.DFS_NAMENODE_CHECKPOINT_PERIOD_KEY, 0);
cluster.restartNameNode(1);
nn1 = cluster.getNameNode(1);
cluster.transitionToActive(0);
boolean canceledOne = false;
for (int i = 0; i < 10 && !canceledOne; i++) {
doEdits(i * 10, i * 10 + 10);
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
cluster.transitionToStandby(1);
cluster.transitionToActive(0);
canceledOne = StandbyCheckpointer.getCanceledCount() > 0;
}
replacedertTrue(canceledOne);
}
14
Source : TestRetryCacheWithHA.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* When NN failover happens, if the client did not receive the response and
* send a retry request to the other NN, the same response should be recieved
* based on the retry cache.
*/
public void testClientRetryWithFailover(final AtMostOnceOp op) throws Exception {
final Map<String, Object> results = new HashMap<String, Object>();
op.prepare();
// set DummyRetryInvocationHandler#block to true
DummyRetryInvocationHandler.block.set(true);
new Thread() {
@Override
public void run() {
try {
op.invoke();
Object result = op.getResult();
LOG.info("Operation " + op.name + " finished");
synchronized (TestRetryCacheWithHA.this) {
results.put(op.name, result == null ? "SUCCESS" : result);
TestRetryCacheWithHA.this.notifyAll();
}
} catch (Exception e) {
LOG.info("Got Exception while calling " + op.name, e);
} finally {
IOUtils.cleanup(null, op.client);
}
}
}.start();
// make sure the client's call has actually been handled by the active NN
replacedertTrue("After waiting the operation " + op.name + " still has not taken effect on NN yet", op.checkNamenodeBeforeReturn());
// force the failover
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
// disable the block in DummyHandler
LOG.info("Setting block to false");
DummyRetryInvocationHandler.block.set(false);
synchronized (this) {
while (!results.containsKey(op.name)) {
this.wait();
}
LOG.info("Got the result of " + op.name + ": " + results.get(op.name));
}
// Waiting for failover.
while (cluster.getNamesystem(1).isInStandbyState()) {
Thread.sleep(10);
}
long hitNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheHit();
long hitNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheHit();
replacedertTrue("CacheHit: " + hitNN0 + ", " + hitNN1, hitNN0 + hitNN1 > 0);
long updatedNN0 = cluster.getNamesystem(0).getRetryCache().getMetricsForTests().getCacheUpdated();
long updatedNN1 = cluster.getNamesystem(1).getRetryCache().getMetricsForTests().getCacheUpdated();
// Cache updated metrics on NN0 should be >0 since the op was process on NN0
replacedertTrue("CacheUpdated on NN0: " + updatedNN0, updatedNN0 > 0);
// Cache updated metrics on NN0 should be >0 since NN1 applied the editlog
replacedertTrue("CacheUpdated on NN1: " + updatedNN1, updatedNN1 > 0);
}
14
Source : TestPendingCorruptDnMessages.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
@Test
public void testChangedStorageId() throws IOException, URISyntaxException, InterruptedException {
HdfsConfiguration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).numDataNodes(1).nnTopology(MiniDFSNNTopology.simpleHATopology()).build();
try {
cluster.transitionToActive(0);
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
OutputStream out = fs.create(filePath);
out.write("foo bar baz".getBytes());
out.close();
HATestUtil.waitForStandbyToCatchUp(cluster.getNameNode(0), cluster.getNameNode(1));
// Change the gen stamp of the block on datanode to go back in time (gen
// stamps start at 1000)
ExtendedBlock block = DFSTestUtil.getFirstBlock(fs, filePath);
replacedertTrue(cluster.changeGenStampOfBlock(0, block, 900));
// Stop the DN so the replica with the changed gen stamp will be reported
// when this DN starts up.
DataNodeProperties dnProps = cluster.stopDataNode(0);
// Restart the namenode so that when the DN comes up it will see an initial
// block report.
cluster.restartNameNode(1, false);
replacedertTrue(cluster.restartDataNode(dnProps, true));
// Wait until the standby NN queues up the corrupt block in the pending DN
// message queue.
while (cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount() < 1) {
ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
}
replacedertEquals(1, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
String oldStorageId = getRegisteredDatanodeUid(cluster, 1);
// Reformat/restart the DN.
replacedertTrue(wipeAndRestartDn(cluster, 0));
// Give the DN time to start up and register, which will cause the
// DatanodeManager to dissociate the old storage ID from the DN xfer addr.
String newStorageId = "";
do {
ThreadUtil.sleepAtLeastIgnoreInterrupts(1000);
newStorageId = getRegisteredDatanodeUid(cluster, 1);
System.out.println("====> oldStorageId: " + oldStorageId + " newStorageId: " + newStorageId);
} while (newStorageId.equals(oldStorageId));
replacedertEquals(0, cluster.getNamesystem(1).getBlockManager().getPendingDataNodeMessageCount());
// Now try to fail over.
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
} finally {
cluster.shutdown();
}
}
14
Source : TestHAAppend.java
with Apache License 2.0
from naver
with Apache License 2.0
from naver
/**
* Test to verify the processing of PendingDataNodeMessageQueue in case of
* append. One block will marked as corrupt if the OP_ADD, OP_UPDATE_BLOCKS
* comes in one edit log segment and OP_CLOSE edit comes in next log segment
* which is loaded during failover. Regression test for HDFS-3605.
*/
@Test
public void testMultipleAppendsDuringCatchupTailing() throws Exception {
Configuration conf = new Configuration();
// Set a length edits tailing period, and explicit rolling, so we can
// control the ingest of edits by the standby for this test.
conf.set(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, "5000");
conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, -1);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3).build();
FileSystem fs = null;
try {
cluster.transitionToActive(0);
fs = HATestUtil.configureFailoverFs(cluster, conf);
Path fileToAppend = new Path("/FileToAppend");
Path fileToTruncate = new Path("/FileToTruncate");
final byte[] data = new byte[1 << 16];
DFSUtil.getRandom().nextBytes(data);
final int[] appendPos = AppendTestUtil.randomFileParreplacedion(data.length, COUNT);
final int[] truncatePos = AppendTestUtil.randomFileParreplacedion(data.length, 1);
// Create file, write some data, and hflush so that the first
// block is in the edit log prior to roll.
FSDataOutputStream out = createAndHflush(fs, fileToAppend, data, appendPos[0]);
FSDataOutputStream out4Truncate = createAndHflush(fs, fileToTruncate, data, data.length);
// Let the StandbyNode catch the creation of the file.
cluster.getNameNode(0).getRpcServer().rollEditLog();
cluster.getNameNode(1).getNamesystem().getEditLogTailer().doTailEdits();
out.close();
out4Truncate.close();
// Append and re-close a few time, so that many block entries are queued.
for (int i = 0; i < COUNT; i++) {
int end = i < COUNT - 1 ? appendPos[i + 1] : data.length;
out = fs.append(fileToAppend);
out.write(data, appendPos[i], end - appendPos[i]);
out.close();
}
boolean isTruncateReady = fs.truncate(fileToTruncate, truncatePos[0]);
// Ensure that blocks have been reported to the SBN ahead of the edits
// arriving.
cluster.triggerBlockReports();
// Failover the current standby to active.
cluster.shutdownNameNode(0);
cluster.transitionToActive(1);
// Check the FSCK doesn't detect any bad blocks on the SBN.
int rc = ToolRunner.run(new DFSck(cluster.getConfiguration(1)), new String[] { "/", "-files", "-blocks" });
replacedertEquals(0, rc);
replacedertEquals("CorruptBlocks should be empty.", 0, cluster.getNameNode(1).getNamesystem().getCorruptReplicaBlocks());
AppendTestUtil.checkFullFile(fs, fileToAppend, data.length, data, fileToAppend.toString());
if (!isTruncateReady) {
TestFileTruncate.checkBlockRecovery(fileToTruncate, cluster.getFileSystem(1));
}
AppendTestUtil.checkFullFile(fs, fileToTruncate, truncatePos[0], data, fileToTruncate.toString());
} finally {
if (null != cluster) {
cluster.shutdown();
}
if (null != fs) {
fs.close();
}
}
}
13
Source : TestStandbyCheckpoints.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test cancellation of ongoing checkpoints when failover happens
* mid-checkpoint during image upload from standby to active NN.
*/
@Test(timeout = 60000)
public void testCheckpointCancellationDuringUpload() throws Exception {
// don't compress, we want a big image
cluster.getConfiguration(0).setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false);
cluster.getConfiguration(1).setBoolean(DFSConfigKeys.DFS_IMAGE_COMPRESS_KEY, false);
// Throttle SBN upload to make it hang during upload to ANN
cluster.getConfiguration(1).setLong(DFSConfigKeys.DFS_IMAGE_TRANSFER_RATE_KEY, 100);
cluster.restartNameNode(0);
cluster.restartNameNode(1);
nn0 = cluster.getNameNode(0);
nn1 = cluster.getNameNode(1);
cluster.transitionToActive(0);
doEdits(0, 100);
HATestUtil.waitForStandbyToCatchUp(nn0, nn1);
HATestUtil.waitForCheckpoint(cluster, 1, ImmutableList.of(104));
cluster.transitionToStandby(0);
cluster.transitionToActive(1);
// Wait to make sure background TransferFsImageUpload thread was cancelled.
// This needs to be done before the next test in the suite starts, so that a
// file descriptor is not held open during the next cluster init.
cluster.shutdown();
cluster = null;
GenericTestUtils.waitFor(new Supplier<Boolean>() {
@Override
public Boolean get() {
ThreadMXBean threadBean = ManagementFactory.getThreadMXBean();
ThreadInfo[] threads = threadBean.getThreadInfo(threadBean.getAllThreadIds(), 1);
for (ThreadInfo thread : threads) {
if (thread.getThreadName().startsWith("TransferFsImageUpload")) {
return false;
}
}
return true;
}
}, 1000, 30000);
// replacedert that former active did not accept the canceled checkpoint file.
replacedertEquals(0, nn0.getFSImage().getMostRecentCheckpointTxId());
}
13
Source : TestHASafeMode.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Before
public void setupCluster() throws Exception {
Configuration conf = new Configuration();
conf.setInt(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, BLOCK_SIZE);
conf.setInt(DFSConfigKeys.DFS_HEARTBEAT_INTERVAL_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(3).waitSafeMode(false).build();
cluster.waitActive();
nn0 = cluster.getNameNode(0);
nn1 = cluster.getNameNode(1);
fs = HATestUtil.configureFailoverFs(cluster, conf);
cluster.transitionToActive(0);
}
13
Source : TestFailureOfSharedDir.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
/**
* Test that marking the shared edits dir as being "required" causes the NN to
* fail if that dir can't be accessed.
*/
@Test
public void testFailureOfSharedDir() throws Exception {
Configuration conf = new Configuration();
conf.setLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, 2000);
// The shared edits dir will automatically be marked required.
MiniDFSCluster cluster = null;
File sharedEditsDir = null;
try {
cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).checkExitOnShutdown(false).build();
cluster.waitActive();
cluster.transitionToActive(0);
FileSystem fs = HATestUtil.configureFailoverFs(cluster, conf);
replacedertTrue(fs.mkdirs(new Path("/test1")));
// Blow away the shared edits dir.
URI sharedEditsUri = cluster.getSharedEditsDir(0, 1);
sharedEditsDir = new File(sharedEditsUri);
replacedertEquals(0, FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "-w", true));
Thread.sleep(conf.getLong(DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_KEY, DFS_NAMENODE_RESOURCE_CHECK_INTERVAL_DEFAULT) * 2);
NameNode nn1 = cluster.getNameNode(1);
replacedertTrue(nn1.isStandbyState());
replacedertFalse("StandBy NameNode should not go to SafeMode on resource unavailability", nn1.isInSafeMode());
NameNode nn0 = cluster.getNameNode(0);
try {
// Make sure that subsequent operations on the NN fail.
nn0.getRpcServer().rollEditLog();
fail("Succeeded in rolling edit log despite shared dir being deleted");
} catch (ExitException ee) {
GenericTestUtils.replacedertExceptionContains("finalize log segment 1, 3 failed for required journal", ee);
}
// Check that none of the edits dirs rolled, since the shared edits
// dir didn't roll. Regression test for HDFS-2874.
for (URI editsUri : cluster.getNameEditsDirs(0)) {
if (editsUri.equals(sharedEditsUri)) {
continue;
}
File editsDir = new File(editsUri.getPath());
File curDir = new File(editsDir, "current");
GenericTestUtils.replacedertGlobEquals(curDir, "edits_.*", NNStorage.getInProgressEditsFileName(1));
}
} finally {
if (sharedEditsDir != null) {
// without this test cleanup will fail
FileUtil.chmod(sharedEditsDir.getAbsolutePath(), "+w", true);
}
if (cluster != null) {
cluster.shutdown();
}
}
}
13
Source : TestEditLogTailer.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private static void testStandbyTriggersLogRolls(int activeIndex) throws Exception {
Configuration conf = new Configuration();
// Roll every 1s
conf.setInt(DFSConfigKeys.DFS_HA_LOGROLL_PERIOD_KEY, 1);
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
// Have to specify IPC ports so the NNs can talk to each other.
MiniDFSNNTopology topology = new MiniDFSNNTopology().addNameservice(new MiniDFSNNTopology.NSConf("ns1").addNN(new MiniDFSNNTopology.NNConf("nn1").setIpcPort(10031)).addNN(new MiniDFSNNTopology.NNConf("nn2").setIpcPort(10032)));
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(topology).numDataNodes(0).build();
try {
cluster.transitionToActive(activeIndex);
waitForLogRollInSharedDir(cluster, 3);
} finally {
cluster.shutdown();
}
}
13
Source : TestEditLogTailer.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
@Test
public void testTailer() throws IOException, InterruptedException, ServiceFailedException {
Configuration conf = new HdfsConfiguration();
conf.setInt(DFSConfigKeys.DFS_HA_TAILEDITS_PERIOD_KEY, 1);
HAUtil.setAllowStandbyReads(conf, true);
MiniDFSCluster cluster = new MiniDFSCluster.Builder(conf).nnTopology(MiniDFSNNTopology.simpleHATopology()).numDataNodes(0).build();
cluster.waitActive();
cluster.transitionToActive(0);
NameNode nn1 = cluster.getNameNode(0);
NameNode nn2 = cluster.getNameNode(1);
try {
for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
NameNodeAdapter.mkdirs(nn1, getDirPath(i), new PermissionStatus("test", "test", new FsPermission((short) 00755)), true);
}
HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
for (int i = 0; i < DIRS_TO_MAKE / 2; i++) {
replacedertTrue(NameNodeAdapter.getFileInfo(nn2, getDirPath(i), false).isDir());
}
for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
NameNodeAdapter.mkdirs(nn1, getDirPath(i), new PermissionStatus("test", "test", new FsPermission((short) 00755)), true);
}
HATestUtil.waitForStandbyToCatchUp(nn1, nn2);
for (int i = DIRS_TO_MAKE / 2; i < DIRS_TO_MAKE; i++) {
replacedertTrue(NameNodeAdapter.getFileInfo(nn2, getDirPath(i), false).isDir());
}
} finally {
cluster.shutdown();
}
}
See More Examples