* Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *     http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * See the License for the specific language governing permissions and
 * limitations under the License.
package org.apache.hadoop.hdfs;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;

import java.io.IOException;
import java.util.Random;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hdfs.DFSTestUtil;
import org.apache.hadoop.hdfs.DistributedFileSystem;
import org.apache.hadoop.hdfs.MiniDFSCluster;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.LocatedBlocks;
import org.apache.hadoop.raid.RaidCodec;
import org.apache.hadoop.raid.RaidCodecBuilder;
import org.apache.hadoop.security.UnixUserGroupInformation;
import org.apache.hadoop.util.InjectionEventI;
import org.apache.hadoop.util.InjectionHandler;
import org.apache.hadoop.hdfs.server.datanode.BlockDataFile;
import org.apache.hadoop.hdfs.server.datanode.DataNode;
import org.apache.hadoop.hdfs.server.namenode.INode;
import org.apache.hadoop.hdfs.server.namenode.INodeFile;
import org.apache.hadoop.hdfs.server.namenode.NameNode;
import org.apache.hadoop.hdfs.server.namenode.BlocksMap.BlockInfo;
import org.apache.hadoop.hdfs.server.protocol.RaidTask;
import org.apache.hadoop.hdfs.server.protocol.RaidTaskCommand;
import org.apache.hadoop.hdfs.server.datanode.BlockInlineChecksumWriter;
import org.apache.hadoop.hdfs.util.InjectionEvent;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;

public class TestRaidFile {
  public static final Log LOG = LogFactory.getLog(TestRaidFile.class);
  private MiniDFSCluster cluster;
  private NameNode nn;
  private DistributedFileSystem dfs;
  private DistributedFileSystem userdfs;
  private static long blockSize = 512;
  private static int numDataBlocks = 6;
  private static int numRSParityBlocks = 3;
  private static Configuration conf;
  private static UnixUserGroupInformation USER1;
  private static int id = 0;
  private static Random rand = new Random();
  private static byte[] bytes = new byte[(int)blockSize];
  static {
    conf = new Configuration();
    conf.setLong("dfs.block.size", blockSize);
    conf.setBoolean("dfs.permissions", true);
  class FakeBlockGeneratorInjectionHandler extends InjectionHandler {
    public void _processEventIO(InjectionEventI event, Object... args) 
        throws IOException {
      if (event == InjectionEvent.DATANODE_PROCESS_RAID_TASK) {
        int namespaceId = nn.getNamespaceID();
        DataNode dn = (DataNode)args[0];
        RaidTaskCommand rtc = (RaidTaskCommand)args[1];
        RaidTask[] tasks = rtc.tasks;
        for (RaidTask rw: tasks) {
          // Generate all parity block locally instead of sending them remotely
          try {
            for (int idx = 0; idx < rw.toRaidIdxs.length; idx++) {
              Block blk = rw.stripeBlocks[rw.toRaidIdxs[idx]];
              BlockDataFile.Writer dataOut = 
                      namespaceId, blk, blk, false, false, 1, 512)).getBlockDataFile()
              dn.finalizeAndNotifyNamenode(namespaceId, blk); 
          } catch (IOException ioe) {
  public void startUpCluster() throws IOException {
    RaidCodecBuilder.loadDefaultFullBlocksCodecs(conf, numRSParityBlocks,
    cluster = new MiniDFSCluster(conf, 4, true, null);
    assertNotNull("Failed Cluster Creation", cluster);
    dfs = (DistributedFileSystem) cluster.getFileSystem();
    assertNotNull("Failed to get FileSystem", dfs);
    nn = cluster.getNameNode();
    assertNotNull("Failed to get NameNode", nn);
    Configuration newConf = new Configuration(conf);
    USER1 = new UnixUserGroupInformation("foo", new String[] {"bar" });
        UnixUserGroupInformation.UGI_PROPERTY_NAME, USER1);
    userdfs = (DistributedFileSystem)FileSystem.get(newConf); // login as ugi
    InjectionHandler h = new FakeBlockGeneratorInjectionHandler();

  public void shutDownCluster() throws IOException {
    if(dfs != null) {
    if (userdfs != null) {
    if(cluster != null) {
  public static void raidFile(DistributedFileSystem fs, Path source, 
      String codecId, short expectedSourceRepl, String exceptionMessage)
          throws Exception {
    try {
      fs.raidFile(source, codecId, expectedSourceRepl);
    } catch (Exception e) {
      if (exceptionMessage == null) {
        // This is not expected
        throw e;
      assertTrue("Exception " + e.getMessage() + " doesn't match " + 
                 exceptionMessage, e.getMessage().contains(exceptionMessage));
   * Test we could XOR Raid files with different number of blocks:
   * 1. Two full stripes
   * 2. one and half stripes
   * 3. less than one stripe 
   * @throws Exception
  public void testRaidXORFile() throws Exception {
    raidFile(12, (short)2, "xor");
    raidFile(9, (short)3, "xor");
    raidFile(3, (short)2, "xor");
   * Test we could RS Raid files with different number of blocks
   * 1. Two full stripes
   * 2. one and half stripes
   * 3. less than one stripe  
   * @throws Exception
  public void testRaidRSFile() throws Exception {
    raidFile(12, (short)1, "rs");
    raidFile(9, (short)2, "rs");
    raidFile(3, (short)3, "rs");
  private static INodeFile getINodeFile(NameNode nn, Path source) {
    INode[] inodes = nn.getNamesystem().dir.getExistingPathINodes(
    return (INodeFile)inodes[inodes.length - 1];
  private static FileStatus verifyRaidFiles(NameNode nn, 
      DistributedFileSystem fileSys, FileStatus statBefore, 
      LocatedBlocks lbsBefore, Path source, long fileLen, long crc,
      short expectedSourceRepl, String codecId, boolean checkParityBlocks)
          throws Exception {
    FileStatus statAfter  = fileSys.getFileStatus(source);
    LocatedBlocks lbsAfter = fileSys.getLocatedBlocks(source, 0, fileLen);
    // Verify file stat
    assertEquals(statBefore.getBlockSize(), statAfter.getBlockSize());
    assertEquals(statBefore.getLen(), statAfter.getLen());
    assertEquals(expectedSourceRepl, statAfter.getReplication());
    // Verify getLocatedBlocks
    for (int i = 0; i < lbsBefore.getLocatedBlocks().size(); i++) {
      assertEquals(lbsBefore.get(i).getBlock(), lbsAfter.get(i).getBlock());
    // Verify file content
    assertTrue("File content matches", DFSTestUtil.validateFile(fileSys, 
        statBefore.getPath(), statBefore.getLen(), crc));
    return statAfter;
  private void fillChecksums(Path source) {
    INodeFile file = getINodeFile(nn, source);
    BlockInfo[] bis = file.getBlocks();
    for (int i = 0; i < bis.length; i++) {
   * 1. Create a file
   * 2. Fill fake checksums in it
   * 3. Call raidFile to convert it into Raid format and return false. Namenode
   *  will start schedule raiding
   * 4. verify we could read the file
   * 5. Datanodes will receive RaidTaskCommand from namenode and jumps into
   *  FakeBlockGeneratorInjectionHandler, this handler will create a fake parity
   *  block in the datanode and notifies namenode 
   * 6. keep calling raidFile until all parity blocks are generated, then raidFile
   *  will succeed to reduce replication and return true
  private void raidFile(int numBlocks, short expectedSourceRepl, String codecId)
      throws Exception {
    LOG.info("RUNNING testMergeFile numBlocks=" + numBlocks + 
        " sourceRepl=" + expectedSourceRepl + " codecId=" + codecId);
    long fileLen = blockSize * numBlocks;
    Path dir = new Path ("/user/facebook" + id);
    Path source = new Path(dir, "1");
    long crc = DFSTestUtil.createFile(dfs, source, fileLen, (short)3, 1);
    LOG.info("Fill fake checksums to the file");
    ContentSummary cBefore = dfs.getContentSummary(dir);
    FileStatus statBefore = dfs.getFileStatus(source);
    LocatedBlocks lbsBefore = dfs.getLocatedBlocks(source, 0, fileLen);
    // now raid the file 
    boolean result = dfs.raidFile(source, codecId, expectedSourceRepl);
    assertTrue("raidFile should return false", !result);
    ContentSummary cAfter = dfs.getContentSummary(dir);
    // verify directory stat
    assertEquals("File count doesn't change", cBefore.getFileCount(),
    verifyRaidFiles(nn, dfs, statBefore, lbsBefore, source, fileLen, crc,
        statBefore.getReplication(), codecId, false);
    LocatedBlocks lbsAfter = dfs.getLocatedBlocks(source, blockSize, fileLen);
    assertEquals(numBlocks - 1, lbsAfter.getLocatedBlocks().size());
    for (int i = 0; i < numBlocks - 1; i++) {
      assertEquals(lbsBefore.get(i + 1).getBlock(), lbsAfter.get(i).getBlock());
    String otherCodec = codecId.equals("xor") ? "rs" : "xor";
    raidFile(dfs, source, otherCodec, (short)2,
        "raidFile: couldn't raid a raided file");
    RaidCodec codec = RaidCodec.getCodec(codecId);
    long startTime = System.currentTimeMillis();
    result = false;
    while (System.currentTimeMillis() - startTime < 70000 && !result) {
      result = dfs.raidFile(source, codecId, expectedSourceRepl);
    assertTrue("Finish raiding", result);
    verifyRaidFiles(nn, dfs, statBefore, lbsBefore, source, fileLen, crc,
        expectedSourceRepl, codecId, true);
    if (codec.minSourceReplication >= 2) {
      try {
        dfs.setReplication(source, (short)(codec.minSourceReplication-1));
        assertTrue("setReplication should fail", false);
      } catch (IOException ioe) {
        assertTrue("fail to setReplication", 
            ioe.getMessage().contains("Couldn't set replication smaller than "));
   * Test raidFile fails with expected exception for different illegal cases
   * such as empty file, directory, hardlinked files, files without checksums,
   * files without permission...
   * @throws Exception
  public void testRaidFileIllegalCases() throws Exception {
    LOG.info("Running testRaidFileIllegalCases");
    int numBlocks = 6;
    long fileLen = blockSize * numBlocks;
    Path dir = new Path ("/user/facebook");
    Path source = new Path(dir, "1");
    Path dest = new Path(dir, "2");
    DFSTestUtil.createFile(dfs, source, fileLen, (short)3, 1);
    Path emptyFile = new Path("/empty");
    DFSTestUtil.createFile(dfs, emptyFile, 0L, (short)3, 1);
    raidFile(dfs, source, "nonexist", (short)2, 
        "raidFile: codec nonexist doesn't exist");
    raidFile(dfs, source, "xor", (short)1,
        "raidFile: expectedSourceRepl is smaller than ");
    dfs.setOwner(source, "foo", "bar");
    LOG.info("Disallow write on " + source);
    dfs.setPermission(source, new FsPermission((short)0577));
    raidFile(userdfs, source, "xor", (short)2, "Permission denied");
    LOG.info("Enable write on " + source);
    dfs.setPermission(source, new FsPermission((short)0777));
    LOG.info("Test different types of files");
    raidFile(dfs, new Path("/nonexist"), "rs", (short)1,
        "raidFile: source file doesn't exist");
    raidFile(dfs, dir, "rs", (short)1, "raidFile: source file is a directory");
    raidFile(dfs, emptyFile, "rs", (short)1, "raidFile: source file is empty");
    raidFile(dfs, source, "rs", (short)1, 
        "raidFile: not all source blocks have checksums");

    LOG.info("Hardlink the file to " + dest);
    dfs.hardLink(source, dest);
    raidFile(dfs, dest, "rs", (short)1, "raidFile: cannot raid a hardlinked file");
    raidFile(dfs, source, "rs", (short)1, "raidFile: cannot raid a hardlinked file");