org.apache.hadoop.fs.FileStatus

Here are the examples of the java api org.apache.hadoop.fs.FileStatus taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

1310 Examples 7

19 Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen

private ImmutableRangeMap<Long, BlockLocation> getBlockMap(FileStatus status) throws IOException {
    ImmutableRangeMap<Long, BlockLocation> blockMap = blockMapMap.get(status.getPath());
    if (blockMap == null) {
        blockMap = buildBlockMap(status);
    }
    return blockMap;
}

19 Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen

public List<CompleteFileWork> generateFileWork(List<FileStatus> files, boolean blockify) throws IOException {
    List<TimedRunnable<List<CompleteFileWork>>> readers = Lists.newArrayList();
    for (FileStatus status : files) {
        readers.add(new BlockMapReader(status, blockify));
    }
    List<List<CompleteFileWork>> work = TimedRunnable.run("Get block maps", logger, readers, 16);
    List<CompleteFileWork> singleList = Lists.newArrayList();
    for (List<CompleteFileWork> innerWorkList : work) {
        singleList.addAll(innerWorkList);
    }
    return singleList;
}

19 Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen

private boolean compressed(FileStatus fileStatus) {
    return codecFactory.getCodec(fileStatus.getPath()) != null;
}

19 Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen

private ImmutableRangeMap<Long, BlockLocation> buildBlockMap(Path path) throws IOException {
    FileStatus status = fs.getFileStatus(path);
    return buildBlockMap(status);
}

19 Source : ParquetGroupScan.java
with Apache License 2.0
from zpochen

/**
 * Create and return a new file selection based on reading the metadata cache file.
 *
 * This function also initializes a few of ParquetGroupScan's fields as appropriate.
 *
 * @param selection initial file selection
 * @param metaFilePath metadata cache file path
 * @return file selection read from cache
 *
 * @throws IOException
 * @throws UserException when the updated selection is empty, this happens if the user selects an empty folder.
 */
private FileSelection expandSelectionFromMetadataCache(FileSelection selection, Path metaFilePath) throws IOException {
    // get the metadata for the root directory by reading the metadata file
    // parquetTableMetadata contains the metadata for all files in the selection root folder, but we need to make sure
    // we only select the files that are part of selection (by setting fileSet appropriately)
    // get (and set internal field) the metadata for the directory by reading the metadata file
    parquetTableMetadata = Metadata.readBlockMeta(fs, metaFilePath, metaContext, formatConfig);
    if (ignoreExpandingSelection(parquetTableMetadata)) {
        return selection;
    }
    if (formatConfig.areCorruptDatesAutoCorrected()) {
        ParquetReaderUtility.correctDatesInMetadataCache(this.parquetTableMetadata);
    }
    List<FileStatus> fileStatuses = selection.getStatuses(fs);
    if (fileSet == null) {
        fileSet = Sets.newHashSet();
    }
    final Path first = fileStatuses.get(0).getPath();
    if (fileStatuses.size() == 1 && selection.getSelectionRoot().equals(first.toString())) {
        // we are selecting all files from selection root. Expand the file list from the cache
        for (Metadata.ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
            fileSet.add(file.getPath());
        }
    } else if (selection.isExpandedPartial() && !selection.hadWildcard() && cacheFileRoot != null) {
        if (selection.wasAllParreplacedionsPruned()) {
            // if all parreplacedions were previously pruned, we only need to read 1 file (for the schema)
            fileSet.add(this.parquetTableMetadata.getFiles().get(0).getPath());
        } else {
            // we are here if the selection is in the expanded_partial state (i.e it has directories).  We get the
            // list of files from the metadata cache file that is present in the cacheFileRoot directory and populate
            // the fileSet. However, this is *not* the final list of files that will be scanned in execution since the
            // second phase of parreplacedion pruning will apply on the files and modify the file selection appropriately.
            for (Metadata.ParquetFileMetadata file : this.parquetTableMetadata.getFiles()) {
                fileSet.add(file.getPath());
            }
        }
    } else {
        // we need to expand the files from fileStatuses
        for (FileStatus status : fileStatuses) {
            Path cacheFileRoot = status.getPath();
            if (status.isDirectory()) {
                // TODO [DRILL-4496] read the metadata cache files in parallel
                final Path metaPath = new Path(cacheFileRoot, Metadata.METADATA_FILENAME);
                final Metadata.ParquetTableMetadataBase metadata = Metadata.readBlockMeta(fs, metaPath, metaContext, formatConfig);
                if (ignoreExpandingSelection(metadata)) {
                    return selection;
                }
                for (Metadata.ParquetFileMetadata file : metadata.getFiles()) {
                    fileSet.add(file.getPath());
                }
            } else {
                final Path path = Path.getPathWithoutSchemeAndAuthority(cacheFileRoot);
                fileSet.add(path.toString());
            }
        }
    }
    if (fileSet.isEmpty()) {
        // no files were found, most likely we tried to query some empty sub folders
        throw UserException.validationError().message("The table you tried to query is empty").build(logger);
    }
    List<String> fileNames = Lists.newArrayList(fileSet);
    // when creating the file selection, set the selection root without the URI prefix
    // The reason is that the file names above have been created in the form
    // /a/b/c.parquet and the format of the selection root must match that of the file names
    // otherwise downstream operations such as parreplacedion pruning can break.
    final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(new Path(selection.getSelectionRoot()));
    this.selectionRoot = metaRootPath.toString();
    // Use the FileSelection constructor directly here instead of the FileSelection.create() method
    // because create() changes the root to include the scheme and authority; In future, if create()
    // is the preferred way to instantiate a file selection, we may need to do something different...
    // WARNING: file statuses and file names are inconsistent
    FileSelection newSelection = new FileSelection(selection.getStatuses(fs), fileNames, metaRootPath.toString(), cacheFileRoot, selection.wasAllParreplacedionsPruned());
    newSelection.setExpandedFully();
    newSelection.setMetaContext(metaContext);
    return newSelection;
}

19 Source : FooterGatherer.java
with Apache License 2.0
from zpochen

private static void checkMagicBytes(FileStatus status, byte[] data, int offset) throws IOException {
    for (int i = 0, v = offset; i < MAGIC_LENGTH; i++, v++) {
        if (ParquetFileWriter.MAGIC[i] != data[v]) {
            byte[] magic = ArrayUtils.subarray(data, offset, offset + MAGIC_LENGTH);
            throw new IOException(status.getPath() + " is not a Parquet file. expected magic number at tail " + Arrays.toString(ParquetFileWriter.MAGIC) + " but found " + Arrays.toString(magic));
        }
    }
}

19 Source : FileSelection.java
with Apache License 2.0
from zpochen

public List<String> getFiles() {
    if (files == null) {
        final List<String> newFiles = Lists.newArrayList();
        for (final FileStatus status : statuses) {
            newFiles.add(status.getPath().toString());
        }
        files = newFiles;
    }
    return files;
}

19 Source : FileSelection.java
with Apache License 2.0
from zpochen

private static String commonPath(final List<FileStatus> statuses) {
    if (statuses == null || statuses.isEmpty()) {
        return "";
    }
    final List<String> files = Lists.newArrayList();
    for (final FileStatus status : statuses) {
        files.add(status.getPath().toString());
    }
    return commonPathForFiles(files);
}

19 Source : FileSelection.java
with Apache License 2.0
from zpochen

public boolean containsDirectories(DrillFileSystem fs) throws IOException {
    if (dirStatus == StatusType.NOT_CHECKED) {
        dirStatus = StatusType.NO_DIRS;
        for (final FileStatus status : getStatuses(fs)) {
            if (status.isDirectory()) {
                dirStatus = StatusType.HAS_DIRS;
                break;
            }
        }
    }
    return dirStatus == StatusType.HAS_DIRS;
}

19 Source : DrillFileSystem.java
with Apache License 2.0
from zpochen

@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
    return underlyingFs.getFileBlockLocations(file, start, len);
}

19 Source : HdfsHelper.java
with Apache License 2.0
from wgzhao

/**
 * 获取以指定目录下的所有fileName开头的文件
 *
 * @param dir 需要扫描的目录
 * @param fileName String 要匹配的文件或者目录后缀,如果为空,则表示不做模式匹配
 * @return Path[]
 */
public Path[] hdfsDirList(String dir, String fileName) {
    Path path = new Path(dir);
    Path[] files;
    try {
        FileStatus[] status = fileSystem.listStatus(path);
        files = new Path[status.length];
        for (int i = 0; i < status.length; i++) {
            files[i] = status[i].getPath();
        }
    } catch (IOException e) {
        String message = String.format("获取目录[%s]下文件列表时发生网络IO异常,请检查您的网络是否正常!", dir);
        LOG.error(message);
        throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
    }
    return files;
}

19 Source : HdfsHelper.java
with Apache License 2.0
from wgzhao

/**
 * 获取指定目录先的文件列表
 *
 * @param dir 需要搜索的目录
 * @return 拿到的是文件全路径,
 * eg:hdfs://10.101.204.12:9000/user/hive/warehouse/writer.db/text/test.textfile
 */
public String[] hdfsDirList(String dir) {
    Path path = new Path(dir);
    String[] files;
    try {
        FileStatus[] status = fileSystem.listStatus(path);
        files = new String[status.length];
        for (int i = 0; i < status.length; i++) {
            files[i] = status[i].getPath().toString();
        }
    } catch (IOException e) {
        String message = String.format("获取目录[%s]文件列表时发生网络IO异常,请检查您的网络是否正常!", dir);
        LOG.error(message);
        throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
    }
    return files;
}

19 Source : DFSUtil.java
with Apache License 2.0
from wgzhao

private void addSourceFileIfNotEmpty(FileStatus f) {
    if (f.isFile()) {
        String filePath = f.getPath().toString();
        if (f.getLen() > 0) {
            addSourceFileByType(filePath);
        } else {
            LOG.warn("文件[{}]长度为0,将会跳过不作处理!", filePath);
        }
    }
}

19 Source : HDFSFileSystem.java
with Apache License 2.0
from WeBankFinTech

@Override
public List<FsPath> list(FsPath path) throws IOException {
    FileStatus[] stat = fs.listStatus(new Path(checkHDFSPath(path.getPath())));
    List<FsPath> fsPaths = new ArrayList<FsPath>();
    for (FileStatus f : stat) {
        fsPaths.add(fillStorageFile(new FsPath(f.getPath().toUri().getPath()), f));
    }
    return fsPaths;
}

19 Source : HDFSFileSystem.java
with Apache License 2.0
from WeBankFinTech

@Override
public FsPathListWithError listPathWithError(FsPath path) throws IOException {
    FileStatus[] stat = fs.listStatus(new Path(checkHDFSPath(path.getPath())));
    List<FsPath> fsPaths = new ArrayList<FsPath>();
    for (FileStatus f : stat) {
        fsPaths.add(fillStorageFile(new FsPath(StorageUtils.HDFS_SCHEMA() + f.getPath().toUri().getPath()), f));
    }
    if (fsPaths.isEmpty()) {
        return null;
    }
    return new FsPathListWithError(fsPaths, "");
}

19 Source : ListHDFS.java
with Apache License 2.0
from wangrenlei

/**
 * Determines which of the given FileStatus's describes a File that should be listed.
 *
 * @param statuses the eligible FileStatus objects that we could potentially list
 * @return a Set containing only those FileStatus objects that we want to list
 */
Set<FileStatus> determineListable(final Set<FileStatus> statuses) {
    final long minTimestamp = this.latestTimestampListed;
    final TreeMap<Long, List<FileStatus>> orderedEntries = new TreeMap<>();
    // Build a sorted map to determine the latest possible entries
    for (final FileStatus status : statuses) {
        if (status.getPath().getName().endsWith("_COPYING_")) {
            continue;
        }
        final long enreplacedyTimestamp = status.getModificationTime();
        if (enreplacedyTimestamp > latestTimestampListed) {
            latestTimestampListed = enreplacedyTimestamp;
        }
        // New entries are all those that occur at or after the replacedociated timestamp
        final boolean newEntry = enreplacedyTimestamp >= minTimestamp && enreplacedyTimestamp > latestTimestampEmitted;
        if (newEntry) {
            List<FileStatus> enreplacediesForTimestamp = orderedEntries.get(status.getModificationTime());
            if (enreplacediesForTimestamp == null) {
                enreplacediesForTimestamp = new ArrayList<FileStatus>();
                orderedEntries.put(status.getModificationTime(), enreplacediesForTimestamp);
            }
            enreplacediesForTimestamp.add(status);
        }
    }
    final Set<FileStatus> toList = new HashSet<>();
    if (orderedEntries.size() > 0) {
        long latestListingTimestamp = orderedEntries.lastKey();
        // If the last listing time is equal to the newest entries previously seen,
        // another iteration has occurred without new files and special handling is needed to avoid starvation
        if (latestListingTimestamp == minTimestamp) {
            // We are done if the latest listing timestamp is equal to the last processed time,
            // meaning we handled those items originally preplaceded over
            if (latestListingTimestamp == latestTimestampEmitted) {
                return Collections.emptySet();
            }
        } else {
            // Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data
            orderedEntries.remove(latestListingTimestamp);
        }
        for (List<FileStatus> timestampEnreplacedies : orderedEntries.values()) {
            for (FileStatus status : timestampEnreplacedies) {
                toList.add(status);
            }
        }
    }
    return toList;
}

19 Source : TestTrinoS3FileSystem.java
with Apache License 2.0
from trinodb

@Test
public void testSkipHadoopFolderMarkerObjectsEnabled() throws Exception {
    Configuration config = new Configuration(false);
    try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) {
        MockAmazonS3 s3 = new MockAmazonS3();
        s3.setHasHadoopFolderMarkerObjects(true);
        fs.initialize(new URI("s3n://test-bucket/"), config);
        fs.setS3Client(s3);
        FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test"));
        replacedertEquals(statuses.length, 2);
    }
}

19 Source : TestTrinoS3FileSystem.java
with Apache License 2.0
from trinodb

private static void replacedertSkipGlacierObjects(boolean skipGlacierObjects) throws Exception {
    Configuration config = new Configuration(false);
    config.set(S3_SKIP_GLACIER_OBJECTS, String.valueOf(skipGlacierObjects));
    try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) {
        MockAmazonS3 s3 = new MockAmazonS3();
        s3.setHasGlacierObjects(true);
        fs.initialize(new URI("s3n://test-bucket/"), config);
        fs.setS3Client(s3);
        FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test"));
        replacedertEquals(statuses.length, skipGlacierObjects ? 2 : 4);
    }
}

19 Source : TestTrinoS3FileSystem.java
with Apache License 2.0
from trinodb

@Test
public void testEmptyDirectory() throws Exception {
    try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) {
        MockAmazonS3 s3 = new MockAmazonS3() {

            @Override
            public ObjectMetadata getObjectMetadata(GetObjectMetadataRequest getObjectMetadataRequest) {
                if (getObjectMetadataRequest.getKey().equals("empty-dir/")) {
                    ObjectMetadata objectMetadata = new ObjectMetadata();
                    objectMetadata.setContentType(S3_DIRECTORY_OBJECT_CONTENT_TYPE);
                    return objectMetadata;
                }
                return super.getObjectMetadata(getObjectMetadataRequest);
            }
        };
        fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false));
        fs.setS3Client(s3);
        FileStatus fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir/"));
        replacedertTrue(fileStatus.isDirectory());
        fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir"));
        replacedertTrue(fileStatus.isDirectory());
    }
}

19 Source : SyncPartitionMetadataProcedure.java
with Apache License 2.0
from trinodb

private static boolean isValidParreplacedionPath(FileStatus file, Column column, boolean caseSensitive) {
    String path = file.getPath().getName();
    if (!caseSensitive) {
        path = path.toLowerCase(ENGLISH);
    }
    String prefix = column.getName() + '=';
    return file.isDirectory() && path.startsWith(prefix);
}

19 Source : FileSegmentPool.java
with Apache License 2.0
from shunfei

private void refreshSegments(boolean force) {
    try {
        if (force || mustRefresh) {
            doRefreshSegments();
            return;
        }
        if (!fileSystem.exists(updateFilePath)) {
            return;
        }
        FileStatus fileStatus = fileSystem.getFileStatus(updateFilePath);
        long modifyTime = fileStatus != null ? fileStatus.getModificationTime() : 0;
        boolean modifyTimeOk = lastRefreshTime < modifyTime;
        if (modifyTimeOk) {
            if (doRefreshSegments()) {
                lastRefreshTime = modifyTime;
            }
        }
    } catch (Throwable e) {
        if (e instanceof ClosedByInterruptException) {
            logger.warn("Load segments of table [{}] failed by ClosedByInterruptException.", tableName);
            return;
        }
        String msg = e.getMessage();
        if (msg != null && Strings.equals(msg.trim(), "Filesystem closed")) {
            logger.warn("Load segments of table [{}] failed by Filesystem closed.", tableName);
            return;
        }
        logger.error("", e);
        logger.error("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<");
        logger.error("Load segments of table [{}] failed, system in inconsistent state", tableName);
        logger.error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
    }
}

19 Source : FileSegmentPool.java
with Apache License 2.0
from shunfei

private String getSegmentName(FileStatus fileStatus) {
    if (!fileStatus.isFile()) {
        return null;
    }
    Path path = fileStatus.getPath();
    if (!SegmentHelper.checkSegmentByPath(path)) {
        return null;
    }
    return StringUtils.removeStart(path.toString(), segmentRootPathStr);
}

19 Source : FileSystemTableModel.java
with GNU General Public License v3.0
from sdadas

private void doFetchRows(int rows, boolean initial) {
    if (iterator == null)
        return;
    int startSize = this.children.size();
    int idx = 0;
    while (iterator.hasNext() && idx < rows) {
        FileStatus file = iterator.next();
        children.add(new FileItem(file));
        idx++;
    }
    if (!iterator.hasNext()) {
        this.hasMoreRows = false;
    }
    if (initial) {
        fireTableDataChanged();
        return;
    }
    int endSize = this.children.size();
    if (endSize != startSize) {
        fireTableRowsInserted(startSize, endSize - 1);
    }
}

19 Source : FileItem.java
with GNU General Public License v3.0
from sdadas

/**
 * @author Sławomir Dadas
 */
public clreplaced FileItem {

    private final FileStatus status;

    private String name;

    public static FileItem parent() {
        FileItem res = new FileItem();
        res.name = "..";
        return res;
    }

    public FileItem(FileStatus status) {
        this.status = status;
    }

    private FileItem() {
        this.status = null;
    }

    public FileStatus getStatus() {
        return status;
    }

    public String getName() {
        return status != null ? status.getPath().getName() : name;
    }

    public FileSize getSize() {
        return status != null && status.isFile() ? new FileSize(status.getLen()) : null;
    }

    public String getOwner() {
        return status != null ? status.getOwner() : null;
    }

    public String getGroup() {
        return status != null ? status.getGroup() : null;
    }

    public Long getBlockSize() {
        return status != null ? status.getBlockSize() : null;
    }

    public FileTimestamp getModificationTime() {
        return status != null ? new FileTimestamp(status.getModificationTime()) : null;
    }

    public FileTimestamp getAccessTime() {
        return status != null ? new FileTimestamp(status.getAccessTime()) : null;
    }

    public String getPermissions() {
        return status != null ? status.getPermission().toString() : null;
    }

    public Icon getIcon() {
        return IconFactory.getIcon(getIconName());
    }

    public boolean isFile() {
        return status != null && status.isFile();
    }

    public Path getPath() {
        return status != null ? status.getPath() : null;
    }

    public boolean isDirectory() {
        return status != null && status.isDirectory();
    }

    private String getIconName() {
        if (status == null)
            return "folder-open";
        if (status.isDirectory()) {
            if (StringUtils.endsWith(getName(), ".har")) {
                return "har";
            } else {
                return "folder";
            }
        } else {
            return "file";
        }
    }

    public static clreplaced FileTimestamp implements Comparable<FileTimestamp> {

        private final long timestamp;

        public FileTimestamp(long timestamp) {
            this.timestamp = timestamp;
        }

        public long getTimestamp() {
            return timestamp;
        }

        @Override
        public String toString() {
            return DateFormatUtils.format(timestamp, "dd-MM-yyyy HH:mm:ss");
        }

        @Override
        public int compareTo(FileTimestamp other) {
            return Longs.compare(this.timestamp, other.timestamp);
        }
    }

    public static clreplaced FileSize implements Comparable<FileSize> {

        private final long size;

        public FileSize(long size) {
            this.size = size;
        }

        public long getSize() {
            return size;
        }

        @Override
        public String toString() {
            return FileSystemUtils.formatByteCount(size);
        }

        @Override
        public int compareTo(FileSize other) {
            return Longs.compare(this.size, other.size);
        }
    }
}

19 Source : CleanupDialog.java
with GNU General Public License v3.0
from sdadas

private void addModelPath(String stringPath, CheckBoxListModel model, boolean checked, boolean ignoreErrors) {
    Path path = new Path(stringPath);
    try {
        if (this.connection.exists(path)) {
            FileStatus status = this.connection.status(path);
            if (status.isFile()) {
                error(ignoreErrors, stringPath + " is not a directory");
            } else {
                if (!paths.contains(stringPath)) {
                    model.addCheckBoxElement(path, checked);
                    paths.add(stringPath);
                }
            }
        } else {
            error(ignoreErrors, "Directory does not exist");
        }
    } catch (FsException ex) {
        error(ignoreErrors, ex.getMessage());
    }
}

19 Source : Utilities.java
with Apache License 2.0
from Qihoo360

public static List<Path> convertStatusToPath(List<FileStatus> fileStatuses) {
    List<Path> paths = new ArrayList<>();
    if (fileStatuses != null) {
        for (FileStatus fileStatus : fileStatuses) {
            paths.add(fileStatus.getPath());
        }
    }
    return paths;
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * read all the footers of the files provided
 * (not using summary files)
 * @param configuration the conf to access the File System
 * @param partFiles the files to read
 * @param skipRowGroups to skip the rowGroup info
 * @return the footers
 * @throws IOException if there is an exception while reading footers
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static List<Footer> readAllFootersInParallel(final Configuration configuration, List<FileStatus> partFiles, final boolean skipRowGroups) throws IOException {
    List<Callable<Footer>> footers = new ArrayList<Callable<Footer>>();
    for (final FileStatus currentFile : partFiles) {
        footers.add(new Callable<Footer>() {

            @Override
            public Footer call() throws Exception {
                try {
                    return new Footer(currentFile.getPath(), readFooter(configuration, currentFile, filter(skipRowGroups)));
                } catch (IOException e) {
                    throw new IOException("Could not read footer for file " + currentFile, e);
                }
            }
        });
    }
    try {
        return runAllInParallel(configuration.getInt(PARQUET_READ_PARALLELISM, 5), footers);
    } catch (ExecutionException e) {
        throw new IOException("Could not read footer: " + e.getMessage(), e.getCause());
    }
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * Specifically reads a given summary file
 * @param configuration a configuration
 * @param summaryStatus file status for a summary file
 * @return the metadata translated for each file
 * @throws IOException if an exception is thrown while reading the summary file
 * @deprecated metadata files are not recommended and will be removed in 2.0.0
 */
@Deprecated
public static List<Footer> readSummaryFile(Configuration configuration, FileStatus summaryStatus) throws IOException {
    final Path parent = summaryStatus.getPath().getParent();
    ParquetMetadata mergedFooters = readFooter(configuration, summaryStatus, filter(false));
    return footersFromSummaryFile(parent, mergedFooters);
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * Read the footers of all the files under that path (recursively)
 * not using summary files.
 *
 * @param configuration a configuration
 * @param fileStatus a file status to recursively list
 * @param skipRowGroups whether to skip reading row group metadata
 * @return a list of footers
 * @throws IOException if an exception is thrown while reading the footers
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static List<Footer> readAllFootersInParallel(Configuration configuration, FileStatus fileStatus, boolean skipRowGroups) throws IOException {
    List<FileStatus> statuses = listFiles(configuration, fileStatus);
    return readAllFootersInParallel(configuration, statuses, skipRowGroups);
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * Read the footers of all the files under that path (recursively)
 * using summary files if possible
 * @param configuration the configuration to access the FS
 * @param pathStatus the root dir
 * @param skipRowGroups whether to skip reading row group metadata
 * @return all the footers
 * @throws IOException if an exception is thrown while reading the footers
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static List<Footer> readFooters(Configuration configuration, FileStatus pathStatus, boolean skipRowGroups) throws IOException {
    List<FileStatus> files = listFiles(configuration, pathStatus);
    return readAllFootersInParallelUsingSummaryFiles(configuration, files, skipRowGroups);
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * Read the footers of all the files under that path (recursively)
 * not using summary files.
 * rowGroups are not skipped
 * @param configuration the configuration to access the FS
 * @param fileStatus the root dir
 * @return all the footers
 * @throws IOException if an exception is thrown while reading the footers
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static List<Footer> readAllFootersInParallel(Configuration configuration, FileStatus fileStatus) throws IOException {
    return readAllFootersInParallel(configuration, fileStatus, false);
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * @param configuration a configuration
 * @param file the Parquet File
 * @return the metadata with row groups.
 * @throws IOException  if an error occurs while reading the file
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static final ParquetMetadata readFooter(Configuration configuration, FileStatus file) throws IOException {
    return readFooter(configuration, file, NO_FILTER);
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * this always returns the row groups
 * @param configuration a configuration
 * @param pathStatus a file status to read footers from
 * @return a list of footers
 * @throws IOException if an exception is thrown while reading the footers
 * @deprecated will be removed in 2.0.0;
 *             use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
 */
@Deprecated
public static List<Footer> readFooters(Configuration configuration, FileStatus pathStatus) throws IOException {
    return readFooters(configuration, pathStatus, false);
}

19 Source : ParquetFileReader.java
with Apache License 2.0
from provectus

/**
 * for files provided, check if there's a summary file.
 * If a summary file is found it is used otherwise the file footer is used.
 * @param configuration the hadoop conf to connect to the file system;
 * @param partFiles the part files to read
 * @param skipRowGroups to skipRowGroups in the footers
 * @return the footers for those files using the summary file if possible.
 * @throws IOException if there is an exception while reading footers
 * @deprecated metadata files are not recommended and will be removed in 2.0.0
 */
@Deprecated
public static List<Footer> readAllFootersInParallelUsingSummaryFiles(final Configuration configuration, final Collection<FileStatus> partFiles, final boolean skipRowGroups) throws IOException {
    // figure out list of all parents to part files
    Set<Path> parents = new HashSet<Path>();
    for (FileStatus part : partFiles) {
        parents.add(part.getPath().getParent());
    }
    // read corresponding summary files if they exist
    List<Callable<Map<Path, Footer>>> summaries = new ArrayList<Callable<Map<Path, Footer>>>();
    for (final Path path : parents) {
        summaries.add(new Callable<Map<Path, Footer>>() {

            @Override
            public Map<Path, Footer> call() throws Exception {
                ParquetMetadata mergedMetadata = readSummaryMetadata(configuration, path, skipRowGroups);
                if (mergedMetadata != null) {
                    final List<Footer> footers;
                    if (skipRowGroups) {
                        footers = new ArrayList<Footer>();
                        for (FileStatus f : partFiles) {
                            footers.add(new Footer(f.getPath(), mergedMetadata));
                        }
                    } else {
                        footers = footersFromSummaryFile(path, mergedMetadata);
                    }
                    Map<Path, Footer> map = new HashMap<Path, Footer>();
                    for (Footer footer : footers) {
                        // the folder may have been moved
                        footer = new Footer(new Path(path, footer.getFile().getName()), footer.getParquetMetadata());
                        map.put(footer.getFile(), footer);
                    }
                    return map;
                } else {
                    return Collections.emptyMap();
                }
            }
        });
    }
    Map<Path, Footer> cache = new HashMap<Path, Footer>();
    try {
        List<Map<Path, Footer>> footersFromSummaries = runAllInParallel(configuration.getInt(PARQUET_READ_PARALLELISM, 5), summaries);
        for (Map<Path, Footer> footers : footersFromSummaries) {
            cache.putAll(footers);
        }
    } catch (ExecutionException e) {
        throw new IOException("Error reading summaries", e);
    }
    // keep only footers for files actually requested and read file footer if not found in summaries
    List<Footer> result = new ArrayList<Footer>(partFiles.size());
    List<FileStatus> toRead = new ArrayList<FileStatus>();
    for (FileStatus part : partFiles) {
        Footer f = cache.get(part.getPath());
        if (f != null) {
            result.add(f);
        } else {
            toRead.add(part);
        }
    }
    if (toRead.size() > 0) {
        // read the footers of the files that did not have a summary file
        LOG.info("reading another {} footers", toRead.size());
        result.addAll(readAllFootersInParallel(configuration, toRead, skipRowGroups));
    }
    return result;
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

/**
 * Gets the filestatus representing the segment.
 *
 * @param segmentName      The name of the Segment to retrieve for.
 * @param enforceExistence If true, it will throw a FileNotFoundException if no files are found, otherwise null is returned.
 * @return FileStatus of the HDFS file.
 * @throws IOException If an exception occurred.
 */
private FileStatus findStatusForSegment(String segmentName, boolean enforceExistence) throws IOException {
    FileStatus[] rawFiles = findAllRaw(segmentName);
    if (rawFiles == null || rawFiles.length == 0) {
        if (enforceExistence) {
            throw HDFSExceptionHelpers.segmentNotExistsException(segmentName);
        }
        return null;
    }
    val result = Arrays.stream(rawFiles).sorted(this::compareFileStatus).collect(Collectors.toList());
    return result.get(result.size() - 1);
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

/**
 * Makes the file represented by the given FileStatus read-only.
 *
 * @param file The FileDescriptor of the file to set. If this method returns true, this FileDescriptor will
 *             also be updated to indicate the file is read-only.
 * @return True if the file was not read-only before (and it is now), or false if the file was already read-only.
 * @throws IOException If an exception occurred.
 */
private boolean makeReadOnly(FileStatus file) throws IOException {
    if (isReadOnly(file)) {
        return false;
    }
    this.fileSystem.setPermission(file.getPath(), READONLY_PERMISSION);
    log.debug("MakeReadOnly '{}'.", file.getPath());
    return true;
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

// endregion
// Region HDFS helper methods.
/**
 * Gets an array (not necessarily ordered) of FileStatus objects currently available for the given Segment.
 * These must be in the format specified by NAME_FORMAT (see EXAMPLE_NAME_FORMAT).
 */
private FileStatus[] findAllRaw(String segmentName) throws IOException {
    replacedert segmentName != null && segmentName.length() > 0 : "segmentName must be non-null and non-empty";
    String pattern = String.format(NAME_FORMAT, getPathPrefix(segmentName), SUFFIX_GLOB_REGEX);
    FileStatus[] files = this.fileSystem.globStatus(new Path(pattern));
    if (files.length > 1) {
        throw new IllegalArgumentException("More than one file");
    }
    return files;
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

private long getEpoch(FileStatus status) throws FileNameFormatException {
    return getEpochFromPath(status.getPath());
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

private int compareFileStatus(FileStatus f1, FileStatus f2) {
    try {
        return Long.compare(getEpoch(f1), getEpoch(f2));
    } catch (FileNameFormatException e) {
        throw new IllegalStateException(e);
    }
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

@Override
public SegmentHandle openWrite(String streamSegmentName) throws StreamSegmentException {
    ensureInitializedAndNotClosed();
    long traceId = LoggerHelpers.traceEnter(log, "openWrite", streamSegmentName);
    long fencedCount = 0;
    do {
        try {
            FileStatus fileStatus = findStatusForSegment(streamSegmentName, true);
            if (!isSealed(fileStatus.getPath())) {
                if (getEpochFromPath(fileStatus.getPath()) > this.epoch) {
                    throw new StorageNotPrimaryException(streamSegmentName);
                }
                Path targetPath = getFilePath(streamSegmentName, this.epoch);
                if (!targetPath.equals(fileStatus.getPath())) {
                    try {
                        this.fileSystem.rename(fileStatus.getPath(), targetPath);
                    } catch (FileNotFoundException e) {
                        // This happens when more than one host is trying to fence and only one of the host goes through.
                        // Retry the rename so that host with the highest epoch gets access.
                        // In the worst case, the current owner of the segment will win this race after a number of attempts
                        // equal to the number of Segment Stores in the race. The high bound for this number of attempts
                        // is the total number of Segment Store instances in the cluster.
                        // It is safe to retry for MAX_EPOCH times as we are sure that the loop will never go that long.
                        log.warn("Race in fencing. More than two hosts trying to own the segment. Retrying");
                        fencedCount++;
                        continue;
                    }
                }
            }
            // Ensure that file exists
            findStatusForSegment(streamSegmentName, true);
            return HDFSSegmentHandle.write(streamSegmentName);
        } catch (IOException e) {
            throw HDFSExceptionHelpers.convertException(streamSegmentName, e);
        }
    // Looping for the maximum possible number.
    } while (fencedCount <= this.epoch);
    LoggerHelpers.traceLeave(log, "openWrite", traceId, epoch);
    throw new StorageNotPrimaryException("Not able to fence out other writers.");
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

/**
 * Determines whether the given FileStatus indicates the file is read-only.
 *
 * @param fs The FileStatus to check.
 * @return True or false.
 */
private boolean isReadOnly(FileStatus fs) {
    return fs.getPermission().getUserAction() == FsAction.READ;
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

private boolean makeWrite(FileStatus file) throws IOException {
    this.fileSystem.setPermission(file.getPath(), READWRITE_PERMISSION);
    log.debug("MakeReadOnly '{}'.", file.getPath());
    return true;
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

@Override
public boolean exists(String streamSegmentName) {
    ensureInitializedAndNotClosed();
    long traceId = LoggerHelpers.traceEnter(log, "exists", streamSegmentName);
    FileStatus status = null;
    try {
        status = findStatusForSegment(streamSegmentName, false);
    } catch (IOException e) {
        // HDFS could not find the file. Returning false.
        log.warn("Got exception checking if file exists", e);
    }
    boolean exists = status != null;
    LoggerHelpers.traceLeave(log, "exists", traceId, streamSegmentName, exists);
    return exists;
}

19 Source : HDFSStorage.java
with Apache License 2.0
from pravega

@Override
public SegmentHandle create(String streamSegmentName) throws StreamSegmentException {
    // Creates a file with the lowest possible epoch (0).
    // There is a possible race during create where more than one segmentstore may be trying to create a streamsegment.
    // If one create is delayed, it is possible that other segmentstore will be able to create the file with
    // epoch (0) and then rename it using its epoch (segment_<epoch>).
    // 
    // To fix this, the create code checks whether a file with higher epoch exists.
    // If it does, it tries to remove the created file, and throws SegmentExistsException.
    ensureInitializedAndNotClosed();
    long traceId = LoggerHelpers.traceEnter(log, "create", streamSegmentName);
    // Create the segment using our own epoch.
    FileStatus[] status = null;
    try {
        status = findAllRaw(streamSegmentName);
    } catch (IOException e) {
        throw HDFSExceptionHelpers.convertException(streamSegmentName, e);
    }
    if (status != null && status.length > 0) {
        // Segment already exists; don't bother with anything else.
        throw HDFSExceptionHelpers.convertException(streamSegmentName, HDFSExceptionHelpers.segmentExistsException(streamSegmentName));
    }
    // Create the file for the segment with epoch 0.
    Path fullPath = getFilePath(streamSegmentName, 0);
    try {
        // Create the file, and then immediately close the returned OutputStream, so that HDFS may properly create the file.
        this.fileSystem.create(fullPath, READWRITE_PERMISSION, false, 0, this.config.getReplication(), this.config.getBlockSize(), null).close();
        HDFSMetrics.CREATE_COUNT.inc();
        log.debug("Created '{}'.", fullPath);
    } catch (IOException e) {
        throw HDFSExceptionHelpers.convertException(streamSegmentName, e);
    }
    // If there is a race during creation, delete the file with epoch 0 and throw exception.
    // It is safe to delete the file as a file with higher epoch already exists. Any new operations will always
    // work the file with higher epoch than 0.
    try {
        status = findAllRaw(streamSegmentName);
        if (status != null && status.length > 1) {
            this.fileSystem.delete(fullPath, true);
            throw new StreamSegmentExistsException(streamSegmentName);
        }
    } catch (IOException e) {
        log.warn("Exception while deleting a file with epoch 0.", e);
    }
    LoggerHelpers.traceLeave(log, "create", traceId, streamSegmentName);
    // return handle
    return HDFSSegmentHandle.write(streamSegmentName);
}

19 Source : HDFSChunkStorage.java
with Apache License 2.0
from pravega

@Override
protected ChunkInfo doGetInfo(String chunkName) throws ChunkStorageException {
    ensureInitializedAndNotClosed();
    try {
        FileStatus status = fileSystem.getFileStatus(getFilePath(chunkName));
        return ChunkInfo.builder().name(chunkName).length(status.getLen()).build();
    } catch (IOException e) {
        throw convertException(chunkName, "doGetInfo", e);
    }
}

19 Source : HDFSWalker.java
with BSD 3-Clause "New" or "Revised" License
from osmlab

public static HDFSFile convert(final FileStatus status) {
    try {
        return new HDFSFile(status.getPath());
    } catch (final IOException oops) {
        throw new CoreException("Error when converting FileStatus to HDFSFile", oops);
    }
}

19 Source : TestPrestoS3FileSystem.java
with Apache License 2.0
from openlookeng

@Test
public void testEmptyDirectory() throws Exception {
    try (PrestoS3FileSystem fs = new PrestoS3FileSystem()) {
        MockAmazonS3 s3 = new MockAmazonS3() {

            @Override
            public ObjectMetadata getObjectMetadata(GetObjectMetadataRequest getObjectMetadataRequest) {
                if (getObjectMetadataRequest.getKey().equals("empty-dir/")) {
                    ObjectMetadata objectMetadata = new ObjectMetadata();
                    objectMetadata.setContentType(S3_DIRECTORY_OBJECT_CONTENT_TYPE);
                    return objectMetadata;
                }
                return super.getObjectMetadata(getObjectMetadataRequest);
            }
        };
        fs.initialize(new URI("s3n://test-bucket/"), new Configuration());
        fs.setS3Client(s3);
        FileStatus fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir/"));
        replacedertTrue(fileStatus.isDirectory());
        fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir"));
        replacedertTrue(fileStatus.isDirectory());
    }
}

19 Source : TestPrestoS3FileSystem.java
with Apache License 2.0
from openlookeng

private static void replacedertSkipGlacierObjects(boolean skipGlacierObjects) throws Exception {
    Configuration config = new Configuration();
    config.set(S3_SKIP_GLACIER_OBJECTS, String.valueOf(skipGlacierObjects));
    try (PrestoS3FileSystem fs = new PrestoS3FileSystem()) {
        MockAmazonS3 s3 = new MockAmazonS3();
        s3.setHasGlacierObjects(true);
        fs.initialize(new URI("s3n://test-bucket/"), config);
        fs.setS3Client(s3);
        FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test"));
        replacedertEquals(statuses.length, skipGlacierObjects ? 1 : 2);
    }
}

19 Source : HCFSFuseFileSystem.java
with Apache License 2.0
from opendataio

/**
 * Reads the contents of a directory.
 *
 * @param path The FS path of the directory
 * @param buff The FUSE buffer to fill
 * @param filter FUSE filter
 * @param offset Ignored in fuse
 * @param fi FileInfo data structure kept by FUSE
 * @return 0 on success, a negative value on error
 */
@Override
public int readdir(String path, Pointer buff, FuseFillDir filter, @off_t long offset, FuseFileInfo fi) {
    final Path turi = mPathResolverCache.getUnchecked(path);
    LOG.trace("readdir({}) [target: {}]", path, turi);
    try {
        final FileStatus[] ls = mFileSystem.listStatus(turi);
        // standard . and .. entries
        filter.apply(buff, ".", null, 0);
        filter.apply(buff, "..", null, 0);
        for (FileStatus file : ls) {
            filter.apply(buff, file.getPath().getName(), null, 0);
        }
    } catch (FileNotFoundException | InvalidPathException e) {
        LOG.debug("Failed to read directory {}, path does not exist or is invalid", path);
        return -ErrorCodes.ENOENT();
    } catch (Throwable t) {
        LOG.error("Failed to read directory {}", path, t);
        return -1;
    }
    return 0;
}

19 Source : FileSystemRMStateStore.java
with Apache License 2.0
from NJUJYB

private void loadRMDTSecretManagerState(RMState rmState) throws Exception {
    checkAndResumeUpdateOperation(rmDTSecretManagerRoot);
    FileStatus[] childNodes = fs.listStatus(rmDTSecretManagerRoot);
    for (FileStatus childNodeStatus : childNodes) {
        replacedert childNodeStatus.isFile();
        String childNodeName = childNodeStatus.getPath().getName();
        if (checkAndRemovePartialRecord(childNodeStatus.getPath())) {
            continue;
        }
        if (childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) {
            rmState.rmSecretManagerState.dtSequenceNumber = Integer.parseInt(childNodeName.split("_")[1]);
            continue;
        }
        Path childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName);
        byte[] childData = readFile(childNodePath, childNodeStatus.getLen());
        ByteArrayInputStream is = new ByteArrayInputStream(childData);
        DataInputStream fsIn = new DataInputStream(is);
        if (childNodeName.startsWith(DELEGATION_KEY_PREFIX)) {
            DelegationKey key = new DelegationKey();
            key.readFields(fsIn);
            rmState.rmSecretManagerState.masterKeyState.add(key);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Loaded delegation key: keyId=" + key.getKeyId() + ", expirationDate=" + key.getExpiryDate());
            }
        } else if (childNodeName.startsWith(DELEGATION_TOKEN_PREFIX)) {
            RMDelegationTokenIdentifierData identifierData = new RMDelegationTokenIdentifierData();
            identifierData.readFields(fsIn);
            RMDelegationTokenIdentifier identifier = identifierData.getTokenIdentifier();
            long renewDate = identifierData.getRenewDate();
            rmState.rmSecretManagerState.delegationTokenState.put(identifier, renewDate);
            if (LOG.isDebugEnabled()) {
                LOG.debug("Loaded RMDelegationTokenIdentifier: " + identifier + " renewDate=" + renewDate);
            }
        } else {
            LOG.warn("Unknown file for recovering RMDelegationTokenSecretManager");
        }
        fsIn.close();
    }
}

See More Examples