Here are the examples of the java api org.apache.hadoop.fs.FileStatus taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
1310 Examples
19
Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
private ImmutableRangeMap<Long, BlockLocation> getBlockMap(FileStatus status) throws IOException {
ImmutableRangeMap<Long, BlockLocation> blockMap = blockMapMap.get(status.getPath());
if (blockMap == null) {
blockMap = buildBlockMap(status);
}
return blockMap;
}
19
Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
public List<CompleteFileWork> generateFileWork(List<FileStatus> files, boolean blockify) throws IOException {
List<TimedRunnable<List<CompleteFileWork>>> readers = Lists.newArrayList();
for (FileStatus status : files) {
readers.add(new BlockMapReader(status, blockify));
}
List<List<CompleteFileWork>> work = TimedRunnable.run("Get block maps", logger, readers, 16);
List<CompleteFileWork> singleList = Lists.newArrayList();
for (List<CompleteFileWork> innerWorkList : work) {
singleList.addAll(innerWorkList);
}
return singleList;
}
19
Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
private boolean compressed(FileStatus fileStatus) {
return codecFactory.getCodec(fileStatus.getPath()) != null;
}
19
Source : BlockMapBuilder.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
private ImmutableRangeMap<Long, BlockLocation> buildBlockMap(Path path) throws IOException {
FileStatus status = fs.getFileStatus(path);
return buildBlockMap(status);
}
19
Source : ParquetGroupScan.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
/**
* Create and return a new file selection based on reading the metadata cache file.
*
* This function also initializes a few of ParquetGroupScan's fields as appropriate.
*
* @param selection initial file selection
* @param metaFilePath metadata cache file path
* @return file selection read from cache
*
* @throws IOException
* @throws UserException when the updated selection is empty, this happens if the user selects an empty folder.
*/
private FileSelection expandSelectionFromMetadataCache(FileSelection selection, Path metaFilePath) throws IOException {
// get the metadata for the root directory by reading the metadata file
// parquetTableMetadata contains the metadata for all files in the selection root folder, but we need to make sure
// we only select the files that are part of selection (by setting fileSet appropriately)
// get (and set internal field) the metadata for the directory by reading the metadata file
parquetTableMetadata = Metadata.readBlockMeta(fs, metaFilePath, metaContext, formatConfig);
if (ignoreExpandingSelection(parquetTableMetadata)) {
return selection;
}
if (formatConfig.areCorruptDatesAutoCorrected()) {
ParquetReaderUtility.correctDatesInMetadataCache(this.parquetTableMetadata);
}
List<FileStatus> fileStatuses = selection.getStatuses(fs);
if (fileSet == null) {
fileSet = Sets.newHashSet();
}
final Path first = fileStatuses.get(0).getPath();
if (fileStatuses.size() == 1 && selection.getSelectionRoot().equals(first.toString())) {
// we are selecting all files from selection root. Expand the file list from the cache
for (Metadata.ParquetFileMetadata file : parquetTableMetadata.getFiles()) {
fileSet.add(file.getPath());
}
} else if (selection.isExpandedPartial() && !selection.hadWildcard() && cacheFileRoot != null) {
if (selection.wasAllParreplacedionsPruned()) {
// if all parreplacedions were previously pruned, we only need to read 1 file (for the schema)
fileSet.add(this.parquetTableMetadata.getFiles().get(0).getPath());
} else {
// we are here if the selection is in the expanded_partial state (i.e it has directories). We get the
// list of files from the metadata cache file that is present in the cacheFileRoot directory and populate
// the fileSet. However, this is *not* the final list of files that will be scanned in execution since the
// second phase of parreplacedion pruning will apply on the files and modify the file selection appropriately.
for (Metadata.ParquetFileMetadata file : this.parquetTableMetadata.getFiles()) {
fileSet.add(file.getPath());
}
}
} else {
// we need to expand the files from fileStatuses
for (FileStatus status : fileStatuses) {
Path cacheFileRoot = status.getPath();
if (status.isDirectory()) {
// TODO [DRILL-4496] read the metadata cache files in parallel
final Path metaPath = new Path(cacheFileRoot, Metadata.METADATA_FILENAME);
final Metadata.ParquetTableMetadataBase metadata = Metadata.readBlockMeta(fs, metaPath, metaContext, formatConfig);
if (ignoreExpandingSelection(metadata)) {
return selection;
}
for (Metadata.ParquetFileMetadata file : metadata.getFiles()) {
fileSet.add(file.getPath());
}
} else {
final Path path = Path.getPathWithoutSchemeAndAuthority(cacheFileRoot);
fileSet.add(path.toString());
}
}
}
if (fileSet.isEmpty()) {
// no files were found, most likely we tried to query some empty sub folders
throw UserException.validationError().message("The table you tried to query is empty").build(logger);
}
List<String> fileNames = Lists.newArrayList(fileSet);
// when creating the file selection, set the selection root without the URI prefix
// The reason is that the file names above have been created in the form
// /a/b/c.parquet and the format of the selection root must match that of the file names
// otherwise downstream operations such as parreplacedion pruning can break.
final Path metaRootPath = Path.getPathWithoutSchemeAndAuthority(new Path(selection.getSelectionRoot()));
this.selectionRoot = metaRootPath.toString();
// Use the FileSelection constructor directly here instead of the FileSelection.create() method
// because create() changes the root to include the scheme and authority; In future, if create()
// is the preferred way to instantiate a file selection, we may need to do something different...
// WARNING: file statuses and file names are inconsistent
FileSelection newSelection = new FileSelection(selection.getStatuses(fs), fileNames, metaRootPath.toString(), cacheFileRoot, selection.wasAllParreplacedionsPruned());
newSelection.setExpandedFully();
newSelection.setMetaContext(metaContext);
return newSelection;
}
19
Source : FooterGatherer.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
private static void checkMagicBytes(FileStatus status, byte[] data, int offset) throws IOException {
for (int i = 0, v = offset; i < MAGIC_LENGTH; i++, v++) {
if (ParquetFileWriter.MAGIC[i] != data[v]) {
byte[] magic = ArrayUtils.subarray(data, offset, offset + MAGIC_LENGTH);
throw new IOException(status.getPath() + " is not a Parquet file. expected magic number at tail " + Arrays.toString(ParquetFileWriter.MAGIC) + " but found " + Arrays.toString(magic));
}
}
}
19
Source : FileSelection.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
public List<String> getFiles() {
if (files == null) {
final List<String> newFiles = Lists.newArrayList();
for (final FileStatus status : statuses) {
newFiles.add(status.getPath().toString());
}
files = newFiles;
}
return files;
}
19
Source : FileSelection.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
private static String commonPath(final List<FileStatus> statuses) {
if (statuses == null || statuses.isEmpty()) {
return "";
}
final List<String> files = Lists.newArrayList();
for (final FileStatus status : statuses) {
files.add(status.getPath().toString());
}
return commonPathForFiles(files);
}
19
Source : FileSelection.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
public boolean containsDirectories(DrillFileSystem fs) throws IOException {
if (dirStatus == StatusType.NOT_CHECKED) {
dirStatus = StatusType.NO_DIRS;
for (final FileStatus status : getStatuses(fs)) {
if (status.isDirectory()) {
dirStatus = StatusType.HAS_DIRS;
break;
}
}
}
return dirStatus == StatusType.HAS_DIRS;
}
19
Source : DrillFileSystem.java
with Apache License 2.0
from zpochen
with Apache License 2.0
from zpochen
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start, long len) throws IOException {
return underlyingFs.getFileBlockLocations(file, start, len);
}
19
Source : HdfsHelper.java
with Apache License 2.0
from wgzhao
with Apache License 2.0
from wgzhao
/**
* 获取以指定目录下的所有fileName开头的文件
*
* @param dir 需要扫描的目录
* @param fileName String 要匹配的文件或者目录后缀,如果为空,则表示不做模式匹配
* @return Path[]
*/
public Path[] hdfsDirList(String dir, String fileName) {
Path path = new Path(dir);
Path[] files;
try {
FileStatus[] status = fileSystem.listStatus(path);
files = new Path[status.length];
for (int i = 0; i < status.length; i++) {
files[i] = status[i].getPath();
}
} catch (IOException e) {
String message = String.format("获取目录[%s]下文件列表时发生网络IO异常,请检查您的网络是否正常!", dir);
LOG.error(message);
throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
}
return files;
}
19
Source : HdfsHelper.java
with Apache License 2.0
from wgzhao
with Apache License 2.0
from wgzhao
/**
* 获取指定目录先的文件列表
*
* @param dir 需要搜索的目录
* @return 拿到的是文件全路径,
* eg:hdfs://10.101.204.12:9000/user/hive/warehouse/writer.db/text/test.textfile
*/
public String[] hdfsDirList(String dir) {
Path path = new Path(dir);
String[] files;
try {
FileStatus[] status = fileSystem.listStatus(path);
files = new String[status.length];
for (int i = 0; i < status.length; i++) {
files[i] = status[i].getPath().toString();
}
} catch (IOException e) {
String message = String.format("获取目录[%s]文件列表时发生网络IO异常,请检查您的网络是否正常!", dir);
LOG.error(message);
throw DataXException.asDataXException(HdfsWriterErrorCode.CONNECT_HDFS_IO_ERROR, e);
}
return files;
}
19
Source : DFSUtil.java
with Apache License 2.0
from wgzhao
with Apache License 2.0
from wgzhao
private void addSourceFileIfNotEmpty(FileStatus f) {
if (f.isFile()) {
String filePath = f.getPath().toString();
if (f.getLen() > 0) {
addSourceFileByType(filePath);
} else {
LOG.warn("文件[{}]长度为0,将会跳过不作处理!", filePath);
}
}
}
19
Source : HDFSFileSystem.java
with Apache License 2.0
from WeBankFinTech
with Apache License 2.0
from WeBankFinTech
@Override
public List<FsPath> list(FsPath path) throws IOException {
FileStatus[] stat = fs.listStatus(new Path(checkHDFSPath(path.getPath())));
List<FsPath> fsPaths = new ArrayList<FsPath>();
for (FileStatus f : stat) {
fsPaths.add(fillStorageFile(new FsPath(f.getPath().toUri().getPath()), f));
}
return fsPaths;
}
19
Source : HDFSFileSystem.java
with Apache License 2.0
from WeBankFinTech
with Apache License 2.0
from WeBankFinTech
@Override
public FsPathListWithError listPathWithError(FsPath path) throws IOException {
FileStatus[] stat = fs.listStatus(new Path(checkHDFSPath(path.getPath())));
List<FsPath> fsPaths = new ArrayList<FsPath>();
for (FileStatus f : stat) {
fsPaths.add(fillStorageFile(new FsPath(StorageUtils.HDFS_SCHEMA() + f.getPath().toUri().getPath()), f));
}
if (fsPaths.isEmpty()) {
return null;
}
return new FsPathListWithError(fsPaths, "");
}
19
Source : ListHDFS.java
with Apache License 2.0
from wangrenlei
with Apache License 2.0
from wangrenlei
/**
* Determines which of the given FileStatus's describes a File that should be listed.
*
* @param statuses the eligible FileStatus objects that we could potentially list
* @return a Set containing only those FileStatus objects that we want to list
*/
Set<FileStatus> determineListable(final Set<FileStatus> statuses) {
final long minTimestamp = this.latestTimestampListed;
final TreeMap<Long, List<FileStatus>> orderedEntries = new TreeMap<>();
// Build a sorted map to determine the latest possible entries
for (final FileStatus status : statuses) {
if (status.getPath().getName().endsWith("_COPYING_")) {
continue;
}
final long enreplacedyTimestamp = status.getModificationTime();
if (enreplacedyTimestamp > latestTimestampListed) {
latestTimestampListed = enreplacedyTimestamp;
}
// New entries are all those that occur at or after the replacedociated timestamp
final boolean newEntry = enreplacedyTimestamp >= minTimestamp && enreplacedyTimestamp > latestTimestampEmitted;
if (newEntry) {
List<FileStatus> enreplacediesForTimestamp = orderedEntries.get(status.getModificationTime());
if (enreplacediesForTimestamp == null) {
enreplacediesForTimestamp = new ArrayList<FileStatus>();
orderedEntries.put(status.getModificationTime(), enreplacediesForTimestamp);
}
enreplacediesForTimestamp.add(status);
}
}
final Set<FileStatus> toList = new HashSet<>();
if (orderedEntries.size() > 0) {
long latestListingTimestamp = orderedEntries.lastKey();
// If the last listing time is equal to the newest entries previously seen,
// another iteration has occurred without new files and special handling is needed to avoid starvation
if (latestListingTimestamp == minTimestamp) {
// We are done if the latest listing timestamp is equal to the last processed time,
// meaning we handled those items originally preplaceded over
if (latestListingTimestamp == latestTimestampEmitted) {
return Collections.emptySet();
}
} else {
// Otherwise, newest entries are held back one cycle to avoid issues in writes occurring exactly when the listing is being performed to avoid missing data
orderedEntries.remove(latestListingTimestamp);
}
for (List<FileStatus> timestampEnreplacedies : orderedEntries.values()) {
for (FileStatus status : timestampEnreplacedies) {
toList.add(status);
}
}
}
return toList;
}
19
Source : TestTrinoS3FileSystem.java
with Apache License 2.0
from trinodb
with Apache License 2.0
from trinodb
@Test
public void testSkipHadoopFolderMarkerObjectsEnabled() throws Exception {
Configuration config = new Configuration(false);
try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) {
MockAmazonS3 s3 = new MockAmazonS3();
s3.setHasHadoopFolderMarkerObjects(true);
fs.initialize(new URI("s3n://test-bucket/"), config);
fs.setS3Client(s3);
FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test"));
replacedertEquals(statuses.length, 2);
}
}
19
Source : TestTrinoS3FileSystem.java
with Apache License 2.0
from trinodb
with Apache License 2.0
from trinodb
private static void replacedertSkipGlacierObjects(boolean skipGlacierObjects) throws Exception {
Configuration config = new Configuration(false);
config.set(S3_SKIP_GLACIER_OBJECTS, String.valueOf(skipGlacierObjects));
try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) {
MockAmazonS3 s3 = new MockAmazonS3();
s3.setHasGlacierObjects(true);
fs.initialize(new URI("s3n://test-bucket/"), config);
fs.setS3Client(s3);
FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test"));
replacedertEquals(statuses.length, skipGlacierObjects ? 2 : 4);
}
}
19
Source : TestTrinoS3FileSystem.java
with Apache License 2.0
from trinodb
with Apache License 2.0
from trinodb
@Test
public void testEmptyDirectory() throws Exception {
try (TrinoS3FileSystem fs = new TrinoS3FileSystem()) {
MockAmazonS3 s3 = new MockAmazonS3() {
@Override
public ObjectMetadata getObjectMetadata(GetObjectMetadataRequest getObjectMetadataRequest) {
if (getObjectMetadataRequest.getKey().equals("empty-dir/")) {
ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentType(S3_DIRECTORY_OBJECT_CONTENT_TYPE);
return objectMetadata;
}
return super.getObjectMetadata(getObjectMetadataRequest);
}
};
fs.initialize(new URI("s3n://test-bucket/"), new Configuration(false));
fs.setS3Client(s3);
FileStatus fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir/"));
replacedertTrue(fileStatus.isDirectory());
fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir"));
replacedertTrue(fileStatus.isDirectory());
}
}
19
Source : SyncPartitionMetadataProcedure.java
with Apache License 2.0
from trinodb
with Apache License 2.0
from trinodb
private static boolean isValidParreplacedionPath(FileStatus file, Column column, boolean caseSensitive) {
String path = file.getPath().getName();
if (!caseSensitive) {
path = path.toLowerCase(ENGLISH);
}
String prefix = column.getName() + '=';
return file.isDirectory() && path.startsWith(prefix);
}
19
Source : FileSegmentPool.java
with Apache License 2.0
from shunfei
with Apache License 2.0
from shunfei
private void refreshSegments(boolean force) {
try {
if (force || mustRefresh) {
doRefreshSegments();
return;
}
if (!fileSystem.exists(updateFilePath)) {
return;
}
FileStatus fileStatus = fileSystem.getFileStatus(updateFilePath);
long modifyTime = fileStatus != null ? fileStatus.getModificationTime() : 0;
boolean modifyTimeOk = lastRefreshTime < modifyTime;
if (modifyTimeOk) {
if (doRefreshSegments()) {
lastRefreshTime = modifyTime;
}
}
} catch (Throwable e) {
if (e instanceof ClosedByInterruptException) {
logger.warn("Load segments of table [{}] failed by ClosedByInterruptException.", tableName);
return;
}
String msg = e.getMessage();
if (msg != null && Strings.equals(msg.trim(), "Filesystem closed")) {
logger.warn("Load segments of table [{}] failed by Filesystem closed.", tableName);
return;
}
logger.error("", e);
logger.error("<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<<");
logger.error("Load segments of table [{}] failed, system in inconsistent state", tableName);
logger.error(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
}
}
19
Source : FileSegmentPool.java
with Apache License 2.0
from shunfei
with Apache License 2.0
from shunfei
private String getSegmentName(FileStatus fileStatus) {
if (!fileStatus.isFile()) {
return null;
}
Path path = fileStatus.getPath();
if (!SegmentHelper.checkSegmentByPath(path)) {
return null;
}
return StringUtils.removeStart(path.toString(), segmentRootPathStr);
}
19
Source : FileSystemTableModel.java
with GNU General Public License v3.0
from sdadas
with GNU General Public License v3.0
from sdadas
private void doFetchRows(int rows, boolean initial) {
if (iterator == null)
return;
int startSize = this.children.size();
int idx = 0;
while (iterator.hasNext() && idx < rows) {
FileStatus file = iterator.next();
children.add(new FileItem(file));
idx++;
}
if (!iterator.hasNext()) {
this.hasMoreRows = false;
}
if (initial) {
fireTableDataChanged();
return;
}
int endSize = this.children.size();
if (endSize != startSize) {
fireTableRowsInserted(startSize, endSize - 1);
}
}
19
Source : FileItem.java
with GNU General Public License v3.0
from sdadas
with GNU General Public License v3.0
from sdadas
/**
* @author Sławomir Dadas
*/
public clreplaced FileItem {
private final FileStatus status;
private String name;
public static FileItem parent() {
FileItem res = new FileItem();
res.name = "..";
return res;
}
public FileItem(FileStatus status) {
this.status = status;
}
private FileItem() {
this.status = null;
}
public FileStatus getStatus() {
return status;
}
public String getName() {
return status != null ? status.getPath().getName() : name;
}
public FileSize getSize() {
return status != null && status.isFile() ? new FileSize(status.getLen()) : null;
}
public String getOwner() {
return status != null ? status.getOwner() : null;
}
public String getGroup() {
return status != null ? status.getGroup() : null;
}
public Long getBlockSize() {
return status != null ? status.getBlockSize() : null;
}
public FileTimestamp getModificationTime() {
return status != null ? new FileTimestamp(status.getModificationTime()) : null;
}
public FileTimestamp getAccessTime() {
return status != null ? new FileTimestamp(status.getAccessTime()) : null;
}
public String getPermissions() {
return status != null ? status.getPermission().toString() : null;
}
public Icon getIcon() {
return IconFactory.getIcon(getIconName());
}
public boolean isFile() {
return status != null && status.isFile();
}
public Path getPath() {
return status != null ? status.getPath() : null;
}
public boolean isDirectory() {
return status != null && status.isDirectory();
}
private String getIconName() {
if (status == null)
return "folder-open";
if (status.isDirectory()) {
if (StringUtils.endsWith(getName(), ".har")) {
return "har";
} else {
return "folder";
}
} else {
return "file";
}
}
public static clreplaced FileTimestamp implements Comparable<FileTimestamp> {
private final long timestamp;
public FileTimestamp(long timestamp) {
this.timestamp = timestamp;
}
public long getTimestamp() {
return timestamp;
}
@Override
public String toString() {
return DateFormatUtils.format(timestamp, "dd-MM-yyyy HH:mm:ss");
}
@Override
public int compareTo(FileTimestamp other) {
return Longs.compare(this.timestamp, other.timestamp);
}
}
public static clreplaced FileSize implements Comparable<FileSize> {
private final long size;
public FileSize(long size) {
this.size = size;
}
public long getSize() {
return size;
}
@Override
public String toString() {
return FileSystemUtils.formatByteCount(size);
}
@Override
public int compareTo(FileSize other) {
return Longs.compare(this.size, other.size);
}
}
}
19
Source : CleanupDialog.java
with GNU General Public License v3.0
from sdadas
with GNU General Public License v3.0
from sdadas
private void addModelPath(String stringPath, CheckBoxListModel model, boolean checked, boolean ignoreErrors) {
Path path = new Path(stringPath);
try {
if (this.connection.exists(path)) {
FileStatus status = this.connection.status(path);
if (status.isFile()) {
error(ignoreErrors, stringPath + " is not a directory");
} else {
if (!paths.contains(stringPath)) {
model.addCheckBoxElement(path, checked);
paths.add(stringPath);
}
}
} else {
error(ignoreErrors, "Directory does not exist");
}
} catch (FsException ex) {
error(ignoreErrors, ex.getMessage());
}
}
19
Source : Utilities.java
with Apache License 2.0
from Qihoo360
with Apache License 2.0
from Qihoo360
public static List<Path> convertStatusToPath(List<FileStatus> fileStatuses) {
List<Path> paths = new ArrayList<>();
if (fileStatuses != null) {
for (FileStatus fileStatus : fileStatuses) {
paths.add(fileStatus.getPath());
}
}
return paths;
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* read all the footers of the files provided
* (not using summary files)
* @param configuration the conf to access the File System
* @param partFiles the files to read
* @param skipRowGroups to skip the rowGroup info
* @return the footers
* @throws IOException if there is an exception while reading footers
* @deprecated will be removed in 2.0.0;
* use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
*/
@Deprecated
public static List<Footer> readAllFootersInParallel(final Configuration configuration, List<FileStatus> partFiles, final boolean skipRowGroups) throws IOException {
List<Callable<Footer>> footers = new ArrayList<Callable<Footer>>();
for (final FileStatus currentFile : partFiles) {
footers.add(new Callable<Footer>() {
@Override
public Footer call() throws Exception {
try {
return new Footer(currentFile.getPath(), readFooter(configuration, currentFile, filter(skipRowGroups)));
} catch (IOException e) {
throw new IOException("Could not read footer for file " + currentFile, e);
}
}
});
}
try {
return runAllInParallel(configuration.getInt(PARQUET_READ_PARALLELISM, 5), footers);
} catch (ExecutionException e) {
throw new IOException("Could not read footer: " + e.getMessage(), e.getCause());
}
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* Specifically reads a given summary file
* @param configuration a configuration
* @param summaryStatus file status for a summary file
* @return the metadata translated for each file
* @throws IOException if an exception is thrown while reading the summary file
* @deprecated metadata files are not recommended and will be removed in 2.0.0
*/
@Deprecated
public static List<Footer> readSummaryFile(Configuration configuration, FileStatus summaryStatus) throws IOException {
final Path parent = summaryStatus.getPath().getParent();
ParquetMetadata mergedFooters = readFooter(configuration, summaryStatus, filter(false));
return footersFromSummaryFile(parent, mergedFooters);
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* Read the footers of all the files under that path (recursively)
* not using summary files.
*
* @param configuration a configuration
* @param fileStatus a file status to recursively list
* @param skipRowGroups whether to skip reading row group metadata
* @return a list of footers
* @throws IOException if an exception is thrown while reading the footers
* @deprecated will be removed in 2.0.0;
* use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
*/
@Deprecated
public static List<Footer> readAllFootersInParallel(Configuration configuration, FileStatus fileStatus, boolean skipRowGroups) throws IOException {
List<FileStatus> statuses = listFiles(configuration, fileStatus);
return readAllFootersInParallel(configuration, statuses, skipRowGroups);
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* Read the footers of all the files under that path (recursively)
* using summary files if possible
* @param configuration the configuration to access the FS
* @param pathStatus the root dir
* @param skipRowGroups whether to skip reading row group metadata
* @return all the footers
* @throws IOException if an exception is thrown while reading the footers
* @deprecated will be removed in 2.0.0;
* use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
*/
@Deprecated
public static List<Footer> readFooters(Configuration configuration, FileStatus pathStatus, boolean skipRowGroups) throws IOException {
List<FileStatus> files = listFiles(configuration, pathStatus);
return readAllFootersInParallelUsingSummaryFiles(configuration, files, skipRowGroups);
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* Read the footers of all the files under that path (recursively)
* not using summary files.
* rowGroups are not skipped
* @param configuration the configuration to access the FS
* @param fileStatus the root dir
* @return all the footers
* @throws IOException if an exception is thrown while reading the footers
* @deprecated will be removed in 2.0.0;
* use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
*/
@Deprecated
public static List<Footer> readAllFootersInParallel(Configuration configuration, FileStatus fileStatus) throws IOException {
return readAllFootersInParallel(configuration, fileStatus, false);
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* @param configuration a configuration
* @param file the Parquet File
* @return the metadata with row groups.
* @throws IOException if an error occurs while reading the file
* @deprecated will be removed in 2.0.0;
* use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
*/
@Deprecated
public static final ParquetMetadata readFooter(Configuration configuration, FileStatus file) throws IOException {
return readFooter(configuration, file, NO_FILTER);
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* this always returns the row groups
* @param configuration a configuration
* @param pathStatus a file status to read footers from
* @return a list of footers
* @throws IOException if an exception is thrown while reading the footers
* @deprecated will be removed in 2.0.0;
* use {@link ParquetFileReader#open(InputFile, ParquetReadOptions)}
*/
@Deprecated
public static List<Footer> readFooters(Configuration configuration, FileStatus pathStatus) throws IOException {
return readFooters(configuration, pathStatus, false);
}
19
Source : ParquetFileReader.java
with Apache License 2.0
from provectus
with Apache License 2.0
from provectus
/**
* for files provided, check if there's a summary file.
* If a summary file is found it is used otherwise the file footer is used.
* @param configuration the hadoop conf to connect to the file system;
* @param partFiles the part files to read
* @param skipRowGroups to skipRowGroups in the footers
* @return the footers for those files using the summary file if possible.
* @throws IOException if there is an exception while reading footers
* @deprecated metadata files are not recommended and will be removed in 2.0.0
*/
@Deprecated
public static List<Footer> readAllFootersInParallelUsingSummaryFiles(final Configuration configuration, final Collection<FileStatus> partFiles, final boolean skipRowGroups) throws IOException {
// figure out list of all parents to part files
Set<Path> parents = new HashSet<Path>();
for (FileStatus part : partFiles) {
parents.add(part.getPath().getParent());
}
// read corresponding summary files if they exist
List<Callable<Map<Path, Footer>>> summaries = new ArrayList<Callable<Map<Path, Footer>>>();
for (final Path path : parents) {
summaries.add(new Callable<Map<Path, Footer>>() {
@Override
public Map<Path, Footer> call() throws Exception {
ParquetMetadata mergedMetadata = readSummaryMetadata(configuration, path, skipRowGroups);
if (mergedMetadata != null) {
final List<Footer> footers;
if (skipRowGroups) {
footers = new ArrayList<Footer>();
for (FileStatus f : partFiles) {
footers.add(new Footer(f.getPath(), mergedMetadata));
}
} else {
footers = footersFromSummaryFile(path, mergedMetadata);
}
Map<Path, Footer> map = new HashMap<Path, Footer>();
for (Footer footer : footers) {
// the folder may have been moved
footer = new Footer(new Path(path, footer.getFile().getName()), footer.getParquetMetadata());
map.put(footer.getFile(), footer);
}
return map;
} else {
return Collections.emptyMap();
}
}
});
}
Map<Path, Footer> cache = new HashMap<Path, Footer>();
try {
List<Map<Path, Footer>> footersFromSummaries = runAllInParallel(configuration.getInt(PARQUET_READ_PARALLELISM, 5), summaries);
for (Map<Path, Footer> footers : footersFromSummaries) {
cache.putAll(footers);
}
} catch (ExecutionException e) {
throw new IOException("Error reading summaries", e);
}
// keep only footers for files actually requested and read file footer if not found in summaries
List<Footer> result = new ArrayList<Footer>(partFiles.size());
List<FileStatus> toRead = new ArrayList<FileStatus>();
for (FileStatus part : partFiles) {
Footer f = cache.get(part.getPath());
if (f != null) {
result.add(f);
} else {
toRead.add(part);
}
}
if (toRead.size() > 0) {
// read the footers of the files that did not have a summary file
LOG.info("reading another {} footers", toRead.size());
result.addAll(readAllFootersInParallel(configuration, toRead, skipRowGroups));
}
return result;
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
/**
* Gets the filestatus representing the segment.
*
* @param segmentName The name of the Segment to retrieve for.
* @param enforceExistence If true, it will throw a FileNotFoundException if no files are found, otherwise null is returned.
* @return FileStatus of the HDFS file.
* @throws IOException If an exception occurred.
*/
private FileStatus findStatusForSegment(String segmentName, boolean enforceExistence) throws IOException {
FileStatus[] rawFiles = findAllRaw(segmentName);
if (rawFiles == null || rawFiles.length == 0) {
if (enforceExistence) {
throw HDFSExceptionHelpers.segmentNotExistsException(segmentName);
}
return null;
}
val result = Arrays.stream(rawFiles).sorted(this::compareFileStatus).collect(Collectors.toList());
return result.get(result.size() - 1);
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
/**
* Makes the file represented by the given FileStatus read-only.
*
* @param file The FileDescriptor of the file to set. If this method returns true, this FileDescriptor will
* also be updated to indicate the file is read-only.
* @return True if the file was not read-only before (and it is now), or false if the file was already read-only.
* @throws IOException If an exception occurred.
*/
private boolean makeReadOnly(FileStatus file) throws IOException {
if (isReadOnly(file)) {
return false;
}
this.fileSystem.setPermission(file.getPath(), READONLY_PERMISSION);
log.debug("MakeReadOnly '{}'.", file.getPath());
return true;
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
// endregion
// Region HDFS helper methods.
/**
* Gets an array (not necessarily ordered) of FileStatus objects currently available for the given Segment.
* These must be in the format specified by NAME_FORMAT (see EXAMPLE_NAME_FORMAT).
*/
private FileStatus[] findAllRaw(String segmentName) throws IOException {
replacedert segmentName != null && segmentName.length() > 0 : "segmentName must be non-null and non-empty";
String pattern = String.format(NAME_FORMAT, getPathPrefix(segmentName), SUFFIX_GLOB_REGEX);
FileStatus[] files = this.fileSystem.globStatus(new Path(pattern));
if (files.length > 1) {
throw new IllegalArgumentException("More than one file");
}
return files;
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
private long getEpoch(FileStatus status) throws FileNameFormatException {
return getEpochFromPath(status.getPath());
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
private int compareFileStatus(FileStatus f1, FileStatus f2) {
try {
return Long.compare(getEpoch(f1), getEpoch(f2));
} catch (FileNameFormatException e) {
throw new IllegalStateException(e);
}
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
@Override
public SegmentHandle openWrite(String streamSegmentName) throws StreamSegmentException {
ensureInitializedAndNotClosed();
long traceId = LoggerHelpers.traceEnter(log, "openWrite", streamSegmentName);
long fencedCount = 0;
do {
try {
FileStatus fileStatus = findStatusForSegment(streamSegmentName, true);
if (!isSealed(fileStatus.getPath())) {
if (getEpochFromPath(fileStatus.getPath()) > this.epoch) {
throw new StorageNotPrimaryException(streamSegmentName);
}
Path targetPath = getFilePath(streamSegmentName, this.epoch);
if (!targetPath.equals(fileStatus.getPath())) {
try {
this.fileSystem.rename(fileStatus.getPath(), targetPath);
} catch (FileNotFoundException e) {
// This happens when more than one host is trying to fence and only one of the host goes through.
// Retry the rename so that host with the highest epoch gets access.
// In the worst case, the current owner of the segment will win this race after a number of attempts
// equal to the number of Segment Stores in the race. The high bound for this number of attempts
// is the total number of Segment Store instances in the cluster.
// It is safe to retry for MAX_EPOCH times as we are sure that the loop will never go that long.
log.warn("Race in fencing. More than two hosts trying to own the segment. Retrying");
fencedCount++;
continue;
}
}
}
// Ensure that file exists
findStatusForSegment(streamSegmentName, true);
return HDFSSegmentHandle.write(streamSegmentName);
} catch (IOException e) {
throw HDFSExceptionHelpers.convertException(streamSegmentName, e);
}
// Looping for the maximum possible number.
} while (fencedCount <= this.epoch);
LoggerHelpers.traceLeave(log, "openWrite", traceId, epoch);
throw new StorageNotPrimaryException("Not able to fence out other writers.");
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
/**
* Determines whether the given FileStatus indicates the file is read-only.
*
* @param fs The FileStatus to check.
* @return True or false.
*/
private boolean isReadOnly(FileStatus fs) {
return fs.getPermission().getUserAction() == FsAction.READ;
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
private boolean makeWrite(FileStatus file) throws IOException {
this.fileSystem.setPermission(file.getPath(), READWRITE_PERMISSION);
log.debug("MakeReadOnly '{}'.", file.getPath());
return true;
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
@Override
public boolean exists(String streamSegmentName) {
ensureInitializedAndNotClosed();
long traceId = LoggerHelpers.traceEnter(log, "exists", streamSegmentName);
FileStatus status = null;
try {
status = findStatusForSegment(streamSegmentName, false);
} catch (IOException e) {
// HDFS could not find the file. Returning false.
log.warn("Got exception checking if file exists", e);
}
boolean exists = status != null;
LoggerHelpers.traceLeave(log, "exists", traceId, streamSegmentName, exists);
return exists;
}
19
Source : HDFSStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
@Override
public SegmentHandle create(String streamSegmentName) throws StreamSegmentException {
// Creates a file with the lowest possible epoch (0).
// There is a possible race during create where more than one segmentstore may be trying to create a streamsegment.
// If one create is delayed, it is possible that other segmentstore will be able to create the file with
// epoch (0) and then rename it using its epoch (segment_<epoch>).
//
// To fix this, the create code checks whether a file with higher epoch exists.
// If it does, it tries to remove the created file, and throws SegmentExistsException.
ensureInitializedAndNotClosed();
long traceId = LoggerHelpers.traceEnter(log, "create", streamSegmentName);
// Create the segment using our own epoch.
FileStatus[] status = null;
try {
status = findAllRaw(streamSegmentName);
} catch (IOException e) {
throw HDFSExceptionHelpers.convertException(streamSegmentName, e);
}
if (status != null && status.length > 0) {
// Segment already exists; don't bother with anything else.
throw HDFSExceptionHelpers.convertException(streamSegmentName, HDFSExceptionHelpers.segmentExistsException(streamSegmentName));
}
// Create the file for the segment with epoch 0.
Path fullPath = getFilePath(streamSegmentName, 0);
try {
// Create the file, and then immediately close the returned OutputStream, so that HDFS may properly create the file.
this.fileSystem.create(fullPath, READWRITE_PERMISSION, false, 0, this.config.getReplication(), this.config.getBlockSize(), null).close();
HDFSMetrics.CREATE_COUNT.inc();
log.debug("Created '{}'.", fullPath);
} catch (IOException e) {
throw HDFSExceptionHelpers.convertException(streamSegmentName, e);
}
// If there is a race during creation, delete the file with epoch 0 and throw exception.
// It is safe to delete the file as a file with higher epoch already exists. Any new operations will always
// work the file with higher epoch than 0.
try {
status = findAllRaw(streamSegmentName);
if (status != null && status.length > 1) {
this.fileSystem.delete(fullPath, true);
throw new StreamSegmentExistsException(streamSegmentName);
}
} catch (IOException e) {
log.warn("Exception while deleting a file with epoch 0.", e);
}
LoggerHelpers.traceLeave(log, "create", traceId, streamSegmentName);
// return handle
return HDFSSegmentHandle.write(streamSegmentName);
}
19
Source : HDFSChunkStorage.java
with Apache License 2.0
from pravega
with Apache License 2.0
from pravega
@Override
protected ChunkInfo doGetInfo(String chunkName) throws ChunkStorageException {
ensureInitializedAndNotClosed();
try {
FileStatus status = fileSystem.getFileStatus(getFilePath(chunkName));
return ChunkInfo.builder().name(chunkName).length(status.getLen()).build();
} catch (IOException e) {
throw convertException(chunkName, "doGetInfo", e);
}
}
19
Source : HDFSWalker.java
with BSD 3-Clause "New" or "Revised" License
from osmlab
with BSD 3-Clause "New" or "Revised" License
from osmlab
public static HDFSFile convert(final FileStatus status) {
try {
return new HDFSFile(status.getPath());
} catch (final IOException oops) {
throw new CoreException("Error when converting FileStatus to HDFSFile", oops);
}
}
19
Source : TestPrestoS3FileSystem.java
with Apache License 2.0
from openlookeng
with Apache License 2.0
from openlookeng
@Test
public void testEmptyDirectory() throws Exception {
try (PrestoS3FileSystem fs = new PrestoS3FileSystem()) {
MockAmazonS3 s3 = new MockAmazonS3() {
@Override
public ObjectMetadata getObjectMetadata(GetObjectMetadataRequest getObjectMetadataRequest) {
if (getObjectMetadataRequest.getKey().equals("empty-dir/")) {
ObjectMetadata objectMetadata = new ObjectMetadata();
objectMetadata.setContentType(S3_DIRECTORY_OBJECT_CONTENT_TYPE);
return objectMetadata;
}
return super.getObjectMetadata(getObjectMetadataRequest);
}
};
fs.initialize(new URI("s3n://test-bucket/"), new Configuration());
fs.setS3Client(s3);
FileStatus fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir/"));
replacedertTrue(fileStatus.isDirectory());
fileStatus = fs.getFileStatus(new Path("s3n://test-bucket/empty-dir"));
replacedertTrue(fileStatus.isDirectory());
}
}
19
Source : TestPrestoS3FileSystem.java
with Apache License 2.0
from openlookeng
with Apache License 2.0
from openlookeng
private static void replacedertSkipGlacierObjects(boolean skipGlacierObjects) throws Exception {
Configuration config = new Configuration();
config.set(S3_SKIP_GLACIER_OBJECTS, String.valueOf(skipGlacierObjects));
try (PrestoS3FileSystem fs = new PrestoS3FileSystem()) {
MockAmazonS3 s3 = new MockAmazonS3();
s3.setHasGlacierObjects(true);
fs.initialize(new URI("s3n://test-bucket/"), config);
fs.setS3Client(s3);
FileStatus[] statuses = fs.listStatus(new Path("s3n://test-bucket/test"));
replacedertEquals(statuses.length, skipGlacierObjects ? 1 : 2);
}
}
19
Source : HCFSFuseFileSystem.java
with Apache License 2.0
from opendataio
with Apache License 2.0
from opendataio
/**
* Reads the contents of a directory.
*
* @param path The FS path of the directory
* @param buff The FUSE buffer to fill
* @param filter FUSE filter
* @param offset Ignored in fuse
* @param fi FileInfo data structure kept by FUSE
* @return 0 on success, a negative value on error
*/
@Override
public int readdir(String path, Pointer buff, FuseFillDir filter, @off_t long offset, FuseFileInfo fi) {
final Path turi = mPathResolverCache.getUnchecked(path);
LOG.trace("readdir({}) [target: {}]", path, turi);
try {
final FileStatus[] ls = mFileSystem.listStatus(turi);
// standard . and .. entries
filter.apply(buff, ".", null, 0);
filter.apply(buff, "..", null, 0);
for (FileStatus file : ls) {
filter.apply(buff, file.getPath().getName(), null, 0);
}
} catch (FileNotFoundException | InvalidPathException e) {
LOG.debug("Failed to read directory {}, path does not exist or is invalid", path);
return -ErrorCodes.ENOENT();
} catch (Throwable t) {
LOG.error("Failed to read directory {}", path, t);
return -1;
}
return 0;
}
19
Source : FileSystemRMStateStore.java
with Apache License 2.0
from NJUJYB
with Apache License 2.0
from NJUJYB
private void loadRMDTSecretManagerState(RMState rmState) throws Exception {
checkAndResumeUpdateOperation(rmDTSecretManagerRoot);
FileStatus[] childNodes = fs.listStatus(rmDTSecretManagerRoot);
for (FileStatus childNodeStatus : childNodes) {
replacedert childNodeStatus.isFile();
String childNodeName = childNodeStatus.getPath().getName();
if (checkAndRemovePartialRecord(childNodeStatus.getPath())) {
continue;
}
if (childNodeName.startsWith(DELEGATION_TOKEN_SEQUENCE_NUMBER_PREFIX)) {
rmState.rmSecretManagerState.dtSequenceNumber = Integer.parseInt(childNodeName.split("_")[1]);
continue;
}
Path childNodePath = getNodePath(rmDTSecretManagerRoot, childNodeName);
byte[] childData = readFile(childNodePath, childNodeStatus.getLen());
ByteArrayInputStream is = new ByteArrayInputStream(childData);
DataInputStream fsIn = new DataInputStream(is);
if (childNodeName.startsWith(DELEGATION_KEY_PREFIX)) {
DelegationKey key = new DelegationKey();
key.readFields(fsIn);
rmState.rmSecretManagerState.masterKeyState.add(key);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded delegation key: keyId=" + key.getKeyId() + ", expirationDate=" + key.getExpiryDate());
}
} else if (childNodeName.startsWith(DELEGATION_TOKEN_PREFIX)) {
RMDelegationTokenIdentifierData identifierData = new RMDelegationTokenIdentifierData();
identifierData.readFields(fsIn);
RMDelegationTokenIdentifier identifier = identifierData.getTokenIdentifier();
long renewDate = identifierData.getRenewDate();
rmState.rmSecretManagerState.delegationTokenState.put(identifier, renewDate);
if (LOG.isDebugEnabled()) {
LOG.debug("Loaded RMDelegationTokenIdentifier: " + identifier + " renewDate=" + renewDate);
}
} else {
LOG.warn("Unknown file for recovering RMDelegationTokenSecretManager");
}
fsIn.close();
}
}
See More Examples