1、文件元数据 FileStatus
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import java.io.IOException;
import java.net.URI;
public class ShowFileStatus {
public static void main(String[] args) {
try {
URI uri = URI.create("hdfs://192.168.1.100:9000/home/ossuser/1.txt");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(uri, conf);
Path path = new Path(uri);
FsStatus status = fs.getStatus(path);
System.out.println("fsStatus.getCapacity() = " + status.getCapacity());
System.out.println("fsStatus.getUsed() = " + status.getUsed());
System.out.println("fsStatus.getRemaining() = " + status.getRemaining());
System.out.println("----------------------------------");
FileStatus fileStatus = fs.getFileStatus(path);
System.out.println("fileStatus.getOwner() = " + fileStatus.getOwner());
System.out.println("fileStatus.getGroup() = " + fileStatus.getGroup());
System.out.println("fileStatus.getAccessTime() = " + fileStatus.getAccessTime());
System.out.println("fileStatus.getBlockSize() = " + fileStatus.getBlockSize());
System.out.println("fileStatus.getLen() = " + fileStatus.getLen());
System.out.println("fileStatus.getModificationTime() = " + fileStatus.getModificationTime());
System.out.println("fileStatus.getReplication() = " + fileStatus.getReplication());
FsPermission fsPermission = fileStatus.getPermission();
System.out.println("fileStatus.getPermission() = " + fsPermission);
} catch (IOException e) {
e.printStackTrace();
}
}
}
输入如下:
fsStatus.getCapacity() = 38046990336
fsStatus.getUsed() = 187760640
fsStatus.getRemaining() = 14045487104
----------------------------------
fileStatus.getOwner() = ossuser
fileStatus.getGroup() = supergroup
fileStatus.getAccessTime() = 1555746057611
fileStatus.getBlockSize() = 134217728
fileStatus.getLen() = 1935838
fileStatus.getModificationTime() = 1555746058582
fileStatus.getReplication() = 3
fileStatus.getPermission() = rw-r--r--
FileStatus
封装了目录与文件的元数据信息,包括所有者、块大小、文件长度、修改时间、副本数、权限等信息
2、列出文件
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathFilter;
import java.io.IOException;
import java.net.URI;
public class ShowFileStatus {
public static void main(String[] args) {
try {
URI uri = URI.create("hdfs://192.168.1.100:9000/home/ossuser/");
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(uri, conf);
Path path = new Path(uri);
FileStatus[] fileStatuses = fs.listStatus(path, new PathFilter() {
@Override
public boolean accept(Path path) {
try {
return fs.getFileStatus(path).isDirectory();
} catch (IOException e) {
return false;
}
}
});
for (FileStatus fileStatus : fileStatuses) {
System.out.println(fileStatus.getPath());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
输出结果
hdfs://192.168.1.100:9000/home/ossuser/aa
输出多个路径的文件列表信息:
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
public class FileStat2Paths {
public static void main(String[] args) {
try {
Path[] paths = new Path[]{
new Path("hdfs://192.168.1.100:9000/"),
new Path("hdfs://192.168.1.100:9000/home/ossuser"),
};
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(URI.create(paths[0].toString()), configuration);
FileStatus[] status = fs.listStatus(paths);
Path[] listedPaths = FileUtil.stat2Paths(status);
for(Path listedPath : listedPaths) {
System.out.println(listedPath);
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
hdfs://192.168.1.100:9000/2019
hdfs://192.168.1.100:9000/home
hdfs://192.168.1.100:9000/home/ossuser/1.txt
hdfs://192.168.1.100:9000/home/ossuser/aa
hdfs://192.168.1.100:9000/home/ossuser/log1.txt
3、文件模式
在一个表达式中使用通配符(globbing)匹配多个文件,FileSystem为匹配通配符提供了两个方法:
public FileStatus[] globStatus(Path pathPattern) throws IOException ;
public FileStatus[] globStatus(Path pathPattern, PathFilter filter) throws IOException
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import java.io.IOException;
import java.net.URI;
public class GlobStatus {
public static void main(String[] args) {
URI uri = URI.create("hdfs://192.168.1.100:9000/");
try {
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(uri, configuration);
FileStatus[] status = fs.globStatus(new Path("/*/{01,02, ossuser}"));
for (FileStatus fileStatus : status) {
System.out.println(fileStatus.getPath());
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
输出如下:
hdfs://192.168.1.100:9000/2019/01
hdfs://192.168.1.100:9000/2019/02
通配符只能作用于文件路径与名称,不能作用于文件属性,可结合PathFilter
接口实现类以达成更加灵活的文件或目录过滤。