今天就跟大家聊聊有关如何进行Hadoop Java API简单使用,可能很多人都不太了解,为了让大家更加了解,小编给大家总结了以下内容,希望大家根据这篇文章可以有所收获。
注意:jar版本,务必和远程的hadoop版本一致。
maven配置文件:
<?xml version="1.0" encoding="UTF-8"?> <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> <modelVersion>4.0.0</modelVersion> <groupId>Hadoop</groupId> <artifactId>demo</artifactId> <version>1.0-SNAPSHOT</version> <properties> <hadoop.version>2.7.1</hadoop.version> </properties> <dependencies> <!--hadoop--> <!-- http://mvnrepository.com/artifact/commons-io/commons-io --> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.4</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-mapreduce-client-core</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-hdfs</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-common</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-api</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-yarn-client</artifactId> <version>${hadoop.version}</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.12</version> </dependency> </dependencies> </project>
测试用例:
package com.demo; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IOUtils; import org.junit.Before; import org.junit.Test; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.net.URI; import java.net.URISyntaxException; public class HadoopTest { FileSystem fileSystem = null; @Before public void init() throws URISyntaxException, IOException, InterruptedException { String p = "hdfs://yarn001:9000"; fileSystem = FileSystem.get(new URI(p), new Configuration(), "root"); } /** * 测试文件下载 * @throws URISyntaxException * @throws IOException */ @Test public void downloadTest() throws URISyntaxException, IOException { Path path = new Path("/hadoop-2.7.1.tar.gz"); InputStream open = fileSystem.open(path); FileOutputStream fileOutputStream = new FileOutputStream("d://hadoop"); IOUtils.copyBytes(open, fileOutputStream, 4096, true); } /** * 测试文件上传1 * @throws IOException */ @Test public void uploadFileTest1() throws IOException { InputStream fileInputStream = new FileInputStream("d://SpringBoot.mobi"); Path path = new Path("/SpringBoot"); FSDataOutputStream fsDataOutputStream = fileSystem.create(path); IOUtils.copyBytes(fileInputStream,fsDataOutputStream,4096); } /** * 测试文件上传2 */ @Test public void uploadFileTest2() throws IOException { Path localPath = new Path("d://test.xls"); Path remoterPath = new Path("/testXLS"); fileSystem.copyFromLocalFile(localPath,remoterPath); } /** * 测试删除文件 */ @Test public void delFileTest() throws IOException { Path path = new Path("/testXLS"); /** * 删除空目录 */ boolean delete = fileSystem.delete(path, false); /** * 删除非空目录 * 递归删除 */ /* boolean delete1 = fileSystem.delete(path, true); */ System.out.println(delete?"删除成功":"删除失败"); } /** * 创建目录测试 * @throws IOException */ @Test public void createFolder() throws IOException { Path path = new Path("/testPath2"); boolean mkdirs = fileSystem.mkdirs(path); System.out.println(mkdirs?"success":"fail"); } }
常见异常:
Exception in thread "main" java.lang.NoSuchMethodError: org.apache.hadoop.tracing.SpanReceiverHost.get(Lorg/apache/hadoop/conf/Configuration;Ljava/lang/String;)Lorg/apache/hadoop/tracing/SpanReceiverHost; at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:634) at org.apache.hadoop.hdfs.DFSClient.<init>(DFSClient.java:619)
异常处理:
配置maven的pom.xml文件,把本地lib版本和远程Hadoop 的HDFS版本一致即可。
看完上述内容,你们对如何进行Hadoop Java API简单使用有进一步的了解吗?如果还想了解更多知识或者相关内容,请关注亿速云行业资讯频道,感谢大家的支持。
免责声明:本站发布的内容(图片、视频和文字)以原创、转载和分享为主,文章观点不代表本网站立场,如果涉及侵权请联系站长邮箱:is@yisu.com进行举报,并提供相关证据,一经查实,将立刻删除涉嫌侵权内容。