hadoop 压缩解压,
先放代码
package com.huawei.hdfs.compress;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.compress.*;
import org.apache.hadoop.util.ReflectionUtils;
import org.junit.Test;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
public class TestCompress {
@Test
public void deflateCompress() throws Exception {
Class codeclass=DeflateCodec.class;
//实例化对象
CompressionCodec codec= (CompressionCodec) ReflectionUtils.newInstance(codeclass,new Configuration());
//创建文件输出流
FileOutputStream fos=new FileOutputStream("/Users/simmucheng/tmp/words.gz");
//得到压缩流
CompressionOutputStream zipout=codec.createOutputStream(fos);
IOUtils.copyBytes(new FileInputStream("/Users/simmucheng/tmp/words"),zipout,1024);
zipout.finish();
}
public static void main(String[] args) throws Exception {
Class[] zipClasses={
DeflateCodec.class,
GzipCodec.class,
BZip2Codec.class
};
for(Class c:zipClasses){
ManyCompress(c,args[0],args[1]);
}
for(Class c:zipClasses){
ManyDecompress(c,args[0],args[1]);
}
}
public static void ManyCompress(Class compressmethos,String strs0,String strs1) throws Exception {
CompressionCodec codec=(CompressionCodec) ReflectionUtils.newInstance(compressmethos,new Configuration());
FileOutputStream fos=new FileOutputStream(strs1+codec.getDefaultExtension());
CompressionOutputStream zipout=codec.createOutputStream(fos);
IOUtils.copyBytes(new FileInputStream(strs0),zipout,1024);
zipout.close();
fos.close();
}
public static void ManyDecompress(Class compressmethos, String arg, String s) throws Exception {
//实例化对象
CompressionCodec codec=(CompressionCodec) ReflectionUtils.newInstance(compressmethos,new Configuration());
FileInputStream fis=new FileInputStream(arg);
CompressionInputStream zipIn=codec.createInputStream(fis);
IOUtils.copyBytes(zipIn,new FileOutputStream(s+".txt"),1024);
zipIn.close();
fis.close();
}
}
pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.huawei</groupId>
<artifactId>hdfs</artifactId>
<version>1.0-SNAPSHOT</version>
<packaging>jar</packaging>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-antrun-plugin</artifactId>
<version>1.7</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>run</goal>
</goals>
<configuration>
<tasks>
<echo>-----复制jar包到指定目录--------</echo>
<copy
file="/Users/simmucheng/IDEA/idea-hadoop-4/hdfs/target/hdfs-1.0-SNAPSHOT.jar"
toDir="/Users/simmucheng/tmp">
</copy>
</tasks>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-hdfs</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>4.12</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-common</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-client</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-yarn-server-resourcemanager</artifactId>
<version>2.7.4</version>
</dependency>
<dependency>
<groupId>org.anarres.lzo</groupId>
<artifactId>lzo-hadoop</artifactId>
<version>1.0.0</version>
</dependency>
</dependencies>
</project>
该项目可以运行在hadoop集群中,但是由于LZO算法在hadoop中连相关的codec都没有,所以需要将相关的jar包放入hadoop集群中,因为我们是采用maven项目,所以jar包是存在在Interllj IDE中的,需要在mac的命令行窗口执行
mvn -DoutputDirectory=/Users/simmucheng/hadoop_tmp_jar -DgroupId=com.huawei -DartifactId=hdfs -Dversion=1.0-SNAPSHOT dependency:copy-dependencies
将maven相关的下载jar包转移到指定目录,这样就可以将jar包收集并转移到hadoop中了。
本站文章为和通数据库网友分享或者投稿,欢迎任何形式的转载,但请务必注明出处.
同时文章内容如有侵犯了您的权益,请联系QQ:970679559,我们会在尽快处理。