hadoop入门教程-程序小实例,hadoop入门教程实例
无论是在微信还是QQ,我们经常看到好友推荐这样的功能,其实这个功能是在大数据的基础上实现的,下面来看具体的代码实现:
在src下添加三个类:JobRun.java:
package com.lftgb.mr;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class JobRun {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.set("mapred.job.tracker", "192.168.152.128:9001");
conf.set("fs.default.name", "hdfs://192.168.152.128:9000");
conf.set("mapred.jar", "C:\\Users\\志鹏\\Desktop\\hadoop程序\\qq.jar");
try {
Job job = new Job(conf);
/*
* job.setJarByClass(JobRun.class);
* job.setMapperClass(WcMapper.class);
* job.setReducerClass(WcReducer.class);
* job.setMapOutputKeyClass(Text.class);
* job.setMapOutputValueClass(IntWritable.class);
*
* // job.setNumReduceTasks(1);//设置reduce任务的个数 默认是一个
*
* // mapreduce 输入数据所在的目录或者文件 FileInputFormat.addInputPath(job, new
* Path("/usr/my2016")); // mr执行之后的数据数据目录
* FileOutputFormat.setOutputPath(job, new Path("/usr/output"));
*/
job.setJobName("qq");
job.setJarByClass(JobRun.class);
job.setMapperClass(Test2Mapper.class);
job.setReducerClass(Test2Reduce.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(IntWritable.class);
// mapreduce 输入数据所在的目录或者文件
FileInputFormat.addInputPath(job, new Path("/usr/input/qq/"));
// mr执行之后的数据数据目录
FileOutputFormat.setOutputPath(job, new Path("/usr/output/qq"));
try {
System.exit(job.waitForCompletion(true) ? 0 : 1);
} catch (ClassNotFoundException e) {
e.printStackTrace();
} catch (InterruptedException e) {
e.printStackTrace();
}
} catch (IOException e) {
e.printStackTrace();
}
}
}
QQMapper.java:
package com.lftgb.mr;
import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class Test2Mapper extends Mapper<LongWritable, Text, Text, Text> {
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
String[] ss = line.split("\t");
context.write(new Text(ss[0]),new Text(ss[1]));
context.write(new Text(ss[1]),new Text(ss[0]));
}
}
QQReduce.java:
package com.lftgb.mr;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class Test2Reduce extends Reducer<Text, Text, Text, Text> {
protected void reduce(Text key, Iterable<Text> i,
Context arg2) throws IOException,
InterruptedException {
Set<String> set= new HashSet<String>();
for(Text t:i){
set.add(t.toString());
}
if(set.size()>1){
for (Iterator j = set.iterator(); j.hasNext();) {
String name = (String) j.next();
for (Iterator k = set.iterator(); k.hasNext();) {
String other = (String) k.next();
if(name.equals(other)){
arg2.write(new Text(name),new Text(other));
}
}
}
}
}
}
在eclipse下,与hadoop结合可以更有效的实现大数据处理的功能,深入的研究请期待小编的下次博客!!
本站文章为和通数据库网友分享或者投稿,欢迎任何形式的转载,但请务必注明出处.
同时文章内容如有侵犯了您的权益,请联系QQ:970679559,我们会在尽快处理。