频道栏目
首页 > 网络 > 云计算 > 正文

流量汇总mapreduce

2017-09-12 10:33:53      个评论    来源:null  
收藏   我要投稿

FlowCountMapper.java

package os.os.flowcount;

import java.io.IOException;

import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


/*
 * KEYIN:日子文件一行的起始偏移量
 * VALUE:日子文件的一行内容
 * 
 * KEYOUT:map阶段输出的key
 * VALUEOUT:流量信息,FlowBean
 */
public class FlowCountMapper extends Mapper{

    @Override
    protected void map(LongWritable key, Text value,Context context)throws IOException, InterruptedException {

        //拿到一行日志内容 转换为string
        String line = value.toString();

        // 通过分隔符切换出各个字段
        String[] fields = StringUtils.split(line,"\t");

        //抽取需要的字段

        String phone = fields[1];
        long upFlow = Long.parseLong(fields[fields.length-3]);
        long downFlow = Long.parseLong(fields[fields.length -2 ]);

        //输出key,value对 
        FlowBean bean = new FlowBean(upFlow,downFlow);

        context.write(new Text(phone), bean);

    }
}

FlowCountReduce.java

package os.os.flowcount;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FlowCountReduce extends Reducer{

    /*
     * key是一个手机号
     * values是这个手机号对应的所有kv
     * @see org.apache.hadoop.mapreduce.Reducer#reduce(KEYIN, java.lang.Iterable, org.apache.hadoop.mapreduce.Reducer.Context)
     */
    protected void reduce(Text key, java.lang.Iterable values, Context context) throws java.io.IOException ,InterruptedException {

        long upflowSum = 0;//上行流量的和
        long downflowSum = 0; //下行流量和

        for(FlowBean value: values) {
            upflowSum += value.getUpflow();
            downflowSum += value.getDownflow();
        }

        FlowBean bean = new FlowBean(upflowSum,downflowSum);

        context.write(key, bean);
    }

}

FlowBean.java

package os.os.flowcount;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

import org.apache.hadoop.io.Writable;

/*
 * 自定义的数据类型,要做hadoop集群中传递,需要实现hadoop序列化框架,就是去实现一个接口 Writable
 */
public class FlowBean implements Writable{

    private long upflow;    //上行流量
    private long downflow;  //下行流量
    private long sumflow; //总流量


    //因为反射机制需要,必须定义一个无参构造函数
    public FlowBean() {

    }

    public long getUpflow() {
        return upflow;
    }
    public void setUpflow(long upflow) {
        this.upflow = upflow;
    }
    public long getDownflow() {
        return downflow;
    }
    public void setDownflow(long downflow) {
        this.downflow = downflow;
    }


    public long getSumflow() {
        return sumflow;
    }

    public void setSumflow(long sumflow) {
        this.sumflow = sumflow;
    }

    public FlowBean(long upflow, long downflow) {
        this.upflow = upflow;
        this.downflow = downflow;
        this.sumflow = upflow + downflow;
    }
    /*
     * 序列化方法,将我们要传输的数据序列化字节流
     * @see org.apache.hadoop.io.Writable#write(java.io.DataOutput)
     */
    @Override
    public void write(DataOutput out) throws IOException {

        out.writeLong(upflow);
        out.writeLong(downflow);

    }

    /*
     * 反序列化的方法,从自己流中恢复出各个字段
     * @see org.apache.hadoop.io.Writable#readFields(java.io.DataInput)
     */
    @Override
    public void readFields(DataInput in) throws IOException {

        upflow = in.readLong();  //从网络字节序,变成主机字节序
        downflow = in.readLong();
    }

    @Override
    public String toString() {
        return "FlowBean [upflow=" + upflow + ", downflow=" + downflow + ", sumflow=" + sumflow + "]";
    }


}

FlowCountJob.java

package os.os.flowcount;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;



public class FlowCountJob {

    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {

        Job job = Job.getInstance(new Configuration());


        job.setJarByClass(FlowCountJob.class);

        job.setMapperClass(FlowCountMapper.class);
        job.setReducerClass(FlowCountReduce.class);

        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(FlowBean.class);

        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(FlowBean.class);

        FileInputFormat.setInputPaths(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));

        job.waitForCompletion(true);
    }
}

日志格式
1363157985066 13726230503 00-FD-07-A4-72-B8:CMCC 120.196.100.82 i02.c.aliimg.com 24 27 2481 24681 200

上一篇:hadoop提供的分布式文件系统
下一篇:Elasticsearch获取ES查询的所有结果,并批量导出Excel2
相关文章
图文推荐

关于我们 | 联系我们 | 广告服务 | 投资合作 | 版权申明 | 在线帮助 | 网站地图 | 作品发布 | Vip技术培训 | 举报中心

版权所有: 红黑联盟--致力于做实用的IT技术学习网站