Java代码统计网站中不同省份用户的访问数

时间:2021-05-20

一、需求

针对log日志中给定的信息,统计网站中不同省份用户的访问数

二、编程代码

package org.apache.hadoop.studyhdfs.mapreduce;import java.io.IOException;import org.apache.commons.lang.StringUtils;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.conf.Configured;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Mapper.Context;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;import org.apache.hadoop.util.Tool;import org.apache.hadoop.util.ToolRunner;import org.jboss.netty.util.internal.StringUtil;public class ProvinceCountMapReduce extends Configured implements Tool {//1.map/** <KEYIN,VALUEIN,KEYOUT,VALUEOUT>*/public static class WordCountMapper extends Mapper<LongWritable,Text,IntWritable,IntWritable>{private IntWritable mapOutputKey =new IntWritable();private IntWritable mapOutputValue =new IntWritable(1);@Overridepublic void map(LongWritable key, Text value, Context context)throws IOException, InterruptedException {//get lineValueString lineValue =value.toString();//splitString[] strs =lineValue.split("\t");//line blankString url=strs[1];String provinceIdValue =strs[23];//guolvif(strs.length < 30 || StringUtils.isBlank(provinceIdValue) || StringUtils.isBlank(url)){return; }int provinceId =Integer.MAX_VALUE;try {provinceId=Integer.valueOf(provinceIdValue);} catch (Exception e) {return;}if(provinceId == Integer.MAX_VALUE){return;}mapOutputKey.set(provinceId);context.write(mapOutputKey, mapOutputValue);}}//2.reducepublic static class WordCountReduce extends Reducer<IntWritable,IntWritable,IntWritable,IntWritable>{private IntWritable outputValue =new IntWritable();@Overridepublic void reduce(IntWritable key, Iterable<IntWritable> values,Context context)throws IOException, InterruptedException {//to doint sum = 0;for(IntWritable value:values){sum +=value.get();}outputValue.set(sum);context.write(key, outputValue);}}public int run(String[] args) throws Exception{//1.get ConfigurationConfiguration conf =super.getConf();//2.create jobJob job =Job.getInstance(conf, this.getClass().getSimpleName());job.setJarByClass(ProvinceCountMapReduce.class);//3.set job//3.1 set inputPath inputPath =new Path(args[0]);FileInputFormat.addInputPath(job, inputPath);//3.2 set mapperjob.setMapperClass(WordCountMapper.class);job.setMapOutputKeyClass(IntWritable.class);job.setMapOutputValueClass(IntWritable.class);//3.3 set reducejob.setReducerClass(WordCountReduce.class);job.setOutputKeyClass(IntWritable.class);job.setOutputValueClass(IntWritable.class);//3.4 set inputPath outputPath =new Path(args[1]);FileOutputFormat.setOutputPath(job, outputPath);//4.submmitboolean isSuccess =job.waitForCompletion(true);return isSuccess?0:1;}public static void main(String[] args) throws Exception {args =new String[]{"hdfs://Hadoop-senior02.beifeng.com:8020/input/2015082818","hdfs://Hadoop-senior02.beifeng.com:8020/output15/"};Configuration conf =new Configuration();conf.set("mapreduce.map.output.compress", "true");int status=ToolRunner.run(conf, new ProvinceCountMapReduce() , args);System.exit(status);}}

3、运行结果

1)运行代码:bin/hdfs dfs -text /output15/par*

2)运行结果:

1 3527
2 1672
3 511
4 325
5 776
6 661
7 95
8 80
9 183
10 93
11 135
12 289
13 264
14 374
15 163
16 419
17 306
18 272
19 226
20 2861
21 124
22 38
23 96
24 100
25 20
26 157
27 49
28 21
29 85
30 42
32 173

以上所述是小编给大家介绍的Java代码统计网站中不同省份用户的访问数的相关介绍,希望对大家有所帮助,在此小编也非常感谢大家对网站的支持!

声明:本页内容来源网络,仅供用户参考;我单位不保证亦不表示资料全面及准确无误,也不保证亦不表示这些资料为最新信息,如因任何原因,本网内容或者用户因倚赖本网内容造成任何损失或损害,我单位将不会负任何法律责任。如涉及版权问题,请提交至online#300.cn邮箱联系删除。

相关文章