计算某小学学生的期末考试成绩的平均分
文章目录- 一、准备数据
- 1.学生的三科成绩
- 二、编写程序
- 1.完整代码
- 2.启动Hadoop
- 3.上传3个txt文件
- 4.在eclipse中运行
- 5.查看最终结果
- 结束
语文成绩:chinese.txt如下:
Stout 91 Wyatt 91 Becker 88 Huber 77 Cok 79 Rocha 64 Cohen 87 Peterson 78 Brooks 96 Clayton 74 Adams 62 Hendricks 77 Dyer 94 Savage 77 Callahan 91 Dalton 90 Hickman 84 Dunn 85 Pierce 54 Dorsey 52 Daniel 76 Martin 98 Larsen 88 White 76 Emerson 86 Fox 50 Ashley 93 Wilder 85 Clay 67 Garrett 57 Hicks 55 Holcomb 74 Welch 89 Ferguson 93 Richard 57 Boyd 72 Johnson 61 Mccarty 61 Monroe 96 Contreras 92 Buckner 62 Wright 90 Walton 85 Houston 59 Powell 76 George 91 Dyer 53 Holt 69 Tran 57
数学成绩:math.txt如下:
Stout 51 Wyatt 95 Becker 68 Huber 70 Cok 78 Rocha 99 Cohen 91 Peterson 98 Brooks 81 Clayton 97 Adams 87 Hendricks 83 Dyer 97 Savage 89 Callahan 73 Dalton 95 Hickman 55 Dunn 78 Pierce 79 Dorsey 90 Daniel 60 Martin 65 Larsen 58 White 75 Emerson 77 Fox 55 Ashley 53 Wilder 87 Clay 82 Garrett 59 Hicks 99 Holcomb 56 Welch 64 Ferguson 54 Richard 74 Boyd 82 Johnson 72 Mccarty 99 Monroe 61 Contreras 64 Buckner 53 Wright 99 Walton 99 Houston 57 Powell 96 George 74 Dyer 100 Holt 54 Tran 93
英语成绩:english.txt如下:
Stout 63 Wyatt 85 Becker 89 Huber 56 Cok 86 Rocha 58 Cohen 63 Peterson 53 Brooks 64 Clayton 93 Adams 80 Hendricks 58 Dyer 75 Savage 98 Callahan 82 Dalton 81 Hickman 87 Dunn 98 Pierce 51 Dorsey 64 Daniel 78 Martin 57 Larsen 84 White 73 Emerson 80 Fox 86 Ashley 73 Wilder 54 Clay 100 Garrett 54 Hicks 60 Holcomb 67 Welch 58 Ferguson 82 Richard 70 Boyd 75 Johnson 93 Mccarty 92 Monroe 91 Contreras 97 Buckner 68 Wright 86 Walton 59 Houston 89 Powell 58 George 78 Dyer 61 Holt 86 Tran 84二、编写程序
我是用的eclipse,先前已经和虚拟机连接过了
1.完整代码package test;
import java.io.IOException;
import java.util.Iterator;
import java.util.StringTokenizer;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class AverageScore{
public static class Map extends
Mapper {
// 实现map函数
public void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// 将输入的纯文本文件的数据转化成String
String line = value.toString();
// 将输入的数据首先按行进行分割
StringTokenizer tokenizerArticle = new StringTokenizer(line, "n");
// 分别对每一行进行处理
while (tokenizerArticle.hasMoreElements()) {
// 每行按空格划分
StringTokenizer tokenizerLine = new StringTokenizer(tokenizerArticle.nextToken());
String strName = tokenizerLine.nextToken();// 学生姓名部分
String strScore = tokenizerLine.nextToken();// 成绩部分
Text name = new Text(strName);
int scoreInt = Integer.parseInt(strScore);
// 输出姓名和成绩
context.write(name, new IntWritable(scoreInt));
}
}
public static class Reduce extends
Reducer {
// 实现reduce函数
public void reduce(Text key, Iterable values,
Context context) throws IOException, InterruptedException {
int sum = 0;
int count = 0;
Iterator iterator = values.iterator();
while (iterator.hasNext()) {
sum += iterator.next().get();// 计算总分
count++;// 统计总的科目数
}
int average = (int) sum / count;// 计算平均成绩
context.write(key, new IntWritable(average));
}
public static void main(String[] args) throws Exception {
Configuration conf = new Configuration();
// 一个hdfs文件系统中的 输入目录 及 输出目录
String[] ioArgs = new String[] { "hdfs://master:8020/user/averagescore","hdfs://master:8020/user/root/avgscore" };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: Score Average ");
System.exit(2);
}
Job job = new Job(conf, "Score Average");
job.setJarByClass(AverageScore.class);
// 设置Map、Combine和Reduce处理类
job.setMapperClass(Map.class);
job.setCombinerClass(Reduce.class);
job.setReducerClass(Reduce.class);
// 设置输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
// 将输入的数据集分割成小数据块splites,提供一个RecordReder的实现
job.setInputFormatClass(TextInputFormat.class);
// 提供一个RecordWriter的实现,负责数据输出
job.setOutputFormatClass(TextOutputFormat.class);
// 设置输入和输出目录
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
}
}
2.启动Hadoop
我这里是用的启动Hadoop的脚本
我这里习惯是把文件从windows里拖入虚拟机的Downloads中
这主要是我个人的一个流程,有什么问题的话欢迎大家积极讨论



