安装java
删除自带的java
rpm -qa|grep java
yum -y remove xxx
解压java到opt目录配置环境变量
vi /etc/profile
设置JAVA_HOME变量
export JAVA_HOME=/opt/jdk1.8.0_161
设置JRE_HOME变量
export JRE_HOME=/opt/jdk1.8.0_161/jre
设置PATH变量
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
更新环境变量
source /etc/profile
验证
java -version
安装hadoop
准备
解压
tar -zxvf hadoop-2.7.5.tar.gz
进入目录
cd hadoop-2.7.5
配置环境变量
vi etc/hadoop/hadoop-env.sh
在最后添加一行
export JAVA_HOME=/opt/jdk1.8.0_161
测试,会输出一些help之类的配置
bin/hadoop
伪分布式安装
hadoop 启动
vi etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
vi etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
无密码登录配置
创建秘钥
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
拷贝秘钥
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
改变权限
chmod 0600 ~/.ssh/authorized_keys
测试
ssh localhost
启动hdfs
格式化文件系统
bin/hdfs namenode -format
启动hdfs
sbin/start-dfs.sh
验证
浏览器查看 http://localhost:50070/
或者命令行查看
jps
yarn启动
配置mapred-site.xml
copy模板修改
cp etc/hadoop/mapred-site.xml.template ect/hadoop/mapred-site.xml
vi etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
配置 yarn-site.xml
vi etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
启动yarn
sbin/start-yarn.sh
验证
远程连接运行一个任务
保存文件至dfs
创建目录
bin/hdfs dfs -mkdir /sort
拷贝当前目录下的sort目录
image.png
sort目录下有两个文件
file1
5
6
4
7
8
9
22
524
8783
876
546512
546541
57
755
4781
file2
89
5
412
589
84
4841
11
5532
11
881
12
111
2222
45
21
123
5
8238
55953
hdfs dfs -put sort /sort/sort_input
准备程序
public static class Map extends
Mapper<Object, Text, IntWritable, IntWritable> {
private static IntWritable data = new IntWritable();
@Override
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String line = value.toString();
data.set(Integer.parseInt(line));
context.write(data, new IntWritable(1));
}
}
public static class Reduce extends
Reducer<IntWritable, IntWritable, IntWritable, IntWritable> {
private static IntWritable linenum = new IntWritable(1);
@Override
public void reduce(IntWritable key, Iterable<IntWritable> values,
Context context) throws IOException, InterruptedException {
for (IntWritable val : values) {
context.write(linenum, key);
linenum = new IntWritable(linenum.get() + 1);
}
}
}
public static class Partition extends Partitioner<IntWritable, IntWritable> {
/**
* 为每一个数值进行分区
* @param key 键
* @param value 值
* @param numPartitions 分区个数
* @return 分区id
*/
@Override
public int getPartition(IntWritable key, IntWritable value,
int numPartitions) {
int maxNumber = 65223;
int bound = maxNumber / numPartitions + 1;
int keyNumber = key.get();
for (int i = 0; i < numPartitions; i++) {
if (keyNumber < bound * i && keyNumber >= bound * (i - 1)) {
return i - 1;
}
}
return 0;
}
}
public static void main(String[] args) throws Exception {
String inputPath = "hdfs://127.0.0.1:9000/sort/sort_input";
String outputPath = "hdfs://127.0.0.1:9000/sort/sort_output";
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "Sort");
job.setJarByClass(Sort.class);
job.setMapperClass(Map.class);
job.setPartitionerClass(Partition.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(IntWritable.class);
job.setOutputValueClass(IntWritable.class);
FileInputFormat.addInputPath(job, new Path(inputPath));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
坑
集群启动不成功
Does not contain a valid host:port authority: _ 9000
我的host配置
127.0.0.1 hadoop_node1
vim /etc/hosts
把下划线去掉
127.0.0.1 hadoopnode1
Hadoop本地开发,9000端口拒绝访问
http://blog.csdn.net/yjc_1111/article/details/53817750
vi复制命令
任务:将第9行至第15行的数据,复制到第16行
:9,15 copy 15
或
:9,15 co 15
由此可有:
:9,15 move 16 或 :9,15 m 16 将第9行到第15行的文本内容到第16行的后面
网友评论