> Linux集群 > Hadoop >

Hadoop 1.0.3 在CentOS 6.2上安装过程

//安装SSH 

[root@localhost /]# sudo yum install ssh

 

//生成密钥 

 
 
[root@localhost /]# ssh-keygen 
(可以一路回车)
生成下面两个文件:
/root/.ssh/id_rsa
/root/.ssh/id_rsa.pub

 

[root@localhost .ssh]# cd /root/.ssh/ 

 

//实际情况是把公钥复制到另外一台机器上,并且写入到另外一台机器上的authorized_keys文件中 

 
[root@localhost .ssh]# cat ./id_rsa.pub>>./authorized_keys

 

 
 
[root@localhost .ssh]# cd /home

(另外一个更简单的复制方法是使用 ssh-copy-id -i ~/.ssh/id_rsa.pub  root@192.168.1.201

 

//配置JDK环境变量 

 
[root@localhost opt]# vi /etc/profile

 

 
export JAVA_HOME=/opt/jdk1.6.0_31
export PATH=$JAVA_HOME/bin:$PATH:.

//使配置生效

 
[root@localhost opt]# source /etc/profile

 

//安装Hadoop 1.0.3 

 
 
[root@localhost opt]# rpm -i hadoop-1.0.3-1.x86_64.rpm

 

//查看安装后的Hadoop版本号信息

 
[root@localhost opt]# hadoop version

 

(如果报错,请检查 hadoop-env.sh 中的java路径配置是否正确)

 
 
修改hadoop配置文件(/etc/hadoop)

[root@localhost hadoop]# cd /etc/hadoop  

 
[root@localhost hadoop]# vi hadoop-env.sh

 

 
export JAVA_HOME=/opt/jdk1.6.0_31

 

 

 
[root@localhost hadoop]# vi core-site.xml

 

 
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://192.168.1.101:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/hadoop</value>
</property>
</configuration>

 

 
 
[root@localhost hadoop]# vi hdfs-site.xml

 

 
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>

 

 

 
[root@localhost hadoop]# vi mapred-site.xml

 

 
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>192.168.1.101:9001</value>
</property>
</configuration>

 

//格式化文件系统 

 
 
[root@localhost opt]# hadoop namenode -format

 

 

//启动Hadoop相关的所有服务 (/usr/sbin)
[root@localhost sbin]# start-all.sh
或 
[root@localhost opt]# /usr/sbin/start-all.sh
 

 

(如果没有执行权限,需要将/usr/sbin目录下的相关sh文件设置执行权限)
 
说明:
 
start-all.sh
stop-all.sh
start-dfs.sh
stop-dfs.sh
start-mapred.sh
stop-mapred.sh
slaves.sh
 

 

 //jps查看已经启动的服务进程信息

 
 
[root@localhost hadoop]# jps

 

 
5131 NameNode
5242 DataNode
5361 SecondaryNameNode
5583 TaskTracker
5463 JobTracker
6714 Jps
 

 

防火墙需要开放的端口:
9000

9001  

50010

 

 

 
(访问 http://192.168.1.101:50070  http://192.168.1.101:50030)
[root@localhost hadoop]# hadoop dfsadmin -report

 

 

 
为运行例子 wordcount 作准备
[root@localhost opt]# hadoop fs -mkdir input

 

 
[root@localhost opt]# echo "Hello World Bye World" > file01
[root@localhost opt]# echo "Hello Hadoop Goodbye Hadoop" > file02

 

 
[root@localhost opt]# hadoop fs -copyFromLocal ./file0* input

 

 

 
运行例子 wordcount
[root@localhost opt]# hadoop jar /usr/share/hadoop/hadoop-examples-1.0.3.jar wordcount input output

 

 
12/08/11 12:00:30 INFO input.FileInputFormat: Total input paths to process : 2
12/08/11 12:00:30 INFO util.NativeCodeLoader: Loaded the native-hadoop library
12/08/11 12:00:30 WARN snappy.LoadSnappy: Snappy native library not loaded
12/08/11 12:00:31 INFO mapred.JobClient: Running job: job_201208111137_0001
12/08/11 12:00:32 INFO mapred.JobClient:  map 0% reduce 0%
12/08/11 12:01:05 INFO mapred.JobClient:  map 100% reduce 0%
12/08/11 12:01:20 INFO mapred.JobClient:  map 100% reduce 100%
12/08/11 12:01:25 INFO mapred.JobClient: Job complete: job_201208111137_0001
12/08/11 12:01:25 INFO mapred.JobClient: Counters: 29
12/08/11 12:01:25 INFO mapred.JobClient:   Job Counters 
12/08/11 12:01:25 INFO mapred.JobClient:     Launched reduce tasks=1
12/08/11 12:01:25 INFO mapred.JobClient:     SLOTS_MILLIS_MAPS=49499
12/08/11 12:01:25 INFO mapred.JobClient:     Total time spent by all reduces waiting after reserving slots (ms)=0
12/08/11 12:01:25 INFO mapred.JobClient:     Total time spent by all maps waiting after reserving slots (ms)=0
12/08/11 12:01:25 INFO mapred.JobClient:     Launched map tasks=2
12/08/11 12:01:25 INFO mapred.JobClient:     Data-local map tasks=2
12/08/11 12:01:25 INFO mapred.JobClient:     SLOTS_MILLIS_REDUCES=12839
12/08/11 12:01:25 INFO mapred.JobClient:   File Output Format Counters 
12/08/11 12:01:25 INFO mapred.JobClient:     Bytes Written=41
12/08/11 12:01:25 INFO mapred.JobClient:   FileSystemCounters
12/08/11 12:01:25 INFO mapred.JobClient:     FILE_BYTES_READ=79
12/08/11 12:01:25 INFO mapred.JobClient:     HDFS_BYTES_READ=276
12/08/11 12:01:25 INFO mapred.JobClient:     FILE_BYTES_WRITTEN=64705
12/08/11 12:01:25 INFO mapred.JobClient:     HDFS_BYTES_WRITTEN=41
12/08/11 12:01:25 INFO mapred.JobClient:   File Input Format Counters 
12/08/11 12:01:25 INFO mapred.JobClient:     Bytes Read=50
12/08/11 12:01:25 INFO mapred.JobClient:   Map-Reduce Framework
12/08/11 12:01:25 INFO mapred.JobClient:     Map output materialized bytes=85
12/08/11 12:01:25 INFO mapred.JobClient:     Map input records=2
12/08/11 12:01:25 INFO mapred.JobClient:     Reduce shuffle bytes=85
12/08/11 12:01:25 INFO mapred.JobClient:     Spilled Records=12
12/08/11 12:01:25 INFO mapred.JobClient:     Map output bytes=82
12/08/11 12:01:25 INFO mapred.JobClient:     CPU time spent (ms)=4770
12/08/11 12:01:25 INFO mapred.JobClient:     Total committed heap usage (bytes)=246751232
12/08/11 12:01:25 INFO mapred.JobClient:     Combine input records=8
12/08/11 12:01:25 INFO mapred.JobClient:     SPLIT_RAW_BYTES=226
12/08/11 12:01:25 INFO mapred.JobClient:     Reduce input records=6
12/08/11 12:01:25 INFO mapred.JobClient:     Reduce input groups=5
12/08/11 12:01:25 INFO mapred.JobClient:     Combine output records=6
12/08/11 12:01:25 INFO mapred.JobClient:     Physical memory (bytes) snapshot=391634944
12/08/11 12:01:25 INFO mapred.JobClient:     Reduce output records=5
12/08/11 12:01:25 INFO mapred.JobClient:     Virtual memory (bytes) snapshot=3159781376
12/08/11 12:01:25 INFO mapred.JobClient:     Map output records=8

 

 

 
//查看统计结果
[root@localhost opt]# hadoop fs -cat output/part-r-00000

 

 
Bye1
Goodbye1
Hadoop2
Hello2
World2

 

 

//--------------------------------------- 

作业日志存放目录:

/var/log/hadoop/root/userlogs/

 

 

//---------------------------------------
安装 hadoop-1.0.3-1 后,存放的目录有:
 
/etc/hadoop
/var/run/hadoop
/var/log/hadoop
/usr/share/hadoop
/usr/share/doc/hadoop
/usr/etc/hadoop
/usr/bin/hadoop(文件)
/usr/include/hadoop
 
 
 
(责任编辑:IT)