Raspberry Pi Hadoop Cluster is built up by a number of components and Open Source frameworks which makes it quite flexible and modular – before diving deeper into Hadoop it is easier to view it as two main parts – the data storage (HDFS) and the data processing (MapReduce)
# apt-get update
# apt-get upgrade -y
# rpi-update
# vi /etc/dphys-swapfile
CONF_SWAPSIZE=512
# dphys-swapfile setup
# dphys-swapfile swapon
# java -Version
# addgroup hadoop
# adduser –ingroup hadoop hduser
# adduser hduser sudo
# su – hduser
# mkdir .ssh
# ssh-keygen -t rsa -C hduser@JSHOHadoop01
# cat .ssh/id_rsa.pub | ssh hduser@JSHOHadoop01 ‘cat >> .ssh/authorized_keys’
# su – hduser
# ssh JSHOHadoop01
# su – root
# wget http://apache.mirrors.spacedump.net/hadoop/core/hadoop-1.2.1/hadoop-1.2.1.tar.gz
# tar -xvzf hadoop-1.2.1.tar.gz -C /opt/
# cd /opt
# mv hadoop-1.2.1 hadoop
# chown -R hduser:hadoop hadoop
# vi /etc/bash.bashrc
export JAVA_HOME=$(readlink -f /usr/bin/java | sed “s:bin/java::”)
export HADOOP_INSTALL=/opt/hadoop
export PATH=$PATH:$HADOOP_INSTALL/bin
# su – hduser
# hadoop Version
# vi /opt/hadoop/conf/hadoop-env.sh
export JAVA_HOME=$(readlink -f /usr/bin/java | sed “s:bin/java::”)
export HADOOP_HEAPSIZE=250
export HADOOP_DATANODE_OPTS=”-Dcom.sun.management.jmxremote $HADOOP_DATANODE_OPTS -client”
Single Node
# vi /opt/hadoop/conf/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/hdfs/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:54310</value>
</property>
</configuration>
# vi /opt/hadoop/conf/mapred-site.xml
<configuration>
<property>
<name>mapred.job.tracker</name>
<value>localhost:54311</value>
</property>
</configuration>
# vi /opt/hadoop/conf/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
# su – root
# mkdir -p /hdfs/tmp
# chown hduser:hadoop /hdfs/tmp
# chmod 750 /hdfs/tmp
# su – hduser
# hadoop namenode -Format
# su – hduser
# /opt/hadoop/bin/start-dfs.sh
# /opt/hadoop/bin/start-mapred.sh
# jps
# hadoop jar /opt/hadoop/hadoop-examples-1.2.1.jar wordcount /license.txt /license-out.txt
Multiple Node