S-JIS[2012-06-08] �ύX����

Hadoop CDH4

CDH4�iCloudera's Distribution including Apache Hadoop v4�j�̃C���X�g�[���ɂ‚��āB


CDH4�̊T�v

CDH�iCloudera's Distribution including Apache Hadoop�j�́AHadoop�̃f�B�X�g���r���[�V�����B
CDH4��Hadoop2.0�i��Hadoop0.23�j���x�[�X�ƂȂ��Ă���B

CDH4��Hadoop��MapReduce v1�iMRv1�j��YARN�iMRv2�j�̓��ނ���B
MRv1�͏]���Ɠ�����JobTracker��TaskTracker���g�p���ē��삷��B
YARN�͐V���������ŁAResourceManager�ENodeManager�i��ApplicationMaster/Container�j���g�p���ē��삷��B
�iHadoop0.23�ł̓C���X�g�[���͈��ނŁA�p�����[�^�[mapreduce.framework.name�ɂ���Ăǂ���œ��������ω������Ă������ACDH4�ł̓C���X�g�[�����̂𕪂����͗l�j

MRv1�ł�YARN�ł�HDFS�iNameNode��DataNode�j���g���͓̂����B
�܂��A�N���C�A���g�p���C�u�����[�����ʁB

�������ǂ���̊‹��œ������������ɂ���āAMRv1��YARN�̂ǂ��炩���C���X�g�[������B


CentOS�ւ̃C���X�g�[��

CentOS�ւ́Ayum�R�}���h���g�p���ăC���X�g�[�����邱�Ƃ��o����B

  1. JDK1.6���C���X�g�[���������B
  2. yum���|�W�g���[��Cloudera�̃T�C�g��lj�����B
    �ǂ�OS�̏ꍇ�ɂǂ�repo�t�@�C�����_�E�����[�h����΂悢���́ACDH Version and Packaging Information�ɍڂ��Ă���B
    CentOS5 http://archive.cloudera.com/cdh4/redhat/5/x86_64/cdh/cloudera-cdh4.repo
    CentOS6�i32bit�j http://archive.cloudera.com/cdh4/redhat/6/i386/cdh/cloudera-cdh4.repo
    CentOS6�i64bit�j http://archive.cloudera.com/cdh4/redhat/6/x86_64/cdh/cloudera-cdh4.repo
    # cd /etc/yum.repos.d/
    # wget http://archive.cloudera.com/cdh4/redhat/5/x86_64/cdh/cloudera-cdh4.repo
    # yum update yum
    # yum search hadoop

MRv1�̃C���X�g�[��

MRv1��CDH4���g���ɂ́A�ȉ��̂��̂��C���X�g�[������B
�i�J���‹��i�P�Ɗ‹��i�X�^���h�A���[���‹��j�j�����ꍇ�́A�S�������}�V���ɃC���X�g�[������΂悢�B������SecondaryNameNode�͕s�v�j

�R���|�[�l���g �R�}���h ���l
JobTracker
yum install hadoop-0.20-mapreduce-jobtracker
JobTrakcer���ғ�������}�V���ɃC���X�g�[������B
NameNode
yum install hadoop-hdfs-namenode
NameNode���ғ�������}�V���ɃC���X�g�[������B
Secondary NameNode
yum install hadoop-hdfs-secondarynamenode
Secondary NameNode���ғ�������}�V���ɃC���X�g�[������B
TaskTracker
DataNode
yum install hadoop-0.20-mapreduce-tasktracker hadoop-hdfs-datanode
�e�X���[�u�m�[�h�ɃC���X�g�[������B
Client
yum install hadoop-client
Hadoop�̃N���C�A���g�}�V���ɃC���X�g�[������B

YARN�̃C���X�g�[��

YARN��CDH4���g���ɂ́A�ȉ��̂��̂��C���X�g�[������B
�i�J���‹��i�P�Ɗ‹��i�X�^���h�A���[���‹��j�j�����ꍇ�́A�S�������}�V���ɃC���X�g�[������΂悢�B������SecondaryNameNode�͕s�v�j

�R���|�[�l���g �R�}���h ���l
Resource Manager
yum install hadoop-yarn-resourcemanager
Resource Manager�i�]����JobTracker�����j���ғ�������}�V���ɃC���X�g�[������B
NameNode
yum install hadoop-hdfs-namenode
NameNode���ғ�������}�V���ɃC���X�g�[������B
Secondary NameNode
yum install hadoop-hdfs-secondarynamenode
Secondary NameNode���ғ�������}�V���ɃC���X�g�[������B
NodeManager
DataNode
yum install hadoop-yarn-nodemanager hadoop-hdfs-datanode hadoop-mapreduce
�e�X���[�u�m�[�h�ɃC���X�g�[������B
�i�]����TaskTracker�����j
HistoryServer
YarnProxyServer
yum install hadoop-mapreduce-historyserver hadoop-yarn-proxyserver
�ǂ����̃T�[�o�[�i1��̂݁j�ɃC���X�g�[������B
Client
yum install hadoop-client
Hadoop�̃N���C�A���g�}�V���ɃC���X�g�[������B

�‹��ݒ�

�]����Hadoop�ł͊‹��ϐ�HADOOP_HOME��Hadoop���C���X�g�[�����Ă���ꏊ�������Ă����B
CDH4�ł́AHADOOP_HOME�͎g���Ȃ��Ȃ�A�����ƐF�X�Ȋ‹��ϐ��ɕ������Ă���B

�‹��ϐ� �f�t�H���g�l�i�w���Ă���ꏊ�j ���l
HADOOP_CONF_DIR /etc/hadoop/conf �ݒ�t�@�C��
HADOOP_COMMON_HOME /usr/lib/hadoop Hadoop����
HADOOP_HDFS_HOME /usr/lib/hadoop-hdfs HDFS�֘A
HADOOP_MAPRED_HOME /usr/lib/hadoop-mapreduce MapReduce�֘A
YARN_HOME /usr/lib/hadoop-yarn �@
JSVC_HOME /usr/libexec/bigtop-utils �@

�����̊‹��ϐ���/etc/default/hadoop�Ƃ����t�@�C���i�V�F���X�N���v�g�j�Őݒ�ł���悤�ɂȂ��Ă���̂ŁA.bash_profile�ӂ�Ŏ��s����悤�ɂ��Ă����΂悢�B

$HOME/.bash_profile��/etc/bashrc���ւ̒lj��F

. /etc/default/hadoop

$HADOOP_CONF_DIR�̎w���ꏊ�i/etc/hadoop/conf�j��CDH3�̎��Ɠ��l���V���{���b�N�����N�ƂȂ��Ă���Aalternatives�R�}���h�Ő؂�ւ�����B

# alternatives --display hadoop-conf
hadoop-conf -�X�e�[�^�X�͎����ł��B
�����N�͌��� /etc/hadoop/conf.empty ���w���Ă��܂��B
/etc/hadoop/conf.empty - �D�捀�� 10
���݂́u�œK�v�o�[�W������ /etc/hadoop/conf.empty �ł��B

�܂��Ahdfs�Emapred�Eyarn�Ƃ���UNIX���[�U�[�������B


����m�F

CDH4�ł��]���Ɠ����T���v�����p�ӂ���Ă���̂ŁA����m�F�Ɏg����B

$ hadoop version
Hadoop 2.0.0-cdh4.0.0
$ cd /tmp
$ hadoop jar $HADOOP_MAPRED_HOME/hadoop-mapreduce-examples.jar pi 4 1000

hadoop�R�}���h��/usr/bin�̉��ɍ���Ă���̂ŁA��΃p�X���w�肵�Ȃ��Ă����s�ł���B


�\�[�X�̃_�E�����[�h

CDH4�ł�yum�Ń\�[�X���_�E�����[�h�ł��Ȃ��悤�Ȃ̂ŁA�ʓr�A�[�J�C�u���_�E�����[�h���Ă���B

  1. Cloudera�̃g�b�v�y�[�W�̏㕔�̃��j���[�o�[���ۂ��Ƃ��납��Downloads���N���b�N���ADownloads�y�[�W���J���B
  2. CDH4�́uDownloads and Documentation�v���N���b�N���ACDH Downloads�y�[�W���J���B
  3. Tarball�́uDownload�v���N���b�N���ACDH4 Downloadable Tarballs�y�[�W���J���B
  4. �F�X�ȃR���|�[�l���g�̈ꗗ������̂ŁAhadoop-2.0.0��Download��̃A�C�R�����N���b�N����ƃA�[�J�C�u�ihadoop-2.0.0-cdh4.0.0.tar.gz�j���_�E�����[�h�����B
  5. �K���ȏꏊ�ɉ𓀂���ƁAhadoop-2.0.0-cdh4.0.0/src�Ƀ\�[�X������B�iEclipse�̃\�[�X�̓Y�t�ł͂��̃f�B���N�g���[���w�肷��΂悢�j

HBase��Hive�EPig�ESqoop�����_�E�����[�h�ł���悤���B


WordCount�T���v��

MapReduce�v���O�������̂͏]���̂��̂Ƃقڕς��Ȃ��B
�]���iHadoop0.21�̐VAPI�j��WordCount�Ƃ̈Ⴂ�́AJob�̃C���X�^���X�����̕��@��getInstance()���g���悤�ɂȂ��������B

import java.io.IOException;
import java.util.StringTokenizer;

import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class WordCount extends Configured implements Tool {
	public static void main(String[] args) throws Exception {
		int r = ToolRunner.run(new WordCount(), args);
		System.exit(r);
	}
	public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
		private final static IntWritable one = new IntWritable(1);
		private Text word = new Text();

		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String line = value.toString();
			StringTokenizer tokenizer = new StringTokenizer(line);
			while (tokenizer.hasMoreTokens()) {
				word.set(tokenizer.nextToken());
				context.write(word, one);
			}
		}
	}
	public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {

		@Override
		protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
			int sum = 0;
			for (IntWritable value : values) {
				sum += value.get();
			}
			context.write(key, new IntWritable(sum));
		}
	}
	@Override
	public int run(String[] args) throws Exception {
		Job job = Job.getInstance(getConf(), "wordcount");
		job.setJarByClass(getClass());

		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);

		job.setMapperClass(Map.class);
		job.setCombinerClass(Reduce.class);
		job.setReducerClass(Reduce.class);

		job.setInputFormatClass(TextInputFormat.class);
		job.setOutputFormatClass(TextOutputFormat.class);

		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));

		boolean success = job.waitForCompletion(true);
		return success ? 0 : 1;
	}
}

�R���p�C������ׂɂ͈ȉ���jar�t�@�C����CLASSPATH�iEclipse�̃r���h�p�X�j�ɉ�����B

jar�t�@�C�� ���l
$HADOOP_COMMON_HOME/hadoop-common.jar hadoop.conf��hadoop.io�Ehadoop.util���̋��ʃN���X
$HADOOP_MAPRED_HOME/hadoop-mapreduce-client-core.jar hadoop.mapreduce�n�iJob�EMapper�EReducer��InputFormat�EOutputFormat�j�N���X

�[�����U�‹��iCentOS�j

CentOS�ŋ[�����U�‹����\�z���Ă݂�B
�i�[�����U�‹���1��̃}�V����őS�Ẵf�[�����𓮂������[�h�j

yum�R�}���h�ŋ[�����U�‹��̐ݒ�t�@�C�����C���X�g�[�����邱�Ƃ��o����B�irepo�t�@�C����P�Ɗ‹����W���[���̓C���X�g�[���ς݂ł���O��j
������MRv1��YARN�ł̓C���X�g�[������A�[�J�C�u���قȂ�B
�����𓯎��ɃC���X�g�[������i����������j���Ƃ͏o���Ȃ��̂Œ��ӁB

�Q�l�F Installing CDH4 on a Single Linux Node in Pseudo-distributed Mode


MRv1�̋[�����U���[�h

  1. �[�����U���[�h�̐ݒ���C���X�g�[������B
    # yum install hadoop-0.20-conf-pseudo
    # alternatives --display hadoop-conf
    hadoop-conf -�X�e�[�^�X�͎����ł��B
    �����N�͌��� /etc/hadoop/conf.pseudo.mr1 ���w���Ă��܂��B
    /etc/hadoop/conf.empty - �D�捀�� 10
    /etc/hadoop/conf.pseudo.mr1 - �D�捀�� 30
    ���݂́u�œK�v�o�[�W������ /etc/hadoop/conf.pseudo.mr1 �ł��B
  2. HDFS���t�H�[�}�b�g����B�ihdfs���[�U�[�Ŏ��s����j
    # sudo -u hdfs hdfs namenode -format
  3. HDFS�f�[�������J�n����B
    �iNameNode�ESecondaryNameNode�EDataNode�j
    # for service in /etc/init.d/hadoop-hdfs-*
      do
        $service start
      done
  4. /tmp�f�B���N�g���[���쐬����B�i���������Ă����Ȃ��ƁA��Ńg���u�����o��”\��������炵���j
    # sudo -u hdfs hadoop fs -mkdir /tmp
    # sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
    $ hadoop fs -ls /
  5. MapReduce�p�̃f�B���N�g���[���쐬����B
    # su - hdfs
    $ hadoop fs -mkdir /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
    $ hadoop fs -chmod 1777 /var/lib/hadoop-hdfs/cache/mapred/mapred/staging
    $ hadoop fs -chown -R mapred /var/lib/hadoop-hdfs/cache/mapred
    $ exit
    $ hadoop fs -ls -R /
  6. MapReduce�f�[�������J�n����B
    �iJobTracker�ETaskTracker�j
    # for service in /etc/init.d/hadoop-0.20-mapreduce-*
      do
        $service start
      done
  7. ���[�U�[�p�f�B���N�g���[����������B
    # sudo -u hdfs hadoop fs -mkdir /user/hishidama
    # sudo -u hdfs hadoop fs -chown hishidama /user/hishidama

�[�����U���[�h������ɉғ����Ă���΁Ahttp://localhost:50030�ŃW���u�ꗗ�Ahttp://localhost:50070��HDFS�̏�Ԃ�������B


�T���v�������s���Ă݂�B

$ hadoop jar $HADOOP_MAPRED_HOME/hadoop-mapreduce-examples.jar pi 4 1000
$ vi example.txt
$ hadoop fs -put example.txt
$ hadoop jar $HADOOP_MAPRED_HOME/hadoop-mapreduce-examples.jar wordcount example.txt output
$ hadoop fs -cat output/part-r-00000

����������A�����̊‹��ł́Ahdfs���[�U�[���Ƃ����Ǝ��s�ł������A�����[�U�[���ƃG���[�ɂȂ����B

INFO mapreduce.Cluster: Failed to use org.apache.hadoop.mapred.LocalClientProtocolProvider due to error: Invalid "mapreduce.jobtracker.address" configuration value for LocalJobRunner : "localhost:8021"
ERROR security.UserGroupInformation: PriviledgedActionException as:hishidama (auth:SIMPLE) cause:java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
java.io.IOException: Cannot initialize Cluster. Please check your configuration for mapreduce.framework.name and the correspond server addresses.
	at org.apache.hadoop.mapreduce.Cluster.initialize(Cluster.java:121)
	�`

�F�X�������Ƃ���A���[�U�[�Ƃ͊֌W�Ȃ��AHADOOP_MAPRED_HOME�̐ݒ�L���ɂ���ăG���[�ɂȂ�����Ȃ�Ȃ������肷�鎖�����������B
HADOOP_MAPRED_HOME���󂾂Ɓi�ݒ肳��Ă��Ȃ��Ɓj����ɓ��삷��B�s�v�c�s�v�c�B
hdfs���[�U�[��/etc/default/hadoop�����s����悤�ɂ��ĂȂ������̂ŁA�‹��ϐ����ݒ肳��Ă��Ȃ������̂œ������̂��낤�B�ijar�t�@�C���͊‹��ϐ����g�킸�Ƀt���p�X�Ŏw�肵�Ă����j

$ export HADOOP_MAPRED_HOME=
$ hadoop jar /usr/lib/hadoop-mapreduce/hadoop-mapreduce-examples.jar pi 4 1000

���Ԃ�AHADOOP_MAPRED_HOME�̉��ɂ��鉽�炩�̐ݒ�t�@�C���̉e�����󂯂ă��[�J�����[�h�i�[�����U���[�h�łȂ��j�Ɣ��f����ALocalJobRunner���g�����Ƃ��ăG���[�ɂȂ����̂��낤�B

mapreduce.framework.name��local����LocalJobRunner���I������邪�ALocalJobRunner��mapreduce.jobtracker.address���ulocal�v�łȂ��ƃG���[�ɂȂ�i����͋[�����U���[�h�Ȃ̂ŁA�ulocalhost:8021�v�ɂȂ��Ă���j�B
�Ƃ����`�F�b�N��LocalClientProtocolProvider#create()������Ă���B

�����AHADOOP_MAPRED_HOME�̉��ɂ͐ݒ�t�@�C���ixml�t�@�C���j����������Ȃ��B�s�v�c�s�v�c�B


YARN�̋[�����U���[�h

  1. �[�����U���[�h�̐ݒ���C���X�g�[������B
    # yum install hadoop-conf-pseudo
    # alternatives --display hadoop-conf
    hadoop-conf -�X�e�[�^�X�͎����ł��B
    �����N�͌��� /etc/hadoop/conf.pseudo ���w���Ă��܂��B
    /etc/hadoop/conf.empty - �D�捀�� 10
    /etc/hadoop/conf.pseudo - �D�捀�� 30
    ���݂́u�œK�v�o�[�W������ /etc/hadoop/conf.pseudo �ł��B
  2. HDFS���t�H�[�}�b�g����B�ihdfs���[�U�[�Ŏ��s����j
    # sudo -u hdfs hdfs namenode -format
  3. HDFS�f�[�������J�n����B
    �iNameNode�ESecondaryNameNode�EDataNode�j
    # for service in /etc/init.d/hadoop-hdfs-*
      do
        $service start
      done
  4. /tmp�f�B���N�g���[���쐬����B�i���������Ă����Ȃ��ƁA��Ńg���u�����o��”\��������炵���j
    # sudo -u hdfs hadoop fs -mkdir /tmp
    # sudo -u hdfs hadoop fs -chmod -R 1777 /tmp
    $ hadoop fs -ls /
  5. YARN�p�̃f�B���N�g���[���쐬����B
    # su - hdfs
    $ hadoop fs -mkdir /var/log/hadoop-yarn
    $ hadoop fs -chown yarn:mapred /var/log/hadoop-yarn
    $ hadoop fs -mkdir /tmp/hadoop-yarn/staging
    $ hadoop fs -chmod -R 1777 /tmp/hadoop-yarn/staging
    $ hadoop fs -mkdir /tmp/hadoop-yarn/staging/history/done_intermediate
    $ hadoop fs -chmod -R 1777 /tmp/hadoop-yarn/staging/history/done_intermediate
    $ hadoop fs -chown -R mapred:mapred /tmp/hadoop-yarn/staging
    $ exit
    $ hadoop fs -ls -R /
  6. ���[�U�[�p�f�B���N�g���[����������B
    # sudo -u hdfs hadoop fs -mkdir /user/hishidama
    # sudo -u hdfs hadoop fs -chown hishidama:hishidama /user/hishidama
  7. YARN�f�[�������J�n����B
    # /etc/init.d/hadoop-yarn-resourcemanager start
    # /etc/init.d/hadoop-yarn-nodemanager start
    # /etc/init.d/hadoop-mapreduce-historyserver start

�[�����U���[�h������ɉғ����Ă���΁Ahttp://localhost:50070��HDFS�̏�ԁAhttp://localhost:19888�ŃW���u�̗����iJobHistory�j��������B
JobHistory�͗����Ȃ̂ŁA���s���̃W���u�͈ꗗ�ɏo�Ă��Ȃ��B


�T���v�������s���Ă݂�B

$ hadoop jar $HADOOP_MAPRED_HOME/hadoop-mapreduce-examples.jar pi 4 1000
$ vi example.txt
$ hadoop fs -put example.txt
$ hadoop jar $HADOOP_MAPRED_HOME/hadoop-mapreduce-examples.jar wordcount example.txt output
$ hadoop fs -cat output/part-r-00000

YARN����MRv1�ƈ���ē��ɖ��Ȃ����s�ł���B


Hadoop�ڎ��֖߂� / �Z�p�����֖߂�
���[���̑��M��F�Ђ�����

�@

�@

�@

�@

�@

�@

�@

�@

�@

�@

�@

�@

�@

�@

�@

�@