Skip to content
Published on

Building a Secured (Kerberized) YARN Cluster

Authors
  • Name
    Twitter

Overview

This document records how to apply Kerberos to YARN.

configurations

yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
    <property>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </property>
        <value>org.apache.hadoop.mapred.ShuffleHandler</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.enabled</name>
        <value>true</value>
    </property>
    <property>
        <name>yarn.resourcemanager.cluster-id</name>
        <value>test-cluster</value>
    </property>
    <property>
        <name>yarn.resourcemanager.ha.rm-ids</name>
        <value>rm1,rm2</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm1</name>
        <value>hadoop1.mysite.com</value>
    </property>
    <property>
        <name>yarn.resourcemanager.hostname.rm2</name>
        <value>hadoop2.mysite.com</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
        <value>hadoop1.mysite.com:8025</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm1</name>
        <value>hadoop1.mysite.com:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm1</name>
        <value>hadoop1.mysite.com:8050</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm1</name>
        <value>hadoop1.mysite.com:8055</value>
    </property>
    <property>
        <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
        <value>hadoop2.mysite.com:8025</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.address.rm2</name>
        <value>hadoop2.mysite.com:8030</value>
    </property>
    <property>
        <name>yarn.resourcemanager.address.rm2</name>
        <value>hadoop2.mysite.com:8050</value>
    </property>
    <property>
        <name>yarn.resourcemanager.webapp.address.rm2</name>
        <value>hadoop2.mysite.com:8055</value>
    </property>
    <property>
        <name>hadoop.zk.address</name>
        <value>hadoop1.mysite.com:2181,hadoop2.mysite.com:2181,hadoop3.mysite.com:2181</value>
    </property>
    <property>
        <name>yarn.resourcemanager.scheduler.class</name>
        <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
    </property>
<!-- ResourceManager Security Settings -->
    <property>
        <name>yarn.resourcemanager.keytab</name>
        <value>/etc/hdfs/conf/hdfs.keytab</value>
    </property>
    <property>
        <name>yarn.resourcemanager.principal</name>
        <value>yarn/_HOST@CHAOS.ORDER.COM</value>
    </property>
<!-- NodeManager Security Settings -->
    <property>
        <name>yarn.nodemanager.keytab</name>
        <value>/etc/hdfs/conf/hdfs.keytab</value>
  </property>
      <property>
        <name>yarn.nodemanager.principal</name>
        <value>yarn/_HOST@CHAOS.ORDER.COM</value>
    </property>
    <property>
        <name>yarn.nodemanager.container-executor.class</name>
        <value>org.apache.hadoop.yarn.server.nodemanager.DefaultContainerExecutor</value>
    </property>
    <property>
        <name>yarn.nodemanager.container-executor.class</name>
        <value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
    </property>
    <property>
        <name>yarn.nodemanager.linux-container-executor.group</name>
        <value>hadoop</value>
    </property>
</configuration>
mapred-site.xml

<configuration>
   <property>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.keytab</name>
        <value>hdfs</value>
    </property>
    <property>
         <name>mapreduce.jobhistory.principal</name>
         <value>mapred/_HOST@CHAOS.ORDER.COM</value>
    </property>
    <property>
        <name>mapreduce.tasktracker.http.threads</name>
        <value>400</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.spnego-principal</name>
        <value>HTTP/_HOST@CHAOS.ORDER.COM</value>
    </property>
    <property>
        <name>mapreduce.jobhistory.webapp.spnego-keytab-file</name>
        <value>/etc/hdfs/conf/hdfs.keytab</value>
    </property>
<property>
    <name>mapred.child.java.opts</name>
    <value>-Xmx2048m</value>
</property>
</configuration>
yarn.nodemanager.linux-container-executor.group=hadoop
banned.users=zookeeper
min.user.id=1000
allowed.system.users=hbase

By configuring the files as above, you can use YARN components securely. However, ACL management becomes somewhat more difficult. To allow a new user to run MapReduce jobs, you need to create an account using useradd on all Hadoop Linux servers, and create a directory with the user's name under the HDFS user directory with the appropriate user permissions.