forked from mesos/spark-ec2
-
Notifications
You must be signed in to change notification settings - Fork 0
/
setup-slave.sh
executable file
·135 lines (113 loc) · 4.95 KB
/
setup-slave.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#!/bin/bash
# Disable Transparent Huge Pages (THP)
# THP can result in system thrashing (high sys usage) due to frequent defrags of memory.
# Most systems recommends turning THP off.
if [[ -e /sys/kernel/mm/transparent_hugepage/enabled ]]; then
echo never > /sys/kernel/mm/transparent_hugepage/enabled
fi
# Make sure we are in the spark-ec2 directory
pushd /root/spark-ec2 > /dev/null
source ec2-variables.sh
# Set hostname based on EC2 private DNS name, so that it is set correctly
# even if the instance is restarted with a different private DNS name
PRIVATE_DNS=`wget -q -O - http://169.254.169.254/latest/meta-data/local-hostname`
hostname $PRIVATE_DNS
echo $PRIVATE_DNS > /etc/hostname
HOSTNAME=$PRIVATE_DNS # Fix the bash built-in hostname variable too
echo "checking/fixing resolution of hostname"
bash /root/spark-ec2/resolve-hostname.sh
# Work around for R3 or I2 instances without pre-formatted ext3 disks
instance_type=$(curl http://169.254.169.254/latest/meta-data/instance-type 2> /dev/null)
echo "Setting up slave on `hostname`... of type $instance_type"
if [[ $instance_type == r3* || $instance_type == i2* || $instance_type == hi1* ]]; then
# Format & mount using ext4, which has the best performance among ext3, ext4, and xfs based
# on our shuffle heavy benchmark
EXT4_MOUNT_OPTS="defaults,noatime,nodiratime"
rm -rf /mnt*
mkdir /mnt
# To turn TRIM support on, uncomment the following line.
#echo '/dev/sdb /mnt ext4 defaults,noatime,nodiratime,discard 0 0' >> /etc/fstab
mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdb
mount -o $EXT4_MOUNT_OPTS /dev/sdb /mnt
if [[ $instance_type == "r3.8xlarge" || $instance_type == "hi1.4xlarge" ]]; then
mkdir /mnt2
# To turn TRIM support on, uncomment the following line.
#echo '/dev/sdc /mnt2 ext4 defaults,noatime,nodiratime,discard 0 0' >> /etc/fstab
if [[ $instance_type == "r3.8xlarge" ]]; then
mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdc
mount -o $EXT4_MOUNT_OPTS /dev/sdc /mnt2
fi
# To turn TRIM support on, uncomment the following line.
#echo '/dev/sdf /mnt2 ext4 defaults,noatime,nodiratime,discard 0 0' >> /etc/fstab
if [[ $instance_type == "hi1.4xlarge" ]]; then
mkfs.ext4 -E lazy_itable_init=0,lazy_journal_init=0 /dev/sdf
mount -o $EXT4_MOUNT_OPTS /dev/sdf /mnt2
fi
fi
fi
# Mount options to use for ext3 and xfs disks (the ephemeral disks
# are ext3, but we use xfs for EBS volumes to format them faster)
XFS_MOUNT_OPTS="defaults,noatime,nodiratime,allocsize=8m"
function setup_ebs_volume {
device=$1
mount_point=$2
if [[ -e $device ]]; then
# Check if device is already formatted
if ! blkid $device; then
mkdir $mount_point
yum install -q -y xfsprogs
if mkfs.xfs -q $device; then
mount -o $XFS_MOUNT_OPTS $device $mount_point
chmod -R a+w $mount_point
else
# mkfs.xfs is not installed on this machine or has failed;
# delete /vol so that the user doesn't think we successfully
# mounted the EBS volume
rmdir $mount_point
fi
else
# EBS volume is already formatted. Mount it if its not mounted yet.
if ! grep -qs '$mount_point' /proc/mounts; then
mkdir $mount_point
mount -o $XFS_MOUNT_OPTS $device $mount_point
chmod -R a+w $mount_point
fi
fi
fi
}
# Format and mount EBS volume (/dev/sd[s, t, u, v, w, x, y, z]) as /vol[x] if the device exists
setup_ebs_volume /dev/sds /vol0
setup_ebs_volume /dev/sdt /vol1
setup_ebs_volume /dev/sdu /vol2
setup_ebs_volume /dev/sdv /vol3
setup_ebs_volume /dev/sdw /vol4
setup_ebs_volume /dev/sdx /vol5
setup_ebs_volume /dev/sdy /vol6
setup_ebs_volume /dev/sdz /vol7
# Alias vol to vol3 for backward compatibility: the old spark-ec2 script supports only attaching
# one EBS volume at /dev/sdv.
if [[ -e /vol3 && ! -e /vol ]]; then
ln -s /vol3 /vol
fi
# Make data dirs writable by non-root users, such as CDH's hadoop user
chmod -R a+w /mnt*
# Remove ~/.ssh/known_hosts because it gets polluted as you start/stop many
# clusters (new machines tend to come up under old hostnames)
rm -f /root/.ssh/known_hosts
# Create swap space on /mnt
/root/spark-ec2/create-swap.sh $SWAP_MB
# Allow memory to be over committed. Helps in pyspark where we fork
echo 1 > /proc/sys/vm/overcommit_memory
# Add github to known hosts to get [email protected] clone to work
# TODO(shivaram): Avoid duplicate entries ?
cat /root/spark-ec2/github.hostkey >> /root/.ssh/known_hosts
# Create /usr/bin/realpath which is used by R to find Java installations
# NOTE: /usr/bin/realpath is missing in CentOS AMIs. See
# http://superuser.com/questions/771104/usr-bin-realpath-not-found-in-centos-6-5
echo '#!/bin/bash' > /usr/bin/realpath
echo 'readlink -e "$@"' >> /usr/bin/realpath
chmod a+x /usr/bin/realpath
popd > /dev/null
# Ensure that numpy is installed properly on every machine.
pip-2.7 install -U --force-reinstall numpy requests BeautifulSoup4
echo "shutdown -h now" | at now + 8 hours