个人笔记,问题较多
符号说明 | [] | 表示其中内容可以没有 |
su [root] | 获取root权限 | |
vi /etc/sudoers | 1.点击I或Insert获得插入权限 2.在root ALL=(ALL) ALL行后面添加: usr ALL=(ALL)[NOPASSWD:]ALL 3.点击Esc, 输入 :wq! 保存. (此文件默认没有写入权限所以需要加!来写入) | |
exit | 退出root权限 | |
| 将需要安装的软件拷贝到桌面 | |
sudo tar -zxvf jdk… | 解压 | |
sudo mv jdk… /usr/local/java | 将解压后的软件复制到相应路径, 同样执行操作hadoop, scala, spark | |
bash Ana… .sh -b | 安装Anaconda, -b表示系统直接使用默认设置安装 | |
sudo gedit ~/.bashrc | 配置环境变量 #Hadoop Variables export JAVA_HOME= /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.141-3.b16.el6_9.x86_64 export HADOOP_HOME=/usr/local/hadoop export PATH=$PATH:$HADOOP_HOME/bin export PATH=$PATH:$HADOOP_HOME/sbin export HADOOP_MAPRED_HOME=$HADOOP_HOME export HADOOP_COMMON_HOME=$HADOOP_HOME export HADOOP_HDFS_HOME=$HADOOP_HOME export YARN_HOME=$HADOOP_HOME export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native export HADOOP_OPTS=”-Djava.library.path=$HADOOP_HOME/lib” export JAVA_LIBRARY_PATH=$HADOOP_HOME/lib/native:$JAVA_LIBRARY_PATH #Hadoop Variables
export PATH=${JAVA_HOME}/bin:${PATH} export HADOOP_CLASSPATH=${JAVA_HOME}/lib/tools.jar
export SCALA_HOME=/usr/local/scala export PATH=$PATH:$SCALA_HOME/bin
export SPARK_HOME=/usr/local/spark export PATH=$PATH:$SPARK_HOME/bin
export PATH=/home/hdusr/anaconda2/bin:$PATH #此行需修改
export ANACONDA_PATH=/home/hdusr/anaconda2 #此行需修改 export PYSPARK_DRIVER_PYTHON=$ANACONDA_PATH/bin/ipython export PYSPARK_PYTHON=$ANACONDA_PATH/bin/python | |
source ~/.bashrc | 重新载入配置文件 | |
sudo yum install openssh | 安装ssh | |
ssh-keygen -t dsa -P ” -f ~/.ssh/id_dsa | ” 是两个单引号 产生SSH Key 进行后续身份验证 | |
cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys | 将产生的Key放置到授权文件中 | |
chmod 700 ~/.ssh | $ chmod 644 ~/.ssh/authorized_keys | |
只在master执行 | $ cat ~/.ssh/id_dsa.pub | ssh hdusr@data1 ‘cat – >> ~/.ssh/authorized_keys’ | |
sudo tail /var/log/secure -n 20 | 查看日志 | |
sudo gedit /etc/sysconfig/network | 修改主机名 HOSTNAME=新主机名 | |
service iptables stop | 关闭防火墙 | |
sudo chkconfig iptables off | 永久关闭防火墙 | |
Hadoop设置 | $sudo gedit /usr/local/hadoop/etc/hadoop/hadoop-env.sh export JAVA_HOME=${JAVA_HOME}修改为 export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.141-3.b16.el6_9.x86_64 | |
$sudo gedit /usr/local/hadoop/etc/hadoop/core-site.xml <configuration> <property> <name>fs.default.name</name> <value>hdfs://master:9000</value> </property> </configuration> | ||
$sudo gedit /usr/local/hadoop/etc/hadoop/yarn-site.xml <configuration> <!– Site specific YARN configuration properties –> <property> <name>yarn.nodemanager.aux-services</name> <value>mapreduce_shuffle</value> </property> <property> <name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name> <value>org.apache.hadoop.mapred.ShuffleHandler</value> </property> <property> <name>yarn.resourcemanager.resource-tracker.address</name> <value>master:8025</value> </property> <property> <name>yarn.resourcemanager.scheduler.address</name> <value>master:8030</value> </property> <property> <name>yarn.resourcemanager.address</name> <value>master:8050</value> </property> </configuration> | ||
$sudo cp /usr/local/hadoop/etc/hadoop/mapred-site.xml.template /usr/local/hadoop/etc/hadoop/mapred-site.xml $sudo gedit /usr/local/hadoop/etc/hadoop/mapred-site.xml <configuration> <property> <name>mapred.job.tracker</name> <value>master:54311</value> </property> </configuration> | ||
$sudo gedit /usr/local/hadoop/etc/hadoop/hdfs-site.xml <configuration> <property> <name>dfs.replication</name> <value>3</value> </property> <property> <name>dfs.namenode.data.dir</name> <value> file:/usr/local/hadoop/hadoop_data/hdfs/namenode</value> </property> </configuration> | ||
$sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/namenode 建立NameNode资料存储目录(master) | ||
$sudo mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode 建立DataNode资料存储目录 | ||
$sudo chown hdusr:hdusr -R /usr/local/hadoop/ 将hadoop目录拥有者改为hdusr | ||
$hadoop namenode -format 将HDFS进行格式化(此命令会删除HDFS中的所有资料) | ||
$ip addr 复制mac地址 “dhcp” $sudo gedit /etc/sysconfig/network-scripts/ifcfg-eth0 DEVICE=”eth0″ BOOTPROTO=”static” HWADDR=”00:0C:29:5C:30:F1″ IPV6INIT=”yes” NM_CONTROLLED=”yes” ONBOOT=”yes” TYPE=”Ethernet” UUID=”e779e28b-2f28-44ee-a194-f2ec781860fb” IPADDR=192.168.179.140 NETMASK=255.255.255.0 GATEWAR=192.168.179.2 $ifconfig 查看本机ip确定修改成功 | ||
$sudo gedit /etc/hosts 192.168.179.140 master 192.168.179.141 data1 192.168.179.142 data2 192.168.179.143 data3 | ||
| ||
$cd /usr/local/spark/conf/ $cp log4j.properties.template log4j.properties $sudo gedit log4j.properties INFO改为WARN 表示在启动pyspark时只显示警告信息 | ||
sudo chown hdusr:hdusr /usr/local/spark/ 将spark目录拥有者改为hdusr | ||
| ||
复制虚拟机到data1修改完再复制data1到data2和data3,最后一步省略 | ||
$sudo gedit /etc/passwd (可不执行,更改开机显示的用户名) 最后一行hdusr:x:500:500:用户名:/home/hdusr:/bin/bash | ||
$sudo gedit /etc/sysconfig/network (此处修改需重启才能继续, 可最后再修改) 修改主机名 HOSTNAME=新主机名 | ||
$ip addr $sudo gedit /etc/sysconfig/network-scripts/ifcfg-eth0 修改第一行, mac和ip $ifconfig | ||
$sudo gedit /usr/local/hadoop/etc/hadoop/hdfs-site.xml name改为data <property> <name>dfs.datanode.data.dir</name> <value> file:/usr/local/hadoop/hadoop_data/hdfs/datanode</value> </property> | ||
| ||
$ssh data1 $sudo rm -rf /usr/local/hadoop/hadoop_data/hdfs/ $mkdir -p /usr/local/hadoop/hadoop_data/hdfs/datanode | ||
|