一、前期准备
1、服务器
三台 centos7+ 服务器
10.1.80.128 #主节点
10.1.80.129
10.1.80.130
三台均修改hosts
vim /etc/hosts
#增加
10.1.80.128 master
10.1.80.129 slave1
10.1.80.130 slave2
三台均关闭防火墙
systemctl status firewalld.service
systemctl stop firewalld.service
systemctl disable firewalld.service
2、三台服务器互联免密
#以root用户登录,更改ssh配置文件
vim /etc/ssh/sshd_config
#启用rsa认证
RSAAuthentication yes
#启用公钥私钥配对认证方式
PubkeyAuthentication yes
#公钥文件路径
AuthorizedKeysFile .ssh/authorized_keys
//系统在/root/.ssh下生成id_rsa、id_rsa.pub按回车即可
ssh-keygen -t rsa
//把A机下的id_rsa.pub发送到B、C机
ssh-copy-id -i /root/.ssh/id_rsa.pub slave1
ssh-copy-id -i /root/.ssh/id_rsa.pub slave2
//验证
//登录B机
ssh slave1
//检查是否成功登录
ifconfig
//退出ssh登录,返回本机
exit
3、安装jdk
下载地址:https://www.oracle.com/java/technologies/javase-downloads.html
# 将java安装包移动到相应的路径
mv jdk-8u201-linux-x64.tar.gz /usr/local
# 解压
tar -zxvf jdk-8u201-linux-x64.tar.gz
# 重命名文件夹为java
mv jdk1.8.0_201 java
# 添加java环境到配置/etc/profile文件 vim /etc/profile
export JAVA_HOME=/usr/local/java
export JRE_HOME=/usr/local/java/jre
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
#激活环境变量
source /etc/profile
二、hadoop
1、下载 :
https://mirror.bit.edu.cn/apache/hadoop/common/hadoop-3.2.1/
2、安装
安装
#移动至 /usr/local
mv hadoop-3.2.1.tar.gz /usr/local/
#进入 /usr/local 目录
cd /usr/local
#解压
tar -zxvf hadoop-3.2.1.tar.gz
#重命名
mv hadoop-3.2.1 hadoop
#删除压缩包
rm -rf hadoop-3.2.1.tar.gz
配置环境变量
vim /etc/profile
#增加
#hadoop
export HADOOP_HOME=/usr/local/hadoop
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
#激活
source /etc/profile
#成功可查看版本
hadoop version
3、修改hadoop相关的配置文件
#创建data目录 /usr/local/hadoop/data
cd /usr/local/hadoop
mkdir data
hadoop-env.sh与yarn-env.sh文件配置
cd /usr/local/hadoop/etc/hadoop
# hadoop-env.sh 文件引入JAVA_HOME环境变量及root用户
vim hadoop-env.sh
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
export JAVA_HOME=/usr/local/java
# yarn-env.sh 文件引入JAVA_HOME环境变量
vim yarn-env.sh
export JAVA_HOME=/usr/local/java
core-site.xml文件配置
vim core-site.xml
#设置hdfs集群对外提供的目录
#以及hadoop的公共目录
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/data</value>
</property>
</configuration>
hdfs-site.xml文件配置
vim hdfs-site.xml
#设置hdfs的一些目录,放在/usr/local/hadoop/data的目录下面
#设置副本数为3
<property>
<name>dfs.name.dir</name>
<value>/usr/local/hadoop/data/namenode</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/usr/local/hadoop/data/datanode</value>
</property>
<property>
<name>dfs.tmp.dir</name>
<value>/usr/local/hadoop/data/tmp</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
mapred-site.xml文件配置
vim mapred-site.xml
#指定MapReduce程序应该放在哪个资源调度集群上运行。
#若不指定为yarn,那么MapReduce程序就只会在本地运行而非在整个集群中运行。
#高版本不再有jobtracker和tasktracker配置了
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
yarn-site.xml文件配置
vim yarn-site.xml
#指定yarn集群中的老大
#配置yarn集群中的重节点,指定map产生的中间结果传递给reduce采用的机制是shuffle
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
works文件配置主从关系
vim works
slave1
slave2
4、启动及验证
#格式化HDFS文件系统,只需一次
hadoop namenode -format
#bin目录下
start-all.sh
ERROR: Attempting to operate on yarn nodemanager as root
ERROR: but there is no YARN_NODEMANAGER_USER defined. Aborting operation.
Stopping resourcemanager
ERROR: Attempting to operate on yarn resourcemanager as root
ERROR: but there is no YARN_RESOURCEMANAGER_USER defined. Aborting operation.
#/usr/local/hadoop/sbin 目录下
#start-dfs.sh 和 stop-dfs.sh文件顶部
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
#start-yarn.sh,stop-yarn.sh文件顶部
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
#多次格式化之后导致节点clusterID不一致
#修改分节点的clusterID和主节点一致
#主节点的在namenode下
vim /usr/local/hadoop/data/namenode/current/VERSION
#分节点的在datanode下
vim /usr/local/hadoop/data/datanode/current/VERSION
验证是否成功
#方法一
jps
主节点
18800 SecondaryNameNode
19043 ResourceManager
19395 Jps
18533 NameNode
从节点
5177 NodeManager
5067 DataNode
5307 Jps
#方法二
hdfs dfsadmin -report
信息如下
Live datanodes (2):
Name: 10.1.80.129:9866 (slave1)
Hostname: slave1
Decommission Status : Normal
Configured Capacity: 18238930944 (16.99 GB)
DFS Used: 8192 (8 KB)
Non DFS Used: 2830036992 (2.64 GB)
DFS Remaining: 15408885760 (14.35 GB)
DFS Used%: 0.00%
DFS Remaining%: 84.48%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Mar 26 16:26:06 CST 2020
Last Block Report: Thu Mar 26 16:20:51 CST 2020
Num of Blocks: 0
Name: 10.1.80.130:9866 (slave2)
Hostname: slave2
Decommission Status : Normal
Configured Capacity: 18238930944 (16.99 GB)
DFS Used: 8192 (8 KB)
Non DFS Used: 2835111936 (2.64 GB)
DFS Remaining: 15403810816 (14.35 GB)
DFS Used%: 0.00%
DFS Remaining%: 84.46%
Configured Cache Capacity: 0 (0 B)
Cache Used: 0 (0 B)
Cache Remaining: 0 (0 B)
Cache Used%: 100.00%
Cache Remaining%: 0.00%
Xceivers: 1
Last contact: Thu Mar 26 16:26:06 CST 2020
Last Block Report: Thu Mar 26 16:20:51 CST 2020
Num of Blocks: 0
web端查看yarn集群
http://10.1.80.128:8088/cluster
用web查看hdfs
http://10.1.80.128:9870/dfshealth.html#tab-overview
三、Mysql
1、安装
#解压、重命名、放到/usr/local/
tar -zxvf mysql-5.7.29-linux-glibc2.12-x86_64.tar.gz
mv mysql-5.7.29-linux-glibc2.12-x86_64 mysql
mv mysql /usr/local/
#添加用户组,用专门管理mysql,提高安全
groupadd mysql
useradd -r -g mysql mysql
chown -R mysql:mysql ./
2、配置
mysql配置
#配置mysql服务
cd /usr/local/mysql
mkdir data
cp /usr/local/mysql/support-files/mysql.server /etc/init.d/mysql
vim /etc/init.d/mysql
#配置如下
basedir=/usr/local/mysql
datadir=/usr/local/mysql/data
cd /etc
vim my.cnf
#覆盖配置如下
[client]
port = 3306
default-character-set=utf8
[mysqld]
# 一般配置选项
basedir = /usr/local/mysql
datadir = /usr/local/mysql/data
port = 3306
character-set-server=utf8
default_storage_engine = InnoDB
sql_mode=STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_AUTO_CREATE_USER,NO_ENGINE_SUBSTITUTION
symbolic-links=0
#[mysqld_safe]
#log-error=/var/log/mariadb/mariadb.log
#pid-file=/var/run/mariadb/mariadb.pid
#
# include all files from the config directory
#
!includedir /etc/my.cnf.d
初始化数据库
cd /usr/local/mysql/bin/
./mysqld --initialize --user=mysql --basedir=/usr/local/mysql --datadir=/usr/local/mysql/data
#打印如下,最后为密码
2020-03-30T05:45:00.308460Z 0 [Warning] TIMESTAMP with implicit DEFAULT value is deprecated. Please use --explicit_defaults_for_timestamp server option (see documentation for more details).
2020-03-30T05:45:02.054695Z 0 [Warning] InnoDB: New log files created, LSN=45790
2020-03-30T05:45:02.136140Z 0 [Warning] InnoDB: Creating foreign key constraint system tables.
2020-03-30T05:45:02.312420Z 0 [Warning] No existing UUID has been found, so we assume that this is the first time that this server has been started. Generating a new UUID: 9995ae68-7249-11ea-a395-000c29384786.
2020-03-30T05:45:02.314254Z 0 [Warning] Gtid table is not ready to be used. Table 'mysql.gtid_executed' cannot be opened.
2020-03-30T05:45:03.040905Z 0 [Warning] CA certificate ca.pem is self signed.
2020-03-30T05:45:03.456573Z 1 [Note] A temporary password is generated for root@localhost: Heifb*Qeo2#e
#启动
service mysql start
登录及远程配置
cd /usr/local/mysql/bin/
./mysql -uroot -p
#输入临时密码后修改密码
set password=password('123456');
#设置远程访问
GRANT ALL PRIVILEGES ON *.* TO 'root'@'%' IDENTIFIED BY '123456';
flush privileges;
开机自动启动
chkconfig --list
chkconfig --add mysql
chkconfig mysql on
环境变量
vim /etc/profile
# mysql
export MYSQL_HOME=/usr/local/mysql
export PATH=$PATH:$MYSQL_HOME/bin
source /etc/profile
四、Hive
1、下载
https://mirrors.tuna.tsinghua.edu.cn/apache/hive/hive-3.1.2/
注意版本: hive-2.x与hadoop-2.x匹配,hive-3.x与hadoop-3.x匹配。
2、安装与环境变量配置
安装
#移动至/usr/local/目录
mv apache-hive-3.1.2-bin.tar.gz /usr/local/
#进入/usr/local/目录并解压
cd /usr/local/
tar -zxvf apache-hive-3.1.2-bin.tar.gz
#重命名并删除压缩包
mv apache-hive-3.1.2-bin hive
rm -rf apache-hive-3.1.2-bin.tar.gz
环境变量
vim /etc/profile
#hive
export HIVE_HOME=/usr/local/hive
export HIVE_CONF_DIR=${HIVE_HOME}/conf
export CLASSPATH=$CLASSPATH.:{HIVE_HOME}/lib
export PATH=$PATH:${HIVE_HOME}/bin
source /etc/profile
hive --version
3、hive配置
创建HDFS文件夹
hadoop fs -mkdir -p /user/hive/warehouse # 创建文件夹
hadoop fs -mkdir -p /tmp/hive # 创建文件夹
hadoop fs -chmod -R 777 /user/hive/warehouse # 授予权限
hadoop fs -chmod -R 777 /tmp/hive # 授予权限
hadoop fs -ls /
创建temp
#/usr/local/hive 目录下
mkdir temp
chmod -R 777 temp
hive-site.xml文件配置
cd /usr/local/hive/conf
#复制一份配置文件,也可生成空文件自行配置
cp hive-default.xml.template hive-site.xml
#增加配置
vim hive-site.xml
#hive配置
<property>
<name>hive.exec.local.scratchdir</name>
<value>/usr/local/hive/root</value>
<description>Local scratch space for Hive jobs</description>
</property>
<property>
<name>hive.downloaded.resources.dir</name>
<value>/usr/local/hive/${hive.session.id}_resources</value>
<description>Temporary local directory for added resources in the remote file system.</description>
</property>
<property>
<name>hive.server2.logging.operation.log.location</name>
<value>/usr/local/hive/root/operation_logs</value>
<description>Top level directory where operation logs are stored if logging functionality is enabled</description>
</property>
<property>
<name>hive.querylog.location</name>
<value>/usr/local/hive/root</value>
<description>Location of Hive run time structured log file</description>
</property>
#数据库配置
<property>
<name>javax.jdo.option.ConnectionURL</name>
<value>jdbc:mysql://master:3306/hive?createDatabaseIfNotExist=true</value>
<description>
JDBC connect string for a JDBC metastore.
To use SSL to encrypt/authenticate the connection, provide database-specific SSL flag in the connection URL.
For example, jdbc:postgresql://myhost/db?ssl=true for postgres database.
</description>
</property>
<property>
<name>javax.jdo.option.ConnectionDriverName</name>
<value>com.mysql.jdbc.Driver</value>
<description>Driver class name for a JDBC metastore</description>
</property>
<property>
<name>javax.jdo.option.ConnectionUserName</name>
<value>root</value>
<description>Username to use against metastore database</description>
</property>
<property>
<name>javax.jdo.option.ConnectionPassword</name>
<value>123456</value>
<description>password to use against metastore database</description>
</property>
<property>
<name>hive.metastore.schema.verification</name>
<value>false</value>
<description>
Enforce metastore schema version consistency.
True: Verify that version information stored in is compatible with one from Hive jars. Also disable automatic
schema migration attempt. Users are required to manually migrate schema after Hive upgrade which ensures
proper metastore schema migration. (Default)
False: Warn if the version information stored in metastore doesn't match with one from in Hive jars.
</description>
</property>
配置hive-log4j2.properties
cp hive-log4j2.properties.template hive-log4j2.properties
vim hive-log4j2.properties
# 修改内容
property.hive.log.dir = /usr/local/hive/temp/root
配置hive-env.sh
cp hive-env.sh.template hive-env.sh
vim hive-env.sh
#前四项可以不写
export JAVA_HOME=/usr/local/java
export HADOOP_HOME=/usr/local/hadoop
export HIVE_HOME=/usr/local/hive
export HIVE_CONF_DIR=${HIVE_HOME}/conf
export HIVE_AUX_JARS_PATH=${HIVE_HOME}/lib
4、连接数据库配置
下载:https://dev.mysql.com/downloads/connector/j/5.1.html
tar -zxvf mysql-connector-java-5.1.48.tar.gz
#将该jar包拷贝至 /usr/local/hive/lib
cp -r mysql-connector-java-5.1.48.jar /usr/local/hive/lib
cp -r mysql-connector-java-5.1.48-bin.jar /usr/local/hive/lib
5、初始化并启动hive
cd /usr/local/hive/bin
schematool -dbType mysql -initSchema
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/usr/local/hive/lib/log4j-slf4j-impl-2.10.0.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/usr/local/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.apache.logging.slf4j.Log4jLoggerFactory]
Exception in thread "main" java.lang.NoSuchMethodError: com.google.common.base.Preconditions.checkArgument(ZLjava/lang/String;Ljava/lang/Object;)V
at org.apache.hadoop.conf.Configuration.set(Configuration.java:1357)
at org.apache.hadoop.conf.Configuration.set(Configuration.java:1338)
at org.apache.hadoop.mapred.JobConf.setJar(JobConf.java:536)
at org.apache.hadoop.mapred.JobConf.setJarByClass(JobConf.java:554)
at org.apache.hadoop.mapred.JobConf.<init>(JobConf.java:448)
at org.apache.hadoop.hive.conf.HiveConf.initialize(HiveConf.java:5141)
at org.apache.hadoop.hive.conf.HiveConf.<init>(HiveConf.java:5104)
at org.apache.hive.beeline.HiveSchemaTool.<init>(HiveSchemaTool.java:96)
at org.apache.hive.beeline.HiveSchemaTool.main(HiveSchemaTool.java:1473)
at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62)
at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
at java.lang.reflect.Method.invoke(Method.java:498)
at org.apache.hadoop.util.RunJar.run(RunJar.java:323)
at org.apache.hadoop.util.RunJar.main(RunJar.java:236)
#将 /usr/local/hadoop/share/hadoop/common/lib中的guava-27.0-jre.jar拷贝至/usr/local/hive/lib
cp /usr/local/hadoop/share/hadoop/common/lib/guava-27.0-jre.jar /usr/local/hive/lib/
#再次
schematool -dbType mysql -initSchema
成功启动界面如下
Initialization script completed
Mon Mar 30 14:32:36 CST 2020 WARN: Establishing SSL connection without server's identity verification is not recommended. According to MySQL 5.5.45+, 5.6.26+ and 5.7.6+ requirements SSL connection must be established by default if explicit option isn't set. For compliance with existing applications not using SSL the verifyServerCertificate property is set to 'false'. You need either to explicitly disable SSL by setting useSSL=false, or set useSSL=true and provide truststore for server certificate verification.
schemaTool completed
6、进入hive终端查看并使用一些常见指令
#进入终端
hive
#查看函数
show functions;
#查看函数常见信息
desc function sum;
#新建数据库
create database learn;
#使用当前数据库
use learn;
#创建一个数据表
create table student(id int,name string) row format delimited fields terminated by '\t';
#/home/admin 下新建student.txt文件写入数据
001 zhangsan
002 lisi
003 wangwu
004 zhaoliu
005 chenqi
#加载数据 (注意加上后缀.student)
load data local inpath '/home/admin/student.txt' into table learn.student;
#查看
select * from student;
#mysql中查看表信息
SELECT * FROM hive.TBLS;
hadoop的hdfs系统的web端也可以查看
hdfs查看.png
五、scala+zookeeper+kafka集群
下载地址:注意kakfa和scala的版本要对应
scala:https://www.scala-lang.org/download/2.12.11.html
zookeeper: http://mirror.bit.edu.cn/apache/zookeeper/
kafka: http://kafka.apache.org/downloads
1、Scala
tar -zxvf scala-2.12.11.tgz
mv scala-2.12.11 scala
mv scala /usr/local/
vim /etc/profile
#增加配置
#scala
export SCALA_HOME=/usr/local/scala
export PATH=$PATH:$SCALA_HOME/bin
source /etc/profile
scala -version
2、Zookeeper
安装
tar -zxvf zookeeper-3.4.14.tar.gz
mv zookeeper-3.4.14 zookeeper
mv zookeeper /usr/local/
cd /usr/local/zookeeper
mkdir data
mkdir logs
mkdir log
cd /usr/local/zookeeper/conf/
#进行配置
cp zoo_sample.cfg zoo.cfg
vim zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/usr/local/zookeeper/data
dataLogDir=/usr/local/zookeeper/logs
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
server.0=master:2888:3888
server.1=slave1:2888:3888
server.2=slave2:2888:3888
#配置myid master 写0 、slave1 写1 、slave2 写2 且与上面配置的server对应
cd /usr/local/zookeeper/data
echo 1 >myid
开机启动
cd /etc/rc.d/init.d
touch zookeeper
vim zookeeper
#配置如下
#!/bin/bash
#chkconfig: 2345 10 90
#description: service zookeeper
export JAVA_HOME=/usr/local/java
export ZOO_LOG_DIR=/usr/local/zookeeper/log
ZOOKEEPER_HOME=/usr/local/zookeeper
su root ${ZOOKEEPER_HOME}/bin/zkServer.sh "$1"
chkconfig --add zookeeper
chkconfig --list
启动
cd /usr/local/zookeeper/bin/
./zkServer.sh start
./zkServer.sh status
jps
3、Kakfa
#安装
tar -zxvf kafka_2.12-2.2.0.tgz
mv kafka_2.12-2.2.0 kafka
mv kafka /usr/local
cd /usr/local/kafka/
mkdir log_data
#配置
vim /usr/local/kafka/config/server.properties
#主要改这四条就可以
#broker.id 最好对应zookeeper的命名,好记一点
#listeners听本机
#zookeeper.connect三台全要
#日志数据文件夹要先创建
broker.id=0
listeners=PLAINTEXT://master:9092
zookeeper.connect=master:2181,slave1:2181,slave2:2181
log.dirs=/usr/local/kafka/log_data
依次启动
/usr/local/kafka/bin/kafka-server-start.sh /usr/local/kafka/config/server.properties
# 后台启动
/usr/local/kafka/bin/kafka-server-start.sh -daemon /usr/local/kafka/config/server.properties
测试
zk创建topic
./kafka-topics.sh --create --zookeeper 10.1.80.128:2181 --replication-factor 3 --partitions 1 --topic luojialei-topic
zk查看topic
./kafka-topics.sh --describe --zookeeper 10.1.80.128:2181 --topic luojialei-topic
一个生产者
./kafka-console-producer.sh --broker-list 10.1.80.128:9092,10.1.80.129:9092,10.1.80.130:9092 --topic luojialei-topic
三个消费者
./kafka-console-consumer.sh --bootstrap-server 10.1.80.128:9092,10.1.80.129:9092,10.1.80.130:9092 --from-beginning --topic luojialei-topic
以上任一生产者生产数据,任一消费者都能消费到
安装监控,选择KafkaOffsetMonitor:程序以一个jar包的形式运行,部署较为方便。只有监控功能,使用起来也较为安全。
下载:https://github.com/quantifind/KafkaOffsetMonitor
#随便找一台装一次就好
cd /usr/local/kafka
#jar包放入以下目录
mkdir kafkaoffsetmonitor
启动脚本
vim start.sh
java -cp KafkaOffsetMonitor-assembly-0.2.0.jar \
com.quantifind.kafka.offsetapp.OffsetGetterWeb \
--zk master:2181,slave1:2181,slave2:2181 \
--port 8788 \
--refresh 10.seconds \
--retain 2.days &
./start.sh
web端口查看:http://10.1.80.128:8788/
KafkaMonitor.png六、Spark
1、下载
2、安装
tar -zxvf spark-3.0.0-preview2-bin-hadoop3.2.tgz
mv spark-3.0.0-preview2-bin-hadoop3.2 spark
mv spark /usr/local
vim /etc/profile
#增加
#spark
export HADOOP_HOME=/usr/local/hadoop #hadoop配过可以不要
export SPARK_HOME=/usr/local/spark
export PATH="${HADOOP_HOME}/bin:${SCALA_HOME}/bin:${SPARK_HOME}/bin:$PATH"
source /etc/profile
#查看版本
spark-shell
Welcome to
____ __
/ __/__ ___ _____/ /__
_\ \/ _ \/ _ `/ __/ '_/
/___/ .__/\_,_/_/ /_/\_\ version 3.0.0-preview2
/_/
Using Scala version 2.12.10 (Java HotSpot(TM) 64-Bit Server VM, Java 1.8.0_171)
3、配置
cd /usr/local/spark/conf
cp spark-env.sh.template spark-env.sh
vim spark-env.sh
#修改如下
export JAVA_HOME=/usr/local/java
export SCALA_HOME=/usr/local/scala
export HADOOP_HOME=/usr/local/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export SPARK_MASTER_HOST=master
export SPARK_LOCAL_DIRS=/usr/local/spark
#内存
export SPARK_DRIVER_MEMORY=2g
#cpus核心数
export SPARK_WORKER_CORES=2
cp slaves.template slaves
vim slaves
#修改如下
master
slave1
slave2
4、启动
以上操作在三台服务器配置后
cd /usr/local/spark/sbin
./start-all.sh
./stop-all.sh
#查看,主节点有Master进程,从节点有Worker进程即可
jps
web端口查看:http://10.1.80.128:8080/
spark.png
5、spark-shell的简单使用
#创建集合,查看首个元素
val data= sc.parallelize(Array(1,2,3))
data.first()
#引入本地文件,查看首个元素
val textFile = sc.textFile("file:/home/admin/text.txt")
textFile.first()
#本地当前目录下的文件
val distFile1 = sc.textFile("data.txt")
#HDFS文件
val distFile2 = sc.textFile("hdfs://ip:port/user/names.txt")
#本地指定目录下的文件
val distFile3 = sc.textFile("file:/input/data.txt")
#读取多个文件
val distFile4 = sc.textFile("/input/data1.txt, /input/data2.txt")
网友评论