hostnamectl set-hostname master
hostnamectl set-hostname slave1
hostnamectl set-hostname slave2
172.18.14.238 master
172.18.14.242 slave1
172.18.14.241 slave2
*/30 10-17 * * * /usr/sbin/ntpdate master
master节点
ssh-keygen
ssh-copy-id -i id_rsa.pub master
ssh-copy-id -i id_rsa.pub slave1
ssh-copy-id -i id_rsa.pub slave2
cd /usr/
mkdir java
cd package277
tar -zxvf jdk-8u221-linux-x64.tar.gz -C /usr/java
cd /usr/java/jdk/
pwd
vim /etc/profile
export JAVA_HOME=/usr/java/jdk1.8.0_221
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=$JAVA_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin
source /etc/profile
java -version
-------------------------------
#hadoop
export HADOOP_HOME=/usr/hadoop/hadoop-2.7.7
export CLASSPATH=$CLASSPATH:$HADOOP_HOME/lib
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
echo master >> master
hostnamectl set-hostname hadoop000 && bash
vim /etc/hosts
添加以下内容
172.18.32.104 hadoop000
注:一个节点都要同时写入一个网址,并且IP为各自分配IP。
ssh-keygen
回车三次
cd /root/.ssh
ssh-copy-id -i id_rsa.pub hadoop000
yes 第一次需要输入密码
ssh hadoop000
ssh localhost
格式化HDFS文件系统
hdfs namenode -format
启动Hadoop集群
start-all.sh
开启mysql服务
systemctl start mysqld
schematool -dbType mysql -initSchema;
hive --service metastore
2、先复制一个master会话窗口,如果要退出:quit;
hive
创建数据库 hive
create database hive;
1.
CREATE TABLE comm.dim_date (
date_id string,
week_id string,
week_day string,
day string,
month string,
quarter string,
year string,
is_workday string,
holiday_id string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/behavior/dim/dim_date'
TBLPROPERTIES ('skip .header .line .count'= '1');
load data local inpath '/root/bigdata/data/dim_date_2023.txt' into table comm.dim_date;
load data local inpath '/root/bigdata/data/dim_date_2023.txt' overwrite into table comm.dim_date;
2.
create table comm.dim_area
(
city string,
province string,
area string
)
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
LOCATION '/behavior/dim/dim_area';
load data local inpath '/root/bigdata/data/dim_area.txt' overwrite into table comm.dim_area;
3.
create table ads_user_pro
as select province,count(1) cnt
from comm.dws_behavior_log
groupo by province;
insert overwrite local directory '/root/bigdata/result/ads_user_pro'
row format delimited fields terminated by ','
select * from comm.ads_user_pro;
4.
create table ads_user_region
as select b.dt,a.area,count(1) cnt
from comm.dim_area a
comm.dws_behavior_log b
where a.province = b.province
groupo by b.dt,a.area;
insert overwrite local directory '/root/bigdata/result/ads_user_region'
row format delimited fields terminated by ','
select * from comm.ads_user_region;
5.
create table ads_user_hour
as select substring(from_utc_timestamp(ts,'Asia/Shanghai'),12,2) ts_hour,count(1) cnt
from dws_behavior_log
group by substring(from_utc_timestamp(ts,'Asia/Shanghai'),12,2);
insert overwrite local directory '/root/bigdata/result/ads_user_hour'
row format delimited fields terminated by ','
select * from comm.ads_user_hour;
6.
....
7.
create table ads_visit_mode
as select url,device_type,count(1) cnt
from dws_behavior_log
group by url,device_type;
insert overwrite local directory '/root/bigdata/result/ads_visit_mode'
row format delimited fields terminated by ','
select * from comm.ads_visit_mode;
8.
create table ads_online_type
as select url,type,count(1) cnt
from dws_behavior_log
group by url,type;
insert overwrite local directory '/root/bigdata/result/ads_online_type'
row format delimited fields terminated by ','
select * from comm.ads_online_type;
9.
create table ads_user_domain
as select split(url,'.')[1] url_domain,count(1) cnt
from dws_behavior_log
group by split(url,'.')[1],type;
insert overwrite local directory '/root/bigdata/result/ads_user_domain'
row format delimited fields terminated by ','
select * from comm.ads_user_domain ;
网友评论