美文网首页
day04 简单的数据导入和sql查询

day04 简单的数据导入和sql查询

作者: LittleBear_6c91 | 来源:发表于2019-04-28 09:28 被阅读0次

导入和插入数据代码

-- mysql --local-infile=1 -u root -p

create database movie_db default character set='utf8';

use movie_db;

create table ratings (
    user_id int null,
    movie_id int null,
    ratings int null,
    rating_time int null
);

-- -- 指定导入文件的位置 
-- LOAD DATA local INFILE '/Users/carmack/datasets/movielens/ratings.dat' 
-- -- 导入到哪张表
-- INTO TABLE ratings 
-- -- 字段分隔符用什么
-- FIELDS TERMINATED BY '::'  
-- -- 双引号是括住字段
-- ENCLOSED BY '"' 
-- -- 你用什么符号来区别换行
-- LINES TERMINATED BY '\n';

create table movies (
    movie_id int null,
    title varchar(256) null,
    genres varchar(512) null
);

-- LOAD DATA local INFILE '/Users/carmack/datasets/movielens/movies.dat' 
-- INTO TABLE movies FIELDS TERMINATED BY '::'  ENCLOSED BY '"' LINES TERMINATED BY '\n';

create table users (
    user_id int null,
    gender varchar(2) null,
    age int null,
    occupation int null,
    zip_code varchar(128) null
);

-- LOAD DATA local INFILE '/Users/carmack/datasets/movielens/users.dat' 
-- INTO TABLE users FIELDS TERMINATED BY '::'  ENCLOSED BY '"' LINES TERMINATED BY '\n';

    -- *  1:  "Under 18"   < 18
    -- * 18:  "18-24"   18 <= < 25
    -- * 25:  "25-34"   25 <= < 35
    -- * 35:  "35-44"   35 <= < 45
    -- * 45:  "45-49"   45 <= <50
    -- * 50:  "50-55"   50 <= < 56
    -- * 56:  "56+"     56 <=

create table age_dict (
    age_id int null,
    age_min int null,
    age_max int null
);

insert into age_dict (age_id, age_min, age_max) values(1, 0, 18);
insert into age_dict (age_id, age_min, age_max) values(18, 18, 25);
insert into age_dict (age_id, age_min, age_max) values(25, 25, 35);
insert into age_dict (age_id, age_min, age_max) values(35, 35, 45);
insert into age_dict (age_id, age_min, age_max) values(45, 45, 50);
insert into age_dict (age_id, age_min, age_max) values(50, 50, 56);
insert into age_dict (age_id, age_min, age_max) values(56, 56, 300);


-- - Occupation is chosen from the following choices:

--     *  0:  "other" or not specified
--     *  1:  "academic/educator"
--     *  2:  "artist"
--     *  3:  "clerical/admin"
--     *  4:  "college/grad student"
--     *  5:  "customer service"
--     *  6:  "doctor/health care"
--     *  7:  "executive/managerial"
--     *  8:  "farmer"
--     *  9:  "homemaker"
--     * 10:  "K-12 student"
--     * 11:  "lawyer"
--     * 12:  "programmer"
--     * 13:  "retired"
--     * 14:  "sales/marketing"
--     * 15:  "scientist"
--     * 16:  "self-employed"
--     * 17:  "technician/engineer"
--     * 18:  "tradesman/craftsman"
--     * 19:  "unemployed"
--     * 20:  "writer"

create table occup_dict (
    occup_id int null,
    occup_title varchar(256) null
);

insert into occup_dict (occup_id, occup_title) values(0, 'other or not specified');
insert into occup_dict (occup_id, occup_title) values(1, 'academic/educator');
insert into occup_dict (occup_id, occup_title) values(2, 'artist');
insert into occup_dict (occup_id, occup_title) values(3, 'clerical/admin');
insert into occup_dict (occup_id, occup_title) values(4, 'college/grad student');
insert into occup_dict (occup_id, occup_title) values(5, 'customer service');
insert into occup_dict (occup_id, occup_title) values(6, 'doctor/health care');
insert into occup_dict (occup_id, occup_title) values(7, 'executive/managerial');
insert into occup_dict (occup_id, occup_title) values(8, 'farmer');
insert into occup_dict (occup_id, occup_title) values(9, 'homemaker');
insert into occup_dict (occup_id, occup_title) values(10, 'K-12 student');
insert into occup_dict (occup_id, occup_title) values(11, 'lawyer');
insert into occup_dict (occup_id, occup_title) values(12, 'programmer');
insert into occup_dict (occup_id, occup_title) values(13, 'retired');
insert into occup_dict (occup_id, occup_title) values(14, 'sales/marketing');
insert into occup_dict (occup_id, occup_title) values(15, 'scientist');
insert into occup_dict (occup_id, occup_title) values(16, 'self-employed');
insert into occup_dict (occup_id, occup_title) values(17, 'technician/engineer');
insert into occup_dict (occup_id, occup_title) values(18, 'tradesman/craftsman');
insert into occup_dict (occup_id, occup_title) values(19, 'unemployed');
insert into occup_dict (occup_id, occup_title) values(20, 'writer');


#在ipython中插入数据代码(先定义再执行后面倒入代码)
--db= sqla.create_engine('mysql+pymysql://root:Vff123456@127.0.0.1/movie_db?charset=utf8')
--
--mnames = ['movie_id', 'title', 'genres']
--movies = pd.read_table(r'datasets/movielens/movies.dat', sep='::', header=None, names=mnames,engine='python')#前面加r转义不行就加r
--
--mnames = ['user_id', 'gender', 'age', 'occupation', 'zip_code']
--users = pd.read_table('datasets/movielens/users.dat', sep='::', header=None, names=mnames,engine='python')
--
--mnames = ['user_id', 'movie_id', 'ratings', 'rating_time']
--ratings = pd.read_table('datasets/movielens/ratings.dat', sep='::', header=None, names=mnames,engine='python')
--
--movies.to_sql('movies', db, index=False, if_exists='append')
--users.to_sql('users', db, index=False, if_exists='append')
--ratings.to_sql('ratings', db, index=False, if_exists='append')

查询代码

找出用户的性别、年龄、电影名、打分

select gender, age_dict.age_min, age_dict.age_max, title, ratings
from users, movies, ratings, age_dict
where users.user_id = ratings.user_id and 
ratings.movie_id = movies.movie_id and 
age_dict.age_id = users.age;

select a.gender, d.age_min, d.age_max, c.title, b.ratings
from users a inner join ratings b
on a.user_id = b.user_id
inner join movies c
on b.movie_id = c.movie_id
inner join age_dict d
on a.age = d.age_id;

select case when a.gender = 'F' then '女' else '男' end as 性别, 
avg(b.ratings) as 平均打分
from users a inner join ratings b
on a.user_id = b.user_id
group by a.gender;

select a.title, avg(b.ratings)
from movies a inner join ratings b
on a.movie_id = b.movie_id
group by a.title;

select a.gender, c.title, avg(b.ratings)
from users a inner join ratings b
on a.user_id = b.user_id
inner join movies c
on c.movie_id = b.movie_id
group by a.gender, c.title
order by c.title 
limit 3;

查询不同年龄段的电影平均评分,不分性别

set @rownum=0;

create table temp_result as
select @rownum:=@rownum+1 as rownum, CONCAT(c.age_min,'~',c.age_max) as 年龄段, d.title, avg(b.ratings) as 平均评分
from users a inner join ratings b 
on a.user_id = b.user_id
inner join age_dict c
on a.age = c.age_id
inner join movies d
on d.movie_id = b.movie_id
group by c.age_min, c.age_max, d.title;



不同职业的电影平均得分,要分性别
select c.occup_title, a.gender, avg(b.ratings)
from users a inner join ratings b
on a.user_id = b.user_id
inner join occup_dict c
on a.occupation = c.occup_id
group by c.occup_title, a.gender;

各个年龄段最喜欢的电影
select d.age_min, d.age_max, c.title, b.ratings
from users a inner join ratings b
on a.user_id = b.user_id
inner join movies c
on c.movie_id = b.movie_id
inner join age_dict d
on d.age_id = a.age
group by d.age_min, d.age_max
order by b.ratings desc
limit 0,1;

select a.gender, c.title, avg(b.ratings)
from users a inner join ratings b
on a.user_id = b.user_id
inner join movies c
on c.movie_id = b.movie_id
group by a.gender, c.title
order by c.title 

select * from temp_result
where (年龄段, 平均评分, rownum) in 
(
    select a.年龄段, a.平均评分, max(a.rownum) from 
    (
        select rownum, 年龄段, 平均评分, title 
            from temp_result 
            where (年龄段, 平均评分) in (
            select 年龄段, max(平均评分)
            from temp_result
            group by 年龄段
        )
    ) a 
    group by a.年龄段, a.平均评分
) 

create view temp_view2 as
select gender, age_dict.age_min, age_dict.age_max, title, ratings
from users, movies, ratings, age_dict
where users.user_id = ratings.user_id and 
ratings.movie_id = movies.movie_id and 
age_dict.age_id = users.age;

相关文章

网友评论

      本文标题:day04 简单的数据导入和sql查询

      本文链接:https://www.haomeiwen.com/subject/bxbrnqtx.html