一、数模设计
1、原始表,各对应一个excel
CREATE TABLE `current_school_info` (
`school_name` varchar(100) NOT NULL COMMENT '学校名称',
`current_num` int(11) DEFAULT NULL COMMENT '当前人数',
`total_num` int(11) DEFAULT NULL COMMENT '总人数',
PRIMARY KEY (`school_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `grow_school` (
`area` varchar(45) DEFAULT NULL,
`school_name` varchar(100) DEFAULT NULL,
`school_type` varchar(16) DEFAULT NULL,
`grow_class` varchar(16) DEFAULT NULL,
`grow_person` varchar(16) DEFAULT NULL,
`plan_time` varchar(45) DEFAULT NULL,
`remark` varchar(200) DEFAULT NULL,
`id` int(11) NOT NULL AUTO_INCREMENT,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=256 DEFAULT CHARSET=utf8;
CREATE TABLE `school_network` (
`school_name` varchar(100) NOT NULL,
`emp_no` varchar(45) DEFAULT NULL,
`network_type` varchar(45) DEFAULT NULL,
`download_speed1` varchar(45) DEFAULT NULL,
`download_speed2` varchar(45) DEFAULT NULL,
`download_speed3` varchar(45) DEFAULT NULL,
`upload_speed1` varchar(45) DEFAULT NULL,
`upload_speed2` varchar(45) DEFAULT NULL,
`upload_speed3` varchar(45) DEFAULT NULL,
`remark` varchar(1000) DEFAULT NULL,
PRIMARY KEY (`school_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
汇总学校的增长情况
CREATE TABLE `school_grow_info` (
`school_name` varchar(100) NOT NULL,
`grow_person` varchar(16) DEFAULT NULL,
`school_type` varchar(45) DEFAULT NULL,
PRIMARY KEY (`school_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
为了之间过渡,需要创建几张临时表
CREATE TABLE `tmp_new_school` (
`school_name` varchar(100) NOT NULL COMMENT '学校名称',
`current_num` int(11) DEFAULT '0' COMMENT '当前人数',
`grow_num` int(11) DEFAULT NULL COMMENT '增加人数',
`last_total_num` int(11) DEFAULT '0' COMMENT '上学期总人数',
`next_total_num` int(11) DEFAULT NULL COMMENT '下学期总人数',
PRIMARY KEY (`school_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
CREATE TABLE `tmp_old_school` (
`school_name` varchar(100) NOT NULL COMMENT '学校名称,系统填写的。',
`yy_school_name` varchar(45) NOT NULL COMMENT '运营填写的学校名称',
`current_num` int(11) NOT NULL COMMENT '当前人数',
`grow_num` int(11) DEFAULT '0' COMMENT '增加人数',
`last_total_num` int(11) NOT NULL COMMENT '上学期总人数',
`next_total_num` int(11) NOT NULL COMMENT '下学期总人数',
PRIMARY KEY (`school_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='老学校,有两类匹配情况:模糊匹配;运营填写的学校名称yy_school_name被包含于系统填写的学校名称school_name内。';
CREATE TABLE `tmp_school` (
`id` int(11) NOT NULL AUTO_INCREMENT,
`school_name` varchar(100) DEFAULT NULL,
`yy_school_name` varchar(100) DEFAULT NULL,
`current_num` int(11) DEFAULT NULL,
`grow_num` int(11) DEFAULT NULL,
`last_total_num` int(11) DEFAULT NULL,
`next_total_num` varchar(45) DEFAULT NULL,
PRIMARY KEY (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=64 DEFAULT CHARSET=utf8;
CREATE TABLE `tmp_system_unilateral` (
`school_name` varchar(100) NOT NULL COMMENT '学校名称,系统填写的。',
`current_num` int(11) NOT NULL COMMENT '当前人数',
`total_num` int(11) NOT NULL COMMENT '总人数',
PRIMARY KEY (`school_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='系统里存在,但是在运营列表里不存在的学校';
CREATE TABLE `tmp_yy_unilateral` (
`school_name` varchar(100) NOT NULL COMMENT '学校名称,由运营填写。',
`grow_num` int(11) NOT NULL COMMENT '当前人数',
`total_num` int(11) NOT NULL COMMENT '总人数',
PRIMARY KEY (`school_name`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='运营里存在,但是在学校列表tmp_system_unilateral里不存在的学校。';
二、汇总
1、求出运营给出的汇总情况
INSERT INTO sys.school_grow_info (school_name, grow_person, school_type)
SELECT school_name, SUM(grow_person), school_type
FROM sys.grow_school
GROUP BY school_name, school_type;
2、由于学校名称可能不一致,我们需要模糊匹配。所以在对比的时候,会出现两边各不一致的情况。
1)首先分新老学校,用一张表保存新学校的情况。
DELETE FROM sys.tmp_new_school;
INSERT INTO sys.tmp_new_school (school_name, current_num, grow_num, last_total_num, next_total_num)
SELECT b.school_name AS school_name, 0 AS current_num, b.grow_person + 0 AS grow_num, 0 AS last_total_num
, b.grow_person + 0 AS next_total_num
FROM sys.school_grow_info b
WHERE b.school_type LIKE '%新%';
2)运营填写的学校名称,包含在系统的学校名称里。这种认为是最接近一致性的情况。防止出现一对多,我们需要去重。
DELETE FROM sys.tmp_old_school;
-- 排除掉一对多的情况
INSERT INTO sys.tmp_old_school (school_name, yy_school_name, current_num, grow_num, last_total_num
, next_total_num)
SELECT a.school_name AS school_name, b.school_name AS yy_school_name, a.current_num AS current_num, b.grow_person AS grow_num, a.total_num AS last_total_num
, a.current_num + b.grow_person AS next_total_num
FROM sys.current_school_info a
INNER JOIN sys.school_grow_info b
ON (b.school_type LIKE '%老%'
AND a.school_name LIKE CONCAT('%', b.school_name, '%')
AND b.school_name IN (
SELECT b.school_name
FROM sys.current_school_info a
INNER JOIN sys.school_grow_info b
ON b.school_type LIKE '%老%'
AND a.school_name LIKE CONCAT('%', b.school_name, '%')
GROUP BY b.school_name
HAVING COUNT(b.school_name) = 1
));
3)当上面模糊匹配不奏效的情况下,我们需要采取逐个字进行匹配。
TRUNCATE TABLE sys.tmp_school;
INSERT INTO `sys`.`tmp_school` (`school_name`, `yy_school_name`, `current_num`, `grow_num`, `last_total_num`
, `next_total_num`)
SELECT a.school_name AS school_name, b.school_name AS yy_school_name, a.current_num AS current_num, b.grow_person AS grow_num, a.total_num AS last_total_num
, a.current_num + b.grow_person AS next_total_num
FROM sys.current_school_info a
INNER JOIN sys.school_grow_info b
ON (b.school_type LIKE '%老%'
AND a.school_name NOT IN (
SELECT school_name
FROM sys.tmp_old_school
)
AND CASE LENGTH(b.school_name)
WHEN LENGTH('一二三') THEN (a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 2, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 3, 1), '%'))
WHEN LENGTH('一二三四') THEN (a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 2, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 3, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 4, 1), '%'))
WHEN LENGTH('一二三四五') THEN (a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 2, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 3, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 4, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 5, 1), '%'))
WHEN LENGTH('一二三四五六') THEN (a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 2, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 3, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 4, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 5, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 6, 1), '%'))
WHEN LENGTH('一二三四五六七') THEN (a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 2, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 3, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 4, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 5, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 6, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 7, 1), '%'))
WHEN LENGTH('一二三四五六七八') THEN (a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 2, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 3, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 4, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 5, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 6, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 7, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 8, 1), '%'))
WHEN
LENGTH('一二三四五六七八九')
THEN
a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 2, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 3, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 4, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 5, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 6, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 7, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 8, 1), '%')
AND a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 9, 1), '%')
ELSE a.school_name LIKE CONCAT('%', SUBSTR(b.school_name, 1, 2), '%')
END);
注意:上述过程,也可能会出现一对多的情况,tmp_school并不保证school_name的唯一性。
我们需要进行人工过滤,保证下文的保存动作,school_name映射的唯一性。
-- 先找出多的情况
SELECT school_name
FROM sys.tmp_school
GROUP BY school_name
HAVING COUNT(school_name) > 1;
-- 人工删掉
SELECT *
FROM sys.tmp_old_school;
接下来,我们就进行保存动作。
INSERT INTO sys.tmp_old_school (school_name, yy_school_name, current_num, grow_num, last_total_num
, next_total_num)
SELECT a.school_name AS school_name, a.yy_school_name AS yy_school_name, a.current_num AS current_num, a.grow_num AS grow_num, a.last_total_num AS last_total_num
, a.next_total_num AS next_total_num
FROM `sys`.tmp_school a;
4)单边账的情况,这有两种可能。到时候需要人工接入处理。
DELETE FROM `sys`.`tmp_system_unilateral`;
INSERT INTO `sys`.`tmp_system_unilateral` (`school_name`, `current_num`, `total_num`)
SELECT t.school_name, t.current_num, t.total_num
FROM `sys`.current_school_info t
WHERE t.school_name NOT IN (
SELECT school_name
FROM tmp_old_school
);
DELETE FROM `sys`.`tmp_yy_unilateral`;
INSERT INTO `sys`.`tmp_yy_unilateral` (`school_name`, `grow_num`, `total_num`)
SELECT t.school_name, t.grow_person, t.grow_person
FROM `sys`.school_grow_info t
WHERE t.school_name NOT IN (
SELECT school_name
FROM (
SELECT school_name
FROM tmp_new_school
UNION ALL
SELECT yy_school_name
FROM tmp_old_school
UNION ALL
SELECT school_name
FROM tmp_system_unilateral
) b
);
三、查询
SELECT *
FROM (
SELECT '' AS school_name, a.school_name AS yy_school_name, a.current_num AS current_num, a.grow_num AS grow_num, a.last_total_num AS last_total_num
, a.next_total_num AS next_total_num, '新学校' AS select_type
FROM sys.tmp_new_school a
UNION ALL
SELECT a.school_name AS school_name, a.yy_school_name AS yy_school_name, a.current_num AS current_num, a.grow_num AS grow_num, a.last_total_num AS last_total_num
, a.next_total_num AS next_total_num, '老学校' AS select_type
FROM sys.tmp_old_school a
UNION ALL
SELECT a.school_name AS school_name, '' AS yy_school_name, a.current_num AS current_num, 0 AS grow_num, a.total_num AS last_total_num
, 0 AS next_total_num, '老学校-未匹配上运营' AS select_type
FROM sys.tmp_system_unilateral a
UNION ALL
SELECT '' AS school_name, a.school_name AS yy_school_name, 0 AS current_num, a.grow_num AS grow_num, 0 AS last_total_num
, a.total_num AS next_total_num, '老学校-未匹配上学校' AS select_type
FROM sys.tmp_yy_unilateral a
) c
ORDER BY c.select_type ASC, c.next_total_num DESC, c.last_total_num DESC;
整个过程,最麻烦的就是对sys
.tmp_school
的处理,也是核心部分。
网友评论