选读SQL经典实例笔记21_字符串处理.png
1. SQL 并不专门用于处理复杂的字符串
1.1. 需要有逐字遍历字符串的能力。但是,使用SQL 进行这样的操作并不容易
1.2. SQL 没有Loop循环功能
1.2.1. Oracle的MODEL子句除外
2. 遍历字符串
2.1. 把EMP表的ENAME等于KING的字符串拆开来显示为4行,每行一个字符
2.2. sql
select substr(e.ename,iter.pos,1) as C
from (select ename from emp where ename = 'KING') e,
(select id as pos from t10) iter
where iter.pos <= length(e.ename)
C
-
K
I
N
G
2.3. T10表,该表有10行记录(它只有一列,列名为ID,它的值分别是从1到10
3. 嵌入引号
3.1. sql
QMARKS
--------------
g'day mate
beavers' teeth
'
3.2. sql
select 'g''day mate' qmarks from t1 union all
select 'beavers'' teeth' from t1 union all
select '''' from t1
4. 统计字符出现的次数
4.1. 10,CLARK,MANAGER
4.1.1. 该字符串里有多少个逗号
4.2. sql
select (length('10,CLARK,MANAGER')-
length(replace('10,CLARK,MANAGER',',','')))/length(',')
as cnt
from t1
4.3. 获取不含逗号的字符串长度
4.4. 逗号的删除则借助了REPLACE函数
5. 删除不想要的字符
5.1. sql
ENAME SAL
---------- ----------
SMITH 800
ALLEN 1600
WARD 1250
JONES 2975
MARTIN 1250
BLAKE 2850
CLARK 2450
SCOTT 3000
KING 5000
TURNER 1500
ADAMS 1100
JAMES 950
FORD 3000
MILLER 1300
5.2. sql
ENAME STRIPPED1 SAL STRIPPED2
--------- ---------- ---------- ---------
SMITH SMTH 800 8
ALLEN LLN 1600 16
WARD WRD 1250 125
JONES JNS 2975 2975
MARTIN MRTN 1250 125
BLAKE BLK 2850 285
CLARK CLRK 2450 245
SCOTT SCTT 3000 3
KING KNG 5000 5
TURNER TRNR 1500 15
ADAMS DMS 1100 11
JAMES JMS 950 95
FORD FRD 3000 3
MILLER MLLR 1300 13
5.3. DB2
5.3.1. sql
select ename,
replace(translate(ename,'aaaaa','AEIOU'),'a','') stripped1,
sal,
replace(cast(sal as char(4)),'0','') stripped2
from emp
5.4. Oracle
5.5. PostgreSQL
5.6. 使用内置函数TRANSLATE和REPLACE删除不想要的字符和字符串
5.6.1. sql
select ename,
replace(translate(ename,'AEIOU','aaaaa'),'a')
as stripped1,
sal,
replace(sal,0,'') as stripped2
from emp
5.7. MySQL
5.8. SQL Server
5.9. 多次调用REPLACE 函数
5.9.1. sql
select ename,
replace(
replace(
replace(
replace(
replace(ename,'A',''),'E',''),'I',''),'O',''),'U','')
as stripped1,
sal,
replace(sal,0,'') stripped2
from emp
6. 分离数字和字符数据
6.1. sql
DATA
---------------
SMITH800
ALLEN1600
WARD1250
JONES2975
MARTIN1250
BLAKE2850
CLARK2450
SCOTT3000
KING5000
TURNER1500
ADAMS1100
JAMES950
FORD3000
MILLER1300
6.2. DB2
6.2.1. sql
select replace(
translate(data,'0000000000','0123456789'),'0','') ename,
cast(
replace(
translate(lower(data),repeat('z',26),
'abcdefghijklmnopqrstuvwxyz'),'z','') as integer) sal
from (
select ename||cast(sal as char(4)) data
from emp
) x
6.3. Oracle
6.3.1. sql
select replace(
translate(data,'0123456789','0000000000'),'0') ename,
to_number(
replace(
translate(lower(data),
'abcdefghijklmnopqrstuvwxyz',
rpad('z',26,'z')),'z')) sal
from (
select ename||sal data
from emp
)
6.4. PostgreSQL
6.4.1. sql
select replace(
translate(data,'0123456789','0000000000'),'0','') as ename,
cast(
replace(
translate(lower(data),
'abcdefghijklmnopqrstuvwxyz',
rpad('z',26,'z')),'z','') as integer) as sal
from (
select ename||sal as data
from emp
) x
7. 按照子字符串排序
7.1. sql
ENAME
----------
SMITH
ALLEN
WARD
JONES
MARTIN
BLAKE
CLARK
SCOTT
KING
TURNER
ADAMS
JAMES
FORD
MILLER
7.2. DB2
7.3. Oracle
7.4. PostgreSQL
7.5. MySQL
7.6. 使用内置函数LENGTH和SUBSTR
7.6.1. sql
select ename
from emp
order by substr(ename,length(ename)-1,2)
7.7. SQL Server
7.7.1. sql
select ename
from emp
order by substring(ename,len(ename)-1,2)
8. 根据字符串里的数字排序
8.1. sql
DATA
-----------------------------
CLARK 7782 ACCOUNTING
KING 7839 ACCOUNTING
MILLER 7934 ACCOUNTING
SMITH 7369 RESEARCH
JONES 7566 RESEARCH
SCOTT 7788 RESEARCH
ADAMS 7876 RESEARCH
FORD 7902 RESEARCH
ALLEN 7499 SALES
WARD 7521 SALES
MARTIN 7654 SALES
BLAKE 7698 SALES
TURNER 7844 SALES
JAMES 7900 SALES
8.2. DB2
8.2.1. sql
select data
from V
order by
cast(
replace(
translate(data,repeat('#',length(data)),
replace(
translate(data,'##########','0123456789'),
'#','')),'#','') as integer)
8.3. Oracle
8.3.1. sql
select data
from V
order by
to_number(
replace(
translate(data,
replace(
translate(data,'0123456789','##########'),
'#'),rpad('#',20,'#')),'#'))
8.4. PostgreSQL
8.4.1. sql
select data
from V
order by
cast(
replace(
translate(data,
replace(
translate(data,'0123456789','##########'),
'#',''),rpad('#',20,'#')),'#','') as integer)
9. 识别字符串里的数字字符
9.1. sql
MIXED
--------------
CL10AR
KI10NG
MI10LL
7369
7566
7788
7876
7902
ALLEN
WARD
MARTIN
BLAKE
TURNER
JAMES
9.2. DB2
9.2.1. sql
select mixed old,
cast(
case
when
replace(
translate(mixed,'9999999999','0123456789'),'9','') = ''
then
mixed
else replace(
translate(mixed,
repeat('#',length(mixed)),
replace(
translate(mixed,'9999999999','0123456789'),'9','')),
'#','')
end as integer ) mixed
from V
where posstr(translate(mixed,'9999999999','0123456789'),'9') > 0
9.3. Oracle
9.3.1. sql
select to_number (
case
when
replace(translate(mixed,'0123456789','9999999999'),'9')
is not null
then
replace(
translate(mixed,
replace(
translate(mixed,'0123456789','9999999999'),'9'),
rpad('#',length(mixed),'#')),'#')
else
mixed
end
) mixed
from V
where instr(translate(mixed,'0123456789','9999999999'),'9') > 0
9.4. PostgreSQL
9.4.1. sql
select cast(
case
when
replace(translate(mixed,'0123456789','9999999999'),'9','')
is not null
then
replace(
translate(mixed,
replace(
translate(mixed,'0123456789','9999999999'),'9',''),
rpad('#',length(mixed),'#')),'#','')
else
mixed
end as integer ) as mixed
from V
where strpos(translate(mixed,'0123456789','9999999999'),'9') > 0
9.5. MySQL
9.5.1. sql
select cast(group_concat(c order by pos separator '') as unsigned)
as MIXED1
from (
select v.mixed, iter.pos, substr(v.mixed,iter.pos,1) as c
from V,
( select id pos from t10 ) iter
where iter.pos <= length(v.mixed)
and ascii(substr(v.mixed,iter.pos,1)) between 48 and 57
) y
group by mixed
order by 1
10. 提取第n个分隔子字符串
10.1. sql
create view V as
select 'mo,larry,curly' as name
from t1
union all
select 'tina,gina,jaunita,regina,leena' as name
from t1
10.2. sql
SUB
-----
larry
gina
10.3. DB2
10.3.1. sql
select substr(c,2,locate(',',c,2)-2)
from (
select pos, name, substr(name, pos) c,
row_number() over(partition by name
order by length(substr(name,pos)) desc) rn
from (
select ',' ||csv.name|| ',' as name,
cast(iter.pos as integer) as pos
from V csv,
(select row_number() over() pos from t100 ) iter
where iter.pos <= length(csv.name)+2
) x
where length(substr(name,pos)) > 1
and substr(substr(name,pos),1,1) = ','
) y
where rn = 2
10.4. Oracle
10.4.1. sql
select sub
from (
select iter.pos,
src.name,
substr( src.name,
instr( src.name,',',1,iter.pos )+1,
instr( src.name,',',1,iter.pos+1 ) -
instr( src.name,',',1,iter.pos )-1) sub
from (select ','||name||',' as name from V) src,
(select rownum pos from emp) iter
where iter.pos < length(src.name)-length(replace(src.name,','))
)
where pos = 2
10.5. PostgreSQL
10.5.1. sql
select name
from (
select iter.pos, split_part(src.name,',',iter.pos) as name
from (select id as pos from t10) iter,
(select cast(name as text) as name from v) src
where iter.pos <=
length(src.name)-length(replace(src.name,',',''))+1
) x
where pos = 2
10.6. MySQL
10.6.1. sql
select name
from (
select iter.pos,
substring_index(
substring_index(src.name,',',iter.pos),',',-1) name
from V src,
(select id pos from t10) iter,
where iter.pos <=
length(src.name)-length(replace(src.name,',',''))
) x
where pos = 2
10.7. SQL Server
10.7.1. sql
select substring(c,2,charindex(',',c,2)-2)
from (
select pos, name, substring(name, pos, len(name)) as c,
row_number() over(
partition by name
order by len(substring(name,pos,len(name))) desc) rn
from (
select ',' + csv.name + ',' as name,
iter.pos
from V csv,
(select id as pos from t100 ) iter
where iter.pos <= len(csv.name)+2
) x
where len(substring(name,pos,len(name))) > 1
and substring(substring(name,pos,len(name)),1,1) = ','
) y
where rn = 2
11. 解析IP地址
11.1. 111.22.3.4
11.2. sql
A B C D
----- ----- ----- ---
111 22 3 4
11.3. DB2
11.3.1. sql
with x (pos,ip) as (
values (1,'.92.111.0.222')
union all
select pos+1,ip from x where pos+1 <= 20
)
select max(case when rn=1 then e end) a,
max(case when rn=2 then e end) b,
max(case when rn=3 then e end) c,
max(case when rn=4 then e end) d
from (
select pos,c,d,
case when posstr(d,'.') > 0 then substr(d,1,posstr(d,'.')-1)
else d
end as e,
row_number() over(order by pos desc) rn
from (
select pos, ip,right(ip,pos) as c, substr(right(ip,pos),2) as d
from x
where pos <= length(ip)
and substr(right(ip,pos),1,1) = '.'
) x
) y
11.4. Oracle
11.4.1. sql
select ip,
substr(ip, 1, instr(ip,'.')-1 ) a,
substr(ip, instr(ip,'.')+1,
instr(ip,'.',1,2)-instr(ip,'.')-1 ) b,
substr(ip, instr(ip,'.',1,2)+1,
instr(ip,'.',1,3)-instr(ip,'.',1,2)-1 ) c,
substr(ip, instr(ip,'.',1,3)+1 ) d
from (select '92.111.0.2' as ip from t1)
11.5. PostgreSQL
11.5.1. sql
select split_part(y.ip,'.',1) as a,
split_part(y.ip,'.',2) as b,
split_part(y.ip,'.',3) as c,
split_part(y.ip,'.',4) as d
from (select cast('92.111.0.2' as text) as ip from t1) as y
11.6. MySQL
11.6.1. sql
select substring_index(substring_index(y.ip,'.',1),'.',-1) a,
substring_index(substring_index(y.ip,'.',2),'.',-1) b,
substring_index(substring_index(y.ip,'.',3),'.',-1) c,
substring_index(substring_index(y.ip,'.',4),'.',-1) d
from (select '92.111.0.2' as ip from t1) y
11.7. SQL Server
11.7.1. sql
with x (pos,ip) as (
select 1 as pos,'.92.111.0.222' as ip from t1
union all
select pos+1,ip from x where pos+1 <= 20
)
select max(case when rn=1 then e end) a,
max(case when rn=2 then e end) b,
max(case when rn=3 then e end) c,
max(case when rn=4 then e end) d
from (
select pos,c,d,
case when charindex('.',d) > 0
then substring(d,1,charindex('.',d)-1)
else d
end as e,
row_number() over(order by pos desc) rn
from (
select pos, ip,right(ip,pos) as c,
substring(right(ip,pos),2,len(ip)) as d
from x
where pos <= len(ip)
and substring(right(ip,pos),1,1) = '.'
) x
) y
网友评论