# 如何识别PDB文件中的特定氨基酸残基
1.PDB文件格式
ATOM 1604 C ILE A 242 20.705 -26.318 26.653 1.00 14.92 C
ATOM 1605 O ILE A 242 20.152 -26.942 27.574 1.00 14.92 O
ATOM 1606 CB ILE A 242 19.560 -24.197 25.856 1.00 14.92 C
ATOM 1607 CG1 ILE A 242 19.299 -22.702 26.128 1.00 24.76 C
ATOM 1608 CG2 ILE A 242 18.303 -25.064 25.876 1.00 24.76 C
ATOM 1609 CD1 ILE A 242 18.131 -22.226 25.235 1.00 24.76 C
ATOM 1610 N ALA A 243 21.439 -26.905 25.725 1.00 17.24 N
ATOM 1611 CA ALA A 243 21.400 -28.376 25.564 1.00 17.24 C
ATOM 1612 C ALA A 243 22.118 -29.051 26.721 1.00 17.24 C
ATOM 1613 O ALA A 243 21.805 -30.211 27.060 1.00 17.24 O
ATOM 1614 CB ALA A 243 22.110 -28.691 24.252 1.00 17.24 C
ATOM 1615 N SER A 244 22.931 -28.273 27.410 1.00 21.20 N
ATOM 1616 CA SER A 244 23.624 -28.860 28.537 1.00 21.20 C
ATOM 1617 C SER A 244 23.069 -28.457 29.888 1.00 21.20 C
ATOM 1618 O SER A 244 23.749 -28.808 30.883 1.00 21.20 O
ATOM 1619 CB SER A 244 25.120 -28.570 28.588 1.00 21.20 C
ATOM 1620 OG SER A 244 25.568 -27.755 27.532 1.00 37.03 O
ATOM 1621 N ASN A 245 22.014 -27.634 29.956 1.00 23.26 N
ATOM 1622 CA ASN A 245 21.491 -27.139 31.253 1.00 23.26 C
ATOM 1623 C ASN A 245 20.002 -27.388 31.492 1.00 23.26 C
ATOM 1624 O ASN A 245 19.486 -26.996 32.563 1.00 23.26 O
ATOM 1625 CB ASN A 245 21.630 -25.625 31.399 1.00 23.26 C
ATOM 1626 CG ASN A 245 23.052 -25.304 31.799 1.00 20.44 C
ATOM 1627 OD1 ASN A 245 23.456 -25.717 32.901 1.00 20.44 O
ATOM 1628 ND2 ASN A 245 23.863 -25.089 30.809 1.00 20.44 N
ATOM 1629 OXT ASN A 245 19.369 -28.189 30.785 1.00 27.82 O
pdb文件说明:

2. python3版本
import struct
pdb_format = '6s5s1s4s1s3s1s1s4s1s3s8s8s8s6s6s10s2s3s'
def main(pdb_file,out_file,residues):
pdb = open(pdb_file)
out_file = open(out_file,'w')
for line in pdb:
if line[0:4] == 'ATOM':
res_data = get_ATOM_line(line)
res_type = bytes.decode(res_data[1])
res_num = bytes.decode(res_data[2])
for aa,num in residues: ##Python3中s的Python type是bytes类型,所以当进行条件判断时需要进行decode解码。
if res_type == aa and res_num == num:
out_file.write(line)
out_file.close()
def get_ATOM_line(line):
tmp = struct.unpack(pdb_format,bytes(line.encode('utf-8'))) ##注意struct模块是将字符串转换成元组,输出是tuple
atom = tmp[3].strip()
res_type = tmp[5].strip()
chain = tmp[7].strip()
res_num = tmp[8].strip()
x = float(tmp[11].strip())
y = float(tmp[12].strip())
z = float(tmp[13].strip())
return chain, res_type, res_num, atom, x, y, z
residues = [('ASP', '102'), ('HIS', '57'), ('SER', '195')]
main("C:\\shiyan\\honghua\\1tld.pdb", "trypsin_triad.pdb", residues)
3. python2版本
import struct
pdb_format = '6s5s1s4s1s3s1s1s4s1s3s8s8s8s6s6s10s2s3s'
def get_atom_line(line):
'''return an ATOM line parsed to a tuple'''
tmp = struct.unpack(pdb_format, line)
atom = tmp[3].strip()
res_type = tmp[5].strip()
res_num = tmp[8].strip()
chain = tmp[7].strip()
x = float(tmp[11].strip())
y = float(tmp[12].strip())
z = float(tmp[13].strip())
return chain, res_type, res_num, atom, x, y, z
def main(pdb_file, outfile, residues):
'''writes residues from a PDB file to an output file'''
pdb = open(pdb_file)
outfile = open(outfile, "w")
for line in pdb:
if line.startswith('ATOM'):
res_data = get_atom_line(line)
for aa,num in residues:
if res_data[1] == aa and res_data[2] == num:
outfile.write(line)
outfile.close()
residues = [('ASP', '102'), ('HIS', '57'), ('SER', '195')]
main("1TLD.pdb", "trypsin_triad.pdb", residues)
网友评论