DeepPrime
- code : https://github.com/hkimlab/DeepPrime
- web tools : http://deepcrispr.info/DeepPrime/page/help_src
- paper : https://doi.org/10.1016/j.cell.2023.03.034
Installation
## Create and activate virtual environment
conda create -n dprime python=3.8
conda activate dprime
## Install Required Python Packages
pip install tensorflow==2.8.0 #Use pip linked to the above python installation
pip install torch==1.10.0+cu113 torchvision==0.11.1+cu113 torchaudio===0.10.0+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
pip install biopython==1.78
pip install pandas regex silence-tensorflow
## Install ViennaRNA
pip install ViennaRNA
## Download Source Code
wget https://github.com/hkimlab/DeepPrime/archive/main.zip
unzip main.zip
cd DeepPrime-main
python DeepPrime.py -h
Test
python DeepPrime.py -f ./example_input/dp_code_test.csv
Usage
python DeepPrime.py
[-h]
[-f INPUT_FILE]
[-n NAME]
[-p {PE2,PE2max,PE2max-e,PE4max,PE4max-e,NRCH_PE2,NRCH_PE2max,NRCH_PE4max}]
[--cell_type {HEK293T,A549,DLD1,HCT116,HeLa,MDA-MB-231,NIH3T3}]
[--pbs_min PBS_MIN]
[--pbs_max PBS_MAX]
[--jobs JOBS]
[--progress]
optional arguments:
-h, --help show this help message and exit
-f INPUT_FILE, --input_file INPUT_FILE
Input file containing target sequence and edit type
-n NAME, --name NAME Sample name for your input (default='Sample')
-p {PE2,PE2max,PE2max-e,PE4max,PE4max-e,NRCH_PE2,NRCH_PE2max,NRCH_PE4max,PE-off}, --pe_type {PE2,PE2max,PE2max-e,PE4max,PE4max-e,NRCH_PE2,NRCH_PE2max,NRCH_PE4max,PE-off}
PE type parameter (default=PE2max)
--cell_type {HEK293T,A549,DLD1,HCT116,HeLa,MDA-MB-231,NIH3T3}
Cell type parameter. (default=HEK293T)
--pbs_min PBS_MIN PBS minimum length parameter (default=1)
--pbs_max PBS_MAX PBS maximun length parameter (default=17)
--jobs JOBS Number of cores for computing (default=1)
--progress Show processing message
Input
ID,RefSeq,Edited Seq,EditType
BRCA1e17_pos34_tat_CAT,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAATATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAACATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,sub1
BRCA1e17_pos34_tat_CCA,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAATATTTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,AATCCTTTGAGTGTTTTTCATTCTGCAGATGCTGAGTTTGTGTGTGAACGGACACTGAAACCATTTCTAGGAATTGCGGGAGGAAAATGGGTAGTTAGCTATTTCTGTAAGTATAATACTA,sub3
BRCA1e17_pos34
Output
- results/ 有每个input ID对应的所有peg的csv
- Statistics.csv
- Top4_pegRNAs.csv :各输出四个最优
Scripts
for ct in {"HEK293T","A549"}
do
echo $ct
python DeepPrime.py \
-f input.csv \
-p PE4max-e \
--pbs_min 8 \
--pbs_max 15 \
--cell_type ${ct} \
-n ${ct} \
--jobs 8 >> log_${ct}
done
trainning data PE 系统 和 对应 细胞系
Error & Correction 记录
File "/home/vg3/yijia/DeepPrime-main/src/dspcas9.py", line 145, in calculate_DeepSpCas9_score
sess.run(tf.comapt.v1.global_variables_initializer())
AttributeError: module 'tensorflow' has no attribute 'comapt'
Correct src/dspcas9.py
-
tensorflow 版本问题
-
/home/vg3/yijia/DeepPrime-main/src/dspcas9.py
-
修改后的
src/dspcas9.py
:
import os, sys
import numpy as np
from src.utils import preprocess_seq
from silence_tensorflow import silence_tensorflow
silence_tensorflow()
import tensorflow as tf
class Deep_SpCas9(object):
def __init__(self, filter_size, filter_num, node_1=80, node_2=60, l_rate=0.005):
length = 30
self.inputs = tf.compat.v1.placeholder(tf.float32, [None, 1, length, 4])
self.targets = tf.compat.v1.placeholder(tf.float32, [None, 1])
self.is_training = tf.compat.v1.placeholder(tf.bool)
def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
# setup the filter input shape for tf.compat.nn.conv_2d
conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
num_filters]
# initialise weights and bias for the filter
w = tf.compat.v1.Variable(tf.compat.v1.truncated_normal(conv_filt_shape, stddev=0.03), name=name + '_W')
b = tf.compat.v1.Variable(tf.compat.v1.truncated_normal([num_filters]), name=name + '_b')
# setup the convolutional layer operation
out_layer = tf.nn.conv2d(input_data, w, [1, 1, 1, 1], padding='VALID')
# add the bias
out_layer += b
# apply a ReLU non-linear activation
out_layer = tf.keras.layers.Dropout(rate=0.3)(tf.nn.relu(out_layer))
# now perform max pooling
ksize = [1, pool_shape[0], pool_shape[1], 1]
strides = [1, 1, 2, 1]
out_layer = tf.nn.avg_pool(out_layer, ksize=ksize, strides=strides, padding='SAME')
return out_layer
# def end: create_new_conv_layer
L_pool_0 = create_new_conv_layer(self.inputs, 4, filter_num[0], [1, filter_size[0]], [1, 2], name='conv1')
L_pool_1 = create_new_conv_layer(self.inputs, 4, filter_num[1], [1, filter_size[1]], [1, 2], name='conv2')
L_pool_2 = create_new_conv_layer(self.inputs, 4, filter_num[2], [1, filter_size[2]], [1, 2], name='conv3')
with tf.compat.v1.variable_scope('Fully_Connected_Layer1'):
layer_node_0 = int((length - filter_size[0]) / 2) + 1
node_num_0 = layer_node_0 * filter_num[0]
layer_node_1 = int((length - filter_size[1]) / 2) + 1
node_num_1 = layer_node_1 * filter_num[1]
layer_node_2 = int((length - filter_size[2]) / 2) + 1
node_num_2 = layer_node_2 * filter_num[2]
L_flatten_0 = tf.reshape(L_pool_0, [-1, node_num_0])
L_flatten_1 = tf.reshape(L_pool_1, [-1, node_num_1])
L_flatten_2 = tf.reshape(L_pool_2, [-1, node_num_2])
L_flatten = tf.concat([L_flatten_0, L_flatten_1, L_flatten_2], 1, name='concat')
node_num = node_num_0 + node_num_1 + node_num_2
W_fcl1 = tf.compat.v1.get_variable("W_fcl1", shape=[node_num, node_1])
B_fcl1 = tf.compat.v1.get_variable("B_fcl1", shape=[node_1])
L_fcl1_pre = tf.nn.bias_add(tf.matmul(L_flatten, W_fcl1), B_fcl1)
L_fcl1 = tf.nn.relu(L_fcl1_pre)
L_fcl1_drop = tf.keras.layers.Dropout(rate=0.3)(L_fcl1)
with tf.compat.v1.variable_scope('Fully_Connected_Layer2'):
W_fcl2 = tf.compat.v1.get_variable("W_fcl2", shape=[node_1, node_2])
B_fcl2 = tf.compat.v1.get_variable("B_fcl2", shape=[node_2])
L_fcl2_pre = tf.nn.bias_add(tf.matmul(L_fcl1_drop, W_fcl2), B_fcl2)
L_fcl2 = tf.nn.relu(L_fcl2_pre)
L_fcl2_drop = tf.keras.layers.Dropout(rate=0.3)(L_fcl2)
with tf.compat.v1.variable_scope('Output_Layer'):
W_out = tf.compat.v1.get_variable("W_out", shape=[node_2, 1])
B_out = tf.compat.v1.get_variable("B_out", shape=[1])
self.outputs = tf.nn.bias_add(tf.matmul(L_fcl2_drop, W_out), B_out)
# Define loss function and optimizer
self.obj_loss = tf.reduce_mean(tf.square(self.targets - self.outputs))
self.optimizer = tf.compat.v1.train.AdamOptimizer(l_rate).minimize(self.obj_loss)
# def end: def __init__
# class end: Deep_xCas9
def Model_Finaltest(sess, TEST_X, model):
test_batch = 500
test_spearman = 0.0
optimizer = model.optimizer
TEST_Z = np.zeros((TEST_X.shape[0], 1), dtype=float)
for i in range(int(np.ceil(float(TEST_X.shape[0]) / float(test_batch)))):
Dict = {model.inputs: TEST_X[i * test_batch:(i + 1) * test_batch], model.is_training: False}
TEST_Z[i * test_batch:(i + 1) * test_batch] = sess.run([model.outputs], feed_dict=Dict)[0]
list_score = sum(TEST_Z.tolist(), [])
return list_score
# def end: Model_Finaltest
def calculate_DeepSpCas9_score(sBase_DIR, list_target30):
# TensorFlow config
conf = tf.compat.v1.ConfigProto()
conf.gpu_options.allow_growth = True
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
best_model_cv = 0.0
TEST_X = preprocess_seq(list_target30, 30)
TEST_X_nohot = list_target30
best_model_path = '%s/models/DeepSpCas9' % sBase_DIR
best_model = 'PreTrain-Final-3-5-7-100-70-40-0.001-550-80-60'
valuelist = best_model.split('-')
fulllist = []
for value in valuelist:
if value == 'True':
value = True
elif value == 'False':
value = False
else:
try:
value = int(value)
except:
try:
value = float(value)
except:
pass
fulllist.append(value)
# loop end: value
filter_size_1, filter_size_2, filter_size_3, filter_num_1, filter_num_2, filter_num_3, l_rate, load_episode, node_1, node_2 = fulllist[
2:]
filter_size = [filter_size_1, filter_size_2, filter_size_3]
filter_num = [filter_num_1, filter_num_2, filter_num_3]
if3d = False
inception = False
args = [filter_size, filter_num, l_rate, load_episode]
tf.compat.v1.reset_default_graph()
with tf.compat.v1.Session(config=conf) as sess:
sess.run(tf.compat.v1.global_variables_initializer())
model = Deep_SpCas9(filter_size, filter_num, node_1, node_2, args[2])
saver = tf.compat.v1.train.Saver()
saver.restore(sess, best_model_path + '/' + best_model)
list_score = Model_Finaltest(sess, TEST_X, model)
return list_score
def main():
print('This is DeepSpCas9 model script')
if __name__ == '__main__':
if len(sys.argv) == 1:
main()
else:
function_name = sys.argv[1]
function_parameters = sys.argv[2:]
if function_name in locals().keys():
locals()[function_name](*function_parameters)
else:
sys.exit('ERROR: function_name=%s, parameters=%s' % (function_name, function_parameters))
# if END: len(sys.argv)
# if END: __name__
网友评论