import numpy as np
import pandas as pd
_A_to_nm = 0.1
_kcal_to_kj = 4.1840
[docs]
class ParserNinfo():
def __init__(self):
"""
Initialize
"""
self.forcefield_term = ['protein_bonds', 'protein_harmonic_angles','protein_aicg13_angles',
'protein_native_dihd', 'protein_aicg_dihd', 'protein_native_pair']
[docs]
def convert_str_to_number_array(self,para_array,int_ini_idx,int_end_idx,float_ini_idx,float_end_idx):
para_array = np.array(para_array)
idx_array = para_array[:,int_ini_idx:int_end_idx].astype(np.int64) - int(1)
kpara_array = para_array[:,float_ini_idx:float_end_idx].astype(np.float64)
para_array = [idx_array,kpara_array]
return para_array
[docs]
def parser_ninfo(self,ninfo_file_path):
"""
The method parser native information file.
Parameter: str
the path of native information file.
"""
force_field_para = {}
with open(ninfo_file_path,'r') as read_f:
info = []
for line in read_f:
line = line.strip('\n')
if line == '>>>>':
force_field_para[keys] = info
info = []
line = line.split()
if len(line) == 0:
continue
elif line[0] == 'bond' and int(line[2]) == 1:
keys = line[0]
info.append(line)
elif line[0] == 'angl' and int(line[2]) == 1:
keys = line[0]
info.append(line)
elif line[0] == 'aicg13' and int(line[2]) == 1:
keys = line[0]
info.append(line)
elif line[0] == 'dihd' and int(line[2]) == 1:
keys = line[0]
info.append(line)
elif line[0] == 'aicgdih' and int(line[2]) == 1:
keys = line[0]
info.append(line)
elif line[0] == 'contact' and int(line[2]) == 1 and int(line[3]) == 1:
keys = line[0]
info.append(line)
elif line[0] == 'contact' and int(line[2]) == 1 and int(line[3]) == 2:
keys = line[0]
info.append(line)
if 'bond' in force_field_para:
self.protein_bonds = self.convert_str_to_number_array(force_field_para['bond'],1,8,8,12)
if 'angl' in force_field_para:
self.protein_harmonic_angles = self.convert_str_to_number_array(force_field_para['angl'],1,10,10,14)
if 'aicg13' in force_field_para:
self.protein_aicg13_angles = self.convert_str_to_number_array(force_field_para['aicg13'],1,10,10,15)
if 'dihd' in force_field_para:
self.protein_native_dihd = self.convert_str_to_number_array(force_field_para['dihd'],1,12,12,17)
if 'aicgdih' in force_field_para:
self.protein_aicg_dihd = self.convert_str_to_number_array(force_field_para['aicgdih'],1,12,12,17)
if 'contact' in force_field_para:
self.protein_native_pair = self.convert_str_to_number_array(force_field_para['contact'],1,8,8,12)
[docs]
def bonds_array_to_pd(self):
"""
To make protein bonds array to tabular format
"""
if hasattr(self,'protein_bonds'):
idx_bonds = self.protein_bonds[0][:,5:7]
bd_nat = self.protein_bonds[1][:,0] * _A_to_nm
coef_bd = self.protein_bonds[1][:,3] * _kcal_to_kj * 100 * 2
pd_idx_bonds = pd.DataFrame(idx_bonds, columns=['a1','a2'])
pd_bd_nat = pd.DataFrame(bd_nat, columns=['r0'])
pd_coef_bd = pd.DataFrame(coef_bd, columns=['k'])
self.protein_bonds = pd.concat([pd_idx_bonds,pd_bd_nat,pd_coef_bd],axis=1)
[docs]
def harm_ang_array_to_pd(self):
"""
To make harmonic angles array to tabular format
"""
if hasattr(self,'protein_harmonic_angles'):
idx_harm_ang = self.protein_harmonic_angles[0][:,6:9]
nat_ang = self.protein_harmonic_angles[1][:,0] * np.pi / 180
coef_ang = self.protein_harmonic_angles[1][:,3] * _kcal_to_kj * 2
pd_idx_ang= pd.DataFrame(idx_harm_ang, columns=['a1','a2','a3'])
pd_ang_nat = pd.DataFrame(nat_ang, columns=['natang'])
pd_coef_ang = pd.DataFrame(coef_ang, columns=['k'])
self.protein_harmonic_angles = pd.concat([pd_idx_ang,pd_ang_nat,pd_coef_ang],axis=1)
[docs]
def aicg13_ang_array_to_pd(self):
"""
To make aicg13 angles array to tabular format
"""
if hasattr(self,'protein_aicg13_angles'):
idx_ang = self.protein_aicg13_angles[0][:,6:9]
epsilon = self.protein_aicg13_angles[1][:,3]*_kcal_to_kj
r0 = self.protein_aicg13_angles[1][:,0] * _A_to_nm
width = self.protein_aicg13_angles[1][:,4]* _A_to_nm
pd_idx_ang = pd.DataFrame(idx_ang,columns=['a1', 'a2', 'a3'])
pd_epsilon = pd.DataFrame(epsilon,columns=['epsilon'])
pd_r0 = pd.DataFrame(r0, columns=['r0'])
pd_width = pd.DataFrame(width, columns=['width'])
self.protein_aicg13_angles = pd.concat([pd_idx_ang,pd_epsilon,pd_r0,pd_width],axis=1)
[docs]
def native_dihd_array_to_pd(self):
"""
To make native dihedral angles array to tabular format
"""
if hasattr(self,'protein_native_dihd'):
idx_dihd = self.protein_native_dihd[0][:,7:11]
nat_dihd = self.protein_native_dihd[1][:, 0]*np.pi/180
coef_dihd = self.protein_native_dihd[1][:, 3:5]*_kcal_to_kj
pd_idx_dihd = pd.DataFrame(idx_dihd,columns=['a1','a2','a3','a4'])
pd_nat_dihd = pd.DataFrame(nat_dihd,columns=['natdihd'])
pd_coef_dihd = pd.DataFrame(coef_dihd,columns=['k_dihd1','k_dihd3'])
self.protein_native_dihd = pd.concat([pd_idx_dihd,pd_nat_dihd,pd_coef_dihd],axis=1)
[docs]
def aicg_dihd_array_to_pd(self):
"""
To make aicg dihedral angles array to tabular format
"""
if hasattr(self,'protein_aicg_dihd'):
idx_dihd = self.protein_aicg_dihd[0][:,7:11]
nat_dihd = self.protein_aicg_dihd[1][:,0]*np.pi/180
epsilon = self.protein_aicg_dihd[1][:,3]*_kcal_to_kj
width = self.protein_aicg_dihd[1][:,4]
pd_idx_dihd = pd.DataFrame(idx_dihd,columns=['a1','a2','a3','a4'])
pd_epsilon = pd.DataFrame(epsilon,columns=['epsilon'])
pd_nat_dihd = pd.DataFrame(nat_dihd, columns=['natdihd'])
pd_width = pd.DataFrame(width, columns=['width'])
self.protein_aicg_dihd = pd.concat([pd_idx_dihd, pd_epsilon, pd_nat_dihd, pd_width], axis=1)
[docs]
def native_pair_array_to_pd(self):
"""
To make native pairs array to tabular format
"""
if hasattr(self,'protein_native_pair'):
cont_para = self.protein_native_pair
# index
idx_intra_cont_idx = np.argwhere(cont_para[0][:,2]==0)
idx_inter_cont_idx = np.argwhere(cont_para[0][:,2]==1)
idx_intra_cont = cont_para[0][idx_intra_cont_idx[:,0],:][:,5:7]
idx_inter_cont = cont_para[0][idx_inter_cont_idx[:,0],:][:,5:7]
# para
epsilon_intra_cont = cont_para[1][idx_intra_cont_idx[:,0],3] * _kcal_to_kj
r0_intra_con = cont_para[1][idx_intra_cont_idx[:,0],0] * _A_to_nm
epsilon_inter_cont = cont_para[1][idx_inter_cont_idx[:,0],3] * _kcal_to_kj
r0_inter_con = cont_para[1][idx_inter_cont_idx[:,0],0] * _A_to_nm
para_intra_cont = np.stack((epsilon_intra_cont,r0_intra_con),axis=-1)
para_inter_cont = np.stack((epsilon_inter_cont,r0_inter_con),axis=-1)
# array to pandas
pd_idx_intra_cont = pd.DataFrame(idx_intra_cont,columns=['a1','a2'])
pd_para_intra_cont = pd.DataFrame(para_intra_cont,columns=['epsilon','sigma'])
pd_idx_inter_cont = pd.DataFrame(idx_inter_cont,columns=['a1','a2'])
pd_para_inter_cont = pd.DataFrame(para_inter_cont,columns=['epsilon','sigma'])
# reset protein native pair
if len(idx_intra_cont) != 0:
self.protein_intra_contact = pd.concat([pd_idx_intra_cont, pd_para_intra_cont],axis=1)
if len(idx_inter_cont) != 0:
self.protein_inter_contact = pd.concat([pd_idx_inter_cont, pd_para_inter_cont],axis=1)
[docs]
def get_ninfo(self,ninfo_file_path):
"""
get the native information in tabular format
"""
self.parser_ninfo(ninfo_file_path)
self.bonds_array_to_pd()
self.harm_ang_array_to_pd()
self.aicg13_ang_array_to_pd()
self.native_dihd_array_to_pd()
self.aicg_dihd_array_to_pd()
self.native_pair_array_to_pd()