## TEMPY is a Python library designed to help the user manipulate and analyse atomic structures and density maps from 3D EM.
## Copyright (c) 2013 Daven Vasishtan,Irene Farabella, Arun Prasad Pandurangan, Harpal Sahota, Frank Alber and Maya Topf
from EMMap import *
from ProtRep_Biopy import *
#from ProtRep_ire3 import *
from PDBParser import *
from StructureBlurrer import *
#import math
from scipy.spatial import KDTree
from scipy.signal import resample
#import itertools
from MapParser import *
#import numpy as np
#from scipy import *
[docs]class ScoringFunctions:
"""
A class containing variouse Scoring Function for density fitting.
Vasishtan and Topf (2011) Scoring functions for cryoEM density fitting. J Struct Biol 174:333-343.
"""
def __init__(self):
pass
[docs] def mapComparison(self, map1, map2):
"""
Compare the sampling rate, box size and origin of two maps.
Returns True if they are the same, False otherwise.
Arguments:
*map1, map2*
EMMap instances to compare.
"""
if (map1.apix - map2.apix < 1E-6) and map1.box_size() == map2.box_size() and map1.origin == map2.origin:
return True
else:
return False
def _matchMaps(self, map1, map2):
## NOTE: CAN'T WORK, NEEDS A WAY OF SCORING DIFFERENT MAPS
## he tried to resample 1 map on to the other (and the the 2 on 1)
#m1 = map1.copy()
#m2 = map2.copy()
#if m1.apix == m2.apix:
# ex_origin_shift = (0,0,0)
#elif m1.apix > m2.apix:
# m2 = m2.resample_by_apix(m1.apix)
# #ex_origin_shift = m1.centre()-m2.centre()
# m2 = m2.resize_map(m1.box_size())
#else:
# m1 = m1.resample_by_apix(m2.apix)
# #ex_origin_shift = m2.centre()-m1.centre()
# m1 = m1.resize_map(m2.box_size())
##print m1
##print m2
#if m1.box_size() != m2.box_size():
# m2 = m2.resize_map(m1.box_size())
#origin_shift = [y-x for x,y in zip(m2.origin, m1.origin)]
#m2 = m2.shift_map(origin_shift)#shift_map
#m2.origin = m1.origin[:]
##return m1, m2
return "Warning: can't match the map at the moment, use map with same box size." #comment all out!
[docs] def CCF(self, map1, map2):
"""
Calculate cross-correlation between two Map instances.
Arguments:
*map1, map2*
EMMap instances to compare.
"""
if self.mapComparison(map1, map2):
return (map1.normalise().getMap()*map2.normalise().getMap()).mean()
else:
return "can't Match the map"
#m1,m2 = self.matchMaps(map1, map2)
#return (m1.normalise().getMap()*m2.normalise().getMap()).mean()
[docs] def LSF(self, map1, map2):
"""
Calculate least-squares between two Map instances.
Arguments:
*map1, map2*
EMMap instances to compare.
"""
if self.mapComparison(map1, map2):
return ((map1.getMap()-map2.getMap())**2).mean()
else:
# m1,m2 = self.matchMaps(map1, map2)
# return ((m1.getMap()-m2.getMap())**2).mean()
return "can't Match the map"
[docs] def laplace_CCF(self, map1, map2, prefil=(False, False)):
"""
Calculate Laplacian cross-correlation between two Map instances.
Based on (Chacon and Wriggers, 2002).
Arguments:
*map1, map2*
EMMap instances to compare.
*prefil*
Tuple of boolean values, one for each map respectively.
True if Map instance is already Laplacian-filtered. False otherwise.
"""
if not prefil[0]:
map1 = map1.laplace_filtered()
if not prefil[1]:
map2 = map2.laplace_filtered()
map1 = map1.normalise()
map2 = map2.normalise()
return self.CCF(map1, map2)
[docs] def normal_vector_score(self, map1, map2, min_threshold, max_threshold):
"""
Calculate the Normal Vector Score between two Map instances.
Based on 3SOM algorithm (Ceulemans and Russell, 2004)
Arguments:
*map1, map2*
EMMap instances to compare.
*min_threshold, max_threshold*
need to run get_min_threshold and get_max_threshold
"""
scores = []
if not self.mapComparison(map1, map2):
#map1, map2 = self.matchMaps(map1, map2)
return "can't Match the map"
points = map1.get_pos(min_threshold, max_threshold)
for v in points:
n_vec = map1.get_normal_vector(v[0],v[1],v[2])
o_vec = map2.get_normal_vector(v[0],v[1],v[2])
try:
#print n_vec, o_vec, n_vec.arg(o_vec)
scores.append(abs(n_vec.arg(o_vec)))
except ValueError:
print 'Error: Angle between '+ str(n_vec) +', '+ str(o_vec) +' for point %d, %d, %d cannot be calculated.' %(v.x,v.y,v.z)
# return
if max(scores) == min(scores):
return 0
else:
return -(sum(scores)/len(points))
[docs] def get_partial_DLSF(self, x, map1, map2):
"""
Calculate the DLSF score between two Map instances.
Arguments:
*map1, map2*
EMMap instances to compare.
*x*
number of significant points.
"""
map1_sig_pairs=map1._get_random_significant_pairs(int(x))
otherMap=map2
score = 0.0
for p in map1_sig_pairs:
z1 = p[0]
y1 = p[1]
x1 = p[2]
z2 = p[3]
y2 = p[4]
x2 = p[5]
dens = p[6]
prot_dens = otherMap.fullMap[z1][y1][x1] - otherMap.fullMap[z2][y2][x2]
score += (dens-prot_dens)**2
return score/map1.fullMap.size
[docs] def MI(self, map1, map2, layers=20):
"""
Calculate the Mutual information Score between two Map instances.
Arguments:
*map1, map2*
EMMap instances to compare.
*layers*
value for which to bin the map into a limited number of values. Default is 20 as in Shatsky et al., 2008.
"""
if self.mapComparison(map1, map2):
m1, m2 = map1, map2
else:
return "Can't Match"
#m1,m2 = self.matchMaps(map1, map2)
score = 0
m1_levels = (m1.max()-m1.min())/layers
m2_levels = (m2.max()-m2.min())/layers
for x in range(layers):
for y in range(layers):
m1_level_map = (m1.getMap() >= m1.min()+(x*m1_levels))*(m1.getMap() <= m1.min()+((x+1)*m1_levels))
m2_level_map = (m2.getMap() >= m2.min()+(y*m2_levels))*(m2.getMap() <= m2.min()+((y+1)*m2_levels))
comb_level_map = m1_level_map*m2_level_map
p_m1 = float(m1_level_map.sum())/m1_level_map.size
p_m2 = float(m2_level_map.sum())/m2_level_map.size
p_comb = float(comb_level_map.sum())/comb_level_map.size
if p_comb == 0:
mi_score = 0.0
else:
#print p_comb, p_m1, p_m2, p_comb/(p_m1*p_m2), log(p_comb/(p_m1*p_m2),2)
mi_score = p_comb*math.log(p_comb/(p_m1*p_m2), 2)
score += mi_score
return score
def _NMI(self, map1, map2, layers=20):
"""
Normalized Mutual Information from Martin et al. 2005.
*not implemented yet*
"""
#call all normaliz as in
#Martin LC, Gloor GB, Dunn SD, Wahl LM. Using information theory to search for co-evolving residues in proteins. Bioinformatics 2005 21(22):4116-4124.
print "not implemented yet"
def _APC_MI():
"""
Average Product Correction Mutual information from Dunn et al. 2008.
*not implemented yet*
"""
#Dunn SD, Wahl LM, Gloor GB. Mutual information without the influence of phylogeny or entropy dramatically improves residue contact prediction. Bioinformatics 2008 24(3):333-340.
print "not implemented yet"
def _hausdorff_list(self, min_threshold, max_threshold, kdtree, map2):
"""
This is for the chamdef distance def chamfer_distance, min max density value that define the surface of the protein
Arguments:
*kdtree* (there are 2 of them in numpy one Cbased on py-based, the latter is better, ctrl) this have to be one of the input.
kdtree from map1
*min_threshold, max_threshold* need to run get_min_threshold and get_max_threshold for map2
NOTE: if you keep the kdtree as parametre out os less time consuming as building it takes time.
"""
points = map2.get_pos(min_threshold, max_threshold)
return kdtree.query(points)[0] #kdtree give 2 list 0=distance 1=actual points
[docs] def chamfer_distance(self, map1, map2, min_threshold, max_threshold, kdtree=None):
"""
Calculate the Chamfer Distance Score between two Map instances.
NOT RACCOMANDED.
Arguments:
*map1, map2*
EMMap instances to compare.
*min_threshold, max_threshold*
need to run get_min_threshold and get_max_threshold for map2
*kdtree*
If set True It is possible to choose between two option of kdtree in numpy the one that is py-based is a better choice.
"""
if self.mapComparison(map1, map2):
m1, m2 = map1, map2
else:
return "can't match"
#m1,m2 = matchMaps(map1, map2)
if kdtree:
return self.hausdorff_list(min_threshold, max_threshold, kdtree, m2).mean()
else:
kdtree = m1.makeKDTree(min_threshold, max_threshold)
#if you don't assine it wil be build one kdtree
return self.hausdorff_list(min_threshold, max_threshold, kdtree, m2).mean()#mean distance to the nearest neighbour
#MODIFIED by IF and DV
#3-01-2014
[docs] def envelope_score(self,map, cutoff, structure,norm=False):
"""
Calculate the Envelope Score between a target Map and a Structure Instances.
Arguments:
*map*
Target Map Instance.
*cutoff*
Calculated with min_threshold for map.
*structure*
Structure Instance to compare.
"""
binMap = map.make_bin_map(cutoff)
max_score = float(-2*numsum(binMap.fullMap))
min_score = float(numsum(binMap.fullMap)-2*numsum(binMap.fullMap+1))
blurrer = StructureBlurrer()
struct_binMap = blurrer.make_atom_overlay_map1(map, structure)
grid = struct_binMap.get_pos(0.9,1.1)
for x,y,z in grid:
g = binMap[z][y][x]
if g == -1:
binMap[z][y][x] = 2
elif g == 0:
binMap[z][y][x] = -2
#score=binMap.fullMap.sum()
score = float(numsum(binMap.fullMap))
if norm:
norm_score = float((score-min_score)/(max_score-min_score))
return norm_score
else:
return score
#added by IF
# 19-12-2013
#ORIGINAL form PAP
[docs] def SCCF(self,map1,resolution_densMap,sigma_map,struct,ssefile_name):
"""
Calculate Segment baed cross-correlation from Pandurangan et al. 2013,J Struct Biol. 2013 Dec 12
Based on the Local CCF (Roseman, 2000).
It is a local CCF around a selection of atoms.
Arguments:
*map1*
Target Map Instance.
*resolution_densMap*
Parameter need for Structure Blurrer.
Resolution of the target map.
*sigma_map*
Parameter need for Structure Blurrer.
The sigma value (multiplied by the resolution) that controls the width of the Gaussian.
Default values is 0.356.
Other values used :
0.187R corresponding with the Gaussian width of the Fourier transform falling to half the maximum at 1/resolution, as used in Situs (Wriggers et al, 1999);
0.356R corresponding to the Gaussian width at 1/e maximum height equaling the resolution, the default in Chimera (Petterson et al, 2004);
0.425R the fullwidth half maximum being equal to the resolution, as used by FlexEM (Topf et al, 2008);
0.5R the distance between the two inflection points being the same length as the resolution, an option in Chimera (Petterson et al, 2004);
1R where the sigma value simply equal to the resolution, as used by NMFF (Tama et al, 2004).
*struct*
Structure Instance to compare
*ssefile_name*
Rigid-body file alike in which the selection is specified.
"""
blurrer = StructureBlurrer()
outline = ""
ssefile = open(ssefile_name, 'rU')
nsse = 0
#IRENE note:
#here read the SSE as in RB file check with our def look @ RB_get()
for line in ssefile:
sselist = []
nsse += 1
tokens = line.split(' ')
for i in range(len(tokens)/2):
start = int(tokens[i*2])
end = int(tokens[i*2+1])
sselist.append([start,end])
print sselist
#Combine SSEs into one structure
sse_struct_list = struct.break_into_segments(sselist)
sse_struct = struct.combine_SSE_structures(sse_struct_list)
sim_map = blurrer.gaussian_blur(sse_struct, resolution_densMap, densMap=map1, sigma_coeff=sigma_map, normalise=True)
#Create masking for the maps: mask all values less than or equal to the minimum density value of the simulated map of SSE
#NOTE: maybe we should add it as a def mask
#can be useful for futher implementation of envelope_score.
minDens = 0.0
sim_min = sim_map.min()
if sim_min >= 0.0:
minDens = sim_min
#Creat the mask array
sim_mask_array = sim_map._get_maskArray(minDens)
#Apply the mask to em and simulated maps
mask_emMap = map1._get_maskMap(sim_mask_array)
mask_simMap = sim_map._get_maskMap(sim_mask_array)
#Calculate the SSE CCC
sse_lccf = (mask_emMap * mask_simMap).sum()/sqrt(square(mask_emMap).sum()*square(mask_simMap).sum())
#return the overall score
outline+='%s\n'%sse_lccf
if nsse>1:
#return score per element if more then one is specify
outline+='LCCF for the SSE %3d: %7.3f\n'%(nsse,sse_lccf)
ssefile.close()
return outline
### ||| WARNING:
### ||| PRIVATE PARTS DOWN THERE ||| ###
### ||| EXPLICIT CONTENT ||| ###
### VVV VVV ###
#===============================================================================
# #===============================================================================
# #
# # #apix may be a issue ctrl
# # #no idea what this does or is. apix
# # #get_clash_map exists in EMMAP, in structure blured (defenetly is here and it is used!) already ctrl and may be a double if
# # #it is different then the strctureblured one may be get rid of this.
# # #==============================================================================
# # # def get_clash_map(self,emmap, apix):
# # # #look note in gaussian_blur function for make_clash_map DAVE NEED TO CTRL IT
# # # template_grid = emmap.make_clash_map(apix)
# # # return template_grid
# # #==============================================================================
# #
# # #=================TO DEL FOR THE MOMENT==============================================================
# # #
# # # #this is PAP
# # # #CTRL how it performs with a hight number of component. May be possible to think at an alternative way that speed up the calculation if slow
# # # def get_overlay_comp_maplist(self, struct,template_grid):
# # # #call blurue obj
# # # blurrer = StructureBlurrer()
# # # overlay_maplist = []
# # # #split structure in its component struct is a list of structure. It is an Assembly Obj
# # # ssplit = struct.structList
# # # #split_into_chains()
# # # for x in ssplit:
# # # overlay_maplist.append(blurrer.make_atom_overlay_map1(template_grid, x))
# # # return overlay_maplist
# # #
# # # ## this is PAP.
# # # ## this function are related to the clash score
# # # ## calculate the fraction of overlap. THIS IS THE "CLASH SCORE"
# # # # it return the fraction of volume that overlap
# # # def get_sm_score(self, struct, ncomp, template_grid, cvol, apix):
# # # overlay_maplist = []
# # # overlay_maplist = self.get_overlay_comp_maplist(struct,template_grid)
# # # nc = range(ncomp)
# # # cpair = list(itertools.combinations(nc,2))
# # # #print cpair
# # # score = 0.0
# # # for i in cpair:
# # # #print 'sum c1, sum c2, c1*c2'
# # # #print overlay_maplist[i[0]].fullMap.sum(), overlay_maplist[i[1]].fullMap.sum(), (overlay_maplist[i[0]].fullMap * overlay_maplist[i[1]].fullMap).sum()
# # # score = score + ((overlay_maplist[i[0]].fullMap * overlay_maplist[i[1]].fullMap).sum()*(apix**3)) / ((cvol[i[0]]+cvol[i[1]]))
# # # return -score
# # #===============================================================================
# #===============================================================================
#
#
# #====================================NOT USED ANYWHERE=======================================
# # def convert_bin_map(self, binMap):
# # #????
# # # WHAT IS THIS??????
# # """For use with the binary score. Takes in a binary map and adds the binary map from this structure such that
# # pixels under an atom with value:
# # - -1 --> 2
# # - 0 --> -2
# # - 2 --> -5
# # The original binary map from a density map will have pixels with value -1 above a specific threshold, and 0 below.
# # binMap = binary map to be modified."""
# # TotalScore = 0
# # grid = self.get_grid_positions(binMap)
# # protGrid = grid[0]
# # box_edge_pen = grid[1]
# # for point in protGrid:
# # g = binMap.fullMap[point[2]][point[1]][point[0]]
# # if g == -1:
# # binMap.fullMap[point[2]][point[1]][point[0]] = 3
# # elif g == 0:
# # binMap.fullMap[point[2]][point[1]][point[0]] = -2
# # elif g == -5:
# # pass
# # elif g == 2:
# # binMap.fullMap[point[2]][point[1]][point[0]] = -5
# # for x in range(binMap.x_size):
# # for y in range(binMap.y_size):
# # for z in range(binMap.z_size):
# # if binMap[z][y][x] == 3:
# # binMap[z][y][x] = 2
# # return (binMap, box_edge_pen)
# #===========================================================================
#
# #===============================================================================
# # WHAT IS THIS??????
# # def clash_bin_map(self, densMap):
# # ### ???? what is this and why is here if is map related????
# # clashMap = densMap.make_clash_map()
# # protGrid, box_edge_pen = self.getGridPositions(clashMap)
# # for point in protGrid:
# # clashMap.fullMap[point[2]][point[1]][point[0]] = 1
# # return clashMap.fullMap, box_edge_pen
# #===============================================================================
#===============================================================================