#!/usr/bin/env python
"""
emdb_xml_translate.py
Convert EMDB XML files from one schema version to another.
TODO:
Version history:
0.1, 2014-09-23, Ardan Patwardhan: Only conversion between v1.9 and v2.0 is supported
0.2, 2015-11-06, Ardan Patwardhan: 1) Uses regenerated emdb_da.py and emdb_19.py
2) 2.0 -> 1.9: Refactored and simplified supplement section, fitting section that were giving errors
3) 2.0 -> 1.9: image_recording is now a list
4) 2.0 -> 1.9: processing_site is now handled correctly
5) Refactored code to a class structure, and introduced Constants class
6) Fixed code for tomography microscopy tilt
7) 2.0 -> 1.9: helical reconstructions will always be transferred as single particle to avoid loss of data
8) 2.0 -> 1.9: code for alignment has been commented out - the schema needs to support astigmatism and electron beam tilt for backward compatibility
9) 2.0 -> 1.9: Sample component ID - this is now a simple counter to avoid issues with identical IDs for supramolecule and macromolecule components
10) 2.0 -> 1.9: Assume full overlap for related EMDB entries if no relationship is speficied
0.3, 2015-11-11, Ardan Patwardhan: 1) 1.9 -> 2.0: Updated image recording section to handle list
2) Error with slices fixed (tested on emd-1011.xml)
3) 1.9 -> 2.0: alignment code commented out - support needs to be added to 2.0 schema
4) 1.9 -> 2.0: updated handling of helical parameters (tested on emd-1052.xml)
5) 1.9 -> 2.0: updated set_two_d_crystal_parameters to set_crystal_parameters
6) 1.9 -> 2.0: nucleic acid DNA/RNA is now transferred as other_macromolecule. Support needs to be added to other_macromolecule for structure,
synthetic_flag. Example: emd-1128.xml
7) 1.9 -> 2.0: improved handling of cases where there are multiple imaging and image acquisition elements (example: emd-1534.xml)
8) Use extension types for microscopy and defined constants for using them
9) 2.0 -> 1.9: Num sample components - handles situations where sample component is or is not defined
0.4, 2015-11-12, Ardan Patwardhan: 1) 2.0 -> 1.9: overallBValue is float - fixed
2) 1.9 <-> 2.0: improved handling of cases with multiple detectors and microscopes (emd-2845.xml)
3) 1.9 and 2.0 have different formats for EMDB accession code - this is now handled correctly
0.5, 2015-11-26, Ardan Patwardhan 1) re-enabled support for astigmatism and beam tilt following schema changes
2) added support for DNA/RNA hybrid macromolecules
3) 2.0: capitalized some enumerations to conform to new schema, cleaned up other enumerations
4) added support for external references for supramolecules
5) 1.9 <-> 2.0: fixed support for eulerAngleDetails (from 1.9)
6) 1.9 <-> 2.0: added support for legacy item replaceExistingEntry (from 1.9; e.g., emd-1010.xml
7) 2.0 -> 1.9: reinstated support for helical reconstructions that was commented out in v0.2
8) 2.0 -> 1.9: fixed translation to crystalGrowDetails
9) 2.0 -> 1.9: fixed bug with reconstruction>algorithm - 'helical:' is no longer written out
10) Figuresets and masksets will now only be written out of they have content
11) 1.9 -> 2.0: if the ncbi for strain is not specified it will now be ignored (previously it because UNKNOWN by default)
12) 1.9 -> 2.0: best effort is made to map PDB chains in cases where no PDB code has been specified.
13) Num sample components is now propagated back and forth without taking into account the actual number
14) 1.9 <-> 2.0: implemented support for 3D crystal symmetry and added crystal parameters and crystal symmetry support
to tomogram, subtomogram and helical categories
15) 1.9 <-> 2.0: added support for helical symmetry to all methods
16) 1.9 <-> 2.0: improved handling of eulerAngleDetails
17) Support added for the following but commented out for now: When converting from 2.0 -> 1.9,
the PDB ID is prefixed to Chain ID (for chains) because the relationships are decoupled in v1.9
18) Fixed issues with transfer of number of class averages, updated EMDB DA schema so that 2D and 3D classification use same type
0.6, 2015-12-14, Ardan Patwardhan 1) updated schema and updated emdb_da.py; changes match 0.9 of cifEMDBTranslator
2) changes helical and singleParticle tags for reconstruction to avoid dangling text in reconstruction when going 2.0->1.9
3) 2.0 -> 1.9: fitting PDBs - reverted to outputting chains as PDBID_CHAINID to make them unique
4) 1.9 <-> 2.0: transfer supersededByList
5) 1.9 <-> 2.0: use version history to store the 1.9 status @prior attribute info
6) 0 contour levels were not being written out because of a generateDS bug - this has been fixed in generateDS
7) 2.0 -> 1.0: ctf correction info was only being written out for the first reconstruction - this has now been corrected
Copyright [2014-2016] EMBL - European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the
"License"); you may not use this file except in
compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
"""
__author__ = 'Ardan Patwardhan'
__email__ = 'ardan@ebi.ac.uk'
__date__ = '2014-09-23'
import sys
import logging
import traceback
import datetime
import string
import re
from dateutil import parser as dtp
from optparse import OptionParser
import emdb_da
import emdb_19
from emdb_settings import emdb_settings
[docs]class EMDBXMLTranslator:
"""
Class for translating EMDB files 2.0 <-> 1.9
"""
[docs] class Constants:
"""
There are many constants in use for the translation. They have been collected here for ease of use.
"""
# Global constants
EM_SAMPLE_ID = 1000 # Used to identify sample supramolecule
EM_DATE_FORMAT = '%d-%b-%Y'
EM_UNIDENTIFIED_TAXID = 32644
EMDB_PAT = re.compile(r'(?i)(EMD-){0,1}(\d{4,})')
EMDB_PREFIX = 'EMD-'
EMDB_DUMMY_CODE = 'EMD-0000'
PDB_CHAIN_PAT = re.compile(r'(\d[\dA-Za-z]{3})([-_:; ]?)([A-Za-z0-9]+)')
EUL_ANG_START_TAG = '{eulerAngleDetails}'
EUL_ANG_END_TAG = '{/eulerAngleDetails}'
EUL_ANG_PAT = re.compile(r'(.*)%s(.*)%s(.*)' % (EUL_ANG_START_TAG, EUL_ANG_END_TAG))
HEL_TAG = '{helical/}'
SP_TAG = '{singleParticle/}'
HEL_SP_PAT = re.compile(r'(.*){(helical|singleParticle)/}(.*)')
# EM methods
EMM_EC = 'electronCrystallography'
EMM_HEL = 'helical'
EMM_SP = 'singleParticle'
EMM_STOM = 'subtomogramAveraging'
EMM_TOM = 'tomography'
# Units
U_ANG = u'\u212B'
U_ANGA = 'A'
U_DEG = 'deg'
U_DEGF = 'degrees'
U_EL_A2 ='e/A**2'
U_FIB_DOSE_RATE = 'ions/nm^2/s'
U_KDA_NM = 'kDa/nm'
U_KEL = 'K'
U_KVOLT = 'kV'
U_MCRN = 'microns'
U_MDA = 'MDa'
U_MG_ML = 'mg/mL'
U_MM = 'mm'
U_NM = 'nm'
U_PAMP = 'pA'
U_SEC = 's'
# Status
STS_REL = 'REL'
STS_HPUB = 'HPUB'
STS_HOLD1 = 'HOLD1'
STS_OBS = 'OBS'
# Extension types
EXT_BASE_MICROSCOPY_TYPE = 'base_microscopy_type'
EXT_TOMOGRAPHY_MICROSCOPY_TYPE = 'tomography_microscopy_type'
EXT_CRYSTALLOGRAPHY_MICROSCOPY_TYPE = 'crystallography_microscopy_type'
EXT_BASE_PREPARATION_TYPE = 'base_preparation_type'
EXT_TOMOGRAPHY_PREPARATION_TYPE = 'tomography_preparation_type'
EXT_CRYSTALLOGRAPHY_PREPARATION_TYPE = 'crystallography_preparation_type'
def __init__(self):
self.warningLevel = 1 # 0 = min, 3 = max
logging.basicConfig(level=emdb_settings.log_level, format=emdb_settings.log_format)
[docs] def setWarningLevel(self, level):
"""
Set the level of logging warnings. 0 = no warnings, 3 = max warnings, 1 = default
Parameters
@param level: warning level 0 -> 3
"""
if level <= 0:
self.warningLevel = 0
elif level >= 3:
self.warningLevel = 3
else:
self.warningLevel = level
[docs] def warn(self, level, msg):
"""
Log a warning message but take into account the warningLevel
Parameters:
@param level: only messages with level >= warningLevel are printed
@param msg: warning message
"""
if level <= self.warningLevel:
logging.warning(msg)
[docs] def checkSet(self, getX, setX, transform=None):
"""
Call setX only if getX does not return None
Parameters:
@param getX: getter function that must return value
@param setX: setter function
@param transform: Apply transform(x) before calling setter function
"""
x = getX()
if x is not None:
if transform is not None:
try:
z = x
x = transform(z)
except Exception as e:
self.warn(3, "function checkSet: Transform function did not work: %s(%s)" % (transform, z))
self.warn(3, traceback.format_exc())
return
try:
setX(x)
except Exception as e:
self.warn(3, "function checkSet: Setter function did not work: %s(%s)" % (setX, x))
self.warn(3, traceback.format_exc())
[docs] def setValueAndUnits(self, getter, setter, constructor, units=None, transform=None):
"""
There are several elements that take a value and have an units attribute.
This function makes it easier to copy over these elements
Parameters:
@param getter: Getter function to get (value,units)
@param setter: Setter function to set (value,units)
@param constructor: Constructor for object that takes (units, valueOf_) as params
@param units: If this is not None then set units based on this otherwise transfer value
@param transform: Apply transform(x) before calling setter function
"""
x = getter()
if x is not None:
if units is None:
u = x.get_units()
else:
u = units
y = constructor(valueOf_=x.get_valueOf_(), units=u)
if transform is not None:
try:
z = transform(y)
except Exception:
self.warn(3, "function setValueAndUnits: Transform function did not work: %s(%s)" % (transform, y))
self.warn(3, traceback.format_exc())
else:
z = y
try:
setter(z)
except Exception:
self.warn(3, "function setValueAndUnits: Setter function did not work: %s(%s)" % (setter, z))
self.warn(3, traceback.format_exc())
[docs] def translate_1_9_to_2_0(self, inputFile, outputFile):
"""
Convert input file from 1.9 to 2.0 schema
Parameters:
@param inputFile: Name of input file
@param outputFile: Name of output file
"""
c = self.Constants
def makeSoftwareList(softIn):
"""
Takes a string representing software and create a software list (2.0 construct).
Convenience function for translating from 1.9 to 2.0
Parameters:
@param softIn: software represented as string
@return: software list as software_list_type (2.0)
"""
if softIn is not None:
softList = emdb_da.software_list_type()
soft = emdb_da.software_type()
soft.set_name(softIn)
softList.add_software(soft)
return softList
else:
return None
def addExternalReferences(refIn, refOut):
"""
Copy over reference list for journals or non-journals
Parameters:
@param refIn: Input citation with reference list
@param refOut: Output citation to which reference list is added.
"""
extRefIn = refIn.get_externalReference()
for r in extRefIn:
extRefOut = emdb_da.external_referencesType()
extRefOut.set_type(r.get_type())
extRefOut.set_valueOf_(r.get_valueOf_())
refOut.add_external_references(extRefOut)
def copyAuthors(get_authors, add_author, simple=False):
"""
Copy authors from 1.9 -> 2.0 while reformatting them
Parameters
@param get_authors: getter function for getting authors from jrnl/nonjrnl object of 1.9
@param add_author: adding (setter) function for adding an author to the list of jrnl/nonjrnl object authors
@param simple: boolean - True means that the authors in 2.0 are simple strings, otherwise they are journal authors
"""
authIn = (get_authors() or '').split(', ')
x = 1
for authStrIn in authIn:
if simple == False:
author = emdb_da.author_order_type()
author.set_valueOf_(authStrIn)
author.set_order(x)
if author.hasContent_():
add_author(author)
x += 1
else:
add_author(authStrIn)
"""
x += 1
authCompIn = authStrIn.split(' ')
lenAuthCompIn = len(authCompIn)
if lenAuthCompIn < 2:
self.warn(1, "Author name (%s) has less than than two space delimited strings (%d) - will be ignored!" % (authCompIn[0], lenAuthCompIn))
else:
lastIdx = lenAuthCompIn - 1
initIn = '.'.join(list(authCompIn[lastIdx]))
if len(initIn)>=3:
initIn = initIn + '.'
authStr = '%s, %s' % (' '.join(authCompIn[0:lastIdx]), initIn)
if simple == False:
author = emdb_da.author_order_type()
author.set_valueOf_(authStr)
author.set_order(x)
if author.hasContent_():
add_author(author)
x += 1
else:
add_author(authStr)
x += 1
"""
def copyCitation(refIn, refOut):
"""
Copy over citation from 1.9 to 2.0
Parameters:
@param citeIn: Input citation in 1.9 schema
@param citeOut: Output citation in 2.0 schema
"""
jrnlIn = refIn.get_journalArticle()
if jrnlIn:
jrnl = emdb_da.journal_citation()
jrnl.original_tagname_ = 'journal_citation'
refOut.set_citation_type(jrnl)
copyAuthors(jrnlIn.get_authors, jrnl.add_author)
jrnl.set_title(jrnlIn.get_articleTitle())
jrnl.set_journal(jrnlIn.get_journal())
jrnl.set_published(refIn.get_published())
# This is a fix because of bad data - emd-1648.xml has an empty volume tag!
vol = jrnlIn.get_volume()
if vol is not None and len(vol) > 0:
jrnl.set_volume(vol)
self.checkSet(jrnlIn.get_firstPage, jrnl.set_first_page)
self.checkSet(jrnlIn.get_lastPage, jrnl.set_last_page)
self.checkSet(jrnlIn.get_year, jrnl.set_year)
addExternalReferences(jrnlIn, jrnl)
else:
nonJrnlIn = refIn.get_nonJournalArticle()
nonJrnl = emdb_da.non_journal_citation()
nonJrnl.original_tagname_ = 'non_journal_citation'
refOut.set_citation_type(nonJrnl)
nonJrnl.set_published(refIn.get_published())
copyAuthors(nonJrnlIn.get_authors, nonJrnl.add_author)
copyAuthors(nonJrnlIn.get_editor, nonJrnl.add_editor)
self.checkSet(nonJrnlIn.get_chapterTitle, nonJrnl.set_book_chapter_title)
self.checkSet(nonJrnlIn.get_book, nonJrnl.set_book_title)
self.checkSet(nonJrnlIn.get_thesisTitle, nonJrnl.set_thesis_title)
self.checkSet(nonJrnlIn.get_publisher, nonJrnl.set_publisher)
self.checkSet(nonJrnlIn.get_publisherLocation, nonJrnl.set_publication_location)
self.checkSet(nonJrnlIn.get_volume, nonJrnl.set_volume)
self.checkSet(nonJrnlIn.get_firstPage, nonJrnl.set_first_page)
self.checkSet(nonJrnlIn.get_lastPage, nonJrnl.set_last_page)
self.checkSet(nonJrnlIn.get_year, nonJrnl.set_year)
# add editor, volume etc
addExternalReferences(nonJrnlIn, nonJrnl)
def copyNaturalSource(pIn, setSourceFunc):
"""
Copy natural source from 1.9 to 2.0
Parameters:
@param pIn: Instance of protein/cellular-component/virus/nucleic-acid/ligand/label/ribosome-eukaryote/ribosome-prokaryote
@param setSourceFunc: Method belonging to molecule/supramolecule that sets its expression system, e.g. setSourceFunc = mol.set_natural_source for protein
"""
ns = emdb_da.natural_source_type()
setSourceFunc(ns)
speciesIn = pIn.get_sciSpeciesName()
if speciesIn is not None:
org = emdb_da.organism_type()
ns.set_organism(org)
self.checkSet(speciesIn.get_valueOf_, org.set_valueOf_)
self.checkSet(speciesIn.get_ncbiTaxId, org.set_ncbi)
self.checkSet(pIn.get_synSpeciesName, ns.set_synonym_organism)
strainIn = pIn.get_sciSpeciesStrain() if pIn.get_sciSpeciesStrain else None
if strainIn is not None:
org1 = emdb_da.organism_type()
ns.set_strain(org1)
self.checkSet(strainIn.get_valueOf_, org1.set_valueOf_)
self.checkSet(strainIn.get_ncbiTaxId, org1.set_ncbi)
try:
nsIn = pIn.get_natSource()
self.checkSet(nsIn.get_cell, ns.set_cell)
self.checkSet(nsIn.get_organelle, ns.set_organelle)
self.checkSet(nsIn.get_organOrTissue, ns.set_tissue)
self.checkSet(nsIn.get_cellLocation, ns.set_cellular_location)
except:
pass
def copyRecombinantSource(engIn, setSourceFunc):
"""
Copy engineered source from 1.9 to recombinant source in 2.0
Parameters:
@param engIn: Engineered source object (input)
@param setSourceFunc: Method belonging to molecule/supramolecule object that sets its expression system, e.g. setSourceFunc = mol.set_recombinant_expression for protein
"""
if engIn is not None:
rs = emdb_da.recombinant_source_type()
setSourceFunc(rs)
expSysIn = engIn.get_expSystem()
if expSysIn is not None:
org = emdb_da.organism_type()
rs.set_organism(org)
self.checkSet(expSysIn.get_valueOf_, org.set_valueOf_)
self.checkSet(expSysIn.get_ncbiTaxId, org.set_ncbi)
self.checkSet(engIn.get_expSystemStrain, rs.set_strain)
self.checkSet(engIn.get_expSystemCell, rs.set_cell)
self.checkSet(engIn.get_vector, rs.set_plasmid)
def setSciName(cIn, cOut):
"""
Copy name object from v1.9 cIn to v2.0 cOut
Parameters:
@param cIn: v1.9 component with getter functions for name
@param cOut: v2.0 component with setter functions for name
"""
sciName = cIn.get_sciName()
synName = cIn.get_synName()
nm = emdb_da.sci_name_type()
nm.set_valueOf_(sciName)
if synName is not None:
nm.set_synonym(synName)
cOut.set_name(nm)
def setMolWeight(setterFunc, wtTheoIn=None, wtExpIn=None, wtMethIn=None):
"""
Set molecular weight if provided
Parameters:
@param setterFunc: Function to set molecular weight: f(wt)
@param wtTheoIn: Theoretical molecular weight
@param wtExpIn: Experimental molecular weight
@param wtMethIn: Method used for calculating experimental weight
"""
if wtTheoIn is not None or wtExpIn is not None or wtMethIn is not None:
wt = emdb_da.molecular_weight_type()
setterFunc(wt)
if wtExpIn is not None:
wt.set_experimental(emdb_da.experimentalType(valueOf_=wtExpIn.get_valueOf_(), units=wtExpIn.get_units()))
if wtTheoIn is not None:
wt.set_theoretical(emdb_da.theoreticalType(valueOf_=wtTheoIn.get_valueOf_(), units=wtTheoIn.get_units()))
if wtMethIn is not None:
wt.set_method(wtMethIn)
def addMolReferences(adderFunc, xRefsIn):
"""
Add external references to molecule sequence
Parameters:
@param adderFunc: Adder function to add external references, e.g.seq.add_external_references(x)
@param xRefsIn: v1.9 externalReferences object
"""
if xRefsIn is not None and type(xRefsIn) is not list:
for xRefIn in xRefsIn.get_refUniProt():
adderFunc(emdb_da.external_referencesType(valueOf_=xRefIn, type_='UNIPROTKB'))
for xRefIn in xRefsIn.get_refGo():
adderFunc(emdb_da.external_referencesType(valueOf_=xRefIn, type_='GO'))
for xRefIn in xRefsIn.get_refInterpro():
adderFunc(emdb_da.external_referencesType(valueOf_=xRefIn, type_='INTERPRO'))
def copyCTFAndEulerAngles(r, imProcOut):
"""
Copy CTF and Euler angle info from 1.9 to 2.0 elements
Parameters:
@param r: reconstruction object from 1.9
@param imProcOut: image_processing object from 2.0
"""
ctfIn = r.get_ctfCorrection()
if ctfIn is not None:
ctf = emdb_da.ctf_correction_type()
ctf.set_details(ctfIn)
imProcOut.set_ctf_correction(ctf)
# Euler angles do not exist for all methods in 2.0
angIn = r.get_eulerAnglesDetails()
if angIn is not None:
try:
ang = emdb_da.angle_assignment_type()
ang.set_details(angIn)
imProcOut.set_final_angle_assignment(ang)
except:
details = imProc.get_details()
details += "%s%s%s" % ('{eulerAngleDetails}', angIn, '{/eulerAngleDetails}')
imProcOut.set_details(details)
self.warn(1,"eulerAngleDetails added to image processing details: %s" % angIn)
def copyMap(mapIn, mapOut, specPrepIn=None):
"""
Copy map from 1.9 to 2.0
Parameters:
@param mapIn: input 1.9 map
@param mapOut: output 2.0 map
@param specPrepIn: specimen preparation from 1.9
"""
# Set file and related attributes
x = mapIn.get_file()
mapOut.set_file(x.get_valueOf_())
mapOut.set_format(x.get_format())
mapOut.set_size_kbytes(x.get_sizeKb())
mapOut.set_data_type(mapIn.get_dataType())
dimIn = mapIn.get_dimensions()
dimOut = emdb_da.integer_vector_map_type(row = dimIn.get_numRows(), col = dimIn.get_numColumns(), sec = dimIn.get_numSections())
mapOut.set_dimensions(dimOut)
origIn = mapIn.get_origin()
origOut = emdb_da.originType(col = origIn.get_originCol(), row = origIn.get_originRow(), sec = origIn.get_originSec())
mapOut.set_origin(origOut)
pixIn = mapIn.get_pixelSpacing()
pixOut = emdb_da.pixel_spacingType(x = emdb_da.pixel_spacing_type(valueOf_ = pixIn.get_pixelX().get_valueOf_()),
y = emdb_da.pixel_spacing_type(valueOf_ = pixIn.get_pixelY().get_valueOf_()),
z = emdb_da.pixel_spacing_type(valueOf_ = pixIn.get_pixelZ().get_valueOf_()))
mapOut.set_pixel_spacing(pixOut)
spcIn = mapIn.get_spacing()
spcOut = emdb_da.spacingType(spcIn.get_spacingRow(), spcIn.get_spacingCol(), spcIn.get_spacingSec())
mapOut.set_spacing(spcOut)
axisIn = mapIn.get_axisOrder()
axisOut = emdb_da.axis_orderType(fast = axisIn.get_axisOrderFast(), medium = axisIn.get_axisOrderMedium(), slow = axisIn.get_axisOrderSlow())
mapOut.set_axis_order(axisOut)
cellIn = mapIn.get_cell()
cellOut = emdb_da.cellType(a = emdb_da.cell_type(valueOf_ = cellIn.get_cellA().get_valueOf_()),
b = emdb_da.cell_type(valueOf_ = cellIn.get_cellB().get_valueOf_()),
c = emdb_da.cell_type(valueOf_ = cellIn.get_cellC().get_valueOf_()),
alpha = emdb_da.cell_angle_type(valueOf_ = cellIn.get_cellAlpha().get_valueOf_()),
beta = emdb_da.cell_angle_type(valueOf_ = cellIn.get_cellBeta().get_valueOf_()),
gamma = emdb_da.cell_angle_type(valueOf_ = cellIn.get_cellGamma().get_valueOf_()))
mapOut.set_cell(cellOut)
sym = emdb_da.applied_symmetry_type()
mapOut.set_symmetry(sym)
sym.set_space_group(mapIn.get_spaceGroupNumber())
self.checkSet(mapIn.get_details, mapOut.set_details)
# helical symmetry parameters
if specPrepIn is not None:
hel = emdb_da.helical_parameters_type()
helIn = specPrepIn.get_helicalParameters()
if helIn is not None:
self.checkSet(helIn.get_deltaPhi, hel.set_delta_phi)
self.checkSet(helIn.get_deltaZ, hel.set_delta_z)
self.checkSet(helIn.get_hand, hel.set_hand)
self.checkSet(helIn.get_axialSymmetry, hel.set_axial_symmetry)
sym.set_helical_parameters(hel)
mapOut.set_statistics(mapIn.get_statistics())
self.checkSet(mapIn.get_annotationDetails, mapOut.set_annotation_details)
# masks do not have contour level
try:
cntrIn = mapIn.get_contourLevel()
if cntrIn is not None:
cntr = emdb_da.contourType()
cntrList = emdb_da.contour_listType()
mapOut.set_contour_list(cntrList)
cntrList.add_contour(cntr)
cntr.set_primary(True)
cntr.set_level(float(cntrIn.get_valueOf_()))
cntr.set_source(cntrIn.get_source().upper())
except:
pass
xmlIn = emdb_19.parse(inputFile, silence=True)
xmlOut = emdb_da.entry_type()
# Write attributes
xmlOut.set_version('2.0')
xmlOut.set_emdb_id(self.formatEMDBCode(xmlIn.get_accessCode()))
## Admin element
admin = emdb_da.admin_type()
xmlOut.set_admin(admin)
depIn = xmlIn.get_deposition()
admin.set_title(depIn.get_title())
self.checkSet(depIn.get_replaceExistingEntry, admin.set_replace_existing_entry)
# key_dates
keyDates = emdb_da.key_datesType()
admin.set_key_dates(keyDates)
keyDates.set_deposition(depIn.get_depositionDate())
keyDates.set_update(xmlIn.get_admin().get_lastUpdate())
keyDates.set_header_release(depIn.get_headerReleaseDate())
self.checkSet(depIn.get_mapReleaseDate, keyDates.set_map_release)
self.checkSet(depIn.get_obsoletedDate, keyDates.set_obsolete)
# current_status
currentStatus = emdb_da.version_type()
code = emdb_da.code_type()
code.set_valueOf_(depIn.get_status().get_valueOf_())
currentStatus.set_code(code)
currentStatus.set_processing_site(depIn.get_processingSite())
# status @prior goes to version history
statusPriorIn = depIn.get_status().get_prior()
if statusPriorIn is not None:
priorStatus = emdb_da.statusType()
priorStatus.set_code(emdb_da.code_type(valueOf_= statusPriorIn))
priorStatus.set_id(1)
"""
if statusPriorIn == c.STS_REL:
self.checkSet(depIn.get_mapReleaseDate, priorStatus.set_date)
elif statusPriorIn in [c.STS_HOLD1, c.STS_HPUB]:
self.checkSet(depIn.get_headerReleaseDate, priorStatus.set_date)
"""
statusHistoryList = emdb_da.version_list_type()
statusHistoryList.add_status(priorStatus)
admin.set_status_history_list(statusHistoryList)
# obsolete list
obsListIn = depIn.get_obsoleteList()
if obsListIn is not None:
# refers to code of currentStatus
code.set_supersedes(True)
obsList = emdb_da.obsolete_listType()
obsEntriesIn = obsListIn.get_entry()
for obsIn in obsEntriesIn:
obs = emdb_da.supersedes_type()
obs.set_entry(obsIn)
obsList.add_entry(obs)
if obsList.hasContent_():
admin.set_obsolete_list(obsList)
# superseded list
supersededListIn = depIn.get_supersededByList()
if supersededListIn is not None:
# refers to code of currentStatus
code.set_superseded(True)
supersedeList = emdb_da.superseded_by_listType()
supersedeEntriesIn = supersededListIn.get_entry()
for supersedeIn in supersedeEntriesIn:
supersede = emdb_da.supersedes_type()
supersede.set_entry(supersedeIn)
supersedeList.add_entry(supersede)
if supersedeList.hasContent_():
admin.set_superseded_by_list(supersedeList)
if currentStatus.hasContent_():
admin.set_current_status(currentStatus)
# sites
sites = emdb_da.sitesType()
admin.set_sites(sites)
sites.set_deposition(depIn.get_depositionSite())
sites.set_last_processing(depIn.get_processingSite())
# keywords
self.checkSet(depIn.get_keywords, admin.set_keywords)
# authors
authorList = emdb_da.authors_listType()
copyAuthors(depIn.get_authors, authorList.add_author, simple=True)
if authorList.hasContent_():
admin.set_authors_list(authorList)
## crossreferences element
cref = emdb_da.crossreferences_type()
xmlOut.set_crossreferences(cref)
citeList = emdb_da.citation_listType()
cref.set_citation_list(citeList)
# primary_citation
refOut = emdb_da.primary_citationType()
citeList.set_primary_citation(refOut)
refIn = depIn.get_primaryReference()
copyCitation(refIn, refOut)
# secondary_citation
for refIn in depIn.get_secondaryReference():
refOut = emdb_da.secondary_citationType()
citeList.add_secondary_citation(refOut)
copyCitation(refIn, refOut)
# in frame EMDB
emdbIn = depIn.get_inFrameEMDBId()
if emdbIn is not None:
emdbListIn = emdbIn.strip(' ').split(',')
if len(emdbListIn) > 0:
emdbList = emdb_da.emdb_cross_reference_list_type()
cref.set_emdb_list(emdbList)
x = 1
for e in emdbListIn:
emdbElem = emdb_da.emdb_cross_reference_type()
emdbElem.set_emdb_id(e)
emdbElem.set_relationship(emdb_da.relationshipType('FULLOVERLAP'))
emdbElem.set_id(x)
emdbList.add_emdb_reference(emdbElem)
x += 1
# fitted PDBs
pdbListIn = depIn.get_fittedPDBEntryIdList()
if pdbListIn:
pdbsIn = pdbListIn.get_fittedPDBEntryId()
if pdbsIn is not None and len(pdbsIn) > 0:
pdbList = emdb_da.pdb_cross_reference_list_type()
cref.set_pdb_list(pdbList)
x = 1
for p in pdbsIn:
pdbElem = emdb_da.pdb_cross_reference_type()
pdbElem.set_pdb_id(p)
pdbElem.set_relationship(emdb_da.relationshipType('FULLOVERLAP'))
pdbElem.set_id(x)
pdbList.add_pdb_reference(pdbElem)
x += 1
## sample
sampleIn = xmlIn.get_sample()
if sampleIn is not None:
sample = emdb_da.sample_type()
xmlOut.set_sample(sample)
self.checkSet(sampleIn.get_name, sample.set_name)
supMolList = emdb_da.supramolecule_listType()
sample.set_supramolecule_list(supMolList)
molList = emdb_da.macromolecule_list_type()
sample.set_macromolecule_list(molList)
# Create 'sample' supramolecule to store 1.9 sample info
smol = emdb_da.sample()
supMolList.add_supramolecule(smol)
smol.set_extensiontype_('sample')
smol.set_id(c.EM_SAMPLE_ID)
self.checkSet(sampleIn.get_compDegree, smol.set_oligomeric_state)
self.checkSet(sampleIn.get_numComponents, smol.set_number_unique_components)
self.checkSet(sampleIn.get_details, smol.set_details)
setMolWeight(smol.set_molecular_weight, sampleIn.get_molWtTheo(), sampleIn.get_molWtExp(), sampleIn.get_molWtMethod())
compIn = sampleIn.get_sampleComponentList()
compListIn = compIn.get_sampleComponent()
for cIn in compListIn:
cType = cIn.get_entry()
cId = cIn.get_componentID()
if cType == 'protein':
mol = emdb_da.protein_or_peptide()
#mol.original_tagname_ = 'protein_or_peptide'
mol.set_extensiontype_('protein_or_peptide')
mol.set_id(cId)
setSciName(cIn, mol)
setMolWeight(mol.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
self.checkSet(cIn.get_details, mol.set_details)
pIn = cIn.get_protein()
copyNaturalSource(pIn, mol.set_natural_source)
copyRecombinantSource(pIn.get_engSource(), mol.set_recombinant_expression)
self.checkSet(pIn.get_recombinantExpFlag, mol.set_recombinant_exp_flag)
self.checkSet(pIn.get_oligomericDetails, mol.set_oligomeric_state)
self.checkSet(pIn.get_numCopies, mol.set_number_of_copies)
# external references
seq = emdb_da.sequenceType()
mol.set_sequence(seq)
addMolReferences(seq.add_external_references, pIn.get_externalReferences())
molList.add_macromolecule(mol)
elif cType == 'ligand':
mol = emdb_da.ligand()
#mol.original_tagname_ = 'ligand'
mol.set_extensiontype_('ligand')
mol.set_id(cId)
lIn = cIn.get_ligand()
setSciName(cIn, mol)
setMolWeight(mol.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
self.checkSet(cIn.get_details, mol.set_details)
copyNaturalSource(lIn, mol.set_natural_source)
copyRecombinantSource(lIn.get_engSource(), mol.set_recombinant_expression)
self.checkSet(lIn.get_recombinantExpFlag, mol.set_recombinant_exp_flag)
self.checkSet(lIn.get_oligomericDetails, mol.set_oligomeric_state)
self.checkSet(lIn.get_numCopies, mol.set_number_of_copies)
# engineered source not implemented in 2.0
#copyRecombinantSource(lIn.get_engSource(), mol.set_recombinant_expression)
# external references
addMolReferences(mol.add_external_references, lIn.get_externalReferences())
molList.add_macromolecule(mol)
elif cType == 'label':
mol = emdb_da.em_label()
#mol.original_tagname_ = 'em_label'
mol.set_extensiontype_('em_label')
mol.set_id(cId)
lIn = cIn.get_label()
setSciName(cIn, mol)
setMolWeight(mol.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
self.checkSet(cIn.get_details, mol.set_details)
self.checkSet(lIn.get_formula, mol.set_formula)
self.checkSet(lIn.get_oligomericDetails, mol.set_oligomeric_state)
self.checkSet(lIn.get_numCopies, mol.set_number_of_copies)
molList.add_macromolecule(mol)
elif cType == 'nucleic-acid':
naIn = cIn.get_nucleic_acid()
naClassIn = naIn.get_class()
if naClassIn == 'DNA':
mol = emdb_da.dna()
mol.set_extensiontype_('dna')
mol.set_id(cId)
mol.set_classification('DNA')
setSciName(cIn, mol)
setMolWeight(mol.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
copyNaturalSource(naIn, mol.set_natural_source)
self.checkSet(naIn.get_structure, mol.set_structure)
self.checkSet(naIn.get_syntheticFlag, mol.set_synthetic_flag)
self.checkSet(cIn.get_details, mol.set_details)
seq = emdb_da.sequenceType()
mol.set_sequence(seq)
seqIn = naIn.get_sequence()
if seqIn is not None:
seq.set_string(seqIn)
molList.add_macromolecule(mol)
elif naClassIn == 'RNA' or naClassIn == 'T-RNA':
mol = emdb_da.rna()
mol.set_extensiontype_('rna')
mol.set_id(cId)
if naClassIn == 'T-RNA':
naClass = 'TRANSFER'
else:
naClass = 'OTHER'
mol.set_classification(naClass)
setSciName(cIn, mol)
setMolWeight(mol.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
copyNaturalSource(naIn, mol.set_natural_source)
self.checkSet(naIn.get_structure, mol.set_structure)
self.checkSet(naIn.get_syntheticFlag, mol.set_synthetic_flag)
self.checkSet(cIn.get_details, mol.set_details)
seq = emdb_da.sequenceType()
seqIn = naIn.get_sequence()
if seqIn is not None:
seq.set_string(seqIn)
if seq.hasContent_():
mol.set_sequence(seq)
if mol.hasContent_():
molList.add_macromolecule(mol)
elif naClassIn in ['DNA/RNA', 'OTHER']:
mol = emdb_da.other_macromolecule()
mol.set_extensiontype_('other_macromolecule')
mol.set_id(cId)
if naClassIn == 'OTHER':
mol.set_classification('OTHER_NA')
else:
mol.set_classification(naClassIn)
setSciName(cIn, mol)
setMolWeight(mol.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
self.checkSet(naIn.get_structure, mol.set_structure)
self.checkSet(naIn.get_syntheticFlag, mol.set_synthetic_flag)
copyNaturalSource(naIn, mol.set_natural_source)
self.checkSet(cIn.get_details, mol.set_details)
seq = emdb_da.sequenceType()
seqIn = naIn.get_sequence()
if seqIn is not None:
seq.set_string(seqIn)
if seq.hasContent_():
mol.set_sequence(seq)
if mol.hasContent_():
molList.add_macromolecule(mol)
else:
# Other types not yet handled
pass
elif cType == 'virus':
smol = emdb_da.virus()
#smol.original_tagname_ = 'virus'
smol.set_extensiontype_('virus')
vIn = cIn.get_virus()
smol.set_id(cId)
setSciName(cIn, smol)
setMolWeight(smol.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
smol.set_virus_empty(vIn.get_empty())
smol.set_virus_enveloped(vIn.get_enveloped())
smol.set_virus_isolate(vIn.get_isolate())
smol.set_virus_type(vIn.get_class())
x = vIn.get_sciSpeciesName()
smol.set_sci_species_name(emdb_da.virus_species_name_type(valueOf_=x.get_valueOf_(), ncbi=x.get_ncbiTaxId()))
self.checkSet(vIn.get_synSpeciesName, smol.set_syn_species_name)
self.checkSet(vIn.get_sciSpeciesSerotype, smol.set_sci_species_serotype)
self.checkSet(vIn.get_sciSpeciesSerocomplex, smol.set_sci_species_serocomplex)
self.checkSet(vIn.get_sciSpeciesSubspecies, smol.set_sci_species_subspecies)
self.checkSet(vIn.get_sciSpeciesStrain, smol.set_sci_species_strain)
self.checkSet(cIn.get_details, smol.set_details)
addMolReferences(smol.add_external_references, vIn.get_externalReferences())
nsIn = vIn.get_natSource()
if nsIn is not None:
ns = emdb_da.natural_hostType()
#ns = emdb_da.natural_source_type()
hsIn = nsIn[0].get_hostSpecies()
if hsIn is not None:
org = emdb_da.organism_type()
ns.set_organism(org)
org.set_valueOf_(hsIn.get_valueOf_())
org.set_ncbi(hsIn.get_ncbiTaxId())
x = nsIn[0].get_hostSpeciesStrain()
if x is not None:
strain = emdb_da.organism_type()
ns.set_strain(strain)
strain.set_valueOf_(x)
strain.set_ncbi(c.EM_UNIDENTIFIED_TAXID)
self.checkSet(nsIn[0].get_hostCategory, ns.set_synonym_organism) # placeholder
smol.add_natural_host(ns)
esIn = vIn.get_engSource()
if esIn is not None:
e = esIn[0]
es = emdb_da.recombinant_source_type()
smol.set_host_system(es)
expSysIn = e.get_expSystem()
if expSysIn is not None:
org = emdb_da.organism_type()
es.set_organism(org)
self.checkSet(expSysIn.get_valueOf_, org.set_valueOf_)
self.checkSet(expSysIn.get_ncbiTaxId, org.set_ncbi)
self.checkSet(e.get_expSystemStrain, es.set_strain)
self.checkSet(e.get_expSystemCell, es.set_cell)
self.checkSet(e.get_vector, es.set_plasmid)
shellListIn = vIn.get_shell()
for shellIn in shellListIn:
shell = emdb_da.virus_shellType()
self.checkSet(shellIn.get_nameElement, shell.set_name)
self.checkSet(shellIn.get_diameter, shell.set_diameter)
self.checkSet(shellIn.get_tNumber, shell.set_triangulation, int)
self.checkSet(shellIn.get_id, shell.set_id)
smol.add_virus_shell(shell)
supMolList.add_supramolecule(smol)
elif cType == 'cellular-component':
ccomp = emdb_da.organelle_or_cellular_component()
ccomp.set_extensiontype_('organelle_or_cellular_component')
ccomp.set_id(cId)
setSciName(cIn, ccomp)
setMolWeight(ccomp.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
self.checkSet(cIn.get_details, ccomp.set_details)
orgIn = cIn.get_cellular_component()
copyNaturalSource(orgIn, ccomp.set_natural_source)
copyRecombinantSource(orgIn.get_engSource(), ccomp.set_recombinant_expression)
self.checkSet(orgIn.get_recombinantExpFlag, ccomp.set_recombinant_exp_flag)
self.checkSet(orgIn.get_oligomericDetails, ccomp.set_oligomeric_state)
self.checkSet(orgIn.get_numCopies, ccomp.set_number_of_copies)
addMolReferences(ccomp.add_external_references, orgIn.get_externalReferences())
supMolList.add_supramolecule(ccomp)
elif cType == 'ribosome-eukaryote':
rib = emdb_da.complex()
rib.set_extensiontype_('complex')
rib.set_id(cId)
setSciName(cIn, rib)
setMolWeight(rib.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
rIn = cIn.get_ribosome_eukaryote()
#rib.set_name(c.get_sciName())
cat = emdb_da.categoryType(valueOf_= 'ribosome-eukaryote')
rib.set_category(cat)
rib.set_ribosome_details(rIn.get_eukaryote())
copyNaturalSource(rIn, rib.add_natural_source)
copyRecombinantSource(rIn.get_engSource(), rib.add_recombinant_expression)
self.checkSet(rIn.get_recombinantExpFlag, rib.set_recombinant_exp_flag)
self.checkSet(rIn.get_oligomericDetails, rib.set_oligomeric_state)
self.checkSet(rIn.get_numCopies, rib.set_number_of_copies)
self.checkSet(cIn.get_details, rib.set_details)
addMolReferences(rib.add_external_references, rIn.get_externalReferences())
supMolList.add_supramolecule(rib)
elif cType == 'ribosome-prokaryote':
rib = emdb_da.complex()
rib.set_extensiontype_('complex')
rib.set_id(cId)
setSciName(cIn, rib)
setMolWeight(rib.set_molecular_weight, cIn.get_molWtTheo(), cIn.get_molWtExp())
rIn = cIn.get_ribosome_prokaryote()
#rib.set_name(c.get_sciName())
cat = emdb_da.categoryType(valueOf_= 'ribosome-prokaryote')
rib.set_category(cat)
rib.set_ribosome_details(rIn.get_prokaryote())
copyNaturalSource(rIn, rib.add_natural_source)
copyRecombinantSource(rIn.get_engSource(), rib.add_recombinant_expression)
self.checkSet(rIn.get_recombinantExpFlag, rib.set_recombinant_exp_flag)
self.checkSet(rIn.get_oligomericDetails, rib.set_oligomeric_state)
self.checkSet(rIn.get_numCopies, rib.set_number_of_copies)
self.checkSet(cIn.get_details, rib.set_details)
addMolReferences(rib.add_external_references, rIn.get_externalReferences())
supMolList.add_supramolecule(rib)
## structure_determination element
#### !!!! NOT COMPLETE
sdList = emdb_da.structure_determination_listType()
xmlOut.set_structure_determination_list(sdList)
sd = emdb_da.structure_determination_type()
sdList.add_structure_determination(sd)
processIn = xmlIn.get_processing()
if processIn is not None:
emMethod = processIn.get_method()
else:
# assume single particle
emMethod = c.EMM_SP
if emMethod in [c.EMM_SP, c.EMM_STOM, c.EMM_TOM]:
sd.set_method(emMethod)
elif emMethod == 'twoDCrystal':
sd.set_method(c.EMM_EC)
elif emMethod == c.EMM_HEL:
sd.set_method(c.EMM_HEL)
expIn = xmlIn.get_experiment()
specPrepIn = expIn.get_specimenPreparation()
vitrIn = expIn.get_vitrification()
nSp = max(1, len(vitrIn))
self.checkSet(specPrepIn.get_specimenState, sd.set_aggregation_state)
# specimen_preparation_list
specPrepList = emdb_da.specimen_preparation_listType()
sd.set_specimen_preparation_list(specPrepList)
j = 1
for i in range(0,nSp):
if emMethod in [c.EMM_SP, c.EMM_HEL ]:
sp = emdb_da.base_preparation_type()
sp.set_extensiontype_(c.EXT_BASE_PREPARATION_TYPE)
elif emMethod in [c.EMM_STOM, c.EMM_TOM]:
sp = emdb_da.tomography_preparation_type()
sp.set_extensiontype_(c.EXT_TOMOGRAPHY_PREPARATION_TYPE)
elif emMethod == 'twoDCrystal':
sp = emdb_da.crystallography_preparation_type()
sp.set_extensiontype_(c.EXT_CRYSTALLOGRAPHY_PREPARATION_TYPE)
xForm = emdb_da.crystal_formationType()
sp.set_crystal_formation(xForm)
self.checkSet(specPrepIn.get_crystalGrowDetails, xForm.set_details)
specPrepList.add_specimen_preparation(sp)
sp.set_id(j)
j += 1
concIn = specPrepIn.get_specimenConc()
if concIn is not None:
conc = emdb_da.concentrationType()
conc.set_units('mg/mL')
conc.set_valueOf_(concIn.get_valueOf_())
sp.set_concentration(conc)
bufIn = specPrepIn.get_buffer()
if bufIn is not None:
buf = emdb_da.buffer_type()
self.checkSet(bufIn.get_ph, buf.set_ph)
self.checkSet(bufIn.get_details, buf.set_details)
sp.set_buffer(buf)
stainIn = specPrepIn.get_staining()
if stainIn is not None:
stain = emdb_da.stainingType()
stain.set_details(stainIn)
# Assume negative staining
stain.set_type('negative')
sp.set_staining(stain)
gridIn = specPrepIn.get_specimenSupportDetails()
if gridIn is not None:
grid = emdb_da.grid_type()
grid.set_details(gridIn)
sp.set_grid(grid)
if vitrIn is not None:
v = vitrIn[i]
vitr = emdb_da.vitrification_type()
vitr.set_cryogen_name(v.get_cryogenName())
self.checkSet(v.get_instrument, vitr.set_instrument)
self.checkSet(v.get_method, vitr.set_method)
self.checkSet(v.get_details, vitr.set_details)
self.checkSet(v.get_timeResolvedState, vitr.set_timed_resolved_state)
x = v.get_humidity()
if x is not None:
vitr.set_chamber_humidity(emdb_da.chamber_humidityType(valueOf_ = float(x)))
x = v.get_temperature()
if x is not None:
vitr.set_chamber_temperature(emdb_da.chamber_temperatureType(valueOf_ = x.get_valueOf_()))
sp.set_vitrification(vitr)
# If tomography, sub-tomogram averaging, or helical the user may have employed array prep in which case crystalGrowDetails is set
# To handle this we create an extra spec prep of crystallography type
if emMethod in [c.EMM_STOM, c.EMM_TOM, c.EMM_HEL]:
crystGrowIn = specPrepIn.get_crystalGrowDetails()
if crystGrowIn is not None:
sp = emdb_da.crystallography_preparation_type()
sp.set_extensiontype_(c.EXT_CRYSTALLOGRAPHY_PREPARATION_TYPE)
xForm = emdb_da.crystal_formationType()
xForm.set_details(crystGrowIn)
sp.set_crystal_formation(xForm)
sp.set_id(j)
specPrepList.add_specimen_preparation(sp)
# microscopy
microscopyList = emdb_da.microscopy_listType()
sd.set_microscopy_list(microscopyList)
imagingListIn = expIn.get_imaging()
numImagingIn = len(imagingListIn)
imAcIn = expIn.get_imageAcquisition() # list of detectors
numDet = len(imAcIn)
if numDet == 0:
self.warn(1, "No image acquisition elements found!")
axis1 = None # forward reference that will be used in tomography processing
i = 1
for img in imagingListIn:
if emMethod in [c.EMM_SP, c.EMM_HEL]:
mic = emdb_da.base_microscopy_type()
mic.set_extensiontype_(c.EXT_BASE_MICROSCOPY_TYPE)
elif emMethod in [c.EMM_TOM, c.EMM_STOM]:
mic = emdb_da.tomography_microscopy_type()
mic.set_extensiontype_(c.EXT_TOMOGRAPHY_MICROSCOPY_TYPE)
elif emMethod == 'twoDCrystal':
mic = emdb_da.crystallography_microscopy_type()
mic.set_extensiontype_(c.EXT_CRYSTALLOGRAPHY_MICROSCOPY_TYPE)
micTypeIn = mic.get_extensiontype_()
microscopyList.add_microscopy(mic)
mic.set_id(i)
mic.set_electron_source(img.get_electronSource())
mic.set_imaging_mode(img.get_imagingMode())
mic.set_illumination_mode(img.get_illuminationMode())
mic.set_microscope(img.get_microscope())
self.checkSet(img.get_specimenHolderModel, mic.set_specimen_holder_model)
self.checkSet(img.get_specimenHolder, mic.set_specimen_holder)
self.checkSet(img.get_details, mic.set_details)
x = img.get_nominalDefocusMin()
if x is not None:
mic.set_nominal_defocus_min(emdb_da.nominal_defocus_minType(valueOf_=x.valueOf_))
x = img.get_nominalDefocusMax()
if x is not None:
mic.set_nominal_defocus_max(emdb_da.nominal_defocus_maxType(valueOf_=x.valueOf_))
x = img.get_nominalCs()
if x is not None:
mic.set_nominal_cs(emdb_da.nominal_csType(valueOf_=x.valueOf_))
x = img.get_acceleratingVoltage()
if x is not None:
mic.set_acceleration_voltage(emdb_da.acceleration_voltageType(valueOf_=x.valueOf_))
tiltMinIn = img.get_tiltAngleMin()
tiltMaxIn = img.get_tiltAngleMax()
if tiltMinIn is not None or tiltMaxIn is not None:
if micTypeIn == c.EXT_TOMOGRAPHY_MICROSCOPY_TYPE:
tiltSeries = emdb_da.tilt_series_type()
mic.add_tilt_series(tiltSeries)
axis1 = emdb_da.axis_type()
if tiltMinIn is not None:
tiltMin = emdb_da.min_angleType(valueOf_=tiltMinIn.get_valueOf_(), units=tiltMinIn.get_units())
axis1.set_min_angle(tiltMin)
if tiltMaxIn is not None:
tiltMax = emdb_da.max_angleType(valueOf_=tiltMaxIn.get_valueOf_(), units=tiltMaxIn.get_units())
axis1.set_max_angle(tiltMax)
tiltSeries.set_axis1(axis1)
else:
if tiltMinIn is not None:
mic.set_tilt_angle_min(tiltMinIn.get_valueOf_())
if tiltMaxIn is not None:
mic.set_tilt_angle_max(tiltMaxIn.get_valueOf_())
self.checkSet(img.get_nominalMagnification, mic.set_nominal_magnification)
self.checkSet(img.get_calibratedMagnification, mic.set_calibrated_magnification)
x = img.get_date()
if x is not None:
try:
d = dtp.parse(x)
mic.set_date(d)
except:
self.warn(1, "Unrecognized date format: %s" % x)
# alignment
astIn = img.get_astigmatism()
tiltIn = img.get_electronBeamTiltParams()
if astIn is not None or tiltIn is not None:
align = emdb_da.alignment_procedureType()
mic.set_alignment_procedure(align)
leg = emdb_da.legacyType()
align.set_legacy(leg)
if astIn is not None:
leg.set_astigmatism(astIn)
if tiltIn is not None:
leg.set_electron_beam_tilt_params(tiltIn)
# temperature
tempAvIn = img.get_temperature()
tempMaxIn = img.get_temperatureMax()
tempMinIn = img.get_temperatureMin()
if tempAvIn is not None or tempMaxIn is not None or tempMinIn is not None:
temp = emdb_da.temperatureType()
mic.set_temperature(temp)
if tempAvIn is not None:
temp.set_temperature_average(emdb_da.temperature_averageType(valueOf_=tempAvIn.get_valueOf_(), units=tempAvIn.get_units()))
if tempMaxIn is not None:
temp.set_temperature_max(emdb_da.temperature_maxType(valueOf_=tempMaxIn.get_valueOf_(), units=tempMaxIn.get_units()))
if tempMinIn is not None:
temp.set_temperature_min(emdb_da.temperature_minType(valueOf_=tempMinIn.get_valueOf_(), units=tempMinIn.get_units()))
egfIn = img.get_energyFilter()
egf = None
if egfIn is not None:
spop = emdb_da.specialist_optics_type()
mic.set_specialist_optics(spop)
egf = emdb_da.energy_filterType()
spop.set_energy_filter(egf)
egf.set_name(egfIn)
eWinIn = img.get_energyWindow()
if eWinIn is not None:
if egf is None:
spop = emdb_da.specialist_optics_type()
mic.set_specialist_optics(spop)
egf = emdb_da.energy_filterType()
spop.set_energy_filter(egf)
eValIn = eWinIn.get_valueOf_()
eUnitsIn = eWinIn.get_units()
x = eValIn.split('-')
if len(x) == 2:
egf.set_lower_energy_threshold(emdb_da.lower_energy_thresholdType(valueOf_=float(x[0]),units=eUnitsIn))
egf.set_upper_energy_threshold(emdb_da.upper_energy_thresholdType(valueOf_=float(x[1]),units=eUnitsIn))
else:
egf.set_upper_energy_threshold(emdb_da.upper_energy_thresholdType(valueOf_=-1.0,units=eUnitsIn))
# image_recording
# if number of detectors > number of microscopes
# mic: 1 2 3 4
# det: 1 2 3 4,5,6
# else
# mic: 1 2 3 4
# det: 1 2 2 2
if numDet > 0:
if numDet >= numImagingIn:
minIdx = i - 1
if i == numImagingIn:
maxIdx = numDet
else:
maxIdx = minIdx + 1
else:
if i < numDet:
minIdx = i - 1
else:
minIdx = numDet - 1
maxIdx = minIdx + 1
imRecList = emdb_da.image_recording_listType()
for imAc in imAcIn[minIdx:maxIdx]:
imRec = emdb_da.image_recordingType()
dig = emdb_da.digitization_detailsType()
imRec.set_digitization_details(dig)
x = imAc.get_samplingSize()
if x is not None:
dig.set_sampling_interval(emdb_da.sampling_intervalType(valueOf_ = x.get_valueOf_(), units='um'))
self.checkSet(imAc.get_numDigitalImages, imRec.set_number_real_images)
self.checkSet(imAc.get_details, imRec.set_details)
self.checkSet(imAc.get_odRange, imRec.set_od_range)
self.checkSet(imAc.get_quantBitNumber, imRec.set_bits_per_pixel)
self.checkSet(img.get_detectorDistance, imRec.set_detector_distance)
doseIn = img.get_electronDose()
if doseIn is not None:
imRec.set_average_electron_dose_per_image(emdb_da.average_electron_dose_per_imageType(valueOf_=doseIn.get_valueOf_(), units=doseIn.get_units()))
fod = emdb_da.film_or_detector_modelType()
x = img.get_detector()
if x is not None:
fod.set_valueOf_(x)
x = imAc.get_scanner()
if x is not None:
fod.set_category('film')
dig.set_scanner(x)
else:
# For now classify all as CCD - this may need remediation
fod.set_category('CCD')
if fod.hasContent_():
imRec.set_film_or_detector_model(fod)
x = imAc.get_URLRawData()
if x is not None:
urlRefList = emdb_da.auxiliary_link_listType()
cref.set_auxiliary_link_list(urlRefList)
urlRef = emdb_da.auxiliary_link_type()
urlRefList.add_auxiliary_link(urlRef)
urlRef.set_link(x)
if imRec.hasContent_():
imRecList.add_image_recording(imRec)
if imRecList.hasContent_():
mic.set_image_recording_list(imRecList)
i += 1
# image_processing
# In 1.9 reconstruction is a list and independent of method
# In 2.0 each reconstruction is mapped to an additional image_processing element
if processIn is not None:
def setCrystalParameters(specPrepIn, setter):
"""
Set crystal parameters element in v2.0 based on 2D/3D crystal parameter info in v1.9
Parameters:
@param specPrepIn: Object wrapping specimen preparation element in v1.9
@param setter: Setter function for setting crystal parameters object in v 2.0
"""
crystParIn = specPrepIn.get_twoDCrystalParameters()
twoDCryst = True
if crystParIn is None:
crystParIn = specPrepIn.get_threeDCrystalParameters()
if crystParIn is None:
return
twoDCryst = False
crystPar = emdb_da.crystal_parameters_type()
if twoDCryst:
self.checkSet(crystParIn.get_planeGroup, crystPar.set_plane_group)
else:
self.checkSet(crystParIn.get_spaceGroup, crystPar.set_space_group)
unitCell = emdb_da.unit_cell_type()
crystPar.set_unit_cell(unitCell)
self.setValueAndUnits(crystParIn.get_aLength, unitCell.set_a, emdb_da.cell_type)
self.setValueAndUnits(crystParIn.get_bLength, unitCell.set_b, emdb_da.cell_type)
self.setValueAndUnits(crystParIn.get_cLength, unitCell.set_c, emdb_da.cell_type)
self.setValueAndUnits(crystParIn.get_alpha, unitCell.set_alpha, emdb_da.cell_angle_type)
self.setValueAndUnits(crystParIn.get_beta, unitCell.set_beta, emdb_da.cell_angle_type)
self.setValueAndUnits(crystParIn.get_gamma, unitCell.set_gamma, emdb_da.cell_angle_type)
if crystPar.hasContent_():
setter(crystPar)
def setHelicalSymmetry(specPrepIn, rec):
"""
Set helical symmetry parameters of reconstruction.
Parameters:
@param specPrepIn: Object wrapping specimen preparation element in v1.9
@param rec: Reconstruction object (v2.0) assumed to have [set/get]_applied_symmetry methofs
"""
hxParIn = specPrepIn.get_helicalParameters()
if hxParIn is not None:
hxPar = emdb_da.helical_parameters_type()
symm = rec.get_applied_symmetry()
if symm is None:
symm = emdb_da.applied_symmetry_type()
symm.set_helical_parameters(hxPar)
self.setValueAndUnits(hxParIn.get_deltaPhi, hxPar.set_delta_phi, emdb_da.delta_phiType)
self.setValueAndUnits(hxParIn.get_deltaZ, hxPar.set_delta_z, emdb_da.delta_zType)
self.checkSet(hxParIn.get_hand, hxPar.set_hand)
self.checkSet(hxParIn.get_axialSymmetry, hxPar.set_axial_symmetry)
if symm.hasContent_():
rec.set_applied_symmetry(symm)
def addReconstruction(reconstructionType):
"""
Local helper function to copy over 1.9 reconstruction element
Parameters:
@param reconstructionType: reconstruction type for method. The assumption is that this routine processes common items only.
@return:
"""
rec = reconstructionType()
imProc.set_final_reconstruction(rec)
self.checkSet(r.get_algorithm, rec.set_algorithm)
resolutionIn = r.get_resolutionByAuthor()
if resolutionIn:
res = emdb_da.resolutionType()
res.set_valueOf_(float(resolutionIn))
rec.set_resolution(res)
self.checkSet(r.get_resolutionMethod, rec.set_resolution_method)
self.checkSet(r.get_details, rec.set_details)
softList = makeSoftwareList(r.get_software())
if softList is not None:
rec.set_software_list(softList)
return rec
reconIn = processIn.get_reconstruction()
i = 1
for r in reconIn:
if emMethod == c.EMM_SP:
imProc = emdb_da.singleparticle_processing_type()
sd.add_image_processing(imProc)
imProc.set_extensiontype_('singleparticle_processing_type')
imProc.set_id(i)
spProc = processIn.get_singleParticle()
self.checkSet(spProc.get_details, imProc.set_details)
rec = addReconstruction(emdb_da.reconstruction_type)
self.checkSet(spProc.get_numProjections, rec.set_number_images_used)
#self.checkSet(spProc.get_numClassAverages, rec.set_number_classes_used)
copyCTFAndEulerAngles(r, imProc)
numClsIn = spProc.get_numClassAverages()
if numClsIn is not None:
finalCls = emdb_da.classification_type()
finalCls.set_number_classes(numClsIn)
if finalCls.hasContent_():
imProc.set_final_two_d_classification(finalCls)
#partPick = emdb_da.particle_selection_type()
#partPick.set_number_particles_selected(10)
#imProc.add_particle_selection(partPick)
symmIn = processIn.get_singleParticle().get_appliedSymmetry()
if symmIn:
symm = emdb_da.applied_symmetry_type()
rec.set_applied_symmetry(symm)
symm.set_point_group(symmIn)
setHelicalSymmetry(specPrepIn, rec)
elif emMethod == c.EMM_HEL:
imProc = emdb_da.helical_processing_type()
imProc.set_extensiontype_('helical_processing_type')
sd.add_image_processing(imProc)
imProc.set_id(i)
# Some helical entries have single particle processing
hProc = processIn.get_helical()
rec = addReconstruction(emdb_da.reconstruction_type)
alg = rec.get_algorithm()
if hProc is None:
hProc = processIn.get_singleParticle()
self.checkSet(hProc.get_numProjections, rec.set_number_images_used)
self.checkSet(hProc.get_numClassAverages, rec.set_number_classes_used)
rec.set_algorithm(c.SP_TAG + (alg or ''))
else:
rec.set_algorithm(c.HEL_TAG + (alg or ''))
self.checkSet(hProc.get_details, imProc.set_details)
copyCTFAndEulerAngles(r, imProc)
setCrystalParameters(specPrepIn, imProc.set_crystal_parameters)
setHelicalSymmetry(specPrepIn, rec)
elif emMethod == c.EMM_TOM:
imProc = emdb_da.tomography_processing_type()
imProc.set_extensiontype_('tomography_processing_type')
sd.add_image_processing(imProc)
imProc.set_id(i)
tProc = processIn.get_tomography()
self.checkSet(tProc.get_details, imProc.set_details)
tiltIncIn = tProc.get_tiltAngleIncrement()
if tiltIncIn is not None:
# check that there is an axis1 element defined - we assume only one microscopy element with one axis element
tiltSeries = mic.get_tilt_series()
if len(tiltSeries) > 0:
axis1 = tiltSeries[0].get_axis1()
else:
ts = emdb_da.tilt_series_type()
mic.add_tilt_series(ts)
axis1 = emdb_da.axis_type()
ts.set_axis1(axis1)
tiltInc = emdb_da.angle_incrementType(valueOf_=tiltIncIn, units='degrees')
axis1.set_angle_increment(tiltInc)
rec = addReconstruction(emdb_da.reconstruction_type)
self.checkSet(tProc.get_numSections, rec.set_number_images_used)
copyCTFAndEulerAngles(r, imProc)
setCrystalParameters(specPrepIn, imProc.set_crystal_parameters)
symmIn = tProc.get_appliedSymmetry()
if symmIn:
symm = emdb_da.applied_symmetry_type()
rec.set_applied_symmetry(symm)
symm.set_point_group(symmIn)
setHelicalSymmetry(specPrepIn, rec)
elif emMethod == c.EMM_STOM:
imProc = emdb_da.subtomogram_averaging_processing_type()
imProc.set_extensiontype_('subtomogram_averaging_processing_type')
sd.add_image_processing(imProc)
imProc.set_id(i)
stProc = processIn.get_subtomogramAveraging()
self.checkSet(stProc.get_details, imProc.set_details)
rec = addReconstruction(emdb_da.subtomogram_reconstruction_type)
self.checkSet(stProc.get_numSubtomograms, rec.set_number_subtomograms_used)
#self.checkSet(stProc.get_numClassAverages, rec.set_number_classes_used)
copyCTFAndEulerAngles(r, imProc)
setCrystalParameters(specPrepIn, imProc.set_crystal_parameters)
numClsIn = stProc.get_numClassAverages()
if numClsIn is not None:
finalCls = emdb_da.classification_type()
finalCls.set_number_classes(numClsIn)
if finalCls.hasContent_():
imProc.set_final_three_d_classification(finalCls)
symmIn = stProc.get_appliedSymmetry()
if symmIn:
symm = emdb_da.applied_symmetry_type()
rec.set_applied_symmetry(symm)
symm.set_point_group(symmIn)
setHelicalSymmetry(specPrepIn, rec)
elif emMethod == 'twoDCrystal':
imProc = emdb_da.crystallography_processing_type()
imProc.set_extensiontype_('crystallography_processing_type')
sd.add_image_processing(imProc)
imProc.set_id(i)
xProc = processIn.get_twoDCrystal()
self.checkSet(xProc.get_details, imProc.set_details)
rec = addReconstruction(emdb_da.reconstruction_type)
copyCTFAndEulerAngles(r, imProc)
setCrystalParameters(specPrepIn, imProc.set_crystal_parameters)
setHelicalSymmetry(specPrepIn, rec)
i += 1
# map
mapOut = emdb_da.map_type()
xmlOut.set_map(mapOut)
mapIn = xmlIn.get_map()
copyMap(mapIn, mapOut, specPrepIn)
# interpretation
intrp = emdb_da.interpretation_type()
xmlOut.set_interpretation(intrp)
# modelling_list
fittingListIn = expIn.get_fitting()
if fittingListIn is not None and len(fittingListIn) > 0:
modellingList = emdb_da.modelling_listType()
intrp.set_modelling_list(modellingList)
for fit in fittingListIn:
modelling = emdb_da.modelling_type()
modellingList.add_modelling(modelling)
pdbListIn = fit.get_pdbEntryIdList()
if pdbListIn is not None:
pdbIn = pdbListIn.get_pdbEntryId()
chainsIn = pdbListIn.get_pdbChainId()
if len(pdbIn) > 0:
for p in pdbIn:
pdbModel = emdb_da.initial_modelType()
pdbModel.set_access_code(p)
modelling.add_initial_model(pdbModel)
# Map all chains on a best effort basis - if it matches the pattern PDBID_CHAIN - check if PDBID matches
for chIn in chainsIn:
chain = emdb_da.chainType()
m = re.match(c.PDB_CHAIN_PAT, chIn)
if m is not None:
matchGroups = m.groups()
pdbCode = matchGroups[0]
ch = matchGroups[2]
if pdbCode == p:
chain.set_id(ch)
else:
chain.set_id(chIn)
if chain.hasContent_():
pdbModel.add_chain(chain)
elif len(chainsIn) > 0:
# Pathological case when chains are specified but no PDB entry
# in this case use the first element as the PDB entry - try and parse the first chain element to see if the PDB ID is embedded
pdbModel = emdb_da.initial_modelType()
self.warn(1, "Chain IDs specified but no PDB ID! Will try and parse PDB ID from first chain ID!")
c0 = chainsIn[0]
m = re.match(c.PDB_CHAIN_PAT, c0)
if m is not None:
matchGroups = m.groups()
pdbCode = matchGroups[0]
c0 = matchGroups[2]
else:
pdbCode = c0
pdbModel.set_access_code(pdbCode)
modelling.add_initial_model(pdbModel)
chain = emdb_da.chainType()
chain.set_id(c0)
pdbModel.add_chain(chain)
# Rest of chains
for c in chainsIn[1:]:
chain = emdb_da.chainType()
chain.set_id(c)
pdbModel.add_chain(chain)
self.checkSet(fit.get_details, modelling.set_details)
self.checkSet(fit.get_refSpace, modelling.set_refinement_space)
self.checkSet(fit.get_overallBValue, modelling.set_overall_bvalue)
self.checkSet(fit.get_targetCriteria, modelling.set_target_criteria)
self.checkSet(fit.get_refProtocol, modelling.set_refinement_protocol)
softList = makeSoftwareList(fit.get_software())
if softList is not None:
modelling.set_software_list(softList)
# supplements
suppIn = xmlIn.get_supplement()
if suppIn is not None:
# figure_list
figListIn = suppIn.get_figureSet()
if figListIn is not None:
figsIn = figListIn.get_figure()
figList = emdb_da.figure_listType()
for f in figsIn:
fig = emdb_da.figure_type()
fig.set_file(f.get_file())
fig.set_details(f.get_details())
figList.add_figure(fig)
if figList.hasContent_():
intrp.set_figure_list(figList)
# masks
maskSetIn = suppIn.get_maskSet()
if maskSetIn is not None:
masksIn = maskSetIn.get_mask()
segList = emdb_da.segmentation_listType()
for m in masksIn:
seg = emdb_da.segmentationType()
segList.add_segmentation(seg)
sMap = emdb_da.map_type()
copyMap(m, sMap)
seg.set_mask_details(sMap)
if segList.hasContent_():
intrp.set_segmentation_list(segList)
# slices
sliceSetIn = suppIn.get_sliceSet()
if sliceSetIn is not None:
slicesIn = sliceSetIn.get_slice()
slcList = emdb_da.slices_listType()
for s in slicesIn:
sMap = emdb_da.map_type()
copyMap(s, sMap)
if sMap.hasContent_():
slcList.add_slice(sMap)
if slcList.hasContent_():
intrp.set_slices_list(slcList)
## validation
fscSetIn = suppIn.get_fscSet()
if fscSetIn is not None:
validList = emdb_da.validationType()
fscListIn = fscSetIn.get_fsc()
if fscListIn is not None:
for fscIn in fscListIn:
fsc = emdb_da.fsc_curve()
fsc.original_tagname_ = 'fsc_curve'
fsc.set_file(fscIn.get_file())
self.checkSet(fscIn.get_details, fsc.set_details)
validList.add_validation_type(fsc)
if validList.hasContent_():
xmlOut.set_validation(validList)
# Write XML to file
f = open(outputFile, 'w') if outputFile else sys.stdout
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
xmlOut.export(f, 0, name_='emd')
if f is not sys.stdout:
f.close()
[docs] def translate_1_9_to_1_9(self, inputFile, outputFile):
"""
Convert input file from 1.9 to 1.9 schema. This gets tags into the same ordering as other >1.9 converters.
Parameters:
@param inputFile: Name of input file
@param outputFile: Name of output file
"""
xmlOut = emdb_19.parse(inputFile, silence=True)
# Write XML to file
f = open(outputFile, 'w') if outputFile else sys.stdout
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
xmlOut.export(f, 0, name_='emdEntry')
if f is not sys.stdout:
f.close()
[docs] def translate_2_0_to_1_9(self, inputFile, outputFile):
"""
Convert input file from 2.0 to 1.9 schema
Parameters:
@param inputFile: Name of input file
@param outputFile: Name of output file
"""
c = self.Constants
def addExternalReferences(refIn, refOut):
"""
Copy over reference list for journals or non-journals
Parameters:
@param refIn: Input citation with reference list
@param refOut: Output citation to which reference list is added.
"""
extRefIn = refIn.get_external_references()
for r in extRefIn:
extRefOut = emdb_19.externalRefType()
extRefOut.set_type(r.get_type())
extRefOut.set_valueOf_(r.get_valueOf_())
refOut.add_externalReference(extRefOut)
def getAuthors(authListIn, simple=False):
"""
Get authors from 2.0 -> 1.9 while reformatting them and creating a string
Parameters
@param authListIn: list object of 2.0 author objects
@param simple: boolean - True means that the authors in 2.0 are simple strings, otherwise they are journal authors
@return:
"""
authList = []
for authIn in authListIn:
if simple == True:
x = authIn
else:
x = authIn.get_valueOf_()
authList.append(x)
"""
authCompIn = x.split(', ')
lenAuthCompIn = len(authCompIn)
if lenAuthCompIn < 2:
self.warn(1, "Author name has more (or less) than two comma separated strings (%d) - will be ignored!" % lenAuthCompIn)
else:
authList.append('%s %s' % (authCompIn[0], authCompIn[1].strip('.')))
"""
if len(authList) > 0:
authStr = ', '.join(authList)
else:
authStr = ''
return authStr
def copyCitation(citeIn, citeOut):
"""
Copy over citation from 2.0 to 1.9
Parameters:
@param citeIn: Input citation in 2.0 schema
@param citeOut: Output citation in 1.9 schema
"""
refIn = citeIn.get_citation_type()
if refIn.original_tagname_ == 'journal_citation':
jrnl = emdb_19.jrnlArtType()
citeOut.set_journalArticle(jrnl)
citeOut.set_published(refIn.get_published())
jrnl.set_articleTitle(refIn.get_title())
jrnl.set_journal(refIn.get_journal())
jrnl.set_authors(getAuthors(refIn.get_author()))
self.checkSet(refIn.get_volume, jrnl.set_volume)
self.checkSet(refIn.get_first_page, jrnl.set_firstPage)
self.checkSet(refIn.get_last_page, jrnl.set_lastPage)
self.checkSet(refIn.get_year, jrnl.set_year)
addExternalReferences(refIn, jrnl)
else:
nonJrnl = emdb_19.nonJrnlArtType()
citeOut.set_nonJournalArticle(nonJrnl)
citeOut.set_published(refIn.get_published())
nonJrnl.set_authors(getAuthors(refIn.get_author()))
nonJrnl.set_editor(getAuthors(refIn.get_editor()))
self.checkSet(refIn.get_book_chapter_title, nonJrnl.set_chapterTitle)
self.checkSet(refIn.get_book_title, nonJrnl.set_book)
self.checkSet(refIn.get_thesis_title, nonJrnl.set_thesisTitle)
self.checkSet(refIn.get_publisher, nonJrnl.set_publisher)
self.checkSet(refIn.get_publication_location, nonJrnl.set_publisherLocation)
self.checkSet(refIn.get_volume, nonJrnl.set_volume)
self.checkSet(refIn.get_first_page, nonJrnl.set_firstPage)
self.checkSet(refIn.get_last_page, nonJrnl.set_lastPage)
self.checkSet(refIn.get_year, nonJrnl.set_year)
addExternalReferences(refIn, nonJrnl)
def makeSoftwareFromList(softListIn):
"""
Take a software list (2.0 construct) and create a single string with software names (1.9 construct)
Parameters:
@param softListIn: software list as software_list_type (2.0)
@return: Comma (', ') seperated string of software
"""
if softListIn is not None and len(softListIn) > 0:
softStr = ', '.join([ soft.get_name() for soft in softListIn])
return softStr
else:
return None
def copyNaturalSource(molIn, molOut, cell = True, organelle = True, tissue = True, cellular_location = True):
"""
Copy natural source from 2.0 to 1.9
Parameters:
@param molIn: Instance of molecule or supramolecule
@param molOut: Instance of molecule type (e.g. protein) in 1.9
@param cell: Whether to generate cell field
@param organelle: Whether to generate organelle field
@param tissue: Whether to generate tissue field
@param cellular_location: Whether to generate cellular_location field
"""
nsIn = molIn.get_natural_source()
if nsIn is not None:
ns1In = None
if type(nsIn) is list and len(nsIn) > 0:
ns1In = nsIn[0]
elif type(nsIn) is not list:
ns1In = nsIn
if ns1In is not None:
self.checkSet(ns1In.get_synonym_organism, molOut.set_synSpeciesName)
orgIn = ns1In.get_organism()
if orgIn is not None:
species = emdb_19.sciSpeciesType()
molOut.set_sciSpeciesName(species)
self.checkSet(orgIn.get_valueOf_, species.set_valueOf_)
self.checkSet(orgIn.get_ncbi, species.set_ncbiTaxId)
strainIn = ns1In.get_strain()
if strainIn is not None:
strain = emdb_19.sciSpeciesType()
molOut.set_sciSpeciesStrain(strain)
self.checkSet(strainIn.get_valueOf_, strain.set_valueOf_)
self.checkSet(strainIn.get_ncbi, strain.set_ncbiTaxId)
if cell or organelle or tissue or cellular_location:
ns = emdb_19.natSrcType()
molOut.set_natSource(ns)
if cell:
self.checkSet(ns1In.get_cell, ns.set_cell)
if organelle:
self.checkSet(ns1In.get_organelle, ns.set_organelle)
if tissue:
self.checkSet(ns1In.get_tissue, ns.set_organOrTissue)
if cellular_location:
self.checkSet(ns1In.get_cellular_location, ns.set_cellLocation)
def copyRecombinantSource(molIn, molOut):
"""
Copy recombinant source from 2.0 to 1.9
Parameters:
@param molIn: Instance of molecule or supramolecule
@param molOut: Instance of molecule type (e.g. protein) in 1.9
"""
self.checkSet(molIn.get_recombinant_exp_flag, molOut.set_recombinantExpFlag)
engIn = molIn.get_recombinant_expression()
eng1In = None
if engIn is not None:
if type(engIn) is list and len(engIn) > 0:
eng1In = engIn[0]
elif type(engIn) is not list:
eng1In = engIn
if eng1In is not None:
es = emdb_19.engSrcType()
molOut.set_engSource(es)
orgIn = eng1In.get_organism()
if orgIn is not None:
expSys = emdb_19.sciSpeciesType()
es.set_expSystem(expSys)
self.checkSet(orgIn.get_valueOf_, expSys.set_valueOf_)
self.checkSet(orgIn.get_ncbi, expSys.set_ncbiTaxId)
self.checkSet(eng1In.get_strain, es.set_expSystemStrain)
self.checkSet(eng1In.get_cell, es.set_expSystemCell)
self.checkSet(eng1In.get_plasmid, es.set_vector)
#if es.hasContent_():
# molOut.set_recombinantExpFlag(True)
#else:
# molOut.set_recombinantExpFlag(False)
#else:
# molOut.set_recombinantExpFlag(False)
def copyCTFAndEulerAngles(imProcIn, r, imProcOut):
"""
Copy CTF and Euler angle info from 2.0 to 1.9 elements
Parameters:
@param imProcIn: image_processing object from 2.0
@param r: reconstruction object from 1.9
@param imProcOut: image_processing object from 1.9
"""
ctfIn = imProcIn.get_ctf_correction()
if ctfIn is not None:
x = ctfIn.get_details()
if x is not None:
r.set_ctfCorrection(x)
# Not all elements have a euler angle element -wrap in try block
try:
angIn = imProcIn.get_final_angle_assignment()
if angIn is not None:
x = angIn.get_details()
if x is not None:
r.set_eulerAnglesDetails(x)
except:
# Check if info has been stored in details section
details = imProcIn.get_details()
if details is not None:
m = re.search(c.EUL_ANG_PAT, details)
if m is not None:
matchGroups = m.groups()
imProcOut.set_details(matchGroups[0] + matchGroups[2])
r.set_eulerAnglesDetails(matchGroups[1])
def setMolWeight(comp, wtIn, meth=False):
"""
Set molecular weight if provided
Parameters:
@param comp: v1.9 component with setter functions for setting mW
@param wtIn: Molecular weight object from 2.0
@param meth: Whether set_molWtMethod should be called (true only for sample)
"""
if wtIn is not None:
x = wtIn.get_experimental()
if x is not None:
comp.set_molWtExp(emdb_19.mwType(valueOf_ = x.get_valueOf_(), units=x.get_units()))
x = wtIn.get_theoretical()
if x is not None:
comp.set_molWtTheo(emdb_19.mwType(valueOf_ = x.get_valueOf_(), units=x.get_units()))
if meth:
self.checkSet(wtIn.get_method, comp.set_molWtMethod)
def setSciName(cIn, cOut):
"""
Copy name object from v2.0 cIn to v1.9 cOut
Parameters:
@param cIn: v2.0 component with getter functions for name
@param cOut: v1.9 component with setter functions for name
"""
nm = cIn.get_name()
if nm is None:
self.warn(1, "Scientific name of biological component is None!")
else:
cOut.set_sciName(nm.get_valueOf_())
x = nm.get_synonym()
if x is not None:
cOut.set_synName(x)
def copyExternalReferences(getter, setter):
"""
Copy external references from 2.0 to 1.9
Parameters:
@param getter: function to get external references from 2.0
@param setter: function to set external references from 1.9
"""
xRefsIn = getter()
if xRefsIn is not None and len(xRefsIn) > 0:
xRefs = emdb_19.externalReferencesType()
setter(xRefs)
for xRefIn in xRefsIn:
x = xRefIn.get_type()
if x == 'UNIPROTKB':
xRefs.add_refUniProt(xRefIn.get_valueOf_())
elif x == 'GO':
xRefs.add_refGo(xRefIn.get_valueOf_())
elif x == 'INTERPRO':
xRefs.add_refInterpro(xRefIn.get_valueOf_())
def setCrystalParameters(impIn, specPrep):
"""
Get v2.0 crystal parameters element and set 2D/3D crystal prameters element in v1.9
Parameters
@param impIn: v2.0 object of image processing extension class which is assumed to have crystal parameters
@param specPrep: v1.9 specimen preparation object which will have the appropriate 2D/3D crystal parameters set
"""
crystParIn = impIn.get_crystal_parameters()
if crystParIn is not None:
twoDCryst = True
planeGroupIn = crystParIn.get_plane_group()
if planeGroupIn is None:
spaceGroupIn = crystParIn.get_space_group()
if spaceGroupIn is None:
return
twoDCryst = False
if twoDCryst is True:
crystPar = emdb_19.twoDxtalParamType()
crystPar.set_planeGroup(planeGroupIn)
else:
crystPar = emdb_19.threeDxtalParamType()
crystPar.set_spaceGroup(spaceGroupIn)
unitCellIn = crystParIn.get_unit_cell()
self.setValueAndUnits(unitCellIn.get_a, crystPar.set_aLength, emdb_19.lengthType, c.U_ANGA)
self.setValueAndUnits(unitCellIn.get_b, crystPar.set_bLength, emdb_19.lengthType, c.U_ANGA)
self.setValueAndUnits(unitCellIn.get_c, crystPar.set_cLength, emdb_19.lengthType, c.U_ANGA)
self.setValueAndUnits(unitCellIn.get_alpha, crystPar.set_alpha, emdb_19.anglType, c.U_DEGF)
self.setValueAndUnits(unitCellIn.get_beta, crystPar.set_beta, emdb_19.anglType, c.U_DEGF)
self.setValueAndUnits(unitCellIn.get_gamma, crystPar.set_gamma, emdb_19.anglType, c.U_DEGF)
if twoDCryst is True:
specPrep.set_twoDCrystalParameters(crystPar)
else:
specPrep.set_threeDCrystalParameters(crystPar)
def setHelicalSymmetry(recIn, specPrep):
"""
Set v1.9 helical symmetry parameters of a specimen preparation object
Parameters:
@param recIn: v2.0 reconstruction object
@param specPrep: v1.9 specimen preparation object
"""
symmIn = recIn.get_applied_symmetry()
if symmIn is not None:
hxParIn = symmIn.get_helical_parameters()
if hxParIn is not None:
hxPar = emdb_19.helixParamType()
specPrep.set_helicalParameters(hxPar)
self.setValueAndUnits(hxParIn.get_delta_phi, hxPar.set_deltaPhi, emdb_19.anglType)
self.setValueAndUnits(hxParIn.get_delta_z, hxPar.set_deltaZ, emdb_19.lengthType)
self.checkSet(hxParIn.get_hand, hxPar.set_hand)
self.checkSet(hxParIn.get_axial_symmetry, hxPar.set_axialSymmetry)
def copyMap(mapIn, mapOut):
"""
Copy map from 2.0 to 1.9
Parameters:
@param mapIn: input 2.0 map
@param mapOut: output 1.9 map
"""
# Set file and related attributes
f = emdb_19.mapFileType()
f.set_valueOf_(mapIn.get_file())
f.set_format(mapIn.get_format())
f.set_sizeKb(mapIn.get_size_kbytes())
f.set_type("map")
mapOut.set_file(f)
mapOut.set_dataType(mapIn.get_data_type())
dimIn = mapIn.get_dimensions()
numRows = dimIn.get_row()
numColumns = dimIn.get_col()
numSections = dimIn.get_sec()
dim = emdb_19.dimensionType(numRows = numRows, numColumns = numColumns, numSections = numSections)
mapOut.set_dimensions(dim)
origIn = mapIn.get_origin()
originRow = float(origIn.get_row())
originCol = float(origIn.get_col())
originSec = float(origIn.get_sec())
orig = emdb_19.originType(originRow = originRow, originCol = originCol, originSec = originSec)
mapOut.set_origin(orig)
limitRow = originRow + numRows - 1
limitCol = originCol + numColumns - 1
limitSec = originSec + numSections - 1
lim = emdb_19.limitType(limitRow = limitRow, limitCol = limitCol, limitSec = limitSec)
mapOut.set_limit(lim)
spcIn = mapIn.get_spacing()
spc = emdb_19.spacingType(spcIn.get_x(), spcIn.get_y(), spcIn.get_z())
mapOut.set_spacing(spc)
cellIn = mapIn.get_cell()
cell = emdb_19.cellType(cellA = emdb_19.cType(valueOf_ = cellIn.get_a().get_valueOf_(), units = 'A'),
cellB = emdb_19.cType(valueOf_ = cellIn.get_b().get_valueOf_(), units = 'A'),
cellC = emdb_19.cType(valueOf_ = cellIn.get_c().get_valueOf_(), units = 'A'),
cellAlpha = emdb_19.cAngleType(valueOf_ = cellIn.get_alpha().get_valueOf_(), units = 'degrees'),
cellBeta = emdb_19.cAngleType(valueOf_ = cellIn.get_beta().get_valueOf_(), units = 'degrees'),
cellGamma = emdb_19.cAngleType(valueOf_ = cellIn.get_gamma().get_valueOf_(), units = 'degrees'))
mapOut.set_cell(cell)
axIn = mapIn.get_axis_order()
ax = emdb_19.axisOrderType(axisOrderFast = axIn.get_fast(), axisOrderMedium = axIn.get_medium(), axisOrderSlow = axIn.get_slow())
mapOut.set_axisOrder(ax)
pixIn = mapIn.get_pixel_spacing()
pix = emdb_19.pixelSpacingType(emdb_19.pixType(valueOf_ = pixIn.get_x().get_valueOf_(), units = 'A'),
emdb_19.pixType(valueOf_ = pixIn.get_y().get_valueOf_(), units = 'A'),
emdb_19.pixType(valueOf_ = pixIn.get_z().get_valueOf_(), units = 'A'))
mapOut.set_pixelSpacing(pix)
mapOut.set_statistics(mapIn.get_statistics())
self.checkSet(mapIn.get_annotation_details, mapOut.set_annotationDetails)
cntrListIn = mapIn.get_contour_list()
if cntrListIn is not None:
for cntrIn in cntrListIn.get_contour():
if cntrIn.get_primary() == True:
cntr = emdb_19.contourLevelType()
mapOut.set_contourLevel(cntr)
cntr.set_valueOf_(float(cntrIn.get_level()))
self.checkSet(cntrIn.get_source, cntr.set_source, string.lower)
symmIn = mapIn.get_symmetry()
if symmIn is not None:
self.checkSet(symmIn.get_space_group, mapOut.set_spaceGroupNumber)
self.checkSet(mapIn.get_details, mapOut.set_details)
xmlIn = emdb_da.parse(inputFile, silence=True)
xmlOut = emdb_19.entryType()
# Write attributes
xmlOut.set_version('1.9.6')
xmlOut.set_accessCode(self.formatEMDBCode(xmlIn.get_emdb_id(), True))
## deposition
admIn = xmlIn.get_admin()
datesIn = admIn.get_key_dates()
statusIn = admIn.get_current_status()
sitesIn = admIn.get_sites()
xRefIn = xmlIn.get_crossreferences()
citeListIn = xRefIn.get_citation_list()
auxListIn = xRefIn.get_auxiliary_link_list()
sdIn = xmlIn.get_structure_determination_list().get_structure_determination()[0] # Assume that this element exists!
mapIn = xmlIn.get_map()
intrpIn = xmlIn.get_interpretation()
validListIn = xmlIn.get_validation()
emMethod = sdIn.get_method()
dep = emdb_19.depType()
xmlOut.set_deposition(dep)
admOut = emdb_19.adminType()
xmlOut.set_admin(admOut)
statusCodeIn = statusIn.get_code().get_valueOf_()
priorStatusIn = None
dep.set_status(emdb_19.statusType(valueOf_=statusCodeIn))
if statusCodeIn == c.STS_OBS:
# check status_history_list for prior status
statusHistoryListIn = admIn.get_status_history_list()
if statusHistoryListIn is not None:
statusHistoryIn = statusHistoryListIn.get_status()
if len(statusHistoryIn) > 0:
priorIn = statusHistoryIn[0]
priorCodeIn = priorIn.get_code()
if priorCodeIn is not None:
priorStatusIn = priorCodeIn.get_valueOf_()
if priorStatusIn is None:
dep.set_status(emdb_19.statusType(valueOf_=statusCodeIn))
else:
dep.set_status(emdb_19.statusType(valueOf_=statusCodeIn, prior = priorStatusIn))
dep.set_depositionSite(sitesIn.get_deposition())
self.checkSet(sitesIn.get_last_processing, dep.set_processingSite)
dep.set_depositionDate(datesIn.get_deposition())
dep.set_headerReleaseDate(datesIn.get_header_release())
admOut.set_lastUpdate(datesIn.get_update())
self.checkSet(datesIn.get_map_release, dep.set_mapReleaseDate)
self.checkSet(admIn.get_replace_existing_entry, dep.set_replaceExistingEntry)
# obsolete list
obsListIn = admIn.get_obsolete_list()
if obsListIn is not None:
obsList = emdb_19.emdbListType()
for obsIn in obsListIn.get_entry():
obsList.add_entry(obsIn.get_entry())
if obsList.hasContent_():
dep.set_obsoleteList(obsList)
dep.set_replaceExistingEntry(True)
# supersede list
supersedeListIn = admIn.get_superseded_by_list()
if supersedeListIn is not None:
supersedeList = emdb_19.emdbListType()
for supersedeIn in supersedeListIn.get_entry():
supersedeList.add_entry(supersedeIn.get_entry())
if supersedeList.hasContent_():
dep.set_supersededByList(supersedeList)
# keywords
self.checkSet(admIn.get_keywords, dep.set_keywords)
dep.set_title(admIn.get_title())
authListIn = admIn.get_authors_list()
dep.set_authors(getAuthors(authListIn.get_author(), simple=True))
# in-frame EMDB
emdbListIn = xRefIn.get_emdb_list()
if emdbListIn is not None:
infrIn = emdbListIn.get_emdb_reference()
infrList = []
for i in infrIn:
relIn = i.get_relationship()
emdbIdIn = i.get_emdb_id()
if relIn is None: # Assume full overlap
infrList.append(emdbIdIn)
elif relIn.get_in_frame() == 'FULLOVERLAP':
infrList.append(emdbIdIn)
if len(infrList) > 0:
infrText = ', '.join(infrList)
dep.set_inFrameEMDBId(infrText)
# fitted PDB
pdbListIn = xRefIn.get_pdb_list()
if pdbListIn is not None:
fitList = emdb_19.pdbidListType()
fitIn = pdbListIn.get_pdb_reference()
for f in fitIn:
fitList.add_fittedPDBEntryId(f.get_pdb_id())
dep.set_fittedPDBEntryIdList(fitList)
# Primary citation
citeIn = citeListIn.get_primary_citation()
cite = emdb_19.prRefType()
dep.set_primaryReference(cite)
copyCitation(citeIn, cite)
# Secondary citations
for citeIn in citeListIn.get_secondary_citation():
cite = emdb_19.prRefType()
dep.add_secondaryReference(cite)
copyCitation(citeIn, cite)
## map
if mapIn is not None:
mapOut = emdb_19.mapType()
xmlOut.set_map(mapOut)
copyMap(mapIn, mapOut)
## supplement
supp = None
if intrpIn is not None:
supp = emdb_19.supplType()
# sliceSet
slcListIn = intrpIn.get_slices_list()
if slcListIn is not None:
slcsIn = slcListIn.get_slice()
slcSet = emdb_19.slcSetType()
for s in slcsIn:
slc = emdb_19.slcType()
copyMap(s, slc)
if slc.hasContent_():
slcSet.add_slice(slc)
if slcSet.hasContent_():
supp.set_sliceSet(slcSet)
# maskSet
segListIn = intrpIn.get_segmentation_list()
if segListIn is not None:
segsIn = segListIn.get_segmentation()
maskSet = emdb_19.mskSetType()
for s in segsIn:
mSegIn = s.get_mask_details()
if mSegIn is not None:
mask = emdb_19.mskType()
copyMap(mSegIn, mask)
maskSet.add_mask(mask)
if maskSet.hasContent_():
supp.set_maskSet(maskSet)
# figureSet
figListIn = intrpIn.get_figure_list()
# write this out even if it is empty - this is to minimize uneccesary elements showing in the diff during round-trip conversion
figSet = emdb_19.figSetType()
supp.set_figureSet(figSet)
if figListIn is not None:
figsIn = figListIn.get_figure()
for f in figsIn:
fig = emdb_19.figType(f.get_file(), f.get_details())
figSet.add_figure(fig)
if supp.hasContent_():
xmlOut.set_supplement(supp)
## validation
fscSet = emdb_19.fscSetType()
if validListIn is not None:
valsIn = validListIn.get_validation_type()
for valIn in valsIn:
if valIn.original_tagname_=='fsc_curve':
fsc = emdb_19.fscType()
fsc.set_file(valIn.get_file())
self.checkSet(valIn.get_details, fsc.set_details)
fscSet.add_fsc(fsc)
if fscSet.hasContent_():
supp.set_fscSet(fscSet)
## sample
sampleIn = xmlIn.get_sample()
if sampleIn is not None:
sample = emdb_19.samplType()
xmlOut.set_sample(sample)
self.checkSet(sampleIn.get_name, sample.set_name)
supMolListIn = sampleIn.get_supramolecule_list()
if supMolListIn is not None:
supMolsIn = supMolListIn.get_supramolecule()
nSupMolsIn = len(supMolsIn)
else:
nSupMolsIn = 0
molListIn = sampleIn.get_macromolecule_list()
if molListIn is not None:
molsIn = molListIn.get_macromolecule()
nMolsIn = len(molsIn)
else:
nMolsIn = 0
numCompIn = nSupMolsIn + nMolsIn
numCompSet = False
if numCompIn > 0:
compList = emdb_19.smplCompListType()
compID = 1
for smolIn in supMolsIn:
smolTypeIn = smolIn.get_extensiontype_()
comp = emdb_19.smplCompType()
comp.set_componentID(compID)
compID += 1
if smolTypeIn == 'sample':
self.checkSet(smolIn.get_oligomeric_state, sample.set_compDegree)
self.checkSet(smolIn.get_details, sample.set_details)
setMolWeight(sample, smolIn.get_molecular_weight(), meth=True)
self.checkSet(smolIn.get_number_unique_components, sample.set_numComponents)
numCompSet = True
numCompIn -= 1
compID -= 1
elif smolTypeIn == 'virus':
vir = emdb_19.virusType()
comp.set_entry('virus')
comp.set_virus(vir)
setSciName(smolIn, comp)
setMolWeight(comp, smolIn.get_molecular_weight(), meth=False)
self.checkSet(smolIn.get_details, comp.set_details)
vir.set_empty(smolIn.get_virus_empty())
vir.set_enveloped(smolIn.get_virus_enveloped())
vir.set_isolate(smolIn.get_virus_isolate())
vir.set_class(smolIn.get_virus_type())
x = smolIn.get_sci_species_name()
if x is not None:
vir.set_sciSpeciesName(emdb_19.sciSpeciesType(valueOf_=x.get_valueOf_(), ncbiTaxId=x.get_ncbi()))
self.checkSet(smolIn.get_syn_species_name, vir.set_synSpeciesName)
self.checkSet(smolIn.get_sci_species_serotype, vir.set_sciSpeciesSerotype)
self.checkSet(smolIn.get_sci_species_serocomplex, vir.set_sciSpeciesSerocomplex)
self.checkSet(smolIn.get_sci_species_subspecies, vir.set_sciSpeciesSubspecies)
self.checkSet(smolIn.get_sci_species_strain, vir.set_sciSpeciesStrain)
copyExternalReferences(smolIn.get_external_references, vir.set_externalReferences)
nsIn = smolIn.get_natural_host()
if nsIn is not None and len(nsIn) > 0:
nIn = nsIn[0]
ns = emdb_19.natSrcVirusType()
vir.add_natSource(ns)
orgIn = nIn.get_organism()
if orgIn is not None:
hs = emdb_19.sciSpeciesType()
ns.set_hostSpecies(hs)
hs.set_valueOf_(orgIn.get_valueOf_())
hs.set_ncbiTaxId(orgIn.get_ncbi())
x = nIn.get_strain()
if x is not None:
self.checkSet(x.get_valueOf_, ns.set_hostSpeciesStrain)
#self.checkSet(nIn.get_strain, ns.set_hostSpeciesStrain)
self.checkSet(nIn.get_synonym_organism, ns.set_hostCategory)
hsIn = smolIn.get_host_system()
if hsIn is not None:
es = emdb_19.engSrcType()
vir.add_engSource(es)
orgIn = hsIn.get_organism()
if orgIn is not None:
species = emdb_19.sciSpeciesType()
es.set_expSystem(species)
self.checkSet(orgIn.get_valueOf_, species.set_valueOf_)
self.checkSet(orgIn.get_ncbi, species.set_ncbiTaxId)
self.checkSet(hsIn.get_strain, es.set_expSystemStrain)
self.checkSet(hsIn.get_cell, es.set_expSystemCell)
self.checkSet(hsIn.get_plasmid, es.set_vector)
shellListIn = smolIn.get_virus_shell()
for shellIn in shellListIn:
shell = emdb_19.shellType()
self.checkSet(shellIn.get_name, shell.set_nameElement)
self.checkSet(shellIn.get_diameter, shell.set_diameter)
self.checkSet(shellIn.get_triangulation, shell.set_tNumber)
self.checkSet(shellIn.get_id, shell.set_id)
vir.add_shell(shell)
compList.add_sampleComponent(comp)
elif smolTypeIn == 'organelle_or_cellular_component':
cc = emdb_19.cellCompType()
comp.set_entry('cellular-component')
comp.set_cellular_component(cc)
setSciName(smolIn, comp)
setMolWeight(comp, smolIn.get_molecular_weight(), meth=False)
copyNaturalSource(smolIn, cc)
copyRecombinantSource(smolIn, cc)
self.checkSet(smolIn.get_oligomeric_state, cc.set_oligomericDetails)
self.checkSet(smolIn.get_number_of_copies, cc.set_numCopies)
self.checkSet(smolIn.get_details, comp.set_details)
copyExternalReferences(smolIn.get_external_references, cc.set_externalReferences)
compList.add_sampleComponent(comp)
elif smolTypeIn == 'complex':
categIn = smolIn.get_category()
if categIn is not None:
catIn = smolIn.get_category().get_valueOf_()
else:
catIn = ''
if catIn == 'ribosome-eukaryote':
rib = emdb_19.riboTypeEu()
comp.set_entry('ribosome-eukaryote')
comp.set_ribosome_eukaryote(rib)
rib.set_eukaryote(smolIn.get_ribosome_details())
setSciName(smolIn, comp)
setMolWeight(comp, smolIn.get_molecular_weight(), meth=False)
copyNaturalSource(smolIn, rib)
copyRecombinantSource(smolIn, rib)
self.checkSet(smolIn.get_details, comp.set_details)
copyExternalReferences(smolIn.get_external_references, rib.set_externalReferences)
elif catIn == 'ribosome-prokaryote':
rib = emdb_19.riboTypePro()
comp.set_entry('ribosome-prokaryote')
comp.set_ribosome_prokaryote(rib)
rib.set_prokaryote(smolIn.get_ribosome_details())
setSciName(smolIn, comp)
setMolWeight(comp, smolIn.get_molecular_weight(), meth=False)
copyNaturalSource(smolIn, rib)
copyRecombinantSource(smolIn, rib)
self.checkSet(smolIn.get_details, comp.set_details)
copyExternalReferences(smolIn.get_external_references, rib.set_externalReferences)
else:
cplx = emdb_19.proteinType()
comp.set_entry('protein')
comp.set_protein(cplx)
setSciName(smolIn, comp)
setMolWeight(comp, smolIn.get_molecular_weight(), meth=False)
self.checkSet(smolIn.get_oligomeric_state, cplx.set_oligomericDetails)
self.checkSet(smolIn.get_number_of_copies, cplx.set_numCopies)
self.checkSet(smolIn.get_details, cplx.set_details)
copyNaturalSource(smolIn, cplx)
copyRecombinantSource(smolIn, cplx)
copyExternalReferences(smolIn.get_external_references, cplx.set_externalReferences)
compList.add_sampleComponent(comp)
for molIn in molsIn:
molTypeIn = molIn.get_extensiontype_()
comp = emdb_19.smplCompType()
comp.set_componentID(compID)
compID += 1
compList.add_sampleComponent(comp)
setSciName(molIn, comp)
setMolWeight(comp, molIn.get_molecular_weight(), meth=False)
if molTypeIn == 'protein_or_peptide':
prot = emdb_19.proteinType()
comp.set_entry('protein')
comp.set_protein(prot)
self.checkSet(molIn.get_oligomeric_state, prot.set_oligomericDetails)
self.checkSet(molIn.get_number_of_copies, prot.set_numCopies)
self.checkSet(molIn.get_details, comp.set_details)
copyNaturalSource(molIn, prot)
copyRecombinantSource(molIn, prot)
seqIn = molIn.get_sequence()
copyExternalReferences(seqIn.get_external_references, prot.set_externalReferences)
elif molTypeIn == 'ligand':
lig = emdb_19.ligandType()
comp.set_entry('ligand')
comp.set_ligand(lig)
self.checkSet(molIn.get_oligomeric_state, lig.set_oligomericDetails)
self.checkSet(molIn.get_number_of_copies, lig.set_numCopies)
self.checkSet(molIn.get_details, comp.set_details)
copyNaturalSource(molIn, lig)
copyRecombinantSource(molIn, lig)
copyExternalReferences(molIn.get_external_references, lig.set_externalReferences)
elif molTypeIn == 'em_label':
lab = emdb_19.labelType()
comp.set_entry('label')
comp.set_label(lab)
self.checkSet(molIn.get_oligomeric_state, lab.set_oligomericDetails)
self.checkSet(molIn.get_number_of_copies, lab.set_numCopies)
self.checkSet(molIn.get_details, comp.set_details)
self.checkSet(molIn.get_formula, lab.set_formula)
elif molTypeIn in ['dna', 'rna']:
na = emdb_19.nuclAcidType()
comp.set_entry('nucleic-acid')
comp.set_nucleic_acid(na)
self.checkSet(molIn.get_details, comp.set_details)
self.checkSet(molIn.get_structure, na.set_structure)
self.checkSet(molIn.get_synthetic_flag, na.set_syntheticFlag)
copyNaturalSource(molIn, na, cell = False, organelle = False, tissue = False, cellular_location = False)
seqIn = molIn.get_sequence()
if seqIn is not None:
na.set_sequence(seqIn.get_string())
if molTypeIn == 'rna':
naClassIn = molIn.get_classification()
if naClassIn == 'TRANSFER':
na.set_class('T-RNA')
else:
na.set_class('RNA')
elif molTypeIn == 'dna':
na.set_class('DNA')
elif molTypeIn =='other_macromolecule':
molClassIn = molIn.get_classification()
if molClassIn is not None and molClassIn in ['DNA/RNA', 'OTHER_NA']:
na = emdb_19.nuclAcidType()
comp.set_entry('nucleic-acid')
comp.set_nucleic_acid(na)
if molClassIn == 'OTHER_NA':
na.set_class('OTHER')
else:
na.set_class(molClassIn)
self.checkSet(molIn.get_details, comp.set_details)
self.checkSet(molIn.get_structure, na.set_structure)
self.checkSet(molIn.get_synthetic_flag, na.set_syntheticFlag)
copyNaturalSource(molIn, na, cell = False, organelle = False, tissue = False, cellular_location = False)
seqIn = molIn.get_sequence()
if seqIn is not None:
na.set_sequence(seqIn.get_string())
if numCompSet == False:
sample.set_numComponents(numCompIn)
if compList.hasContent_():
sample.set_sampleComponentList(compList)
## experiment
if sdIn is not None:
exp = emdb_19.expType()
xmlOut.set_experiment(exp)
# in 2.0 vitrification and specimen preparation are combined - therefore deal with them at the same time
specPrepListIn = sdIn.get_specimen_preparation_list().get_specimen_preparation()
for spIn in specPrepListIn:
vitrIn = spIn.get_vitrification()
if vitrIn is not None:
vitr = emdb_19.vitrifType()
exp.add_vitrification(vitr)
vitr.set_cryogenName(vitrIn.get_cryogen_name())
self.checkSet(vitrIn.get_instrument, vitr.set_instrument)
self.checkSet(vitrIn.get_method, vitr.set_method)
self.checkSet(vitrIn.get_details, vitr.set_details)
self.checkSet(vitrIn.get_timed_resolved_state, vitr.set_timeResolvedState)
x = vitrIn.get_chamber_humidity()
if x is not None:
vitr.set_humidity(str(x.get_valueOf_()))
x = vitrIn.get_chamber_temperature()
if x is not None:
vitr.set_temperature(emdb_19.tempType(valueOf_ = x.get_valueOf_(), units = 'Kelvin'))
# Only the first element sets the specimen preparation element in 1.9 (as there is only one element and not an array as in 2.0)
# in some cases for 2D arrays, a second element has been defined in the transfer from 1.9 -> 2.0.
# ID 2 will in these cases contain crystal grow details
spPrepType = spIn.get_extensiontype_()
spInId = spIn.get_id()
if spInId == 1:
sp = emdb_19.smplPrepType()
self.checkSet(sdIn.get_aggregation_state, sp.set_specimenState)
if spPrepType == c.EXT_CRYSTALLOGRAPHY_PREPARATION_TYPE:
x = spIn.get_crystal_formation()
if x is not None:
self.checkSet(x.get_details, sp.set_crystalGrowDetails)
concIn = spIn.get_concentration()
if concIn is not None:
conc = emdb_19.samplConcType()
conc.set_units('mg/ml')
conc.set_valueOf_(concIn.get_valueOf_())
sp.set_specimenConc(conc)
bufIn = spIn.get_buffer()
if bufIn is not None:
buf = emdb_19.bufferType()
self.checkSet(bufIn.get_ph, buf.set_ph)
self.checkSet(bufIn.get_details, buf.set_details)
sp.set_buffer(buf)
stainIn = spIn.get_staining()
if stainIn is not None:
self.checkSet(stainIn.get_details, sp.set_staining)
gridIn = spIn.get_grid()
if gridIn is not None:
self.checkSet(gridIn.get_details,sp.set_specimenSupportDetails)
specPrep_1 = sp # Forward reference for x-tal image processing
elif spInId != 1 and vitrIn is None:
if spPrepType == c.EXT_CRYSTALLOGRAPHY_PREPARATION_TYPE:
x = spIn.get_crystal_formation()
if x is not None:
self.checkSet(x.get_details, specPrep_1.set_crystalGrowDetails)
# imaging
micListIn = sdIn.get_microscopy_list().get_microscopy()
for m in micListIn:
micId = m.get_id()
micType = m.get_extensiontype_()
img = emdb_19.imgType()
exp.add_imaging(img)
#if m.original_tagname_ == 'microscopy':
img.set_electronSource(m.get_electron_source())
img.set_imagingMode(m.get_imaging_mode())
img.set_illuminationMode(m.get_illumination_mode())
img.set_microscope(m.get_microscope())
spopIn = m.get_specialist_optics()
if spopIn is not None:
egf = spopIn.get_energy_filter()
if egf is not None:
img.set_energyFilter(egf.get_name())
elowIn = egf.get_lower_energy_threshold()
ehighIn = egf.get_upper_energy_threshold()
eText = None
if elowIn is not None:
if elowIn < 0:
eText = 'none'
else: # assume that both low and high are defined in this case
eMinIn = elowIn.get_valueOf_()
if eMinIn == '':
eMinIn = 0
eText = '%g-%g' % (float(eMinIn), float(ehighIn.get_valueOf_()))
eWin = emdb_19.eWindowType(valueOf_=eText, units='eV')
img.set_energyWindow(eWin)
self.checkSet(m.get_nominal_defocus_min, img.set_nominalDefocusMin)
self.checkSet(m.get_nominal_defocus_max, img.set_nominalDefocusMax)
self.checkSet(m.get_specimen_holder_model, img.set_specimenHolderModel)
self.checkSet(m.get_details, img.set_details)
self.checkSet(m.get_nominal_magnification, img.set_nominalMagnification)
self.checkSet(m.get_calibrated_magnification, img.set_calibratedMagnification)
self.checkSet(m.get_specimen_holder, img.set_specimenHolder)
self.checkSet(m.get_details, img.set_details)
#self.checkSet(m.get_average_temperature, img.set_temperature)
x = m.get_nominal_defocus_min()
if x is not None:
img.set_nominalDefocusMin(emdb_19.defocusType(valueOf_ = x.get_valueOf_(), units='nm'))
x = m.get_nominal_defocus_max()
if x is not None:
img.set_nominalDefocusMax(emdb_19.defocusType(valueOf_ = x.get_valueOf_(), units='nm'))
x = m.get_acceleration_voltage()
if x is not None:
img.set_acceleratingVoltage(emdb_19.accVoltType(valueOf_ = x.get_valueOf_(), units='kV'))
x = m.get_nominal_cs()
if x is not None:
img.set_nominalCs(emdb_19.csType(valueOf_ = x.get_valueOf_(), units='mm'))
x = m.get_date()
if x is not None:
img.set_date(x.strftime(c.EM_DATE_FORMAT).upper())
if micType == c.EXT_TOMOGRAPHY_MICROSCOPY_TYPE:
tiltSeriesListIn =m.get_tilt_series()
tiltIncIn = None
if len(tiltSeriesListIn) > 0:
tsIn = tiltSeriesListIn[0]
axisIn = tsIn.get_axis1()
tiltIncIn = axisIn.get_angle_increment() # Forward reference that will be dealt with in image processing
self.setValueAndUnits(axisIn.get_min_angle, img.set_tiltAngleMin, emdb_19.tiltType, c.U_DEGF)
self.setValueAndUnits(axisIn.get_max_angle, img.set_tiltAngleMax, emdb_19.tiltType, c.U_DEGF)
else:
tiltMinIn = m.get_tilt_angle_min()
tiltMaxIn = m.get_tilt_angle_max()
if tiltMinIn is not None:
img.set_tiltAngleMin(emdb_19.tiltType(valueOf_=tiltMinIn, units='degrees'))
if tiltMaxIn is not None:
img.set_tiltAngleMax(emdb_19.tiltType(valueOf_=tiltMaxIn, units='degrees'))
# alignment
aliIn = m.get_alignment_procedure()
if aliIn is not None:
legAliIn = aliIn.get_legacy()
if legAliIn is not None:
self.checkSet(legAliIn.get_astigmatism, img.set_astigmatism)
self.checkSet(legAliIn.get_electron_beam_tilt_params, img.set_electronBeamTiltParams)
# temperature
tempIn = m.get_temperature()
if tempIn is not None:
tempAvIn = tempIn.get_temperature_average()
tempMaxIn = tempIn.get_temperature_max()
tempMinIn = tempIn.get_temperature_min()
if tempAvIn is not None:
img.set_temperature(emdb_19.tempType(valueOf_=tempAvIn.get_valueOf_(), units=tempAvIn.get_units()))
if tempMaxIn is not None:
img.set_temperatureMax(emdb_19.tempType(valueOf_=tempMaxIn.get_valueOf_(), units=tempMaxIn.get_units()))
if tempMinIn is not None:
img.set_temperatureMin(emdb_19.tempType(valueOf_=tempMinIn.get_valueOf_(), units=tempMinIn.get_units()))
# image acquisition - restriction is that 1.9 can only handle one detector but multiple image acquisitions
imRecListIn = m.get_image_recording_list().get_image_recording()
if imRecListIn is not None:
lenImRecListIn = len(imRecListIn)
if lenImRecListIn > 0:
imRecIn = imRecListIn[0]
if lenImRecListIn > 1:
self.warn(1, 'More that one image recording found (%d in total) for microscopy (%d) - only using detector from the first!' % (lenImRecListIn, micId))
i = 0
for imRecIn in imRecListIn:
imAc = emdb_19.imgScanType()
if i == 0:
self.setValueAndUnits(imRecIn.get_average_electron_dose_per_image, img.set_electronDose, constructor=emdb_19.eDoseType, units=c.U_EL_A2)
self.checkSet(imRecIn.get_detector_distance, img.set_detectorDistance)
fod = imRecIn.get_film_or_detector_model()
if fod is not None:
img.set_detector(fod.get_valueOf_())
digIn = imRecIn.get_digitization_details()
if digIn is not None:
self.checkSet(digIn.get_scanner, imAc.set_scanner)
self.setValueAndUnits(digIn.get_sampling_interval, imAc.set_samplingSize, emdb_19.samplSizeType, c.U_MCRN)
"""
x = digIn.get_sampling_interval()
if x is not None:
imAc.set_samplingSize(emdb_19.samplSizeType(valueOf_ = x.get_valueOf_(), units = 'microns'))
"""
if auxListIn is not None:
x = auxListIn.get_auxiliary_link()
if len(x) > 0:
imAc.set_URLRawData(x[0].get_link())
self.checkSet(imRecIn.get_number_real_images, imAc.set_numDigitalImages)
self.checkSet(imRecIn.get_details, imAc.set_details)
self.checkSet(imRecIn.get_od_range, imAc.set_odRange)
self.checkSet(imRecIn.get_bits_per_pixel, imAc.set_quantBitNumber)
if imAc.hasContent_():
exp.add_imageAcquisition(imAc)
# fitting
if intrpIn is not None:
fitListIn = intrpIn.get_modelling_list()
if fitListIn is not None:
fitIn = fitListIn.get_modelling()
for f in fitIn:
fit = emdb_19.fittingType()
modsIn = f.get_initial_model()
if len(modsIn) > 0:
pdbList = emdb_19.pdbidList2Type()
fit.set_pdbEntryIdList(pdbList)
for modIn in modsIn:
pdbFitCodeIn = modIn.get_access_code()
pdbList.add_pdbEntryId(pdbFitCodeIn)
chainsIn = modIn.get_chain()
for chIn in chainsIn:
chIdIn = chIn.get_id()
# The following code prefixes the chain ID with the PDB code otherwise there is no direct linkage between them
if len(chIdIn)<=2:
pdbList.add_pdbChainId('%s_%s' % (pdbFitCodeIn, chIdIn))
else:
pdbList.add_pdbChainId(chIdIn)
softListIn = f.get_software_list()
if softListIn is not None:
softStr = makeSoftwareFromList(softListIn.get_software())
if softStr is not None:
fit.set_software(softStr)
self.checkSet(f.get_target_criteria, fit.set_targetCriteria)
self.checkSet(f.get_overall_bvalue, fit.set_overallBValue)
self.checkSet(f.get_refinement_protocol, fit.set_refProtocol)
self.checkSet(f.get_refinement_space, fit.set_refSpace)
self.checkSet(f.get_details, fit.set_details)
if fit.hasContent_():
exp.add_fitting(fit)
## processing
impListIn = sdIn.get_image_processing()
if impListIn is not None and len(impListIn) > 0:
proc = emdb_19.processType()
xmlOut.set_processing(proc)
for impIn in impListIn:
# We assume here that reconstruction information exists in all the 2.0 processing types
recIn = impIn.get_final_reconstruction()
rec = emdb_19.reconsType()
proc.add_reconstruction(rec)
softListIn = recIn.get_software_list()
if softListIn is not None:
softStr = makeSoftwareFromList(softListIn.get_software())
if softStr is not None:
rec.set_software(softStr)
resIn = recIn.get_resolution()
if resIn is not None:
rec.set_resolutionByAuthor(resIn.get_valueOf_())
self.checkSet(recIn.get_resolution_method, rec.set_resolutionMethod)
self.checkSet(recIn.get_algorithm, rec.set_algorithm)
self.checkSet(recIn.get_details, rec.set_details)
# Extract method specific info only for the first image processing instance
# There is only one instance to store this info in 1.9!
if impIn.get_id() == 1:
if emMethod in [c.EMM_SP, c.EMM_STOM, c.EMM_TOM, c.EMM_HEL]:
proc.set_method(emMethod)
elif emMethod == c.EMM_EC:
proc.set_method('twoDCrystal')
if emMethod == c.EMM_SP:
procSpec = emdb_19.singPartType()
proc.set_singleParticle(procSpec)
self.checkSet(recIn.get_number_images_used, procSpec.set_numProjections)
spClsIn = impIn.get_final_two_d_classification()
if spClsIn is not None:
self.checkSet(spClsIn.get_number_classes, procSpec.set_numClassAverages)
self.checkSet(impIn.get_details, procSpec.set_details)
setHelicalSymmetry(recIn, specPrep_1)
symmIn = recIn.get_applied_symmetry()
if symmIn is not None:
self.checkSet(symmIn.get_point_group, procSpec.set_appliedSymmetry)
elif emMethod == c.EMM_STOM:
procSpec = emdb_19.subTomType()
self.checkSet(recIn.get_number_subtomograms_used, procSpec.set_numSubtomograms)
savClsIn = impIn.get_final_three_d_classification()
if savClsIn is not None:
self.checkSet(savClsIn.get_number_classes, procSpec.set_numClassAverages)
self.checkSet(impIn.get_details, procSpec.set_details)
setCrystalParameters(impIn, specPrep_1)
setHelicalSymmetry(recIn, specPrep_1)
symmIn = recIn.get_applied_symmetry()
if symmIn is not None:
self.checkSet(symmIn.get_point_group, procSpec.set_appliedSymmetry)
if procSpec.hasContent_():
proc.set_subtomogramAveraging(procSpec)
elif emMethod == c.EMM_TOM:
procSpec = emdb_19.tomogrType()
proc.set_tomography(procSpec)
self.checkSet(recIn.get_number_images_used, procSpec.set_numSections)
self.checkSet(impIn.get_details, procSpec.set_details)
setCrystalParameters(impIn, specPrep_1)
setHelicalSymmetry(recIn, specPrep_1)
if tiltIncIn is not None:
procSpec.set_tiltAngleIncrement(tiltIncIn.get_valueOf_())
symmIn = recIn.get_applied_symmetry()
if symmIn is not None:
self.checkSet(symmIn.get_point_group, procSpec.set_appliedSymmetry)
elif emMethod == c.EMM_EC:
procSpec = emdb_19.xtal2DType()
proc.set_twoDCrystal(procSpec)
setCrystalParameters(impIn, specPrep_1)
setHelicalSymmetry(recIn, specPrep_1)
self.checkSet(impIn.get_details, procSpec.set_details)
elif emMethod == c.EMM_HEL:
algIn = recIn.get_algorithm()
if algIn is not None:
m = re.match(c.HEL_SP_PAT, algIn)
if m is not None:
matchGroups = m.groups()
hxMethodIn = matchGroups[1]
algStr = matchGroups[0] + matchGroups[2]
else:
hxMethodIn = c.EMM_HEL
algStr = algIn
rec.set_algorithm(algStr)
else:
hxMethodIn = c.EMM_HEL
if hxMethodIn == c.EMM_HEL:
procSpec = emdb_19.helixType()
proc.set_helical(procSpec)
setCrystalParameters(impIn, specPrep_1)
else:
procSpec = emdb_19.singPartType()
proc.set_singleParticle(procSpec)
self.checkSet(recIn.get_number_images_used, procSpec.set_numProjections)
self.checkSet(recIn.get_number_classes_used, procSpec.set_numClassAverages)
self.checkSet(impIn.get_details, procSpec.set_details)
setHelicalSymmetry(recIn, specPrep_1)
# Euler angles and ctf have to be set for all reconstruction objects
copyCTFAndEulerAngles(impIn, rec, procSpec)
if specPrep_1.hasContent_():
exp.set_specimenPreparation(specPrep_1)
# Write XML to file
f = open(outputFile, 'w') if outputFile else sys.stdout
f.write('<?xml version="1.0" encoding="UTF-8"?>\n')
xmlOut.export(f, 0, name_='emdEntry')
if f is not sys.stdout:
f.close()
[docs]def main():
"""
Convert EMDB XML files from one schema version to another
"""
# Handle command line options
usage = """
emdb_xml_translate.py [options] inputFile
Convert EMDB XML files from one schema version to another
Examples:
python emdb_xml_translate.py inputFile
Typical run:
python emdb_xml_translate.py -f out.xml -i 1.9 -o 2.0 in.xml
in.xml is assumed to be a EMDB 1.9 XML file and converted to
an XML file following EMDB XML schema 2.0 and written out to out.xml
"""
version = "0.6"
parser = OptionParser(usage = usage, version = version)
parser.add_option("-i", "--in-schema", action="store", type="string", metavar="SCHEMA", dest="inputSchema", default = "1.9", help="Schema version of output file - 1.9 or 2.0 [default: %default]")
parser.add_option("-o", "--out-schema", action="store", type="string", metavar="SCHEMA", dest="outputSchema", default = "2.0", help="Schema version of output file - 1.9 or 2.0 [default: %default]")
parser.add_option("-f", "--out-file", action="store", type="string", metavar="FILE", dest="outputFile", help="Write output to FILE")
parser.add_option("-w", "--warning-level", action="store", type="int", dest="warningLevel", default=1, help="Level of warning output. 0 is none, 3 is max, default = 1")
(options, args) = parser.parse_args()
# Check for sensible/supported options
if len(args) < 1:
sys.exit("No input file specified!")
else:
inputFile = args[0]
if (options.inputSchema != "1.9" and options.outputSchema != "2.0") and (options.inputSchema != "2.0" and options.outputSchema != "1.9"):
sys.exit("Conversion from version %s to %s not supported!" % (options.inputSchema, options.outputSchema))
# Call appropriate conversion routine
translator = EMDBXMLTranslator()
translator.setWarningLevel(options.warningLevel)
if (options.inputSchema == "1.9" and options.outputSchema == "2.0"):
translator.translate_1_9_to_2_0(inputFile, options.outputFile)
elif (options.inputSchema == "1.9" and options.outputSchema == "1.9"):
translator.translate_1_9_to_1_9(inputFile, options.outputFile)
elif (options.inputSchema == "2.0" and options.outputSchema == "1.9"):
translator.translate_2_0_to_1_9(inputFile, options.outputFile)
if __name__ == "__main__":
main()