#!/usr/bin/env python
"""
emdb_20_to_json
Reads in a EMDB header file following 2.0 schema and outputs summary information as a JSON.
This is an stub example to show how to use emdb_da.py to read the header file.
TODO:
Version history:
Copyright [2014-2016] EMBL - European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the
"License"); you may not use this file except in
compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
"""
__author__ = 'Ardan Patwardhan'
__email__ = 'ardan@ebi.ac.uk'
__date__ = '2016-01-05'
import sys
import logging
import traceback
import json
from datetime import date
from optparse import OptionParser
import emdb_da
from emdb_settings import emdb_settings
[docs]class EMDBXML20JSONTranslator:
"""
Class for translating summary info from EMDB 2.0 to a JSON file
"""
def __init__(self):
self.warningLevel = 1 # 0 = min, 3 = max
logging.basicConfig(level=emdb_settings.log_level, format=emdb_settings.log_format)
[docs] def setWarningLevel(self, level):
"""
Set the level of logging warnings. 0 = no warnings, 3 = max warnings, 1 = default
Parameters
@param level: warning level 0 -> 3
"""
if level <= 0:
self.warningLevel = 0
elif level >= 3:
self.warningLevel = 3
else:
self.warningLevel = level
[docs] def warn(self, level, msg):
"""
Log a warning message but take into account the warningLevel
Parameters:
@param level: only messages with level >= warningLevel are printed
@param msg: warning message
"""
if level <= self.warningLevel:
logging.warning(msg)
[docs] def translate(self, inputFile, outputFile):
"""
Translate EMDB 2.0 to a JSON. Summary info only
Parameters
@param inputFile: input file in EMDB 2.0 XML
@param outputFile: output JSON file
"""
def checkSet(getX, key, jsonDict, transform=None):
"""
Call setVar only if getX does not return None
Parameters:
@param getX: getter function that must return value
@param key: key in dictionary to be set jsonDict[key]
@param jsonDict: JSON dictionary whose key will be set, jsonDict[key]
@param transform: Apply transform(x) before calling setter function
"""
x = getX()
if x is not None:
if transform is not None:
try:
z = x
x = transform(z)
except Exception:
self.warn(3, "function checkSet: Transform function did not work: %s(%s)" % (transform, z))
self.warn(3, traceback.format_exc())
return
jsonDict[key] = x
def getAuthors(authListIn, simple=False):
"""
Get authors from 2.0 and return comma seperated string
Parameters
@param authListIn: list object of 2.0 author objects
@param simple: boolean - True means that the authors in 2.0 are simple strings, otherwise they are journal authors
@return:
"""
authList = []
for authIn in authListIn:
if simple == True:
x = authIn
else:
x = authIn.get_valueOf_()
authList.append(x)
if len(authList) > 0:
authStr = ', '.join(authList)
else:
authStr = ''
return authStr
def copyCitation(refIn):
"""
Return JSON object containing citation - more complex example...
Parameters:
@param citeIn: Input citation in 2.0 schema
@return: python dictionary with reference info
"""
jrnlIn = refIn.get_citation_type()
refOut = {}
if jrnlIn.original_tagname_ == 'journal_citation':
refOut['citationType'] = 'Journal'
refOut['authors'] = getAuthors(jrnlIn.get_author())
checkSet(jrnlIn.get_title, 'title', refOut)
checkSet(jrnlIn.get_journal, 'journal', refOut)
checkSet(jrnlIn.get_published, 'published', refOut)
# This is a fix because of bad data - emd-1648.xml has an empty volume tag!
vol = jrnlIn.get_volume()
if vol is not None and len(vol) > 0:
refOut['volume'] = vol
checkSet(jrnlIn.get_first_page, 'firstPage', refOut)
checkSet(jrnlIn.get_last_page, 'lastPage', refOut)
checkSet(jrnlIn.get_year, 'year', refOut)
else:
refOut['citationType'] = 'Non-journal'
nonJrnlIn = jrnlIn
refOut['authors'] = getAuthors(nonJrnlIn.get_author())
checkSet(nonJrnlIn.get_editor, 'editor', refOut)
checkSet(nonJrnlIn.get_book_chapter_title, 'chapterTitle', refOut)
checkSet(nonJrnlIn.get_book_title, 'title', refOut)
checkSet(nonJrnlIn.get_thesis_title, 'thesisTitle', refOut)
checkSet(nonJrnlIn.get_published, 'published', refOut)
checkSet(nonJrnlIn.get_publisher, 'publisher', refOut)
checkSet(nonJrnlIn.get_publication_location, 'location', refOut)
checkSet(nonJrnlIn.get_volume, 'volume', refOut)
checkSet(nonJrnlIn.get_first_page, 'firstPage', refOut)
checkSet(nonJrnlIn.get_last_page, 'lastPage', refOut)
checkSet(nonJrnlIn.get_year, 'year', refOut)
return refOut
xmlIn = emdb_da.parse(inputFile, silence=True)
jsonOut = {}
admIn = xmlIn.get_admin()
datesIn = admIn.get_key_dates()
statusIn = admIn.get_current_status()
authListIn = admIn.get_authors_list()
xRefIn = xmlIn.get_crossreferences()
citeListIn = xRefIn.get_citation_list()
citeIn = citeListIn.get_primary_citation()
checkSet(xmlIn.get_emdb_id, 'emdbId', jsonOut)
checkSet(admIn.get_title, 'title', jsonOut)
checkSet(statusIn.get_code().get_valueOf_, 'status', jsonOut)
jsonOut['authors'] = getAuthors(authListIn.get_author(), simple=True)
# dates
checkSet(datesIn.get_deposition, 'depositionDate', jsonOut, str)
checkSet(datesIn.get_header_release, 'headerReleaseDate', jsonOut, str)
checkSet(datesIn.get_map_release, 'mapReleaseDate', jsonOut, str)
# primary citation
jsonOut['citation'] = copyCitation(citeIn)
fd = open(outputFile, 'w') if outputFile else sys.stdout
json.dump(jsonOut, fd)
if fd is not sys.stdout:
fd.close()
[docs]def main():
"""
Extract summary info from EMDB XML 2.0 file to JSON
"""
# Handle command line options
usage = """
emdb_20_to_json.py [options] inputFile
Convert EMDB XML
Examples:
python emdb_20_to_json.py inputFile
Typical run:
python emdb_20_to_json.py -f out.json in.xml
in.xml is assumed to be a EMDB 2.0 XML file
out.json is a JSON file created with the summary information
"""
version = "0.1"
parser = OptionParser(usage = usage, version = version)
parser.add_option("-f", "--out-file", action="store", type="string", metavar="FILE", dest="outputFile", help="Write output to FILE")
parser.add_option("-w", "--warning-level", action="store", type="int", dest="warningLevel", default=1, help="Level of warning output. 0 is none, 3 is max, default = 1")
(options, args) = parser.parse_args()
# Check for sensible/supported options
if len(args) < 1:
sys.exit("No input file specified!")
else:
inputFile = args[0]
# Call appropriate conversion routine
translator = EMDBXML20JSONTranslator()
translator.setWarningLevel(options.warningLevel)
translator.translate(inputFile, options.outputFile)
if __name__ == "__main__":
main()