Source code for emdbXMLTranslator.diff_all
#!/usr/bin/env python
"""
diff_all.py
Diff v1.9 files with 1.9 files translated from 2.0 files
TODO:
Version history:
Copyright [2014-2016] EMBL - European Bioinformatics Institute
Licensed under the Apache License, Version 2.0 (the
"License"); you may not use this file except in
compliance with the License. You may obtain a copy of
the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on
an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
"""
__author__ = 'Ardan Patwardhan'
__email__ = 'ardan@ebi.ac.uk'
__date__ = '2014-11-30'
import glob
import os
import logging
import subprocess
from optparse import OptionParser
from emdb_settings import emdb_settings
logging.basicConfig(level=emdb_settings.log_level, format=emdb_settings.log_format)
[docs]def diff_all(v19Dir, v20To19Dir, outDir):
commandListBase= ['diff', '-b']
filePathTemplate=os.path.join(v20To19Dir, 'emd-*.xml')
emdbFiles = glob.glob(filePathTemplate)
numErrors = 0
numSuccess = 0
errorList = []
for f in emdbFiles:
inf = os.path.basename(f)
outfBase = inf[0:-4] + '.txt'
outf = os.path.join(outDir,outfBase)
v19File = os.path.join(v19Dir, inf)
v20To19File = os.path.join(v20To19Dir, inf)
logging.info("v1.9 file: %s, v2.0 to 1.9 file: %s, output file: %s" % (v19File, v20To19File, outf))
commandList = list(commandListBase)
commandList.append(v19File)
commandList.append(v20To19File)
cmdText = ' '.join(commandList)
logging.info('Executing: %s' % cmdText)
with open(outf, 'w') as f:
exitCode = subprocess.call(commandList, stdout=f )
if exitCode > 1:
numErrors += 1
errorList.append(inf)
else:
numSuccess += 1
logging.warning('%d files successfully processed!' % numSuccess)
if numErrors > 0:
logging.warning('%d errors!' % numErrors)
logging.warning('List of entries that were not translated')
for entry in errorList:
logging.warning(entry)
[docs]def main():
"""
Diff v1.9 files with 1.9 files translated from 2.0 files
"""
defaultV19Dir = emdb_settings.emdb19To19Dir
defaultV20To19Dir = emdb_settings.emdb20To19Dir
defaultOutDir=emdb_settings.diffDir
# Handle command line options
usage = """
python diff_all.py [options]
Diff v1.9 files with 1.9 files translated from 2.0 files
Examples:
python diff_all.py
Typical run:
python diff_all.py -i '/data/emdb19_to_19' -j '/data/emdb20_to_19' -o '/data/emdb_diff'
/data/emdb19_to_19 is the input directory with the canonical EMDB 1.9 XML files (one part of the diff)
/data/emdb20_to_19 is the input directory with EMDB 1.9 XML files converted from EMDB XML 2.0 (the other part of the diff)
/data/emdb_diff is the output directory with the diff files with the same name as the entry but with the suffix .txt
"""
version = "0.1"
parser = OptionParser(usage = usage, version = version)
parser.add_option("-i", "--v19dir", action="store", type="string", metavar="DIR", dest="v19Dir", default = defaultV19Dir, help="Input directory with the canonical EMDB 1.9 XML files (one part of the diff) [default: %default]")
parser.add_option("-j", "--v20to19dir", action="store", type="string", metavar="DIR", dest="v20To19Dir", default = defaultV20To19Dir, help="Input directory with EMDB 1.9 XML files converted from EMDB XML 2.0 (the other part of the diff) [default: %default]")
parser.add_option("-o", "--out-dir", action="store", type="string", metavar="DIR", dest="outDir", default = defaultOutDir, help="Output directory with the diff files with the same name as the entry but with the suffix .txt [default: %default]")
(options, args) = parser.parse_args()
diff_all(options.v19Dir, options.v20To19Dir, options.outDir)
if __name__ == "__main__":
main()