Source code for tooldog.annotate.galaxy
#!/usr/bin/env python3
"""
Generation of XML for Galaxy from https://bio.tools based on the Tooldog model using
galaxyxml library.
"""
# Import ------------------------------
# General libraries
import os
import copy
import logging
# External libraries
from lxml import etree
import galaxyxml.tool as gxt
import galaxyxml.tool.parameters as gxtp
from galaxyxml.tool.import_xml import GalaxyXmlParser
# Class and Objects
from .edam_to_galaxy import EdamToGalaxy
from tooldog import __version__
# Constant(s) ------------------------------
LOGGER = logging.getLogger(__name__)
PARAM_COMMENT = "This parameter has been automatically generated from" \
" https://bio.tools/tool/%s by ToolDog v" + str(__version__) + "."
FIXME = "FIXME: Please map this parameter to its command line argument."
# Class(es) ------------------------------
[docs]class GalaxyToolGen(object):
"""
Class to support generation of XML from :class:`tooldog.biotool_model.Biotool` object.
"""
[docs] def __init__(self, biotool, galaxy_url=None, edam_url=None, mapping_json=None,
existing_tool=None):
"""
Initialize a [Tool] object from galaxyxml with the minimal information
(a name, an id, a version, a description, the command, the command version
and a help).
:param biotool: Biotool object of an entry from https://bio.tools.
:type biotool: :class:`tooldog.biotool_model.Biotool`
"""
# Initialize GalaxyInfo
self.etog = EdamToGalaxy(galaxy_url=galaxy_url, edam_url=edam_url,
mapping_json=mapping_json)
# Initialize counters for inputs and outputs from bio.tools
self.input_ct = 0
self.output_ct = 0
self.biotool_id = biotool.tool_id
if existing_tool:
LOGGER.info("Loading existing XML from " + existing_tool)
gxp = GalaxyXmlParser()
self.tool = gxp.import_xml(existing_tool)
# Add a description if missing from description
if self.tool.root.find('description').text is None:
self.tool.root.find('description').text = biotool.description.split('.')[0] + '.'
# Add information about Tooldog version
self.tool.add_comment("This tool descriptor has been annotated by ToolDog v" +
__version__)
# Help if missing or TODO
if self.tool.help is None:
self.tool.help = biotool.generate_galaxy_help()
elif "TODO" in self.tool.help:
LOGGER.info("TODO has been found in help, content has been replaced.")
self.tool.help = biotool.generate_galaxy_help()
else:
LOGGER.info("Creating new GalaxyToolGen object...")
# Initialize tool
# Get the first sentence of the description only
description = biotool.description.split('.')[0] + '.'
self.tool = gxt.Tool(biotool.name, biotool.tool_id, biotool.version,
description, "COMMAND", version_command="COMMAND --version")
self.tool.help = biotool.generate_galaxy_help()
# Add information about Galaxy and EDAM in the XML
self.tool.add_comment("Information was obtained from the Galaxy instance: " +
self.etog.galaxy_url + " v" +
self.etog.galaxy_version + " and EDAM v" +
self.etog.edam_version)
# Add information about Tooldog version
self.tool.add_comment("This tool descriptor has been generated by ToolDog v" +
__version__)
[docs] def add_edam_topic(self, topic):
"""
Add the EDAM topic to the tool (XML: <edam_topics>).
:param topic: Topic object.
:type topic: :class:`tooldog.biotool_model.Topic`
"""
LOGGER.debug("Adding EDAM topic " + topic.get_edam_id() + " to GalaxyToolGen object.")
if not hasattr(self.tool, 'edam_topics'):
# First time we add topics to the tool
self.tool.edam_topics = gxtp.EdamTopics()
if not self.tool.edam_topics.has_topic(topic.get_edam_id()):
self.tool.edam_topics.append(gxtp.EdamTopic(topic.get_edam_id()))
[docs] def add_edam_operation(self, operation):
"""
Add the EDAM operation to the tool (XML: <edam_operations>).
:param topic: Operation object.
:type topic: :class:`tooldog.biotool_model.Operation`
"""
LOGGER.debug("Adding EDAM operation " + operation.get_edam_id() +
" to GalaxyToolGen object.")
if not hasattr(self.tool, 'edam_operations'):
# First time we add operations to the tool
self.tool.edam_operations = gxtp.EdamOperations()
if not self.tool.edam_operations.has_operation(operation.get_edam_id()):
self.tool.edam_operations.append(gxtp.EdamOperation(operation.get_edam_id()))
[docs] def add_input_file(self, input_obj):
"""
Add an input to the tool (XML: <inputs>).
:param input_obj: Input object.
:type input_obj: :class:`tooldog.biotool_model.Input`
"""
LOGGER.debug("Adding input to GalaxyToolGen object...")
if not hasattr(self.tool, 'inputs'):
self.tool.inputs = gxtp.Inputs()
# Build parameter
self.input_ct += 1
data_uri = input_obj.data_type.get_edam_id()
# Give unique name to the input
name = 'INPUT' + str(self.input_ct)
# Get all different format for this input
list_formats = []
if not input_obj.formats:
list_formats.append(self.etog.get_datatype(edam_data=data_uri))
else:
for format_obj in input_obj.formats:
format_uri = format_obj.get_edam_id()
list_formats.append(self.etog.get_datatype(edam_data=data_uri,
edam_format=format_uri))
formats = ', '.join(list_formats)
# Create the parameter
param = gxtp.DataParam(name, label=input_obj.data_type.term,
help=input_obj.description, format=formats)
# Override the corresponding arguments in the command line
param.command_line_override = '--' + name + ' $' + name
# Write comment about this param
param.node.insert(0, etree.Comment(FIXME))
param.node.insert(0, etree.Comment(PARAM_COMMENT % (self.biotool_id)))
# Appends parameter to inputs
self.tool.inputs.append(param)
[docs] def add_output_file(self, output):
"""
Add an output to the tool (XML: <outputs>).
:param output: Output object.
:type output: :class:`tooldog.biotool_model.Output`
"""
LOGGER.debug("Adding output to GalaxyToolGen object...")
if not hasattr(self.tool, 'outputs'):
self.tool.outputs = gxtp.Outputs()
# Build parameter
self.output_ct += 1
data_uri = output.data_type.get_edam_id()
# Give unique name to the output
name = 'OUTPUT' + str(self.output_ct)
# Get all different format for this output
list_formats = []
if not output.formats:
list_formats.append(self.etog.get_datatype(edam_data=data_uri))
else:
for format_obj in output.formats:
format_uri = format_obj.get_edam_id()
list_formats.append(self.etog.get_datatype(edam_data=data_uri,
edam_format=format_uri))
formats = ', '.join(list_formats)
# Create the parameter
param = gxtp.OutputData(name, format=formats, from_work_dir=name +
"." + formats.replace('.', '/'))
param.command_line_override = ''
# Write comment about this param
param.node.insert(0, etree.Comment(FIXME))
param.node.insert(0, etree.Comment(PARAM_COMMENT % (self.biotool_id)))
self.tool.outputs.append(param)
[docs] def add_citation(self, publication):
"""
Add publication(s) to the tool (XML: <citations>).
:param publication: Publication object.
:type publication: :class:`tooldog.biotool_model.Publication`
"""
LOGGER.debug("Adding citation to GalaxyToolGen object...")
if not hasattr(self.tool, 'citations'):
self.tool.citations = gxtp.Citations()
# Add citation depending the type (doi, pmid...)
if publication.doi is not None:
if not self.tool.citations.has_citation('doi', publication.doi):
self.tool.citations.append(gxtp.Citation('doi', publication.doi))
# <citation> only supports doi and bibtex as a type
elif publication.pmid is not None:
# self.tool.citations.append(gxtp.Citation('pmid', publication.pmid))
LOGGER.warn('pmid is not supported by <citation>, citation skipped')
elif publication.pmcid is not None:
# self.tool.citations.append(gxtp.Citation('pmcid', publication.pmcid))
LOGGER.warn('pmcid is not supported by <citation>, citation skipped')
[docs] def write_xml(self, out_file=None, index=None, keep_old_command=False):
"""
Write CWL to STDOUT or out_file(s).
:param out_file: path to output file.
:type out_file: STRING
:param index: Index in case more than one function is described.
:type index: INT
"""
# Copy informations to avoid expension of xml in case we write several XMLs
export_tool = copy.deepcopy(self.tool)
# Give XML on STDout
if out_file is None:
if index is not None:
print('########## XML number ' + str(index) + ' ##########')
LOGGER.info("Writing XML file to STDOUT")
print(export_tool.export(keep_old_command).decode('utf-8'))
else:
# Format name for output file(s)
if index is not None:
out_file = os.path.splitext(out_file)[0] + str(index) + '.xml'
else:
out_file = os.path.splitext(out_file)[0] + '.xml'
LOGGER.info("Writing XML file to " + out_file)
with open(out_file, 'w') as file_w:
file_w.write(export_tool.export(keep_old_command).decode('utf-8'))