Source code for tooldog.biotool_model
#! /usr/bin/env python3
"""
Model used to process information contained in JSON from https://bio.tools description.
The content of a description on https://bio.tools is contained in a JSON file and this
model aims to store the different information.
"""
import requests
import logging
from lxml import etree
from ruamel.yaml.scalarstring import PreservedScalarString
LOGGER = logging.getLogger(__name__)
# Class(es) ------------------------------
[docs]class Biotool(object):
'''
This class correspond to an entry from https://bio.tools.
'''
[docs] def __init__(self, name, tool_id, version, description, homepage):
'''
:param name: Name of the tool.
:type name: STRING
:param tool_id: ID of the tool entry.
:type tool_id: STRING
:param version: Version of the tool entry.
:type version: STRING
:param description: Description of the tool entry.
:type description: STRING
:param homepage: URL to homepage.
:type homepage: STRING
:class:`tooldog.biotool_model.Biotool` object is also initialized with two empty
list of objects:
* functions: list of :class:`tooldog.biotool_model.Function`
* topics: list of :class:`tooldog.biotool_model.Topic`
More information (:class:`tooldog.biotool_model.Informations` object) can be specified
using :meth:`tooldog.biotool_model.Biotool.set_informations`.
'''
self.name = name
self.tool_id = tool_id
self.version = version
self.description = description
self.homepage = homepage
self.functions = [] # List of Function objects
self.topics = [] # List of Topic objects
self.informations = Informations() # Informations object
if self.homepage.startswith('https://github.com'):
link = Link({'url': self.homepage, 'type': 'Repository', 'comment': ''})
self.informations.links.append(link)
[docs] def generate_galaxy_help(self):
"""
Generate a help message from the different informations found on the tool.
:return: a help message for Galaxy XML.
:rtype: STRING
"""
help_message = "\n\nWhat it is ?\n" + "============\n\n"
help_message += self.description + "\n\n"
help_message += "External links:\n" + "===============\n\n"
help_message += "- Tool homepage_\n"
help_message += "- bio.tools_ entry\n\n"
help_message += ".. _homepage: " + self.homepage + "\n"
help_message += ".. _bio.tools: https://bio.tools/tool/" + self.tool_id
return help_message
[docs] def generate_cwl_doc(self):
"""
Generate a doc from the different informations found on the tool.
:return: a doc for CWL tool description.
:rtype: STRING
"""
doc_message = self.description + "\n\n"
doc_message += "External links:\n"
doc_message += "Tool homepage: " + self.homepage + "\n"
doc_message += "bio.tools entry: " + self.tool_id + "\n\n"
return doc_message
[docs] def set_informations(self, tool_credits, contacts, publications, docs,
language, links, download):
'''
Add an :class:`tooldog.biotool_model.Informations` object to the Biotool.
:param tool_credits: list of different tool_credits.
:type tool_credits: LIST of DICT
:param contacts: list of different contacts.
:type contacts: LIST of DICT
:param publications: list of different IDs for publications.
:type publications: LIST of DICT
:param doc: list of different documentations.
:type doc: LIST of DICT
'''
for cred in tool_credits:
self.informations.tool_credits.append(Credit(cred))
for cont in contacts:
self.informations.contacts.append(Contact(cont))
for pub in publications:
self.informations.publications.append(Publication(pub))
for doc in docs:
self.informations.documentations.append(Documentation(doc))
self.informations.language = language
for link in links:
self.informations.links.append(Link(link))
for link in download:
self.informations.links.append(Link(link))
[docs] def add_functions(self, functions):
'''
Add :class:`tooldog.biotool_model.Function` objects to the list of functions of the
Biotool object.
:param functions: list of functions description from https://bio.tools.
:type functions: LIST of DICT
'''
for fct in functions:
# Create Function object
function = Function(fct['operation'])
function.add_inputs(fct['input'])
function.add_outputs(fct['output'])
# Append object to the biotool
self.functions.append(function)
[docs] def add_topics(self, topics):
'''
Add :class:`tooldog.biotool_model.Topic` objects to the list of topics of the
Biotool object.
:param topics: list of topics description from https://bio.tools.
:type topics: LIST of DICT
'''
for topic in topics:
self.topics.append(Topic(topic))
[docs]class Informations(object):
'''
Class to describe different information concerning a bio.tool entry.
'''
[docs] def __init__(self):
'''
:class:`tooldog.biotool_model.Informations` object is initialized with four empty
list of objects:
* publications: list of :class:`tooldog.biotool_model.Publication`
* documentations: list of :class:`tooldog.biotool_model.Documentation`
* contacts: list of :class:`tooldog.biotool_model.Contact`
* tool_credits: list of :class:`tooldog.biotool_model.Credit`
* language: list of coding language
* link: list of :class:`tooldog.biotool_model.Link`
'''
self.publications = []
self.documentations = []
self.contacts = []
self.tool_credits = []
self.language = []
self.links = []
[docs]class Link(object):
'''
Class to store download and links content.
'''
[docs] def __init__(self, link):
'''
:param link: links or download content of the JSON from http://bio.tools.
:type link: DICT
'''
self.url = link['url']
self.type = link['type']
self.comment = link['comment']
[docs]class Credit(object):
'''
Class to store a credit information.
'''
[docs] def __init__(self, credit):
'''
:param credit: credit part of the JSON from http://bio.tools.
:type credit: DICT
'''
self.comment = credit['comment'] # [STRING]
self.email = credit['email'] # [STRING]
self.grid_id = credit['gridId'] # [STRING]
self.name = credit['name'] # [STRING]
self.type_entity = credit['typeEntity'] # [STRING]
self.type_role = credit['typeRole'] # [STRING]
self.url = credit['url'] # [STRING]
self.orcid_id = credit['orcidId'] # [STRING]
[docs]class Publication(object):
'''
Class to store one publication information.
'''
[docs] def __init__(self, publication):
'''
:param publication: publication part of the JSON from http://bio.tools.
:type publication: DICT
'''
self.doi = publication['doi'] # [STRING]
self.pmid = publication['pmid'] # [STRING]
self.pmcid = publication['pmcid'] # [STRING]
self.type = publication['type'] # [STRING]
if self.doi is None:
self._fetch_doi()
def _fetch_doi(self):
"""
fetch doi using pmid or pmcid using:
https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0
"""
if self.pmid is not None:
id_query = self.pmid
elif self.pmcid is not None:
id_query = self.pmcid
req = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids=" + id_query
xml_req = etree.fromstring(requests.get(req).text)
if xml_req.find('record') is not None:
try:
self.doi = xml_req.find('record').attrib['doi']
except:
LOGGER.warning("Could not find doi corresponding to " + id_query)
[docs]class Documentation(object):
'''
Class to store one documentation information.
'''
[docs] def __init__(self, documentation):
'''
:param documentation: documentation part of the JSON from http://bio.tools.
:type documentation: DICT
'''
self.url = documentation['url'] # [STRING]
self.type = documentation['type'] # [STRING]
self.comment = documentation['comment'] # [STRING]
[docs]class Contact(object):
'''
Class to store one contact information.
'''
[docs] def __init__(self, contact):
'''
:param contact: contact part of the JSON from http://bio.tools.
:type contact: DICT
'''
self.email = contact['email'] # [STRING]
self.name = contact['name'] # [STRING]
# self.role = contact['contactRole']
# self.tel = contact['contactTel']
# self.url = contact['contactURL']
[docs]class Function(object):
'''
Correspond to one function of the entry with the corresponding inputs and outputs.
'''
[docs] def __init__(self, edams):
'''
:param edams: EDAM ontology for operation(s) with uri and term.
:type edams: LIST of DICT
:class:`tooldog.biotool_model.Function` object is initialized with two empty
list of objects:
* inputs: list of :class:`tooldog.biotool_model.Input`
* outputs: list of :class:`tooldog.biotool_model.Output`
'''
self.operations = []
for edam in edams:
self.operations.append(Operation(edam))
self.inputs = []
self.outputs = []
[docs] def add_inputs(self, inputs):
'''
Add inputs to the :class:`tooldog.biotool_model.Function` object.
:param inputs: inputs part of one function from http://bio.tools.
:type inputs: LIST of DICT
'''
for inp in inputs:
# Create Input object and appends to the list
self.inputs.append(Input(inp['data'], inp['format']))
[docs] def add_outputs(self, outputs):
'''
Add outputs to the :class:`tooldog.biotool_model.Function` object.
:param outputs: inputs part of one function from http://bio.tools.
:type outputs: LIST of DICT
'''
for outp in outputs:
# Create Output object and appends to the list
self.outputs.append(Output(outp['data'], outp['format']))
[docs]class Data(object):
'''
Data described by EDAM ontology.
'''
[docs] def __init__(self, data_type, formats, description=None):
'''
:param data_type: EDAM ontology for the data type with uri and term.
:type data_type: DICT
:param formats: EDAM ontology for data formats with uri and term.
:type formats: LIST of DICT
:param description: description of the data (DEPRECATED)
:type description: STRING
'''
self.data_type = DataType(data_type)
self.formats = []
for frmt in formats:
self.formats.append(Format(frmt))
self.description = description
[docs]class Input(Data):
'''
Input of a described function.
'''
[docs] def __init__(self, data_type, formats, description=None):
'''
:param data_type: EDAM ontology for the data type with uri and term.
:type data_type: DICT
:param formats: EDAM ontology for data formats with uri and term.
:type formats: LIST of DICT
:param description: description of the data (DEPRECATED)
:type description: STRING
'''
Data.__init__(self, data_type, formats, description)
[docs]class Output(Data):
'''
Output of a described function.
'''
[docs] def __init__(self, data_type, formats, description=None):
'''
:param data_type: EDAM ontology for the data type with uri and term.
:type data_type: DICT
:param formats: EDAM ontology for data formats with uri and term.
:type formats: LIST of DICT
:param description: description of the data (DEPRECATED)
:type description: STRING
'''
Data.__init__(self, data_type, formats, description)
[docs]class Edam(object):
'''
Edam annotation with the uri and its corresponding term.
'''
[docs] def __init__(self, edam):
'''
:param edam: EDAM ontology with uri and term.
:type edam: DICT
'''
self.uri = edam['uri']
self.term = edam['term']
[docs] def get_edam_id(self):
'''
Get the EDAM id from the uri.
:return: EDAM id from the uri.
:rtype: STRING
'''
return self.uri.split('/')[-1]
[docs]class Operation(Edam):
'''
EDAM operation associated to a function.
'''
[docs] def __init__(self, edam):
'''
:param edam: EDAM ontology with uri and term.
:type edam: DICT
'''
Edam.__init__(self, edam)
[docs]class DataType(Edam):
'''
EDAM data associated to either input or output.
'''
[docs] def __init__(self, edam):
'''
:param edam: EDAM ontology with uri and term.
:type edam: DICT
'''
Edam.__init__(self, edam)