Source code for tooldog.biotool_model

#! /usr/bin/env python3

"""
Model used to process information contained in JSON from https://bio.tools description.

The content of a description on https://bio.tools is contained in a JSON file and this
model aims to store the different information.
"""

import requests
import logging
from lxml import etree
from ruamel.yaml.scalarstring import PreservedScalarString

LOGGER = logging.getLogger(__name__)

#  Class(es)  ------------------------------


[docs]class Biotool(object): ''' This class correspond to an entry from https://bio.tools. '''
[docs] def __init__(self, name, tool_id, version, description, homepage): ''' :param name: Name of the tool. :type name: STRING :param tool_id: ID of the tool entry. :type tool_id: STRING :param version: Version of the tool entry. :type version: STRING :param description: Description of the tool entry. :type description: STRING :param homepage: URL to homepage. :type homepage: STRING :class:`tooldog.biotool_model.Biotool` object is also initialized with two empty list of objects: * functions: list of :class:`tooldog.biotool_model.Function` * topics: list of :class:`tooldog.biotool_model.Topic` More information (:class:`tooldog.biotool_model.Informations` object) can be specified using :meth:`tooldog.biotool_model.Biotool.set_informations`. ''' self.name = name self.tool_id = tool_id self.version = version self.description = description self.homepage = homepage self.functions = [] # List of Function objects self.topics = [] # List of Topic objects self.informations = Informations() # Informations object if self.homepage.startswith('https://github.com'): link = Link({'url': self.homepage, 'type': 'Repository', 'comment': ''}) self.informations.links.append(link)
[docs] def generate_galaxy_help(self): """ Generate a help message from the different informations found on the tool. :return: a help message for Galaxy XML. :rtype: STRING """ help_message = "\n\nWhat it is ?\n" + "============\n\n" help_message += self.description + "\n\n" help_message += "External links:\n" + "===============\n\n" help_message += "- Tool homepage_\n" help_message += "- bio.tools_ entry\n\n" help_message += ".. _homepage: " + self.homepage + "\n" help_message += ".. _bio.tools: https://bio.tools/tool/" + self.tool_id return help_message
[docs] def generate_cwl_doc(self): """ Generate a doc from the different informations found on the tool. :return: a doc for CWL tool description. :rtype: STRING """ doc_message = self.description + "\n\n" doc_message += "External links:\n" doc_message += "Tool homepage: " + self.homepage + "\n" doc_message += "bio.tools entry: " + self.tool_id + "\n\n" return doc_message
[docs] def set_informations(self, tool_credits, contacts, publications, docs, language, links, download): ''' Add an :class:`tooldog.biotool_model.Informations` object to the Biotool. :param tool_credits: list of different tool_credits. :type tool_credits: LIST of DICT :param contacts: list of different contacts. :type contacts: LIST of DICT :param publications: list of different IDs for publications. :type publications: LIST of DICT :param doc: list of different documentations. :type doc: LIST of DICT ''' for cred in tool_credits: self.informations.tool_credits.append(Credit(cred)) for cont in contacts: self.informations.contacts.append(Contact(cont)) for pub in publications: self.informations.publications.append(Publication(pub)) for doc in docs: self.informations.documentations.append(Documentation(doc)) self.informations.language = language for link in links: self.informations.links.append(Link(link)) for link in download: self.informations.links.append(Link(link))
[docs] def add_functions(self, functions): ''' Add :class:`tooldog.biotool_model.Function` objects to the list of functions of the Biotool object. :param functions: list of functions description from https://bio.tools. :type functions: LIST of DICT ''' for fct in functions: # Create Function object function = Function(fct['operation']) function.add_inputs(fct['input']) function.add_outputs(fct['output']) # Append object to the biotool self.functions.append(function)
[docs] def add_topics(self, topics): ''' Add :class:`tooldog.biotool_model.Topic` objects to the list of topics of the Biotool object. :param topics: list of topics description from https://bio.tools. :type topics: LIST of DICT ''' for topic in topics: self.topics.append(Topic(topic))
[docs]class Informations(object): ''' Class to describe different information concerning a bio.tool entry. '''
[docs] def __init__(self): ''' :class:`tooldog.biotool_model.Informations` object is initialized with four empty list of objects: * publications: list of :class:`tooldog.biotool_model.Publication` * documentations: list of :class:`tooldog.biotool_model.Documentation` * contacts: list of :class:`tooldog.biotool_model.Contact` * tool_credits: list of :class:`tooldog.biotool_model.Credit` * language: list of coding language * link: list of :class:`tooldog.biotool_model.Link` ''' self.publications = [] self.documentations = [] self.contacts = [] self.tool_credits = [] self.language = [] self.links = []
[docs]class Credit(object): ''' Class to store a credit information. '''
[docs] def __init__(self, credit): ''' :param credit: credit part of the JSON from http://bio.tools. :type credit: DICT ''' self.comment = credit['comment'] # [STRING] self.email = credit['email'] # [STRING] self.grid_id = credit['gridId'] # [STRING] self.name = credit['name'] # [STRING] self.type_entity = credit['typeEntity'] # [STRING] self.type_role = credit['typeRole'] # [STRING] self.url = credit['url'] # [STRING] self.orcid_id = credit['orcidId'] # [STRING]
[docs]class Publication(object): ''' Class to store one publication information. '''
[docs] def __init__(self, publication): ''' :param publication: publication part of the JSON from http://bio.tools. :type publication: DICT ''' self.doi = publication['doi'] # [STRING] self.pmid = publication['pmid'] # [STRING] self.pmcid = publication['pmcid'] # [STRING] self.type = publication['type'] # [STRING] if self.doi is None: self._fetch_doi()
def _fetch_doi(self): """ fetch doi using pmid or pmcid using: https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0 """ if self.pmid is not None: id_query = self.pmid elif self.pmcid is not None: id_query = self.pmcid req = "https://www.ncbi.nlm.nih.gov/pmc/utils/idconv/v1.0/?ids=" + id_query xml_req = etree.fromstring(requests.get(req).text) if xml_req.find('record') is not None: try: self.doi = xml_req.find('record').attrib['doi'] except: LOGGER.warning("Could not find doi corresponding to " + id_query)
[docs]class Documentation(object): ''' Class to store one documentation information. '''
[docs] def __init__(self, documentation): ''' :param documentation: documentation part of the JSON from http://bio.tools. :type documentation: DICT ''' self.url = documentation['url'] # [STRING] self.type = documentation['type'] # [STRING] self.comment = documentation['comment'] # [STRING]
[docs]class Contact(object): ''' Class to store one contact information. '''
[docs] def __init__(self, contact): ''' :param contact: contact part of the JSON from http://bio.tools. :type contact: DICT ''' self.email = contact['email'] # [STRING] self.name = contact['name'] # [STRING]
# self.role = contact['contactRole'] # self.tel = contact['contactTel'] # self.url = contact['contactURL']
[docs]class Function(object): ''' Correspond to one function of the entry with the corresponding inputs and outputs. '''
[docs] def __init__(self, edams): ''' :param edams: EDAM ontology for operation(s) with uri and term. :type edams: LIST of DICT :class:`tooldog.biotool_model.Function` object is initialized with two empty list of objects: * inputs: list of :class:`tooldog.biotool_model.Input` * outputs: list of :class:`tooldog.biotool_model.Output` ''' self.operations = [] for edam in edams: self.operations.append(Operation(edam)) self.inputs = [] self.outputs = []
[docs] def add_inputs(self, inputs): ''' Add inputs to the :class:`tooldog.biotool_model.Function` object. :param inputs: inputs part of one function from http://bio.tools. :type inputs: LIST of DICT ''' for inp in inputs: # Create Input object and appends to the list self.inputs.append(Input(inp['data'], inp['format']))
[docs] def add_outputs(self, outputs): ''' Add outputs to the :class:`tooldog.biotool_model.Function` object. :param outputs: inputs part of one function from http://bio.tools. :type outputs: LIST of DICT ''' for outp in outputs: # Create Output object and appends to the list self.outputs.append(Output(outp['data'], outp['format']))
[docs]class Data(object): ''' Data described by EDAM ontology. '''
[docs] def __init__(self, data_type, formats, description=None): ''' :param data_type: EDAM ontology for the data type with uri and term. :type data_type: DICT :param formats: EDAM ontology for data formats with uri and term. :type formats: LIST of DICT :param description: description of the data (DEPRECATED) :type description: STRING ''' self.data_type = DataType(data_type) self.formats = [] for frmt in formats: self.formats.append(Format(frmt)) self.description = description
[docs]class Input(Data): ''' Input of a described function. '''
[docs] def __init__(self, data_type, formats, description=None): ''' :param data_type: EDAM ontology for the data type with uri and term. :type data_type: DICT :param formats: EDAM ontology for data formats with uri and term. :type formats: LIST of DICT :param description: description of the data (DEPRECATED) :type description: STRING ''' Data.__init__(self, data_type, formats, description)
[docs]class Output(Data): ''' Output of a described function. '''
[docs] def __init__(self, data_type, formats, description=None): ''' :param data_type: EDAM ontology for the data type with uri and term. :type data_type: DICT :param formats: EDAM ontology for data formats with uri and term. :type formats: LIST of DICT :param description: description of the data (DEPRECATED) :type description: STRING ''' Data.__init__(self, data_type, formats, description)
[docs]class Edam(object): ''' Edam annotation with the uri and its corresponding term. '''
[docs] def __init__(self, edam): ''' :param edam: EDAM ontology with uri and term. :type edam: DICT ''' self.uri = edam['uri'] self.term = edam['term']
[docs] def get_edam_id(self): ''' Get the EDAM id from the uri. :return: EDAM id from the uri. :rtype: STRING ''' return self.uri.split('/')[-1]
[docs]class Operation(Edam): ''' EDAM operation associated to a function. '''
[docs] def __init__(self, edam): ''' :param edam: EDAM ontology with uri and term. :type edam: DICT ''' Edam.__init__(self, edam)
[docs]class DataType(Edam): ''' EDAM data associated to either input or output. '''
[docs] def __init__(self, edam): ''' :param edam: EDAM ontology with uri and term. :type edam: DICT ''' Edam.__init__(self, edam)
[docs]class Format(Edam): ''' EDAM format associated to either input or output. '''
[docs] def __init__(self, edam): ''' :param edam: EDAM ontology with uri and term. :type edam: DICT ''' Edam.__init__(self, edam)
[docs]class Topic(Edam): ''' EDAM topic associated to the entry. '''
[docs] def __init__(self, edam): ''' :param edam: EDAM ontology with uri and term. :type edam: DICT ''' Edam.__init__(self, edam)