Source code for tooldog.analyse.code_collector

#!/usr/bin/env python3

import logging
import os
import urllib.parse
import urllib.request
import tarfile
from tooldog import TMP_DIR

from .utils import *

LOGGER = logging.getLogger(__name__)


[docs]class CodeCollector(object): """ Class to download source code from a https://bio.tools entry """ ZIP_NAME = "tool.zip" TAR_NAME = "tool.tar" TMP_NAME = "tmp"
[docs] def __init__(self, biotool): """ :param biotool: Biotool object :type biotool: :class:`tooldog.biotool_model.Biotool` """ self.biotool = biotool
def _make_tar(self, file_path, tarname): with tarfile.open(tarname, mode='w') as archive: archive.add(file_path, arcname=self.ZIP_NAME) def _get_from_repository(self, url): """ Get source code from a repository link :param url: url of the repository :type url: STRING """ # Here we deal with repository, have to use regex to test the url and # use appropriate strategy to get the code depending the type of repository if "github.com" in url: return self._get_from_github(url) else: LOGGER.warn("The url ({}) is not a Github url".format(url)) LOGGER.warn("ToolDog only deals with Github repository for the moment...") def _get_from_github(self, url): try: zip_url = os.path.join(url, "archive/master.zip") response = urllib.request.urlopen(zip_url) data = response.read() LOGGER.info('Writing data to zip file...') zip_path = os.path.join(TMP_DIR, self.ZIP_NAME) tar_path = os.path.join(TMP_DIR, self.TAR_NAME) write_to_file(zip_path, data, 'wb') LOGGER.info('Making tar...') self._make_tar(zip_path, tar_path) return tar_path except: LOGGER.warn('Something went wrong with the following Github repository: {}'.format(zip_url)) def _get_from_source_code(self, url): """ Get source code from a source code link :param url: url of the source code :type url: STRING """ return None
[docs] def get_source(self): """ Retrieve source code of the tool using links provided in https://bio.tools """ source_code = None links = self.biotool.informations.links for link in links: link_type = link.type.lower().translate(str.maketrans(' ', '_')) try: source_code = getattr(self, '_get_from_{}'.format(link_type))(link.url) except AttributeError: LOGGER.warn(link_type + ' link type is not processed yet by ToolDog.') if source_code is not None: # For the moment, consider that if a source code has been found, # we just leave the loop. break return source_code