#!/usr/bin/env python3
"""Module containing the CanalUnzip class and the command line interface."""
import re
import zipfile
import shutil
from typing import Optional
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools import file_utils as fu
from biobb_common.tools.file_utils import launchlogger
[docs]
class CanalUnzip(BiobbObject):
"""
| biobb_dna CanalUnzip
| Tool for extracting biobb_canal output files.
| Unzips a Canal output file contained within a zip file.
Args:
input_zip_file (str): Zip file with Canal output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/curvesplus/canal_output.zip>`_. Accepted formats: zip (edam:format_3987).
output_path (str): Canal output file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.ser>`_. Accepted formats: ser (edam:format_2330), his (edam:format_3905), cor (edam:format_3465).
output_list_path (str) (Optional): Text file with a list of all Canal output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/curvesplus/canal_unzip_output.txt>`_. Accepted formats: txt (edam:format_2330).
properties (dic):
* **type** (*str*) - (None) Type of file. Values: series, histo, corr.
* **helpar_name** (*str*) - (None) Helical parameter name, only for 'series' and 'histo' types. Values: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.
* **correlation** (*str*) - (None) Correlation indexes separated by underscore (ie '98_165'), only for 'corr' type.
* **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
* **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
* **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
Examples:
This is a use example of how to use the building block from Python::
from biobb_dna.curvesplus.canal_unzip import canal_unzip
prop = {
'type': 'series',
'helpar_name': 'alphaC'
}
canal_unzip(
input_zip_file='/path/to/canal/output.zip',
output_path='/path/to/output.ser',
output_list_path='/path/to/output.txt'
properties=prop)
Info:
* wrapped_software:
* name: In house
* license: Apache-2.0
* ontology:
* name: EDAM
* schema: http://edamontology.org/EDAM.owl
"""
def __init__(self, input_zip_file,
output_path, output_list_path=None, properties=None, **kwargs) -> None:
properties = properties or {}
# Call parent class constructor
super().__init__(properties)
self.locals_var_dict = locals().copy()
# Input/Output files
self.io_dict = {
'in': {
'input_zip_file': input_zip_file
},
'out': {
'output_path': output_path,
'output_list_path': output_list_path
}
}
# Properties specific for BB
self.type = properties.get('type', None)
self.helpar_name = properties.get('helpar_name', None)
self.correlation = properties.get('correlation', None)
self.properties = properties
# Check the properties
self.check_properties(properties)
self.check_arguments()
[docs]
@launchlogger
def launch(self) -> int:
"""Execute the :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` object."""
# Setup Biobb
if self.check_restart():
return 0
self.stage_files()
# Check that both properties are set
if self.type is None:
fu.log("Property 'type' is mandatory to run CanalUnzip. Please set it.",
self.out_log, self.global_log)
exit(1)
# Check that helpar_name is set if type is series or histo
if self.type in ["series", "histo"] and self.helpar_name is None:
fu.log("Property 'helpar_name' is mandatory to run CanalUnzip with type 'series' or 'histo'. Please set it.",
self.out_log, self.global_log)
exit(1)
# Check that correlation is set if type is corr
if self.type == "corr" and self.correlation is None:
fu.log("Property 'correlation' is mandatory to run CanalUnzip with type 'corr'. Please set it.",
self.out_log, self.global_log)
exit(1)
extensions = {
"series": "ser",
"histo": "his",
"corr": "cor"
}
# Check that the type is valid
if self.type not in extensions:
fu.log(f"Type {self.type} not valid. Valid types are: {', '.join(extensions.keys())}.",
self.out_log, self.global_log)
exit(1)
# generate sufix
sufix = ""
if self.type == "corr":
# Check that the correlation is valid
pattern = r'\d+_\d+'
if not re.match(pattern, self.correlation):
fu.log(f"Correlation {self.correlation} not valid. It should match the pattern <number_number>.",
self.out_log, self.global_log)
exit(1)
sufix = self.correlation
else:
# Check that the helpar_name is valid
if self.helpar_name not in ["alphaC", "alphaW", "ampC", "ampW", "ax-bend", "betaC", "betaW", "buckle",
"chiC", "chiW", "curv", "deltaC", "deltaW", "epsilC", "epsilW", "gammaC",
"gammaW", "h-ris", "h-twi", "inclin", "majd", "majw", "mind", "minw",
"opening", "phaseC", "phaseW", "propel", "reg", "rise", "roll", "shear",
"shift", "slide", "stagger", "stretch", "tbend", "tilt", "tip", "twist",
"xdisp", "ydisp", "zetaC", "zetaW"]:
fu.log(f"Parameter {self.helpar_name} not valid. Valid parameters are: alphaC, alphaW, ampC, ampW, ax-bend, betaC, betaW, buckle, chiC, chiW, curv, deltaC, deltaW, epsilC, epsilW, gammaC, gammaW, h-ris, h-twi, inclin, majd, majw, mind, minw, opening, phaseC, phaseW, propel, reg, rise, roll, shear, shift, slide, stagger, stretch, tbend, tilt, tip, twist, xdisp, ydisp, zetaC, zetaW.",
self.out_log, self.global_log)
exit(1)
sufix = self.helpar_name
# Generate the filename
filename = f"canal_output_{sufix}.{extensions[self.type]}"
# Unzip the file
with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref:
# Check if the file exists in the zip file
if filename in zip_ref.namelist():
# Extract the file
fu.log(f'{filename} exists, copying into {self.stage_io_dict["out"]["output_path"]}.',
self.out_log, self.global_log)
with zip_ref.open(filename) as source, open(self.stage_io_dict["out"]["output_path"], "wb") as target:
shutil.copyfileobj(source, target)
else:
fu.log(f"File {filename} not found in the zip file.", self.out_log, self.global_log)
exit(1)
# Write the list of files
if self.stage_io_dict["out"]["output_list_path"]:
with open(self.stage_io_dict["out"]["output_list_path"], "w") as f:
for name in zip_ref.namelist():
f.write(f"{name}\n")
# Run Biobb block
# self.run_biobb()
# Copy files to host
self.copy_to_host()
# Remove temporary file(s)
self.remove_tmp_files()
self.check_arguments(output_files_created=True, raise_exception=False)
return self.return_code
[docs]
def canal_unzip(
input_zip_file: str,
output_path: str,
output_list_path: Optional[str] = None,
properties: Optional[dict] = None,
**kwargs) -> int:
"""Create :class:`CanalUnzip <biobb_dna.curvesplus.canal_unzip.CanalUnzip>` class and
execute the :meth:`launch() <biobb_dna.curvesplus.canal_unzip.CanalUnzip.launch>` method."""
return CanalUnzip(**dict(locals())).launch()
canal_unzip.__doc__ = CanalUnzip.__doc__
main = CanalUnzip.get_main(canal_unzip, "Tool for extracting biobb_canal output files.")
if __name__ == '__main__':
main()