Source code for dna.dna_timeseries_unzip

#!/usr/bin/env python3

"""Module containing the DnaTimeseriesUnzip class and the command line interface."""
import re
import zipfile
import shutil
from typing import Optional

from biobb_dna.utils import constants
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools import file_utils as fu
from biobb_common.tools.file_utils import launchlogger


[docs] class DnaTimeseriesUnzip(BiobbObject): """ | biobb_dna DnaTimeseriesUnzip | Tool for extracting dna_timeseries output files. | Unzips a zip file containing dna_timeseries output files and extracts the csv and jpg files. Args: input_zip_file (str): Zip file with dna_timeseries output files. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/dna/timeseries_output.zip>`_. Accepted formats: zip (edam:format_3987). output_path_csv (str): dna_timeseries output csv file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.csv>`_. Accepted formats: csv (edam:format_3752). output_path_jpg (str): dna_timeseries output jpg file contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.jpg>`_. Accepted formats: jpg (edam:format_3579). output_list_path (str) (Optional): Text file with a list of all dna_timeseries output files contained within input_zip_file. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/dna/dna_timeseries_unzip.txt>`_. Accepted formats: txt (edam:format_2330). properties (dic): * **type** (*str*) - (None) Type of analysis, series or histogram. Values: series, hist. * **parameter** (*str*) - (None) Type of parameter. Values: majd, majw, mind, minw, inclin, tip, xdisp, ydisp, shear, stretch, stagger, buckle, propel, opening, rise, roll, twist, shift, slide, tilt, alphaC, alphaW, betaC, betaW, gammaC, gammaW, deltaC, deltaW, epsilC, epsilW, zetaC, zetaW, chiC, chiW, phaseC, phaseW. * **sequence** (*str*) - (None) Nucleic acid sequence used for generating dna_timeseries output file. * **index** (*int*) - (1) Base pair index in the parameter 'sequence', starting from 1. * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. Examples: This is a use example of how to use the building block from Python:: from biobb_dna.dna.dna_timeseries_unzip import dna_timeseries_unzip prop = { 'type': 'hist', 'parameter': 'shift', 'sequence': 'CGCGAATTCGCG', 'index': 5 } dna_timeseries_unzip( input_zip_file='/path/to/dna_timeseries/output.zip', output_path='/path/to/output.csv', output_list_path='/path/to/output.txt' properties=prop) Info: * wrapped_software: * name: In house * license: Apache-2.0 * ontology: * name: EDAM * schema: http://edamontology.org/EDAM.owl """ def __init__(self, input_zip_file, output_path_csv, output_path_jpg, output_list_path=None, properties=None, **kwargs) -> None: properties = properties or {} # Call parent class constructor super().__init__(properties) self.locals_var_dict = locals().copy() # Input/Output files self.io_dict = { 'in': { 'input_zip_file': input_zip_file }, 'out': { 'output_path_csv': output_path_csv, 'output_path_jpg': output_path_jpg, 'output_list_path': output_list_path } } # Properties specific for BB self.type = properties.get('type', None) self.parameter = properties.get('parameter', None) self.sequence = properties.get('sequence', None) self.index = properties.get('index', 1) self.properties = properties # Check the properties self.check_properties(properties) self.check_arguments()
[docs] @launchlogger def launch(self) -> int: """Execute the :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` object.""" # Setup Biobb if self.check_restart(): return 0 self.stage_files() # Check that both properties are set if self.type is None or self.parameter is None or self.sequence is None: fu.log("Properties 'type', 'parameter' and 'sequence' are mandatory to run DnaTimeseriesUnzip. Please set them.", self.out_log, self.global_log) exit(1) # Check that the type is valid if self.type not in ["series", "hist"]: fu.log(f"Type {self.type} not valid. Valid types are: series, hist.", self.out_log, self.global_log) exit(1) # Check that the parameter is valid if self.parameter not in constants.helical_parameters: fu.log(f"Parameter {self.parameter} not valid. Valid parameters are: {constants.helical_parameters}.", self.out_log, self.global_log) exit(1) # Check that the sequence is valid pattern = r'^[ACGT]+$' if not re.match(pattern, self.sequence): fu.log(f"Sequence {self.sequence} not valid. Only 'A', 'C', 'G' or 'T' bases allowed.", self.out_log, self.global_log) exit(1) # Check that the index is valid if self.index < 1 or self.index >= len(self.sequence) - 1: fu.log(f"Index {self.index} not valid. It should be between 0 and {len(self.sequence) - 2}.", self.out_log, self.global_log) exit(1) # Get index sequence base and next base bp = self.sequence[self.index-1] + self.sequence[self.index] # Get the filename filename = f"{self.type}_{self.parameter}_{self.index}_{bp}" csv_file = f"{filename}.csv" jpg_file = f"{filename}.jpg" # Unzip the file with zipfile.ZipFile(self.stage_io_dict["in"]["input_zip_file"], 'r') as zip_ref: # Check if the csv file exists in the zip file if csv_file in zip_ref.namelist(): # Extract the file fu.log(f'{csv_file} exists, copying into {self.stage_io_dict["out"]["output_path_csv"]}.', self.out_log, self.global_log) with zip_ref.open(csv_file) as source, open(self.stage_io_dict["out"]["output_path_csv"], "wb") as target: shutil.copyfileobj(source, target) else: fu.log(f"File {csv_file} not found in the zip file.", self.out_log, self.global_log) exit(1) # Check if the jpg file exists in the zip file if jpg_file in zip_ref.namelist(): # Extract the file fu.log(f'{jpg_file} exists, copying into {self.stage_io_dict["out"]["output_path_jpg"]}.', self.out_log, self.global_log) with zip_ref.open(jpg_file) as source, open(self.stage_io_dict["out"]["output_path_jpg"], "wb") as target: shutil.copyfileobj(source, target) else: fu.log(f"File {jpg_file} not found in the zip file.", self.out_log, self.global_log) exit(1) # Write the list of files if self.stage_io_dict["out"]["output_list_path"]: with open(self.stage_io_dict["out"]["output_list_path"], "w") as f: for name in zip_ref.namelist(): f.write(f"{name}\n") # Run Biobb block # self.run_biobb() # Copy files to host self.copy_to_host() # Remove temporary file(s) self.remove_tmp_files() self.check_arguments(output_files_created=True, raise_exception=False) return self.return_code
[docs] def dna_timeseries_unzip( input_zip_file: str, output_path_csv: str, output_path_jpg: str, output_list_path: Optional[str] = None, properties: Optional[dict] = None, **kwargs) -> int: """Create :class:`DnaTimeseriesUnzip <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip>` class and execute the :meth:`launch() <biobb_dna.dna.dna_timeseries_unzip.DnaTimeseriesUnzip.launch>` method.""" return DnaTimeseriesUnzip(**dict(locals())).launch()
dna_timeseries_unzip.__doc__ = DnaTimeseriesUnzip.__doc__ main = DnaTimeseriesUnzip.get_main(dna_timeseries_unzip, "Tool for extracting dna_timeseries output files.") if __name__ == '__main__': main()