Source code for intrabp_correlations.intrahpcorr

#!/usr/bin/env python3

"""Module containing the IntraHelParCorrelation class and the command line interface."""
import argparse
from typing import Optional

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.configuration import settings
from biobb_common.tools.file_utils import launchlogger
from biobb_dna.utils.loader import load_data


[docs] class IntraHelParCorrelation(BiobbObject): """ | biobb_dna IntraHelParCorrelation | Calculate correlation between helical parameters for a single intra-base pair. | Calculate correlation between helical parameters for a single intra-base pair. Args: input_filename_shear (str): Path to .csv file with data for helical parameter 'shear'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_shear_A.csv>`_. Accepted formats: csv (edam:format_3752). input_filename_stretch (str): Path to .csv file with data for helical parameter 'stretch'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stretch_A.csv>`_. Accepted formats: csv (edam:format_3752). input_filename_stagger (str): Path to .csv file with data for helical parameter 'stagger'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_stagger_A.csv>`_. Accepted formats: csv (edam:format_3752). input_filename_buckle (str): Path to .csv file with data for helical parameter 'buckle'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_buckle_A.csv>`_. Accepted formats: csv (edam:format_3752). input_filename_propel (str): Path to .csv file with data for helical parameter 'propeller'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_propel_A.csv>`_. Accepted formats: csv (edam:format_3752). input_filename_opening (str): Path to .csv file with data for helical parameter 'opening'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/correlation/series_opening_A.csv>`_. Accepted formats: csv (edam:format_3752). output_csv_path (str): Path to directory where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.csv>`_. Accepted formats: csv (edam:format_3752). output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/intra_hpcorr_ref.jpg>`_. Accepted formats: jpg (edam:format_3579). properties (dict): * **base** (*str*) - (None) Name of base analyzed. * **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files. * **restart** (*bool*) - (False) [WF property] Do not execute if output files exist. * **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory. Examples: This is a use example of how to use the building block from Python:: from biobb_dna.intrabp_correlations.intrahpcorr import intrahpcorr prop = { 'base': 'A', } intrahpcorr( input_filename_shear='path/to/shear.csv', input_filename_stretch='path/to/stretch.csv', input_filename_stagger='path/to/stagger.csv', input_filename_buckle='path/to/buckle.csv', input_filename_propel='path/to/propel.csv', input_filename_opening='path/to/opening.csv', output_csv_path='path/to/output/file.csv', output_jpg_path='path/to/output/file.jpg', properties=prop) Info: * wrapped_software: * name: In house * license: Apache-2.0 * ontology: * name: EDAM * schema: http://edamontology.org/EDAM.owl """ def __init__( self, input_filename_shear, input_filename_stretch, input_filename_stagger, input_filename_buckle, input_filename_propel, input_filename_opening, output_csv_path, output_jpg_path, properties=None, **kwargs) -> None: properties = properties or {} # Call parent class constructor super().__init__(properties) self.locals_var_dict = locals().copy() # Input/Output files self.io_dict = { 'in': { 'input_filename_shear': input_filename_shear, 'input_filename_stretch': input_filename_stretch, 'input_filename_stagger': input_filename_stagger, 'input_filename_buckle': input_filename_buckle, 'input_filename_propel': input_filename_propel, 'input_filename_opening': input_filename_opening }, 'out': { 'output_csv_path': output_csv_path, 'output_jpg_path': output_jpg_path } } self.properties = properties self.base = properties.get("base", None) # Check the properties self.check_properties(properties) self.check_arguments()
[docs] @launchlogger def launch(self) -> int: """Execute the :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` object.""" # Setup Biobb if self.check_restart(): return 0 self.stage_files() # read input shear = load_data(self.stage_io_dict["in"]["input_filename_shear"]) stretch = load_data(self.stage_io_dict["in"]["input_filename_stretch"]) stagger = load_data(self.stage_io_dict["in"]["input_filename_stagger"]) buckle = load_data(self.stage_io_dict["in"]["input_filename_buckle"]) propel = load_data(self.stage_io_dict["in"]["input_filename_propel"]) opening = load_data(self.stage_io_dict["in"]["input_filename_opening"]) # get base if self.base is None: self.base = shear.columns[0] # make matrix # coordinates = ["shear", "stretch", "stagger", "buckle", "propel", "opening"] coordinates = [ "shear", "stretch", "stagger", "buckle", "propel", "opening"] corr_matrix = pd.DataFrame( np.eye(6, 6), index=coordinates, columns=coordinates) # shear # corr_matrix["shear"]["stretch"] = shear.corrwith(stretch, method="pearson") corr_matrix.loc["stretch", "shear"] = shear.corrwith(stretch, method="pearson").values[0] # corr_matrix["shear"]["stagger"] = shear.corrwith(stagger, method="pearson") corr_matrix.loc["stagger", "shear"] = shear.corrwith(stagger, method="pearson").values[0] # corr_matrix["shear"]["buckle"] = shear.corrwith(buckle, method=self.circlineal) corr_matrix.loc["buckle", "shear"] = shear.corrwith(buckle, method=self.circlineal).values[0] # type: ignore # corr_matrix["shear"]["propel"] = shear.corrwith(propel, method=self.circlineal) corr_matrix.loc["propel", "shear"] = shear.corrwith(propel, method=self.circlineal).values[0] # type: ignore # corr_matrix["shear"]["opening"] = shear.corrwith(opening, method=self.circlineal) corr_matrix.loc["opening", "shear"] = shear.corrwith(opening, method=self.circlineal).values[0] # type: ignore # symmetric values # corr_matrix["stretch"]["shear"] = corr_matrix["shear"]["stretch"] corr_matrix.loc["shear", "stretch"] = corr_matrix.loc["stretch", "shear"] # corr_matrix["stagger"]["shear"] = corr_matrix["shear"]["stagger"] corr_matrix.loc["shear", "stagger"] = corr_matrix.loc["stagger", "shear"] # corr_matrix["buckle"]["shear"] = corr_matrix["shear"]["buckle"] corr_matrix.loc["shear", "buckle"] = corr_matrix.loc["buckle", "shear"] # corr_matrix["propel"]["shear"] = corr_matrix["shear"]["propel"] corr_matrix.loc["shear", "propel"] = corr_matrix.loc["propel", "shear"] # corr_matrix["opening"]["shear"] = corr_matrix["shear"]["opening"] corr_matrix.loc["shear", "opening"] = corr_matrix.loc["opening", "shear"] # stretch # corr_matrix["stretch"]["stagger"] = stretch.corrwith(stagger, method="pearson") corr_matrix.loc["stagger", "stretch"] = stretch.corrwith(stagger, method="pearson").values[0] # corr_matrix["stretch"]["buckle"] = stretch.corrwith(buckle, method=self.circlineal) corr_matrix.loc["buckle", "stretch"] = stretch.corrwith(buckle, method=self.circlineal).values[0] # type: ignore # corr_matrix["stretch"]["propel"] = stretch.corrwith(propel, method=self.circlineal) corr_matrix.loc["propel", "stretch"] = stretch.corrwith(propel, method=self.circlineal).values[0] # type: ignore # corr_matrix["stretch"]["opening"] = stretch.corrwith(opening, method=self.circlineal) corr_matrix.loc["opening", "stretch"] = stretch.corrwith(opening, method=self.circlineal).values[0] # type: ignore # symmetric values # corr_matrix["stagger"]["stretch"] = corr_matrix["stretch"]["stagger"] corr_matrix.loc["stretch", "stagger"] = corr_matrix.loc["stagger", "stretch"] # corr_matrix["buckle"]["stretch"] = corr_matrix["stretch"]["buckle"] corr_matrix.loc["stretch", "buckle"] = corr_matrix.loc["buckle", "stretch"] # corr_matrix["propel"]["stretch"] = corr_matrix["stretch"]["propel"] corr_matrix.loc["stretch", "propel"] = corr_matrix.loc["propel", "stretch"] # corr_matrix["opening"]["stretch"] = corr_matrix["stretch"]["opening"] corr_matrix.loc["stretch", "opening"] = corr_matrix.loc["opening", "stretch"] # stagger # corr_matrix["stagger"]["buckle"] = stagger.corrwith(buckle, method=self.circlineal) corr_matrix.loc["buckle", "stagger"] = stagger.corrwith(buckle, method=self.circlineal).values[0] # type: ignore # corr_matrix["stagger"]["propel"] = stagger.corrwith(propel, method=self.circlineal) corr_matrix.loc["propel", "stagger"] = stagger.corrwith(propel, method=self.circlineal).values[0] # type: ignore # corr_matrix["stagger"]["opening"] = stagger.corrwith(opening, method=self.circlineal) corr_matrix.loc["opening", "stagger"] = stagger.corrwith(opening, method=self.circlineal).values[0] # type: ignore # symmetric values # corr_matrix["buckle"]["stagger"] = corr_matrix["stagger"]["buckle"] corr_matrix.loc["stagger", "buckle"] = corr_matrix.loc["buckle", "stagger"] # corr_matrix["propel"]["stagger"] = corr_matrix["stagger"]["propel"] corr_matrix.loc["stagger", "propel"] = corr_matrix.loc["propel", "stagger"] # corr_matrix["opening"]["stagger"] = corr_matrix["stagger"]["opening"] corr_matrix.loc["stagger", "opening"] = corr_matrix.loc["opening", "stagger"] # buckle # corr_matrix["buckle"]["propel"] = buckle.corrwith(propel, method=self.circular) corr_matrix.loc["propel", "buckle"] = buckle.corrwith(propel, method=self.circular).values[0] # type: ignore # corr_matrix["buckle"]["opening"] = buckle.corrwith(opening, method=self.circular) corr_matrix.loc["opening", "buckle"] = buckle.corrwith(opening, method=self.circular).values[0] # type: ignore # symmetric values # corr_matrix["propel"]["buckle"] = corr_matrix["buckle"]["propel"] corr_matrix.loc["buckle", "propel"] = corr_matrix.loc["propel", "buckle"] # corr_matrix["opening"]["buckle"] = corr_matrix["buckle"]["opening"] corr_matrix.loc["buckle", "opening"] = corr_matrix.loc["opening", "buckle"] # propel # corr_matrix["propel"]["opening"] = propel.corrwith(opening, method=self.circular) corr_matrix.loc["opening", "propel"] = propel.corrwith(opening, method=self.circular).values[0] # type: ignore # symmetric values # corr_matrix["opening"]["propel"] = corr_matrix["propel"]["opening"] corr_matrix.loc["propel", "opening"] = corr_matrix.loc["opening", "propel"] # save csv data corr_matrix.to_csv(self.stage_io_dict["out"]["output_csv_path"]) # create heatmap fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True) axs.pcolor(corr_matrix) # Loop over data dimensions and create text annotations. for i in range(len(corr_matrix)): for j in range(len(corr_matrix)): axs.text( j+.5, i+.5, f"{corr_matrix[coordinates[j]].loc[coordinates[i]]:.2f}", ha="center", va="center", color="w") axs.set_xticks([i + 0.5 for i in range(len(corr_matrix))]) axs.set_xticklabels(corr_matrix.columns, rotation=90) axs.set_yticks([i+0.5 for i in range(len(corr_matrix))]) axs.set_yticklabels(corr_matrix.index) axs.set_title( "Helical Parameter Correlation " f"for Base Pair Step \'{self.base}\'") fig.tight_layout() fig.savefig( self.stage_io_dict['out']['output_jpg_path'], format="jpg") plt.close() # Copy files to host self.copy_to_host() # Remove temporary file(s) # self.tmp_files.extend([ # self.stage_io_dict.get("unique_dir", "") # ]) self.remove_tmp_files() self.check_arguments(output_files_created=True, raise_exception=False) return 0
[docs] def get_corr_method(self, corrtype1, corrtype2): if corrtype1 == "circular" and corrtype2 == "linear": method = self.circlineal if corrtype1 == "linear" and corrtype2 == "circular": method = self.circlineal elif corrtype1 == "circular" and corrtype2 == "circular": method = self.circular else: method = "pearson" return method
[docs] @staticmethod def circular(x1, x2): x1 = x1 * np.pi / 180 x2 = x2 * np.pi / 180 diff_1 = np.sin(x1 - x1.mean()) diff_2 = np.sin(x2 - x2.mean()) num = (diff_1 * diff_2).sum() den = np.sqrt((diff_1 ** 2).sum() * (diff_2 ** 2).sum()) return num / den
[docs] @staticmethod def circlineal(x1, x2): x2 = x2 * np.pi / 180 rc = np.corrcoef(x1, np.cos(x2))[1, 0] rs = np.corrcoef(x1, np.sin(x2))[1, 0] rcs = np.corrcoef(np.sin(x2), np.cos(x2))[1, 0] num = (rc ** 2) + (rs ** 2) - 2 * rc * rs * rcs den = 1 - (rcs ** 2) correlation = np.sqrt(num / den) if np.corrcoef(x1, x2)[1, 0] < 0: correlation *= -1 return correlation
[docs] def intrahpcorr( input_filename_shear: str, input_filename_stretch: str, input_filename_stagger: str, input_filename_buckle: str, input_filename_propel: str, input_filename_opening: str, output_csv_path: str, output_jpg_path: str, properties: Optional[dict] = None, **kwargs) -> int: """Create :class:`IntraHelParCorrelation <intrabp_correlations.intrahpcorr.IntraHelParCorrelation>` class and execute the :meth:`launch() <intrabp_correlations.intrahpcorr.IntraHelParCorrelation.launch>` method.""" return IntraHelParCorrelation( input_filename_shear=input_filename_shear, input_filename_stretch=input_filename_stretch, input_filename_stagger=input_filename_stagger, input_filename_buckle=input_filename_buckle, input_filename_propel=input_filename_propel, input_filename_opening=input_filename_opening, output_csv_path=output_csv_path, output_jpg_path=output_jpg_path, properties=properties, **kwargs).launch() intrahpcorr.__doc__ = IntraHelParCorrelation.__doc__
[docs] def main(): """Command line execution of this building block. Please check the command line documentation.""" parser = argparse.ArgumentParser(description='Load helical parameter file and save base data individually.', formatter_class=lambda prog: argparse.RawTextHelpFormatter(prog, width=99999)) parser.add_argument('--config', required=False, help='Configuration file') required_args = parser.add_argument_group('required arguments') required_args.add_argument('--input_filename_shear', required=True, help='Path to csv file with inputs. Accepted formats: csv.') required_args.add_argument('--input_filename_stretch', required=True, help='Path to csv file with inputs. Accepted formats: csv.') required_args.add_argument('--input_filename_stagger', required=True, help='Path to csv file with inputs. Accepted formats: csv.') required_args.add_argument('--input_filename_buckle', required=True, help='Path to csv file with inputs. Accepted formats: csv.') required_args.add_argument('--input_filename_propel', required=True, help='Path to csv file with inputs. Accepted formats: csv.') required_args.add_argument('--input_filename_opening', required=True, help='Path to csv file with inputs. Accepted formats: csv.') required_args.add_argument('--output_csv_path', required=True, help='Path to output file. Accepted formats: csv.') required_args.add_argument('--output_jpg_path', required=True, help='Path to output file. Accepted formats: csv.') args = parser.parse_args() args.config = args.config or "{}" properties = settings.ConfReader(config=args.config).get_prop_dic() intrahpcorr( input_filename_shear=args.input_filename_shear, input_filename_stretch=args.input_filename_stretch, input_filename_stagger=args.input_filename_stagger, input_filename_buckle=args.input_filename_buckle, input_filename_propel=args.input_filename_propel, input_filename_opening=args.input_filename_opening, output_csv_path=args.output_csv_path, output_jpg_path=args.output_jpg_path, properties=properties)
if __name__ == '__main__': main()