#!/usr/bin/env python3
"""Module containing the InterHelParCorrelation class and the command line interface."""
from typing import Optional
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from biobb_common.generic.biobb_object import BiobbObject
from biobb_common.tools.file_utils import launchlogger
from biobb_dna.utils.loader import load_data
[docs]
class InterHelParCorrelation(BiobbObject):
"""
| biobb_dna InterHelParCorrelation
| Calculate correlation between helical parameters for a single inter-base pair.
| Calculate correlation between helical parameters for a single inter-base pair.
Args:
input_filename_shift (str): Path to .csv file with data for helical parameter 'shift'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_shift_AA.csv>`_. Accepted formats: csv (edam:format_3752).
input_filename_slide (str): Path to .csv file with data for helical parameter 'slide'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_slide_AA.csv>`_. Accepted formats: csv (edam:format_3752).
input_filename_rise (str): Path to .csv file with data for helical parameter 'rise'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_rise_AA.csv>`_. Accepted formats: csv (edam:format_3752).
input_filename_tilt (str): Path to .csv file with data for helical parameter 'tilt'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_tilt_AA.csv>`_. Accepted formats: csv (edam:format_3752).
input_filename_roll (str): Path to .csv file with data for helical parameter 'roll'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_roll_AA.csv>`_. Accepted formats: csv (edam:format_3752).
input_filename_twist (str): Path to .csv file with data for helical parameter 'twist'. File type: input. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/data/stiffness/series_twist_AA.csv>`_. Accepted formats: csv (edam:format_3752).
output_csv_path (str): Path to directory where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/inter_hpcorr_ref.csv>`_. Accepted formats: csv (edam:format_3752).
output_jpg_path (str): Path to .jpg file where output is saved. File type: output. `Sample file <https://raw.githubusercontent.com/bioexcel/biobb_dna/master/biobb_dna/test/reference/correlation/inter_hpcorr_ref.jpg>`_. Accepted formats: jpg (edam:format_3579).
properties (dict):
* **basepair** (*str*) - (None) Name of basepair analyzed.
* **remove_tmp** (*bool*) - (True) [WF property] Remove temporal files.
* **restart** (*bool*) - (False) [WF property] Do not execute if output files exist.
* **sandbox_path** (*str*) - ("./") [WF property] Parent path to the sandbox directory.
Examples:
This is a use example of how to use the building block from Python::
from biobb_dna.interbp_correlations.interhpcorr import interhpcorr
prop = {
'basepair': 'AA',
}
interhpcorr(
input_filename_shift='path/to/shift.csv',
input_filename_slide='path/to/slide.csv',
input_filename_rise='path/to/rise.csv',
input_filename_tilt='path/to/tilt.csv',
input_filename_roll='path/to/roll.csv',
input_filename_twist='path/to/twist.csv',
output_csv_path='path/to/output/file.csv',
output_jpg_path='path/to/output/file.jpg',
properties=prop)
Info:
* wrapped_software:
* name: In house
* license: Apache-2.0
* ontology:
* name: EDAM
* schema: http://edamontology.org/EDAM.owl
"""
def __init__(
self, input_filename_shift, input_filename_slide,
input_filename_rise, input_filename_tilt,
input_filename_roll, input_filename_twist,
output_csv_path, output_jpg_path,
properties=None, **kwargs) -> None:
properties = properties or {}
# Call parent class constructor
super().__init__(properties)
self.locals_var_dict = locals().copy()
# Input/Output files
self.io_dict = {
'in': {
'input_filename_shift': input_filename_shift,
'input_filename_slide': input_filename_slide,
'input_filename_rise': input_filename_rise,
'input_filename_tilt': input_filename_tilt,
'input_filename_roll': input_filename_roll,
'input_filename_twist': input_filename_twist
},
'out': {
'output_csv_path': output_csv_path,
'output_jpg_path': output_jpg_path
}
}
self.properties = properties
self.basepair = properties.get("basepair", None)
# Check the properties
self.check_properties(properties)
self.check_arguments()
[docs]
@launchlogger
def launch(self) -> int:
"""Execute the :class:`InterHelParCorrelation <interbp_correlations.interhpcorr.InterHelParCorrelation>` object."""
# Setup Biobb
if self.check_restart():
return 0
self.stage_files()
# read input
shift = load_data(self.stage_io_dict["in"]["input_filename_shift"])
slide = load_data(self.stage_io_dict["in"]["input_filename_slide"])
rise = load_data(self.stage_io_dict["in"]["input_filename_rise"])
tilt = load_data(self.stage_io_dict["in"]["input_filename_tilt"])
roll = load_data(self.stage_io_dict["in"]["input_filename_roll"])
twist = load_data(self.stage_io_dict["in"]["input_filename_twist"])
# get basepair
if self.basepair is None:
self.basepair = shift.columns[0]
# make matrix
coordinates = ["shift", "slide", "rise", "tilt", "roll", "twist"]
corr_matrix = pd.DataFrame(
np.eye(6, 6), index=coordinates, columns=coordinates)
# shift
# corr_matrix["shift"]["slide"] = shift.corrwith(slide, method="pearson")
corr_matrix.loc["slide", "shift"] = shift.corrwith(slide, method="pearson").values[0]
# corr_matrix["shift"]["rise"] = shift.corrwith(rise, method="pearson")
corr_matrix.loc["rise", "shift"] = shift.corrwith(rise, method="pearson").values[0]
# corr_matrix["shift"]["tilt"] = shift.corrwith(tilt, method=self.circlineal)
corr_matrix.loc["tilt", "shift"] = shift.corrwith(tilt, method=self.circlineal).values[0] # type: ignore
# corr_matrix["shift"]["roll"] = shift.corrwith(roll, method=self.circlineal)
corr_matrix.loc["roll", "shift"] = shift.corrwith(roll, method=self.circlineal).values[0] # type: ignore
# corr_matrix["shift"]["twist"] = shift.corrwith(twist, method=self.circlineal)
corr_matrix.loc["twist", "shift"] = shift.corrwith(twist, method=self.circlineal).values[0] # type: ignore
# symmetric values
# corr_matrix["slide"]["shift"] = corr_matrix["shift"]["slide"]
corr_matrix.loc["shift", "slide"] = corr_matrix.loc["slide", "shift"]
# corr_matrix["rise"]["shift"] = corr_matrix["shift"]["rise"]
corr_matrix.loc["shift", "rise"] = corr_matrix.loc["rise", "shift"]
# corr_matrix["tilt"]["shift"] = corr_matrix["shift"]["tilt"]
corr_matrix.loc["shift", "tilt"] = corr_matrix.loc["tilt", "shift"]
# corr_matrix["roll"]["shift"] = corr_matrix["shift"]["roll"]
corr_matrix.loc["shift", "roll"] = corr_matrix.loc["roll", "shift"]
# corr_matrix["twist"]["shift"] = corr_matrix["shift"]["twist"]
corr_matrix.loc["shift", "twist"] = corr_matrix.loc["twist", "shift"]
# slide
# corr_matrix["slide"]["rise"] = slide.corrwith(rise, method="pearson")
corr_matrix.loc["rise", "slide"] = slide.corrwith(rise, method="pearson").values[0]
# corr_matrix["slide"]["tilt"] = slide.corrwith(tilt, method=self.circlineal)
corr_matrix.loc["tilt", "slide"] = slide.corrwith(tilt, method=self.circlineal).values[0] # type: ignore
# corr_matrix["slide"]["roll"] = slide.corrwith(roll, method=self.circlineal)
corr_matrix.loc["roll", "slide"] = slide.corrwith(roll, method=self.circlineal).values[0] # type: ignore
# corr_matrix["slide"]["twist"] = slide.corrwith(twist, method=self.circlineal)
corr_matrix.loc["twist", "slide"] = slide.corrwith(twist, method=self.circlineal).values[0] # type: ignore
# symmetric values
# corr_matrix["rise"]["slide"] = corr_matrix["slide"]["rise"]
corr_matrix.loc["slide", "rise"] = corr_matrix.loc["rise", "slide"]
# corr_matrix["tilt"]["slide"] = corr_matrix["slide"]["tilt"]
corr_matrix.loc["slide", "tilt"] = corr_matrix.loc["tilt", "slide"]
# corr_matrix["roll"]["slide"] = corr_matrix["slide"]["roll"]
corr_matrix.loc["slide", "roll"] = corr_matrix.loc["roll", "slide"]
# corr_matrix["twist"]["slide"] = corr_matrix["slide"]["twist"]
corr_matrix.loc["slide", "twist"] = corr_matrix.loc["twist", "slide"]
# rise
# corr_matrix["rise"]["tilt"] = rise.corrwith(tilt, method=self.circlineal)
corr_matrix.loc["tilt", "rise"] = rise.corrwith(tilt, method=self.circlineal).values[0] # type: ignore
# corr_matrix["rise"]["roll"] = rise.corrwith(roll, method=self.circlineal)
corr_matrix.loc["roll", "rise"] = rise.corrwith(roll, method=self.circlineal).values[0] # type: ignore
# corr_matrix["rise"]["twist"] = rise.corrwith(twist, method=self.circlineal)
corr_matrix.loc["twist", "rise"] = rise.corrwith(twist, method=self.circlineal).values[0] # type: ignore
# symmetric values
# corr_matrix["tilt"]["rise"] = corr_matrix["rise"]["tilt"]
corr_matrix.loc["rise", "tilt"] = corr_matrix.loc["tilt", "rise"]
# corr_matrix["roll"]["rise"] = corr_matrix["rise"]["roll"]
corr_matrix.loc["rise", "roll"] = corr_matrix.loc["roll", "rise"]
# corr_matrix["twist"]["rise"] = corr_matrix["rise"]["twist"]
corr_matrix.loc["rise", "twist"] = corr_matrix.loc["twist", "rise"]
# tilt
# corr_matrix["tilt"]["roll"] = tilt.corrwith(roll, method=self.circular)
corr_matrix.loc["roll", "tilt"] = tilt.corrwith(roll, method=self.circular).values[0] # type: ignore
# corr_matrix["tilt"]["twist"] = tilt.corrwith(twist, method=self.circular)
corr_matrix.loc["twist", "tilt"] = tilt.corrwith(twist, method=self.circular).values[0] # type: ignore
# symmetric values
# corr_matrix["roll"]["tilt"] = corr_matrix["tilt"]["roll"]
corr_matrix.loc["tilt", "roll"] = corr_matrix.loc["roll", "tilt"]
# corr_matrix["twist"]["tilt"] = corr_matrix["tilt"]["twist"]
corr_matrix.loc["tilt", "twist"] = corr_matrix.loc["twist", "tilt"]
# roll
# corr_matrix["roll"]["twist"] = roll.corrwith(twist, method=self.circular)
corr_matrix.loc["twist", "roll"] = roll.corrwith(twist, method=self.circular).values[0] # type: ignore
# symmetric values
# corr_matrix["twist"]["roll"] = corr_matrix["roll"]["twist"]
corr_matrix.loc["roll", "twist"] = corr_matrix.loc["twist", "roll"]
# save csv data
corr_matrix.to_csv(self.stage_io_dict["out"]["output_csv_path"])
# create heatmap
fig, axs = plt.subplots(1, 1, dpi=300, tight_layout=True)
axs.pcolor(corr_matrix)
# Loop over data dimensions and create text annotations.
for i in range(len(corr_matrix)):
for j in range(len(corr_matrix)):
axs.text(
j+.5,
i+.5,
f"{corr_matrix[coordinates[j]].loc[coordinates[i]]:.2f}",
ha="center",
va="center",
color="w")
axs.set_xticks([i + 0.5 for i in range(len(corr_matrix))])
axs.set_xticklabels(corr_matrix.columns, rotation=90)
axs.set_yticks([i+0.5 for i in range(len(corr_matrix))])
axs.set_yticklabels(corr_matrix.index)
axs.set_title(
"Helical Parameter Correlation "
f"for Base Pair Step \'{self.basepair}\'")
fig.tight_layout()
fig.savefig(
self.stage_io_dict['out']['output_jpg_path'],
format="jpg")
plt.close()
# Copy files to host
self.copy_to_host()
# Remove temporary file(s)
self.remove_tmp_files()
self.check_arguments(output_files_created=True, raise_exception=False)
return 0
[docs]
def get_corr_method(self, corrtype1, corrtype2):
if corrtype1 == "circular" and corrtype2 == "linear":
method = self.circlineal
if corrtype1 == "linear" and corrtype2 == "circular":
method = self.circlineal
elif corrtype1 == "circular" and corrtype2 == "circular":
method = self.circular
else:
method = "pearson"
return method
[docs]
@staticmethod
def circular(x1, x2):
x1 = x1 * np.pi / 180
x2 = x2 * np.pi / 180
diff_1 = np.sin(x1 - x1.mean())
diff_2 = np.sin(x2 - x2.mean())
num = (diff_1 * diff_2).sum()
den = np.sqrt((diff_1 ** 2).sum() * (diff_2 ** 2).sum())
return num / den
[docs]
@staticmethod
def circlineal(x1, x2):
x2 = x2 * np.pi / 180
rc = np.corrcoef(x1, np.cos(x2))[1, 0]
rs = np.corrcoef(x1, np.sin(x2))[1, 0]
rcs = np.corrcoef(np.sin(x2), np.cos(x2))[1, 0]
num = (rc ** 2) + (rs ** 2) - 2 * rc * rs * rcs
den = 1 - (rcs ** 2)
correlation = np.sqrt(num / den)
if np.corrcoef(x1, x2)[1, 0] < 0:
correlation *= -1
return correlation
[docs]
def interhpcorr(
input_filename_shift: str, input_filename_slide: str,
input_filename_rise: str, input_filename_tilt: str,
input_filename_roll: str, input_filename_twist: str,
output_csv_path: str, output_jpg_path: str,
properties: Optional[dict] = None, **kwargs) -> int:
"""Create :class:`InterHelParCorrelation <interbp_correlations.interhpcorr.InterHelParCorrelation>` class and
execute the :meth:`launch() <interbp_correlations.interhpcorr.InterHelParCorrelation.launch>` method."""
return InterHelParCorrelation(**dict(locals())).launch()
interhpcorr.__doc__ = InterHelParCorrelation.__doc__
main = InterHelParCorrelation.get_main(interhpcorr, "Load helical parameter file and save base data individually.")
if __name__ == '__main__':
main()