# 786
# Aldy source: common.py
# This file is subject to the terms and conditions defined in
# file 'LICENSE', which is part of this source code package.
from typing import Iterable, Any, List
import importlib_resources
import re
import time
import pprint
import logbook
import textwrap
import collections
PROTEINS = {
"TTT": "F",
"CTT": "L",
"ATT": "I",
"GTT": "V",
"TTC": "F",
"CTC": "L",
"ATC": "I",
"GTC": "V",
"TTA": "L",
"CTA": "L",
"ATA": "I",
"GTA": "V",
"TTG": "L",
"CTG": "L",
"ATG": "M",
"GTG": "V",
"TCT": "S",
"CCT": "P",
"ACT": "T",
"GCT": "A",
"TCC": "S",
"CCC": "P",
"ACC": "T",
"GCC": "A",
"TCA": "S",
"CCA": "P",
"ACA": "T",
"GCA": "A",
"TCG": "S",
"CCG": "P",
"ACG": "T",
"GCG": "A",
"TAT": "Y",
"CAT": "H",
"AAT": "N",
"GAT": "D",
"TAC": "Y",
"CAC": "H",
"AAC": "N",
"GAC": "D",
"TAA": "X",
"CAA": "Q",
"AAA": "K",
"GAA": "E",
"TAG": "X",
"CAG": "Q",
"AAG": "K",
"GAG": "E",
"TGT": "C",
"CGT": "R",
"AGT": "S",
"GGT": "G",
"TGC": "C",
"CGC": "R",
"AGC": "S",
"GGC": "G",
"TGA": "X",
"CGA": "R",
"AGA": "R",
"GGA": "G",
"TGG": "W",
"CGG": "R",
"AGG": "R",
"GGG": "G",
}
"""Codon table (stop codon is X)."""
REV_COMPLEMENT = {"A": "T", "T": "A", "C": "G", "G": "C"}
"""Reverse-complement DNA table."""
log = logbook.Logger("Aldy")
"""Default console logger."""
SOLUTION_PRECISION = 1e-2
"""
Solution precision (all values whose absolute difference falls below the specified
precision are considered equal).
"""
[docs]
class AldyException(Exception):
"""Aldy exception class."""
pass
[docs]
class GRange(collections.namedtuple("GRange", ["chr", "start", "end"])):
"""Reference genome range (e.g. `chr22:10-20`). Immutable."""
def __str__(self):
return self.samtools(0, 0, "")
[docs]
def allele_name(x: str) -> str:
""":returns: Major allele number of the star-allele name (e.g. `'12A'` -> `12`)."""
if "*" in x:
x = x.split("*", maxsplit=1)[1]
return x.replace("/", "_")
[docs]
def rev_comp(seq: str) -> str:
""":returns: Reverse-complemented DNA sequence."""
return "".join([REV_COMPLEMENT.get(x, x) for x in seq[::-1]])
[docs]
def seq_to_amino(seq: str) -> str:
""":returns: Protein sequence formed from the provided DNA sequence."""
return "".join(
PROTEINS[seq[i : i + 3]] for i in range(0, len(seq) - len(seq) % 3, 3)
)
[docs]
def freezekey(x):
"""Hashing support for dictionaries."""
a = tuple(i[1] for i in sorted(x[0].items()))
if len(x) > 1:
a += tuple(i[1] for i in sorted(x[1].items()))
return a
[docs]
def sorted_tuple(x: Iterable) -> tuple:
""":returns: Sorted tuple."""
return tuple(sorted(x))
[docs]
def td(s: str) -> str:
"""
Abbreviation for textwrap.dedent. Used for stripping indentation in multi-line
docstrings.
"""
return textwrap.dedent(s)
[docs]
class Timing:
"""
Context manager for timing code blocks. Prints the time spent in the function after
it is completed.
"""
def __init__(self, name="Block", fn=None):
self.name = name
self.fn = fn if fn else log.debug
def __enter__(self):
self.start = time.time()
return self
def __exit__(self, *_):
self.end = time.time()
self.fn(f"{self.name} took {self.end - self.start:.2f}s")
[docs]
def pp(x) -> str:
""":returns: Pretty-printed variable string."""
return pprint.pformat(x)
[docs]
def script_path(key: str) -> str:
"""
Obtain the full path of a resource.
:param key: resource to be extracted.
:param key: resource to be extracted in `path/file` format
(e.g., `aldy.resources/test.txt`).
:returns: Full path of the resource.
:raises: :py:class:`aldy.common.AldyException` if the resource does not exist.
"""
components = key.split("/")
if len(components) < 2:
raise AldyException(f'"{key}"" is not valid resource name')
return str(importlib_resources.files(components[0]) / "/".join(components[1:]))
[docs]
def colorize(text: str, color: str = "green") -> str:
""":returns: xterm-compatible colorized string with a given color."""
import logbook._termcolors
return logbook._termcolors.colorize(color, text)
[docs]
def parse_cn_region(cn_region):
"""
:returns: :py:class:`GRange` object that represents the user-provided CN region in
Samtools format (i.e., `chr1:100-200`).
:raises: :py:class:`aldy.common.AldyException` if the region is invalid.
"""
if cn_region is not None:
r = re.match(r"^(.+?):(\d+)-(\d+)$", cn_region)
if not r:
raise AldyException(
f"Parameter --cn-neutral={cn_region} cannot be parsed. "
+ "Must be chr:start-end (where start and end are numbers)"
)
ch = r.group(1)
if ch.startswith("chr"):
ch = ch[3:]
return GRange(ch, int(r.group(2)), int(r.group(3)))
return None
[docs]
def chr_prefix(ch: str, chrs: List[str]) -> str:
"""
Check if a chromosome needs "chr" prefix given the available chromosomes.
:returns: Chromosome prefix if the chromosome does not have it.
"""
if ch not in chrs and "chr" + ch in chrs:
return "chr"
return ""
[docs]
class JsonDict(dict):
"""
Dictionary that adds a dictionary for each missing key. Used to ease handling and
populating JSON objects.
"""
def __getitem__(self, key):
if key not in self:
self[key] = JsonDict()
return self.get(key)
json: Any = JsonDict()