From 23b41cd19520f5a61161a2169dc070546e8d2796 Mon Sep 17 00:00:00 2001 From: Mayank Murali Date: Tue, 28 Feb 2023 15:56:11 -0500 Subject: [PATCH] Removed sqtl files --- biosurfer/analysis/sqtl.py | 69 ------------------------------ biosurfer/analysis/sqtl_old.py | 78 ---------------------------------- 2 files changed, 147 deletions(-) delete mode 100644 biosurfer/analysis/sqtl.py delete mode 100644 biosurfer/analysis/sqtl_old.py diff --git a/biosurfer/analysis/sqtl.py b/biosurfer/analysis/sqtl.py deleted file mode 100644 index a468086..0000000 --- a/biosurfer/analysis/sqtl.py +++ /dev/null @@ -1,69 +0,0 @@ -from itertools import product -from more_itertools import only, partition -from statistics import median -from typing import TYPE_CHECKING, Iterable, List -from biosurfer.core.alignments import TranscriptAlignment, CodonAlignment, ProteinAlignment - -from biosurfer.core.constants import (AnnotationFlag, - SequenceAlignmentCategory, - ProteinRegion, Strand) -from biosurfer.core.models.nonpersistent import GenomeRange, Junction -from biosurfer.core.splice_events import ExonBypassEvent - -if TYPE_CHECKING: - from biosurfer.core.splice_events import BasicTranscriptEvent - from biosurfer.core.alignments import CodonAlignmentBlock, ProteinAlignmentBlock - from biosurfer.core.models.nonpersistent import Junction, Protein, Transcript - - -def split_transcripts_on_junction_usage(junction: 'Junction', transcripts: Iterable['Transcript']): - return partition(lambda transcript: junction in transcript.junctions, transcripts) - -def pairwise_align_protein_isoforms(protein_group_a: Iterable['Protein'], protein_group_b: Iterable['Protein']): - return [ - ( - TranscriptAlignment.from_transcripts(anchor.transcript, other.transcript), - CodonAlignment.from_proteins(anchor, other), - ProteinAlignment.from_proteins(anchor, other) - ) for anchor, other in product(protein_group_a, protein_group_b) - ] - -def get_transcript_events_associated_with_junction(junction: 'Junction', tx_aln: 'TranscriptAlignment'): - return [ - event for event in tx_aln.basic_events - if ( - junction in getattr(event, 'anchor_junctions', ()) + getattr(event, 'other_junctions', ()) - or isinstance(event, ExonBypassEvent) and (event.exon & GenomeRange(junction.donor - 1, junction.acceptor + 1)) - ) - ] - -def get_cblocks_attributed_to_transcript_event(tx_event: 'BasicTranscriptEvent', cd_aln: 'CodonAlignment'): - tx_aln = TranscriptAlignment.from_transcripts(cd_aln.anchor.transcript, cd_aln.other.transcript) - tblock = tx_aln.event_to_block.get(tx_event) - return cd_aln.tblock_to_cblocks.get(tblock, ()) - -# def junction_has_drastic_effect_in_pair( -# junction: 'Junction' = None, -# anchor: 'Protein' = None, -# other: 'Protein' = None, -# pblocks: Iterable['ProteinAlignmentBlock'] = None, -# threshold_delta_length: int = None) -> bool: - -# if pblocks is None: -# pblocks = get_pblocks_attributed_to_junction(junction, [Alignment(anchor, other)]) -# else: -# anchor = pblocks[0].anchor -# other = pblocks[0].other - -# if len(pblocks) == 0: -# return False -# if threshold_delta_length is None: -# threshold_delta_length = max(anchor.length, other.length) * 2 // 5 -# # FIXME: this will return a false positive if junction-related pblock is deletion, but another pblock is a similar-length insertion -# return (anchor.orf.nmd ^ other.orf.nmd or -# abs(median(pblock.delta_length for pblock in pblocks)) >= abs(threshold_delta_length)) - -# def get_event_counts(pblocks: Iterable['ProteinAlignmentBlock']): -# events = [event for event in AnnotationFlag.__members__.values() if event is not AnnotationFlag.NONE] -# counts = {event: sum(event & pblock.flags == event for pblock in pblocks) for event in events} -# return counts diff --git a/biosurfer/analysis/sqtl_old.py b/biosurfer/analysis/sqtl_old.py deleted file mode 100644 index 23d089b..0000000 --- a/biosurfer/analysis/sqtl_old.py +++ /dev/null @@ -1,78 +0,0 @@ -from more_itertools import partition -from statistics import median -from typing import TYPE_CHECKING, Iterable, List - -from biosurfer.core.alignments import (FeatureAlignment, - pairwise_align_protein_sets) -from biosurfer.core.constants import (AnnotationFlag, - SequenceAlignmentCategory, - ProteinRegion, Strand) - -if TYPE_CHECKING: - from biosurfer.core.alignments import (ProteinAlignmentBlock, - Alignment) - from biosurfer.core.models.nonpersistent import Junction, Protein, Transcript - - -def split_transcripts_on_junction_usage(junction: 'Junction', transcripts: Iterable['Transcript']): - # def contains_both_splice_sites(transcript): - # return (transcript.start <= junction.donor <= transcript.stop and - # transcript.start <= junction.acceptor <= transcript.stop) - def uses_junction(transcript): - return junction in transcript.junctions - return partition(uses_junction, transcripts) - -def pairwise_align_on_junction_usage(junction: 'Junction', transcripts: Iterable['Transcript']): - using, not_using = split_transcripts_on_junction_usage(junction, transcripts) - alns = pairwise_align_protein_sets((tx.protein for tx in not_using), (tx.protein for tx in using)) - return alns, using, not_using - -def get_pblocks_related_to_junction(junction: 'Junction', alns: Iterable['Alignment']): - pblocks: List['ProteinAlignmentBlock'] = [] - for aln in alns: - up_exon, down_exon = aln.other.transcript.get_exons_from_junction(junction) - def is_related_to_junc(pblock): - result = False - if pblock.category is SequenceAlignmentCategory.DELETION: - start = pblock.anchor_residues[0].codon[1].coordinate - stop = pblock.anchor_residues[-1].codon[1].coordinate - else: - start = pblock.other_residues[0].codon[1].coordinate - stop = pblock.other_residues[-1].codon[1].coordinate - if junction.strand is Strand.PLUS: - result = junction.donor.coordinate <= stop + 1 and start - 1 <= junction.acceptor.coordinate - elif junction.strand is Strand.MINUS: - result = junction.donor.coordinate >= stop - 1 and start + 1 >= junction.acceptor.coordinate - if pblock.region is ProteinRegion.NTERMINUS: - result |= down_exon in pblock.other_exons - return result - junc_related_pblocks = [pblock for pblock in aln.protein_blocks - if pblock.category is not SequenceAlignmentCategory.MATCH and is_related_to_junc(pblock)] - pblocks.extend(junc_related_pblocks) - return pblocks - -def junction_has_drastic_effect_in_pair( - junction: 'Junction' = None, - anchor: 'Protein' = None, - other: 'Protein' = None, - pblocks: Iterable['ProteinAlignmentBlock'] = None, - threshold_delta_length: int = None) -> bool: - - if pblocks is None: - pblocks = get_pblocks_related_to_junction(junction, [Alignment(anchor, other)]) - else: - anchor = pblocks[0].anchor - other = pblocks[0].other - - if len(pblocks) == 0: - return False - if threshold_delta_length is None: - threshold_delta_length = max(anchor.length, other.length) * 2 // 5 - # FIXME: this will return a false positive if junction-related pblock is deletion, but another pblock is a similar-length insertion - return (anchor.orf.nmd ^ other.orf.nmd or - abs(median(pblock.delta_length for pblock in pblocks)) >= abs(threshold_delta_length)) - -def get_event_counts(pblocks: Iterable['ProteinAlignmentBlock']): - events = [event for event in AnnotationFlag.__members__.values() if event is not AnnotationFlag.NONE] - counts = {event: sum(event & pblock.flags == event for pblock in pblocks) for event in events} - return counts