Skip to content

Commit

Permalink
Merge branch 'development'
Browse files Browse the repository at this point in the history
  • Loading branch information
wishabc committed Nov 11, 2019
2 parents 84f2f0b + 97178bd commit 3ff07f8
Show file tree
Hide file tree
Showing 9 changed files with 74 additions and 50 deletions.
17 changes: 10 additions & 7 deletions scripts/ASBcalling/Aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def get_another_agr(path, what_for):
tables = cell_lines_dict[key_name]
if what_for == "TF":
tables = tf_dict[key_name]
print('Reading datasets for {} '.format(what_for) + key_name)
print('Reading datasets for {} {}'.format(what_for, key_name))
common_snps = dict()
for table in tables:
if os.path.isfile(table):
Expand All @@ -89,15 +89,15 @@ def get_another_agr(path, what_for):
dip_qual, lq, rq, seg_c, p_ref, p_alt,
table_name, another_agr)]

print('Writing ', key_name)
print('Writing {}'.format(key_name))

with open(results_path + what_for + "_P-values/" + key_name + '_common_table.tsv', 'w') as out:
out.write(pack(['#chr', 'pos', 'ID', 'ref', 'alt', 'repeat_type', 'total_callers', 'unique_callers', 'm_ploidy',
'm_q', 'm_dipq', 'm_segc', 'm_datasets', 'maxdepth_ref/alt', 'maxdepth_ploidy', 'maxdepth_m1',
'maxdepth_m2', 'mostsig_ref/alt', 'mostsig_ploidy', 'mostsig_m1', 'mostsig_m2',
'min_cover', 'max_cover', 'med_cover', 'mean_cover', 'total_cover', 'm1_ref', 'm1_alt',
'm2_ref', 'm2_alt',
'm_hpref', 'm_hpalt', 'm_fpref', 'm_fpalt', 'm_stpref', 'm_stpalt']))
'm_hpref', 'm_hpalt', 'm_fpref', 'm_fpalt', 'm_logpref', 'm_logpalt', 'm_stpref', 'm_stpalt']))

filtered_snps = dict()
for key in common_snps:
Expand All @@ -107,7 +107,7 @@ def get_another_agr(path, what_for):
break

counter = 0
print(len(filtered_snps), 'snps')
print('{} snps'.format(len(filtered_snps)))

if len(filtered_snps) == 0:
sys.exit(0)
Expand All @@ -120,7 +120,7 @@ def get_another_agr(path, what_for):
value = filtered_snps[key]
counter += 1
if counter % 10000 == 0:
print(counter, 'done')
print('done {}'.format(counter))
c_uniq_callers = dict(zip(callers_names, [False]*len(callers_names)))
m_total_callers = 0
c_ploidy = []
Expand Down Expand Up @@ -184,6 +184,8 @@ def get_another_agr(path, what_for):
m_hpalt = stats.hmean(c_palt)
m_fpref = stats.combine_pvalues(c_pref, method='fisher')[1]
m_fpalt = stats.combine_pvalues(c_palt, method='fisher')[1]
m_logpref = stats.combine_pvalues(c_pref, method='mudholkar_george')[1]
m_logpalt = stats.combine_pvalues(c_palt, method='mudholkar_george')[1]
m_stpref = stats.combine_pvalues(c_pref, method='stouffer')[1]
m_stpalt = stats.combine_pvalues(c_palt, method='stouffer')[1]

Expand Down Expand Up @@ -248,6 +250,7 @@ def get_another_agr(path, what_for):
m1_ref, m1_alt, m2_ref, m2_alt,
m_hpref, m_hpalt,
m_fpref, m_fpalt,
m_logpref, m_logpalt,
m_stpref, m_stpalt]))
origin_of_snp_dict["\t".join(map(str, key))] = {'aligns': c_table_names,
expected_args[what_for]: c_another_agr,
Expand All @@ -257,9 +260,9 @@ def get_another_agr(path, what_for):
print("Counting FDR")
with open(results_path + what_for + "_P-values/" + key_name + '_common_table.tsv', 'r') as f:
table = pd.read_table(f)
bool_ar_ref, p_val_ref, _, _ = statsmodels.stats.multitest.multipletests(table["m_fpref"],
bool_ar_ref, p_val_ref, _, _ = statsmodels.stats.multitest.multipletests(table["m_logpref"],
alpha=0.05, method='fdr_bh')
bool_ar_alt, p_val_alt, _, _ = statsmodels.stats.multitest.multipletests(table["m_fpalt"],
bool_ar_alt, p_val_alt, _, _ = statsmodels.stats.multitest.multipletests(table["m_logpalt"],
alpha=0.05, method='fdr_bh')
table["m_fdr_ref"] = pd.Series(p_val_ref)
table["m_fdr_alt"] = pd.Series(p_val_alt)
Expand Down
13 changes: 8 additions & 5 deletions scripts/CORRELATIONanalysis/Annotate_SNPs_with_ploidy.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@


def unpack_ploidy_segments(line):
if line[0] == '#':
return [''] * 6
line = line.strip().split('\t')

return [line[0], int(line[1]), int(line[2]), float(line[3]), int(line[4]), int(line[7])]


Expand All @@ -33,14 +36,14 @@ def unpack_snps(line):
lab = file_name.split('!')[1][:-4]

try:
aligns = list(set(aligns_by_cell_type[file_name[:-4]])) # .tsv
datasetsn = len(aligns)
al_list = [align[29:-7] for align in aligns]
aligns = aligns_by_cell_type[file_name[:-4]] # .tsv
al_list = [align[29:-7] for align in aligns if os.path.isfile(align)]
datasetsn = len(al_list)
except KeyError:
datasetsn = 'nan'
al_list = []
print(file_name)

names, _ = read_synonims()
if name in names:
table_path = ploidy_path + file_name
Expand All @@ -50,7 +53,7 @@ def unpack_snps(line):
ploidy_file_path = ploidy_path + mode + '/' + name + '!' + lab + '_ploidy.tsv'
out_path = correlation_path + mode + '_tables/' + name + '_' + lab.replace('_', '-') + '.tsv'
print(out_path)

with open(table_path, 'r') as table, open(ploidy_file_path, 'r') as ploidy, open(out_path, 'w') as out:
out.write('#' + str(datasetsn) + '!' + lab + '!' + '>'.join(al_list) + '\n')
for chr, pos, ref, alt, in_intersection, segment_ploidy, qual, segn \
Expand Down
80 changes: 49 additions & 31 deletions scripts/CORRELATIONanalysis/CorStats.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from scipy.stats import kendalltau

sys.path.insert(1, '/home/abramov/ASB-Project')
from scripts.HELPERS.helpers import CorrelationReader, Intersection, pack, read_synonims
from scripts.HELPERS.helpers import CorrelationReader, Intersection, pack, read_synonims, ChromPos
from scripts.HELPERS.paths import parameters_path, correlation_path, heatmap_data_path

CGH_path = parameters_path + 'CHIP_hg38.sorted.bed'
Expand All @@ -23,7 +23,7 @@ def count_cosmic_segments():

def unpack_cosmic_segments(line, mode='normal'):
if line[0] == '#':
return []
return [''] * len(line.strip().split('\t'))
line = line.strip().split('\t')

if line[0] != cosmic_names[cell_line_name]:
Expand All @@ -41,32 +41,51 @@ def unpack_cosmic_segments(line, mode='normal'):
return [line[1], int(line[2]), int(line[3]), value]


def correlation_with_cosmic(SNPs_iterator, mode, heatmap_file_path=None):
heat_map = None
if heatmap_file_path is not None:
heat_map = open(heatmap_file_path, 'w')
def correlation_with_cosmic(SNP_objects, mode, heatmap_data_file=None):
if heatmap_data_file is not None:
heatmap = open(heatmap_data_file, 'w')
cosmic_segments = []
with open(cosmic_path, 'r') as cosmic_file:
snp_bad_list = []
cosmic_bad_list = []
for chr, pos, ploidy, qual, segn, in_intersect, cosmic_bad \
in Intersection(SNPs_iterator, cosmic_file, write_intersect=True,
unpack_segments_function=lambda x: unpack_cosmic_segments(x, mode=mode),
write_segment_args=True):
if not in_intersect:
continue
snp_bad_list.append(ploidy)
cosmic_bad_list.append(cosmic_bad)

if heat_map is not None:
heat_map.write(pack([chr, pos, ploidy, cosmic_bad]))
if heat_map is not None:
heat_map.close()

if len(snp_bad_list) != 0:
return kendalltau(snp_bad_list, cosmic_bad_list)[0]
for line in cosmic_file:
# TODO: change v name
v = unpack_cosmic_segments(line, mode=mode)
if v:
cosmic_segments.append(v)
snp_ploidy = []
cosm_ploidy = []
for chr, pos, ploidy, qual, segn, in_intersect, cosmic_ploidy \
in Intersection(SNP_objects, cosmic_segments, write_intersect=True,
write_segment_args=True):
if not in_intersect:
continue
snp_ploidy.append(ploidy)
cosm_ploidy.append(cosmic_ploidy)

if heatmap_data_file is not None:
heatmap.write(pack([chr, pos, ploidy, cosmic_ploidy]))
if heatmap_data_file is not None:
heatmap.close()

if len(snp_ploidy) != 0:
return kendalltau(snp_ploidy, cosm_ploidy)[0]
return 'NaN'


def find_nearest_probe_to_SNP(SNP_objects, CGH_objects):
nearest_probes = []
if not CGH_objects:
return []
i = 0
for SNP in SNP_objects:
SNP = [ChromPos(SNP[0], SNP[1])] + SNP[2:]
current_distance = SNP[0].distance(ChromPos(CGH_objects[i][0], CGH_objects[i][1]))
while SNP[0].distance(ChromPos(CGH_objects[i + 1][0], CGH_objects[i + 1][1])) <= current_distance:
current_distance = SNP[0].distance(ChromPos(CGH_objects[i + 1][0], CGH_objects[i + 1][1]))
i += 1
nearest_probes.append(CGH_objects[i])
return nearest_probes


if __name__ == '__main__':
file_name = sys.argv[1]
print(file_name)
Expand Down Expand Up @@ -112,22 +131,19 @@ def correlation_with_cosmic(SNPs_iterator, mode, heatmap_file_path=None):
segment_numbers[model] = segments_number
corr_to_objects[model] = correlation_with_cosmic(SNP_objects,
mode='normal',
heatmap_file_path=heatmap_data_file)
heatmap_data_file=heatmap_data_file)

for naive_mode in naive_modes:
number_of_datasets, lab, SNP_objects, aligns, segments_number = reader.read_SNPs(method=naive_mode)

corr_to_objects[naive_mode] = correlation_with_cosmic(SNP_objects, mode='normal')

# TODO: add 3-5 neighbours naive

# TODO: add closest chip COR

# print('reading CGH')
CGH_objects = reader.read_CGH(cgh_names[cell_line_name])
nearest_cgh_objects = find_nearest_probe_to_SNP(SNP_objects, CGH_objects)

corr_to_objects_chip = correlation_with_cosmic(CGH_objects, mode='total')

corr_to_objects_chip_nearest = correlation_with_cosmic(nearest_cgh_objects, mode='total')
out_line = '\t'.join(map(lambda x: '\t'.join(map(str, x)),

[[cell_line_name, lab, aligns, len(SNP_objects), number_of_datasets,
Expand All @@ -139,7 +155,9 @@ def correlation_with_cosmic(SNPs_iterator, mode, heatmap_file_path=None):
[[corr_to_objects[naive_mode]]
for naive_mode in naive_modes] +

[[corr_to_objects_chip]]
[[corr_to_objects_chip]] +

[[corr_to_objects_chip_nearest]]

)) + '\n'
out.write(out_line)
2 changes: 1 addition & 1 deletion scripts/CORRELATIONanalysis/JoinThreads.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def get_name_by_dir(dir_name):
'cor_by_snp' + get_name_by_dir(snp_dir)]
for snp_dir in snp_dirs] +
[['cor_by_snp_naive',
'cor_by_probe_CGH']]
'cor_by_probe_CGH', 'cor_by_snp_probe_CGH']]
)) + '\n')

for file_name in os.listdir(Correlation_path):
Expand Down
3 changes: 2 additions & 1 deletion scripts/HELPERS/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,6 @@ def get_next_snp(self):
def get_next_segment(self):
try:
seg_chr, start_pos, end_pos, *self.seg_args = self.unpack_segments_function(next(self.segments))

self.segment_start = ChromPos(seg_chr, start_pos)
self.segment_end = ChromPos(seg_chr, end_pos)
except StopIteration:
Expand Down Expand Up @@ -312,6 +311,8 @@ def read_CGH(self, cgh_name):
'LC:NCI-H460', 'LC:NCI-H522', 'OV:IGROV1', 'OV:OVCAR-3', 'OV:OVCAR-4', 'OV:OVCAR-5', 'OV:OVCAR-8',
'OV:SK-OV-3', 'OV:NCI/ADR-RES', 'PR:PC-3', 'PR:DU-145', 'RE:786-0', 'RE:A498', 'RE:ACHN',
'RE:CAKI-1', 'RE:RXF 393', 'RE:SN12C', 'RE:TK-10', 'RE:UO-31']
if cgh_name not in cgnames:
return []
idx = cgnames.index(cgh_name) + 3
with open(self.CGH_path, 'r') as file:
result = []
Expand Down
2 changes: 1 addition & 1 deletion scripts/PARAMETERS/cor.sh
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
#!/bin/bash

ParametersListsFolder="/home/abramov/ParallelParameters/stats/"
ParametersListsFolder="/home/abramov/ParallelParameters/"
ScriptsFolder="/home/abramov/ASB-Project/scripts/"

njobs=$1
Expand Down
1 change: 0 additions & 1 deletion scripts/PLOIDYcalling/PloidyEstimation.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ def __init__(self):
self.border_numbers = None
self.positions = []

# TODO: make abstract
self.LINES = None
self.last_snp_number = None
self.candidate_numbers = None
Expand Down
4 changes: 2 additions & 2 deletions scripts/SNPcalling/ProcessLine.sh
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,8 @@ if [ "$TF" != "None" ]; then
OutPath=${AlignmentsPath}"EXP/$TF/$ExpName/"
AlignmentFullPath=${AlignmentsPath}"EXP/$TF/$ExpName/$AlignName.bam"
else
if ! [ -d ${AlignmentsPath}"CTRL/$EXP" ]; then
if ! mkdir ${AlignmentsPath}"CTRL/$EXP"
if ! [ -d ${AlignmentsPath}"CTRL/$ExpName" ]; then
if ! mkdir ${AlignmentsPath}"CTRL/$ExpName"
then
echo "Failed to make dir $ExpName"
exit 1
Expand Down
2 changes: 1 addition & 1 deletion scripts/SNPcalling/SNPcalling.sh
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ if ! $Java $JavaParameters -jar "$PICARD" \
REMOVE_DUPLICATES=true \
M="$OutPath/${BamName}_metrics.txt"
then
echo "Failed to picard.MarkDplicates"
echo "Failed to picard.MarkDuplicates"
exit 1
fi

Expand Down

0 comments on commit 3ff07f8

Please sign in to comment.