Skip to content

Commit

Permalink
#46: passes sample
Browse files Browse the repository at this point in the history
  • Loading branch information
weka511 committed Nov 25, 2020
1 parent cfdc1d4 commit f65a6c3
Showing 1 changed file with 78 additions and 15 deletions.
93 changes: 78 additions & 15 deletions qrtd.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,31 +18,94 @@
import argparse
import os
import time
from helpers import read_strings
from helpers import read_strings
from phylogeny import create_adj, parse

def create_edges(tree):
edges = []
def dfs(tree):
id = tree['id']
name = tree['name']
children = tree['children']
parentid = tree['parentid']

for child in children:
edges.append((id,child['id'] if len(child['name'])==0 else child['name']))
dfs(child)
dfs(tree)
return edges

def extract_quartets(species,edges,adj):
def get_leaves(x):
def dfs(u):
if type(u)==int:
for v in adj[u]:
dfs(v)
else:
leaves.append(u)

leaves = []
dfs(x)
return leaves

def split(a,b):
s_b = get_leaves(b)
s_a = [s for s in species if s not in s_b]
return [(s_a[i],s_a[j],s_b[k],s_b[l]) for i in range(len(s_a))
for j in range(i)
for k in range(len(s_b))
for l in range(k)]

def order(a,b,c,d):
if b<a:
b,a = a,b
if d<c:
c,d = d,c
if c<a:
a,c = c,a
b,d = d,b
return (a,b,c,d)

quartets= [q for a,b in edges if type(b)==int for q in split(a,b)]
return [order(a,b,c,d) for a,b,c,d in quartets]


def qrtd(species,newick1,newick2):

def q(n):
return (n*(n-1)*(n-2)*(n-3))/(4*3*2*1)
#def q(n):
#return (n*(n-1)*(n-2)*(n-3))/(4*3*2*1)

def replace_leaves(adj):
return {parent:sorted([seiceps[child] if child in seiceps else child for child in children]) for parent,children in adj.items() }
#def replace_leaves(adj):
#return {parent:sorted([seiceps[child] if child in seiceps else child for child in children]) for parent,children in adj.items() }

def dq(T1,T2):
return q(count_leaves(T1)) + q(count_leaves(T2)) -2 * shared_quartets(T1,T2)
#def dq(T1,T2):
#return q(count_leaves(T1)) + q(count_leaves(T2)) -2 * shared_quartets(T1,T2)

def count_leaves(adj):
return sum([1 for children in adj.values() for child in children if child<n])
#def count_leaves(adj):
#return sum([1 for children in adj.values() for child in children if child<n])

def shared_quartets(T1,T2):
return 0
#def shared_quartets(T1,T2):
#return 0



n = len(species)
seiceps = {species[i]:i for i in range(n)}
tree1 = replace_leaves(create_adj(parse(newick1,start=n)))
tree2 = replace_leaves(create_adj(parse(newick2,start=n)))
return dq(tree1,tree2)
#seiceps = {species[i]:i for i in range(n)}
#tree1 = replace_leaves(create_adj(parse(newick1,start=n)))
#tree2 = replace_leaves(create_adj(parse(newick2,start=n)))
#return dq(tree1,tree2)
tree1 = parse(newick1,start=n)
edges1 = create_edges(tree1)
adj1 = create_adj(tree1)
q1 = set(extract_quartets(species,edges1,adj1))
tree2 = parse(newick2,start=n)
edges2 = create_edges(tree2)
adj2 = create_adj(tree2)
q2 = list(set(extract_quartets(species,edges2,adj2)))
count_shared = 0
for q in q2:
if q in(q1): count_shared+=1
return 2*(n - count_shared)

if __name__=='__main__':
start = time.time()
Expand Down

0 comments on commit f65a6c3

Please sign in to comment.