Skip to content

Commit

Permalink
adding local neighborhood
Browse files Browse the repository at this point in the history
  • Loading branch information
sumedhars committed Jun 14, 2024
1 parent 5616f76 commit a45e982
Show file tree
Hide file tree
Showing 15 changed files with 208 additions and 10 deletions.
10 changes: 10 additions & 0 deletions .github/workflows/test-spras.yml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,7 @@ jobs:
docker pull reedcompbio/mincostflow:latest
docker pull reedcompbio/allpairs:latest
docker pull reedcompbio/domino:latest
docker pull sumedhars/local-neighborhood:latest
- name: Build Omics Integrator 1 Docker image
uses: docker/build-push-action@v1
with:
Expand Down Expand Up @@ -155,6 +156,15 @@ jobs:
tags: v2
cache_froms: reedcompbio/py4cytoscape:v2
push: false
- name: Build Local Neighborhood Docker image
uses: docker/build-push-action@v1
with:
path: docker-wrappers/LocalNeighborhood/.
dockerfile: docker-wrappers/LocalNeighborhood/Dockerfile
repository: sumedhars/local-neighborhood
tags: v2
cache_froms: sumedhars/local-neighborhood:latest
push: false

# Run pre-commit checks on source files
pre-commit:
Expand Down
20 changes: 12 additions & 8 deletions config/config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,15 @@ container_registry:
base_url: docker.io
# The owner or project of the registry
# For example, "reedcompbio" if the image is available as docker.io/reedcompbio/allpairs
owner: reedcompbio
owner: sumedhars

# This list of algorithms should be generated by a script which checks the filesystem for installs.
# It shouldn't be changed by mere mortals. (alternatively, we could add a path to executable for each algorithm
# in the list to reduce the number of assumptions of the program at the cost of making the config a little more involved)
# Each algorithm has an 'include' parameter. By toggling 'include' to true/false the user can change
# which algorithms are run in a given experiment.
#
# algorithm-specific parameters are embedded in lists so that users can specify multiple. If multiple
# algorithm-specific parsameters are embedded in lists so that users can specify multiple. If multiple
# parameters are specified then the algorithm will be run as many times as needed to cover all parameter
# combinations. For instance if we have the following:
# - name: "myAlg"
Expand All @@ -43,15 +43,15 @@ algorithms:

- name: "omicsintegrator1"
params:
include: true
include: false
run1:
b: [5, 6]
w: np.linspace(0,5,2)
d: [10]

- name: "omicsintegrator2"
params:
include: true
include: false
run1:
b: [4]
g: [0]
Expand All @@ -61,29 +61,33 @@ algorithms:

- name: "meo"
params:
include: true
include: false
run1:
max_path_length: [3]
local_search: ["Yes"]
rand_restarts: [10]

- name: "mincostflow"
params:
include: true
include: false
run1:
flow: [1] # The flow must be an int
capacity: [1]

- name: "allpairs"
params:
include: true
include: false

- name: "domino"
params:
include: true
include: false
run1:
slice_threshold: [0.3]
module_threshold: [0.05]

- name: "localneighborhood"
params:
include: true


# Here we specify which pathways to run and other file location information.
Expand Down
13 changes: 13 additions & 0 deletions docker-wrappers/LocalNeighborhood/Dockerfile
Original file line number Diff line number Diff line change
@@ -1 +1,14 @@
# Create a Docker image for the Local Neighborhood algorithm here

# Local Neighborhood wrapper

FROM python:3.12.3-alpine3.20

WORKDIR /LocalNeighborhood

# Copy the py file to the working directory
COPY local_neighborhood.py .

# Copy the data subfolder to the working directory
COPY ln-network.txt .
COPY ln-nodes.txt .
5 changes: 5 additions & 0 deletions docker-wrappers/LocalNeighborhood/ln-bad-network.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
A|B|E
C|B
C|D
D|E
A|E
5 changes: 5 additions & 0 deletions docker-wrappers/LocalNeighborhood/ln-network.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
A|B
C|B
C|D
D|E
A|E
2 changes: 2 additions & 0 deletions docker-wrappers/LocalNeighborhood/ln-nodes.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
A
B
3 changes: 3 additions & 0 deletions docker-wrappers/LocalNeighborhood/ln-output.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
A|B
C|B
A|E
3 changes: 3 additions & 0 deletions docker-wrappers/LocalNeighborhood/output1.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
A|B
C|B
A|E
136 changes: 136 additions & 0 deletions spras/local_neighborhood.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
from spras.prm import PRM
from pathlib import Path
from spras.containers import prepare_volume, run_container
from spras.util import add_rank_column
import pandas as pd
from spras.interactome import reinsert_direction_col_undirected

__all__ = ['LocalNeighborhood']


class LocalNeighborhood(PRM):
required_inputs = ['network', 'nodetypes']

@staticmethod
def generate_inputs(data, filename_map):
# both edge list and prizes
"""
Access fields from the dataset and write the required input files
@param data: dataset
@param filename_map: a dict mapping file types in the required_inputs to the filename for that type
@return:
"""
# print(filename_map)
# print(data)

for input_type in LocalNeighborhood.required_inputs:
if input_type not in filename_map:
raise ValueError(f"{input_type} filename is missing")

node_df = None

if data.contains_node_columns('prize'):
node_df = data.request_node_columns(['prize'])
elif data.contains_node_columns(['active', 'sources', 'targets']):
node_df = data.request_node_columns(['active', 'sources', 'targets'])
node_df['prize'] = 0.0 # Initialize 'prize' column
node_df.loc[node_df['active'] == True, 'prize'] = 1.0
node_df.loc[node_df['sources'] == True, 'prize'] = 1.0
node_df.loc[node_df['targets'] == True, 'prize'] = 1.0
else:
raise ValueError("Local Neighborhood requires node prizes or sources and targets")

print(node_df)

node_df.to_csv(filename_map['nodetypes'],sep='\t',index=False,columns=['NODEID'],header=False)

edges_df = data.get_interactome()

print(edges_df)

edges_df.to_csv(filename_map['network'],sep='|',index=False,
columns=['Interactor1','Interactor2'],
header=False)


#TODO: ?????
@staticmethod
def run(nodetypes=None, network=None, output_file=None, container_framework="docker"):
"""
Run PathLinker with Docker
@param nodetypes: input node types with sources and targets (required)
@param network: input network file (required)
@param output_file: path to the output pathway file (required)
@param k: path length (optional)
@param container_framework: choose the container runtime framework, currently supports "docker" or "singularity" (optional)
"""
# Add additional parameter validation
# Do not require k
# Use the PathLinker default
# Could consider setting the default here instead
if not nodetypes or not network or not output_file:
raise ValueError('Required Local Neighborhood arguments are missing')

work_dir = '/spras'

# Each volume is a tuple (src, dest)
volumes = list()

bind_path, node_file = prepare_volume(nodetypes, work_dir)
volumes.append(bind_path)

bind_path, network_file = prepare_volume(network, work_dir)
volumes.append(bind_path)

# PathLinker does not provide an argument to set the output directory
# Use its --output argument to set the output file prefix to specify an absolute path and prefix
# out_dir = Path(output_file).parent
# PathLinker requires that the output directory exist
# out_dir.mkdir(parents=True, exist_ok=True)
bind_path, mapped_out_file = prepare_volume(output_file, work_dir)
volumes.append(bind_path)
# mapped_out_prefix = mapped_out_dir + '/out' # Use posix path inside the container

# print(mapped_out_prefix)
#TODO: change for local neighborhood
command = ['python',
'/LocalNeighborhood/local_neighborhood.py',
'--network', network_file,
'--nodes', node_file,
'--output', mapped_out_file]

print('Running Local Neighborhood with arguments: {}'.format(' '.join(command)), flush=True)

container_suffix = "local-neighborhood" #TODO change
out = run_container(container_framework,
container_suffix,
command,
volumes,
work_dir)
print(out)

# Rename the primary output file to match the desired output filename
# Currently PathLinker only writes one output file so we do not need to delete others
# We may not know the value of k that was used
# output_edges = Path(next(out_dir.glob('out*-ranked-edges.txt')))
# output_edges.rename(output_file)


@staticmethod
def parse_output(raw_pathway_file, standardized_pathway_file):
"""
Convert a predicted pathway into the universal format
@param raw_pathway_file: pathway file produced by an algorithm's run function
@param standardized_pathway_file: the same pathway written in the universal format
"""
try:
df = pd.read_csv(raw_pathway_file, sep='|', header=None)
except pd.errors.EmptyDataError:
with open(standardized_pathway_file, 'w'):
pass
return
# df.columns = ["vertex1", "vertex2", "1"]
df = add_rank_column(df)
df = reinsert_direction_col_undirected(df)
df.to_csv(standardized_pathway_file, index=False,header=False, sep='\t')

1 change: 1 addition & 0 deletions spras/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from spras.omicsintegrator1 import OmicsIntegrator1 as omicsintegrator1
from spras.omicsintegrator2 import OmicsIntegrator2 as omicsintegrator2
from spras.pathlinker import PathLinker as pathlinker
from spras.local_neighborhood import LocalNeighborhood as localneighborhood


def run(algorithm, params):
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
A|B
C|B
A|E
3 changes: 2 additions & 1 deletion test/generate-inputs/test_generate_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@
'omicsintegrator2': 'edges',
'domino': 'network',
'pathlinker': 'network',
'allpairs': 'network'
'allpairs': 'network',
'local_neighborhood':'network'
}


Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
A B 1 U
B C 1 U
A D 1 U
C D 1 U
C E 1 U
C F 1 U
6 changes: 6 additions & 0 deletions test/parse-outputs/input/local_neighborhood-raw-pathway.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
A|B
B|C
A|D
C|D
C|E
C|F
2 changes: 1 addition & 1 deletion test/parse-outputs/test_parse_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
# the DOMINO output of the network dip.sif and the nodes tnfa_active_genes_file.txt
# from https://github.com/Shamir-Lab/DOMINO/tree/master/examples

algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino']
algorithms = ['mincostflow', 'meo', 'omicsintegrator1', 'omicsintegrator2', 'pathlinker', 'allpairs', 'domino','local_neighborhood']


class TestParseOutputs:
Expand Down

0 comments on commit a45e982

Please sign in to comment.