Skip to content

Commit

Permalink
#1072 adding some feedback for generate_protein_network file and add …
Browse files Browse the repository at this point in the history
…truncate function for loader.py file
  • Loading branch information
ntran18 committed Nov 28, 2023
1 parent 8b1797f commit 63005b7
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 11 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
# Get Network Data from Yeastmine

def getPhysicalInteractions(gene):
print("Query data from Yeastmine to get Physical Interactions")
query = service.new_query("Gene")
query.add_constraint("interactions.participant2", "Gene")
query.add_view(
Expand All @@ -53,6 +54,7 @@ def getPhysicalInteractions(gene):
return query

def getProteinFromGene(gene):
print("Query data from Yeastmine to get Protein information")
query = service.new_query("Gene")
query.add_view(
"primaryIdentifier", "proteins.symbol", "sgdAlias", "proteins.length",
Expand All @@ -64,6 +66,7 @@ def getProteinFromGene(gene):
return query

def getAllProteins():
print("Query data from Yeastmine to get all proteins")

query = service.new_query("Protein")

Expand Down Expand Up @@ -93,8 +96,7 @@ def getAllProteins():
# proteins : {protein standard name : {protein info}}
# }
}
print("COLLECTING PROTEINS\n")
count = 0
print("COLLECTING PROTEINS FROM QUERY RESULTS\n")

for row in query.rows():
gene_systematic_name = row["genes.secondaryIdentifier"]
Expand All @@ -111,12 +113,16 @@ def getAllProteins():
"PI": PI
}
}


print("COLLECTING/WRITING INTERACTIONS\n")
file = open(PHYSICAL_INTERACTION_FILE,"w")
print(f"Open file {PHYSICAL_INTERACTION_FILE} and write data into that file")
file.write(f"Protein1\tProtein2\tInteraction Detection Methods Identifier\tExperiment Name\tTime_Stamp\tSource\n")


exceptions = []
print("Processing Physical Interactions")
for gene in genes:
query = getPhysicalInteractions(gene)
first_row = True
Expand All @@ -138,6 +144,7 @@ def getAllProteins():
else:
exceptions.append(gene2)

print("Handling Exceptions")
failed_genes = []
while exceptions != None:
acceptable_genes = []
Expand All @@ -160,7 +167,7 @@ def getAllProteins():
}
}
if len(rows) == 0:
failed_genes.append(gene)
failed_genes.append(gene)

more_exceptions = []
for gene in acceptable_genes:
Expand Down Expand Up @@ -190,7 +197,6 @@ def getAllProteins():

file.close()


# Source Table

print(f"Completed {PHYSICAL_INTERACTION_FILE} Starting{SOURCE_DESTINATION}")
Expand All @@ -204,21 +210,18 @@ def getAllProteins():
species = "Saccharomyces cerevisiae"
taxon_id = "559292"

# create gene csv
print(f"Completed {SOURCE_DESTINATION} Starting{GENE_FILE}")
file = open(GENE_FILE,"w")
file.write(f"Gene ID\tDisplay Gene ID\tSpecies\tTaxon ID\n")
for gene in genes:
file.write(f"{gene}\t{genes[gene]['standard_name']}\t{species}\t{taxon_id}\n")
file.close()

# create protein csv
print(f"Completed {GENE_FILE} Starting{PROTEIN_FILE}")
file = open(PROTEIN_FILE, "w")
file.write(f"Standard Name\tGene Systematic Name\tLength\tMolecular Weight\tPI\tTaxon ID\n")
for gene in genes:
file.write(f"{genes[gene]['protein']['standard_name']}\t{gene}\t{genes[gene]['protein']['length']}\t{genes[gene]['protein']['molecular_weight']}\t{genes[gene]['protein']['PI']}\t{taxon_id}\n")
file.close()


# create gene csv

# create protein csv
file.close()
9 changes: 8 additions & 1 deletion database/protein-protein-database/scripts/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
This function Loads Protein-Protein Network Data Sources into the database
"""
def LOAD_SOURCES():
print('COPY protein_protein_interactions.source (time_stamp, source, display_name) FROM stdin;')
print('COPY protein_protein_interactions (time_stamp, source, display_name) FROM stdin;')
NETWORK_DATA_SOURCE = '../script-results/processed-loader-files/source.csv'
with open(NETWORK_DATA_SOURCE, 'r+') as f:
reader = csv.reader(f)
Expand Down Expand Up @@ -95,6 +95,13 @@ def LOAD_PHYSICAL_INTERACTIONS():
print(f'{protein1}\t{protein2}\t{idmi}\t{exp_name}\t{timestamp}\t{source}')
row_num += 1
print('\\.')

def TRUNCATE_TABLES():
# Truncate tables to remove existing data
print('TRUNCATE TABLE protein_protein_interactions.physical_interactions, protein_protein_interactions.protein, protein_protein_interactions.gene, protein_protein_interactions;')

# Call the TRUNCATE_TABLES function before loading data
TRUNCATE_TABLES()

LOAD_SOURCES()
LOAD_GENES()
Expand Down

0 comments on commit 63005b7

Please sign in to comment.