-
Notifications
You must be signed in to change notification settings - Fork 0
/
Build Adjacency Matrix
78 lines (57 loc) · 5.56 KB
/
Build Adjacency Matrix
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
# Let the user choose the file interactively
file_path <- file.choose()
# Read the data into a matrix (assuming it's tabular with headers)
data_matrix <- read.table(file_path, header = TRUE)
# Define the list of columns A (traits=columns-1)
A <- c(10, 11, 12, 14, 42, 47, 54, 62, 63, 64, 68, 69, 70, 71, 72, 73, 74, 75, 95, 96, 97, 109, 114, 119, 127, 128, 143, 144, 147, 156, 157, 162, 163, 168, 169, 174, 175, 192, 194, 207, 209, 1005, 1068, 1194, 1195, 1203, 1204, 1205, 2025, 2030, 2031, 2032, 2035, 2038, 2053, 2176, 3186, 3187, 3188, 3189, 3191, 3192, 3196, 3201, 3202, 3211, 3218, 3220, 3221, 3229, 3230, 3231, 3232, 3233, 3234, 3237, 3242, 3243, 3245, 3247, 3251, 3252, 3254, 3255, 3256, 3257, 3258, 3259, 3262, 3271, 3272, 3274, 3275, 3276, 3277, 3283, 3284, 3285, 3286, 3287, 3288, 3289, 3290, 3291, 3292, 3293, 3301, 3302, 3305, 3306, 3308, 3309, 3311, 3313, 3314, 3315, 3316, 3320, 3325, 3329, 3333, 3340, 3341, 3343, 3344, 3367, 3373, 3380, 3381, 3410, 3413, 3414, 3415, 3416, 3418, 3419, 3426, 3432, 3436, 3437, 3438, 3439, 3441, 3442, 3443, 3444, 3445, 3446, 3447, 3448, 3449, 3450, 3451, 3452, 3453, 3454, 3455, 3456, 3457, 3458, 3459, 3460, 3461, 3462, 3463, 3464, 3465, 3466, 3467, 3468, 3469, 3470, 3471, 3472, 3486, 3495, 3496, 3497, 3498, 3499, 3500, 3501, 3504, 3530, 3532, 3540, 3545, 3550, 3552, 3553, 3554, 3555, 3556, 3560, 3571, 3579, 3591, 3592, 3598, 3600, 3602, 3603, 3606, 3607, 3609, 3614, 3626, 3655, 3659, 3661, 3666, 3669, 3686, 3687, 3689, 3692, 3696, 3786, 3790, 3792, 3794, 3795, 3796, 3798, 3799, 3816, 3819, 3820, 3821, 3823, 3824, 3825, 3833, 3834, 3835, 3836, 3837, 3838, 3839, 3840, 3841, 3842, 3843, 3844, 3845, 3846, 3847, 3848, 3849, 3850, 3851, 3852, 3853, 3854, 3855, 3856, 3857, 3858, 3859, 3860, 3861, 3862, 3863, 3864, 3865, 3866, 3867, 3868, 3869, 3870, 3871, 3872, 3873, 3874, 3875, 3876, 3877, 3878, 3879, 3880, 3881, 3882, 3883, 3884, 3885, 3886, 3887, 3888, 3889, 3890, 3891, 3892, 3893, 3894, 3895, 3896, 3897, 3898, 3899, 3900, 3901, 3902, 3903, 3904, 3905, 3909, 3911, 3926, 3940, 3944, 3967, 3968, 3969, 3972, 3980, 3983, 3991, 3992, 3993, 3994, 3995, 3996, 3997, 3998, 4000, 4003, 4012, 4019, 4039, 4041, 4044, 4045, 4046, 4051, 4064, 4067, 4068, 4069, 4070, 4071, 4072, 4073, 4074, 4075, 4076, 4077, 4078, 4079, 4080, 4081, 4082, 4083, 4086, 4087, 4088, 4092, 4093, 4094, 4095, 4099, 4100, 4101, 4102, 4105, 4107, 4108, 4109, 4110, 4111, 4112, 4120, 4121, 4122, 4123, 4127, 4130, 4136, 4139, 4140, 4141, 4142, 4143, 4145, 4167, 4168, 4176, 4177, 4179, 4182, 4184, 4185, 4188, 4201, 4206, 4207, 4208, 4209, 4216, 4218, 4222, 4223, 4224, 4225, 4226, 4227, 4228, 4233, 4236, 4237, 4267, 4268, 4269, 4270, 4273, 4276, 4279, 4280, 4281, 4282, 4283, 4284, 4285, 4286, 4291, 4294, 4295, 4296, 4300, 4302, 4303, 4304, 4305, 4306, 4307, 4308, 4309, 4310, 4312, 4315, 4316, 4318, 4322, 4323, 4326, 4327, 4328, 4329, 4346, 4347, 4348, 4349, 4352, 4353, 4358, 4359, 4362, 4364, 4366, 4373, 4378, 4379)
# Loop through each column in A
for (col_num in A) {
# Get the p-values from column A
p_values <- data_matrix[-1, col_num]
# Sort the p-values in increasing order
sorted_p_values <- sort(p_values, decreasing = TRUE)
# Choose the top 500 lowest p-values
top_500_p_values <- head(sorted_p_values, 500)
# Get the indices of the top 500 lowest p-values in original data
indices <- which(data_matrix[-1, col_num] %in% top_500_p_values)
# Get the corresponding names from column 1 based on the indices
related_names <- data_matrix[indices + 1, 1] # Adding 1 to indices due to header row
# Map the related names to IDs using the gconvert function
X <- gconvert(query = related_names, organism = "hsapiens", target = "ENSG")
# Extract data from column C ('name' column)
proteins <- X$name
proteins <- as.character(proteins)
# Map protein symbols to IDs
proteins_mapped <- rba_string_map_ids(ids = proteins, species = 9606)
proteins_ids <- proteins_mapped$stringId
# Create the interaction network
int_net <- rba_string_interactions_network(ids = proteins_ids,
species = 9606,
network_type = "functional",
required_score = 900)
# Extract relevant columns for the graph
matrix <- int_net[, c(3, 4, 6)] # Assuming the columns are for gene names, scores, and IDs matrix <- int_net[, c(1, 2, 6)]
# Assuming 'proteins_mapped' is the variable containing the mapped proteins
# and 'matrix' contains the interactions from the int_net
# Define the complete list of all nodes (proteins)
all_proteins <- unique(proteins_mapped[, 2]) #all_proteins <-related_names
# Initialize an adjacency matrix with zeroes
adj_matrix <- matrix(0, nrow = length(all_proteins), ncol = length(all_proteins))
rownames(adj_matrix) <- all_proteins
colnames(adj_matrix) <- all_proteins
# Loop through each interaction in the 'matrix' and update the adjacency matrix
for (i in 1:nrow(matrix)) {
protein1 <- matrix[i, 1]
protein2 <- matrix[i, 2]
if (protein1 %in% all_proteins && protein2 %in% all_proteins) {
adj_matrix[protein1, protein2] <- matrix[i, 3]
adj_matrix[protein2, protein1] <- matrix[i, 3] # Assuming undirected graph
}
}
# Construct the file name based on the column number
file_name <- paste0("adj_matrix_a_col_num_", col_num - 1, ".csv")
# Save the adjacency matrix to the dynamically named CSV file
write.csv(adj_matrix, file = file_name, row.names = TRUE)
# Print a message indicating the file has been saved
print(paste("Adjacency matrix saved to", file_name))
}