From a146fd6d4ccdf051d4f5dcd87308996e86bf8cec Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 26 Sep 2022 16:03:35 -0300 Subject: [PATCH 1/5] Add script to get biotypes for DGE/DTE --- .gitignore | 3 +- scripts/summarise_biotypes.R | 82 ++++++++++++++++++++++++++++++++++++ 2 files changed, 84 insertions(+), 1 deletion(-) create mode 100644 scripts/summarise_biotypes.R diff --git a/.gitignore b/.gitignore index dbe2dcc..4ccc2ae 100644 --- a/.gitignore +++ b/.gitignore @@ -19,4 +19,5 @@ renv/* slurm* run* -data/* \ No newline at end of file +data/* +results/diff_exp/* diff --git a/scripts/summarise_biotypes.R b/scripts/summarise_biotypes.R new file mode 100644 index 0000000..28cf2f9 --- /dev/null +++ b/scripts/summarise_biotypes.R @@ -0,0 +1,82 @@ +library(dplyr) +library(purrr) +library(ggplot2) +library(rtracklayer) +library(GenomicFeatures) + +# 1. Carregando GTF --------------------------------------- +gtf <- "./data/Homo_sapiens.GRCh38.97.chr_patch_hapl_scaff.gtf.gz" + +gtf_data <- import(gtf) + +# 2. Lendo DGE/DTE e pegando os biotipos --------------------------------------- +load("./results/diff_exp/diff_df.rda") + +dge_genes <- diff_df %>% + filter(type == "DGE") + +# 2.1. Pegando biotipo dos DGE --------------------------------------- +dge_w_biotype <- gtf_data[, c("gene_id", "gene_biotype")] %>% + as.data.frame() %>% + filter(gene_id %in% dge_genes$gene) %>% + dplyr::select(gene_id, gene_biotype) %>% + right_join(dge_genes, by = c("gene_id" = "gene")) %>% + distinct() %>% + dplyr::select(gene_id, gene_biotype, group) + +readr::write_csv(dge_w_biotype, "results/diff_exp/dge_w_biotype.csv") + +# 2.2. Pegando biotipo dos DTE --------------------------------------- +load("./results/diff_exp/diff_tx_corrected.rda") + +dte_genes <- df_res_padj_tx %>% + dplyr::select(txID, transcript, group) %>% + filter(transcript < 0.01) + +dte_w_biotype <- + gtf_data[, c("transcript_id", "transcript_biotype")] %>% + as.data.frame() %>% + filter(transcript_id %in% dte_genes$txID) %>% + dplyr::select(transcript_id, transcript_biotype) %>% + right_join(dte_genes, by = c("transcript_id" = "txID")) %>% + distinct() %>% + dplyr::select(transcript_id, transcript_biotype, group) + +readr::write_csv(dte_w_biotype, "results/diff_exp/dte_w_biotype.csv") + +# 2.3. Pegando biotipo dos DTU --------------------------------------- + + + +# 3. Plotando as porcentagens --------------------------------------- +plot_biotype_bar <- function(data, id_col, n_col) { + + id_col <- enquo(id_col) + n_col <- enquo(n_col) + + data %>% + ggplot(aes(x = reorder(!!id_col, dplyr::desc(!!n_col)), y = !!n_col)) + + geom_col() + + scale_y_continuous(labels = scales::percent_format(scale = 1)) + + coord_flip() + + labs( + y = "Porcentagem de Genes", + x = "Biotipo", + ) + +} + +dge_plot <- dge_w_biotype %>% + group_by(gene_biotype) %>% + summarise(biotype_n = n() / length(unique(dge_w_biotype$gene_id)) * 100) %>% + ungroup() %>% + plot_biotype_bar(. , id_col = gene_biotype, n_col = biotype_n) + +dte_plot <- dte_w_biotype %>% + group_by(transcript_biotype) %>% + summarise(biotype_n = n() / length(unique(dte_w_biotype$transcript_id))* 100) %>% + ungroup() %>% + plot_biotype_bar(., id_col = transcript_biotype, n_col = biotype_n) + +ggsave(dge_plot, filename = "results/diff_exp/dge_biotypes.pdf") +ggsave(dte_plot, filename = "results/diff_exp/dte_biotypes.pdf") \ No newline at end of file From c206ad95ea0af16eedef8cd0b18a7845f80bedec Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 26 Sep 2022 16:26:17 -0300 Subject: [PATCH 2/5] Add script to compile all results from DTU --- .gitignore | 1 + scripts/compile_DTU.R | 42 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) create mode 100644 scripts/compile_DTU.R diff --git a/.gitignore b/.gitignore index 4ccc2ae..1aaeee8 100644 --- a/.gitignore +++ b/.gitignore @@ -21,3 +21,4 @@ slurm* run* data/* results/diff_exp/* +results/ISA/ diff --git a/scripts/compile_DTU.R b/scripts/compile_DTU.R new file mode 100644 index 0000000..1240f5c --- /dev/null +++ b/scripts/compile_DTU.R @@ -0,0 +1,42 @@ +library(dplyr) +library(purrr) +library(IsoformSwitchAnalyzeR) + +# 1. Get all results from ISA ------ + +files <- + list.files( + "results/ISA/", + pattern = "pass2", + recursive = T, + full.names = T + ) +isa_df <- map_dfr(files, ~ { + load(.x) + SwitchList_2$isoformFeatures +}) + +# 2. Filter results by conditions and save full result ----- + +condition_1_male <- grepl("CTRL_male", isa_df$condition_1) +condition_2_male <- grepl("MDD_male", isa_df$condition_2) +condition_1_female <- grepl("CTRL_female", isa_df$condition_1) +condition_2_female <- grepl("MDD_female", isa_df$condition_2) + +isa_df <- isa_df[(condition_1_male & condition_2_male) | + (condition_1_female & condition_2_female), ] + +save(isa_df, file = "results/ISA/DTU_df.rda") + +# 3. Get dataframe with biotypes ---- + +dtu_w_biotypes <- isa_df %>% + mutate( + gene_id = gsub("\\.\\d+", "", gene_id), + isoform_id = gsub("\\.\\d+", "", isoform_id) + ) %>% + filter(isoform_switch_q_value <= 0.05) %>% + mutate(group = gsub("_CTRL", "", condition_1)) %>% + dplyr::select(isoform_id, iso_biotype, group) + +readr::write_csv(dtu_w_biotypes, file = "results/ISA/dtu_w_biotype.csv") From db5d36381fb22b6292c3854c7dfbcc24935faec4 Mon Sep 17 00:00:00 2001 From: jvfe Date: Mon, 26 Sep 2022 16:33:58 -0300 Subject: [PATCH 3/5] Add DTU results to summarise biotypes script --- scripts/summarise_biotypes.R | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/scripts/summarise_biotypes.R b/scripts/summarise_biotypes.R index 28cf2f9..fc025a1 100644 --- a/scripts/summarise_biotypes.R +++ b/scripts/summarise_biotypes.R @@ -46,7 +46,7 @@ readr::write_csv(dte_w_biotype, "results/diff_exp/dte_w_biotype.csv") # 2.3. Pegando biotipo dos DTU --------------------------------------- - +dtu_w_biotype <- readr::read_csv("results/ISA/dtu_w_biotype.csv") # 3. Plotando as porcentagens --------------------------------------- plot_biotype_bar <- function(data, id_col, n_col) { @@ -78,5 +78,12 @@ dte_plot <- dte_w_biotype %>% ungroup() %>% plot_biotype_bar(., id_col = transcript_biotype, n_col = biotype_n) +dtu_plot <- dtu_w_biotype %>% + group_by(iso_biotype) %>% + summarise(biotype_n = n() / length(unique(dtu_w_biotype$isoform_id))* 100) %>% + ungroup() %>% + plot_biotype_bar(., id_col = iso_biotype, n_col = biotype_n) + ggsave(dge_plot, filename = "results/diff_exp/dge_biotypes.pdf") -ggsave(dte_plot, filename = "results/diff_exp/dte_biotypes.pdf") \ No newline at end of file +ggsave(dte_plot, filename = "results/diff_exp/dte_biotypes.pdf") +ggsave(dtu_plot, filename = "results/diff_exp/dtu_biotypes.pdf") From 72858103a7bf27414621989f11899ce7c01f80f7 Mon Sep 17 00:00:00 2001 From: Iara Souza Date: Fri, 14 Apr 2023 15:28:54 -0300 Subject: [PATCH 4/5] fix: rebuilt conda env, added java-dependent packages --- environment.yml | 100 ------------------------------------------------ renv.lock | 47 ++++++++++++++++++++++- 2 files changed, 46 insertions(+), 101 deletions(-) delete mode 100644 environment.yml diff --git a/environment.yml b/environment.yml deleted file mode 100644 index 037613c..0000000 --- a/environment.yml +++ /dev/null @@ -1,100 +0,0 @@ -name: mdd-env -channels: - - conda-forge - - defaults -dependencies: - - _libgcc_mutex=0.1=conda_forge - - _openmp_mutex=4.5=1_gnu - - _r-mutex=1.0.1=anacondar_1 - - binutils_impl_linux-64=2.36.1=h193b22a_2 - - binutils_linux-64=2.36=hf3e587d_3 - - bwidget=1.9.14=ha770c72_1 - - bzip2=1.0.8=h7f98852_4 - - c-ares=1.18.1=h7f98852_0 - - ca-certificates=2021.10.8=ha878542_0 - - cairo=1.16.0=ha00ac49_1009 - - curl=7.80.0=h494985f_1 - - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 - - font-ttf-inconsolata=3.000=h77eed37_0 - - font-ttf-source-code-pro=2.038=h77eed37_0 - - font-ttf-ubuntu=0.83=hab24e00_0 - - fontconfig=2.13.1=hba837de_1005 - - fonts-conda-ecosystem=1=0 - - fonts-conda-forge=1=0 - - freetype=2.10.4=h0708190_1 - - fribidi=1.0.10=h36c2ea0_0 - - gcc_impl_linux-64=9.4.0=h03d3576_11 - - gcc_linux-64=9.4.0=h391b98a_3 - - gettext=0.19.8.1=h73d1719_1008 - - gfortran_impl_linux-64=9.4.0=h0003116_11 - - gfortran_linux-64=9.4.0=hf0ab688_3 - - graphite2=1.3.13=h58526e2_1001 - - gsl=2.7=he838d99_0 - - gxx_impl_linux-64=9.4.0=h03d3576_11 - - gxx_linux-64=9.4.0=h0316aca_3 - - harfbuzz=3.2.0=hb4a5f5f_0 - - icu=69.1=h9c3ff4c_0 - - jbig=2.1=h7f98852_2003 - - jpeg=9d=h36c2ea0_0 - - kernel-headers_linux-64=2.6.32=he073ed8_15 - - krb5=1.19.2=h48eae69_3 - - ld_impl_linux-64=2.36.1=hea4e1c9_2 - - lerc=3.0=h9c3ff4c_0 - - libblas=3.9.0=12_linux64_openblas - - libcblas=3.9.0=12_linux64_openblas - - libcurl=7.80.0=h494985f_1 - - libdeflate=1.8=h7f98852_0 - - libedit=3.1.20191231=he28a2e2_2 - - libev=4.33=h516909a_1 - - libffi=3.4.2=h7f98852_5 - - libgcc-devel_linux-64=9.4.0=hd854feb_11 - - libgcc-ng=11.2.0=h1d223b6_11 - - libgfortran-ng=11.2.0=h69a702a_11 - - libgfortran5=11.2.0=h5c6108e_11 - - libglib=2.70.2=h174f98d_1 - - libgomp=11.2.0=h1d223b6_11 - - libiconv=1.16=h516909a_0 - - liblapack=3.9.0=12_linux64_openblas - - libnghttp2=1.43.0=ha19adfc_1 - - libopenblas=0.3.18=pthreads_h8fe5266_0 - - libpng=1.6.37=h21135ba_2 - - libsanitizer=9.4.0=h79bfe98_11 - - libssh2=1.10.0=ha35d2d1_2 - - libstdcxx-devel_linux-64=9.4.0=hd854feb_11 - - libstdcxx-ng=11.2.0=he4da1e4_11 - - libtiff=4.3.0=h6f004c6_2 - - libuuid=2.32.1=h7f98852_1000 - - libwebp-base=1.2.1=h7f98852_0 - - libxcb=1.13=h7f98852_1004 - - libxml2=2.9.12=h885dcf4_1 - - libzlib=1.2.11=h36c2ea0_1013 - - lz4-c=1.9.3=h9c3ff4c_1 - - make=4.3=hd18ef5c_1 - - ncurses=6.2=h58526e2_4 - - openssl=3.0.0=h7f98852_2 - - pango=1.48.10=h54213e6_2 - - pcre=8.45=h9c3ff4c_0 - - pcre2=10.37=h032f7d1_0 - - pixman=0.40.0=h36c2ea0_0 - - pthread-stubs=0.4=h36c2ea0_1001 - - r-base=4.1.2=hde4fec0_0 - - readline=8.1=h46c0cb4_0 - - sed=4.8=he412f7d_0 - - sysroot_linux-64=2.12=he073ed8_15 - - tk=8.6.11=h27826a3_1 - - tktable=2.10=hb7b940f_3 - - xorg-kbproto=1.0.7=h7f98852_1002 - - xorg-libice=1.0.10=h7f98852_0 - - xorg-libsm=1.2.3=hd9c2040_1000 - - xorg-libx11=1.7.2=h7f98852_0 - - xorg-libxau=1.0.9=h7f98852_0 - - xorg-libxdmcp=1.1.3=h7f98852_0 - - xorg-libxext=1.3.4=h7f98852_1 - - xorg-libxrender=0.9.10=h7f98852_1003 - - xorg-libxt=1.2.1=h7f98852_2 - - xorg-renderproto=0.11.1=h7f98852_1002 - - xorg-xextproto=7.3.0=h7f98852_1002 - - xorg-xproto=7.0.31=h7f98852_1007 - - xz=5.2.5=h516909a_1 - - zlib=1.2.11=h36c2ea0_1013 - - zstd=1.5.1=ha95c52a_0 diff --git a/renv.lock b/renv.lock index ec24f2c..fcf95df 100644 --- a/renv.lock +++ b/renv.lock @@ -905,6 +905,21 @@ "S4Vectors" ] }, + "TreeAndLeaf": { + "Package": "TreeAndLeaf", + "Version": "1.6.1", + "Source": "Bioconductor", + "git_url": "https://git.bioconductor.org/packages/TreeAndLeaf", + "git_branch": "RELEASE_3_14", + "git_last_commit": "171d519", + "git_last_commit_date": "2021-10-27", + "Hash": "b8f10ab9082a4c3cbcc136006706ef4e", + "Requirements": [ + "RedeR", + "ape", + "igraph" + ] + }, "UpSetR": { "Package": "UpSetR", "Version": "1.4.0", @@ -2156,12 +2171,13 @@ "Package": "ggtree", "Version": "3.2.1", "Source": "Bioconductor", + "RemoteType": "bioconductor", "Remotes": "GuangchuangYu/treeio", "git_url": "https://git.bioconductor.org/packages/ggtree", "git_branch": "RELEASE_3_14", "git_last_commit": "d3747e6", "git_last_commit_date": "2021-11-14", - "Hash": "f156c85173024c88e2fdfd63ccca3fd7", + "Hash": "5711c057a04e53ed1c70909939dd9ad9", "Requirements": [ "ape", "aplot", @@ -3291,6 +3307,14 @@ "reshape2" ] }, + "rJava": { + "Package": "rJava", + "Version": "1.0-6", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "0415819f6baa75d86d52483f7292b623", + "Requirements": [] + }, "ranger": { "Package": "ranger", "Version": "0.13.1", @@ -4237,6 +4261,27 @@ "Hash": "e2e5fb1a74fbb68b27d6efc5372635dc", "Requirements": [] }, + "xlsx": { + "Package": "xlsx", + "Version": "0.6.5", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "d24d579f59a3b6da1e1cf4660425443e", + "Requirements": [ + "rJava", + "xlsxjars" + ] + }, + "xlsxjars": { + "Package": "xlsxjars", + "Version": "0.6.1", + "Source": "Repository", + "Repository": "CRAN", + "Hash": "4c4b3bc29a916f33f1298dd951133351", + "Requirements": [ + "rJava" + ] + }, "xml2": { "Package": "xml2", "Version": "1.3.3", From abe25979b7a603e76b8c3a94d8196eafebe3a837 Mon Sep 17 00:00:00 2001 From: Iara Souza Date: Fri, 14 Apr 2023 15:43:06 -0300 Subject: [PATCH 5/5] fix: added new env file --- environment.yml | 106 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 106 insertions(+) create mode 100644 environment.yml diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..ea11b24 --- /dev/null +++ b/environment.yml @@ -0,0 +1,106 @@ +name: mdd-paper-env +channels: + - conda-forge + - defaults +dependencies: + - _libgcc_mutex=0.1=conda_forge + - _openmp_mutex=4.5=2_gnu + - _r-mutex=1.0.1=anacondar_1 + - binutils_impl_linux-64=2.39=he00db2b_1 + - binutils_linux-64=2.39=h5fc0e48_12 + - bwidget=1.9.14=ha770c72_1 + - bzip2=1.0.8=h7f98852_4 + - c-ares=1.18.1=h7f98852_0 + - ca-certificates=2022.12.7=ha878542_0 + - cairo=1.16.0=ha12eb4b_1010 + - curl=7.87.0=h5eee18b_0 + - expat=2.5.0=hcb278e6_1 + - font-ttf-dejavu-sans-mono=2.37=hab24e00_0 + - font-ttf-inconsolata=3.000=h77eed37_0 + - font-ttf-source-code-pro=2.038=h77eed37_0 + - font-ttf-ubuntu=0.83=hab24e00_0 + - fontconfig=2.14.2=h14ed4e7_0 + - fonts-conda-ecosystem=1=0 + - fonts-conda-forge=1=0 + - freetype=2.12.1=hca18f0e_1 + - fribidi=1.0.10=h36c2ea0_0 + - gcc_impl_linux-64=9.5.0=h99780fb_19 + - gcc_linux-64=9.5.0=h4258300_12 + - gettext=0.21.1=h27087fc_0 + - gfortran_impl_linux-64=9.5.0=hf1096a2_19 + - gfortran_linux-64=9.5.0=hdb51d14_12 + - git=2.34.1=pl5262hc120c5b_0 + - graphite2=1.3.13=h58526e2_1001 + - gsl=2.7=he838d99_0 + - gxx_impl_linux-64=9.5.0=h99780fb_19 + - gxx_linux-64=9.5.0=h43f449f_12 + - harfbuzz=4.2.0=h40b6f09_0 + - icu=69.1=h9c3ff4c_0 + - jpeg=9e=h0b41bf4_3 + - kernel-headers_linux-64=2.6.32=he073ed8_15 + - keyutils=1.6.1=h166bdaf_0 + - krb5=1.19.3=h3790be6_0 + - ld_impl_linux-64=2.39=hcc3a1bd_1 + - lerc=4.0.0=h27087fc_0 + - libblas=3.9.0=16_linux64_openblas + - libcblas=3.9.0=16_linux64_openblas + - libcurl=7.87.0=h91b91d3_0 + - libdeflate=1.14=h166bdaf_0 + - libedit=3.1.20191231=he28a2e2_2 + - libev=4.33=h516909a_1 + - libexpat=2.5.0=hcb278e6_1 + - libffi=3.4.2=h7f98852_5 + - libgcc-devel_linux-64=9.5.0=h0a57e50_19 + - libgcc-ng=12.2.0=h65d4601_19 + - libgfortran-ng=12.2.0=h69a702a_19 + - libgfortran5=12.2.0=h337968e_19 + - libglib=2.74.1=h7a41b64_0 + - libgomp=12.2.0=h65d4601_19 + - libiconv=1.17=h166bdaf_0 + - liblapack=3.9.0=16_linux64_openblas + - libnghttp2=1.46.0=hce63b2e_0 + - libnsl=2.0.0=h7f98852_0 + - libopenblas=0.3.21=pthreads_h78a6416_3 + - libpng=1.6.39=h753d276_0 + - libsanitizer=9.5.0=h2f262e1_19 + - libssh2=1.10.0=haa6b8db_3 + - libstdcxx-devel_linux-64=9.5.0=h0a57e50_19 + - libstdcxx-ng=12.2.0=h46fd767_19 + - libtiff=4.4.0=h82bc61c_5 + - libuuid=2.38.1=h0b41bf4_0 + - libwebp-base=1.3.0=h0b41bf4_0 + - libxcb=1.13=h7f98852_1004 + - libxml2=2.9.14=haae042b_4 + - libzlib=1.2.13=h166bdaf_4 + - make=4.3=hd18ef5c_1 + - ncurses=6.2=h58526e2_4 + - openjdk=8.0.332=h166bdaf_0 + - openssl=1.1.1t=h0b41bf4_0 + - pango=1.50.7=hbd2fdc8_0 + - pcre2=10.37=hc3806b6_1 + - perl=5.32.1=2_h7f98852_perl5 + - pixman=0.40.0=h36c2ea0_0 + - pthread-stubs=0.4=h36c2ea0_1001 + - r-base=4.1.2=h2553ce4_1 + - r-rjava=1.0_6=r41h06615bd_5 + - readline=8.1=h46c0cb4_0 + - sed=4.8=he412f7d_0 + - sysroot_linux-64=2.12=he073ed8_15 + - tk=8.6.12=h27826a3_0 + - tktable=2.10=hb7b940f_3 + - xorg-kbproto=1.0.7=h7f98852_1002 + - xorg-libice=1.0.10=h7f98852_0 + - xorg-libsm=1.2.3=hd9c2040_1000 + - xorg-libx11=1.8.4=h0b41bf4_0 + - xorg-libxau=1.0.9=h7f98852_0 + - xorg-libxdmcp=1.1.3=h7f98852_0 + - xorg-libxext=1.3.4=h0b41bf4_2 + - xorg-libxrender=0.9.10=h7f98852_1003 + - xorg-libxt=1.2.1=h7f98852_2 + - xorg-renderproto=0.11.1=h7f98852_1002 + - xorg-xextproto=7.3.0=h0b41bf4_1003 + - xorg-xproto=7.0.31=h7f98852_1007 + - xz=5.2.6=h166bdaf_0 + - zlib=1.2.13=h166bdaf_4 + - zstd=1.5.2=h3eb15da_6 +prefix: /home/iaradsouza/miniconda3/envs/mdd-paper-env