update summaries

AquaAuma · Jan 11, 2023 · 0ac6ca0 · 0ac6ca0
1 parent 628a0ce
commit 0ac6ca0
Show file tree

Hide file tree

Showing 17 changed files with 1,380 additions and 1,300 deletions.
diff --git a/summary/dfo-wcvi.log b/summary/dfo-wcvi.log
diff --git a/summary/dfo-wcvi.pdf b/summary/dfo-wcvi.pdf
diff --git a/summary/neus.Rmd b/summary/neus.Rmd
diff --git a/summary/neus.log b/summary/neus.log
diff --git a/summary/neus.pdf b/summary/neus.pdf
diff --git a/summary/nigfs.Rmd b/summary/nigfs.Rmd
@@ -56,7 +56,7 @@ This document presents the cleaning code and summary of the Irish Sea bottom tra
 ## Data cleaning in R
 
 
-```{r cleaning_code, code = readLines(here("./cleaning.codes/get.datras.R")), eval=FALSE}
+```{r cleaning_code, code = readLines(here("./cleaning_codes/get_datras.R")), eval=FALSE}
 ```
 
 
@@ -71,9 +71,8 @@ This document presents the cleaning code and summary of the Irish Sea bottom tra
 # survey <- read.csv(here("./summary/DATRAS_v3_clean.csv")) %>% 
 #   filter(survey == "NIGFS")
 
-load(here("outputs/Cleaned_data/DATRAS_v3_clean.RData"))
-survey <- data %>% 
-  filter(survey == "NIGFS")
+load(here("outputs/Cleaned_data/NIGFS_clean.RData"))
+survey <- data 
 rm(data)
 
 ### Map data
@@ -94,22 +93,22 @@ World_map <- rnaturalearth::ne_countries(scale = 'medium', returnclass = c("sf")
 ## 1. Overview of the survey data table
 
 ```{r head_survey, eval = T, echo = F}
-kable(survey[1:5,1:8], format = "latex", booktabs = T) %>%
+kable(survey[1:5,1:6], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,9:15], format = "latex", booktabs = T) %>%
+kable(survey[1:5,7:15], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,16:21], format = "latex", booktabs = T) %>%
+kable(survey[1:5,16:23], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,22:27], format = "latex", booktabs = T) %>%
+kable(survey[1:5,24:30], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,28:32], format = "latex", booktabs = T) %>%
+kable(survey[1:5,31:35], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
-kable(survey[1:5,33:39], format = "latex", booktabs = T) %>%
+kable(survey[1:5,36:42], format = "latex", booktabs = T) %>%
 kable_styling(latex_options = c("striped","HOLD_position"))
 
 
@@ -188,18 +187,18 @@ var_plot
 
 Here we display the yearly total and average across hauls of the following variables recorded in the data:
 
-- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{km^2}$
-- *num_h*, number of individuals (abundance) in $\frac{individuals}{h}$
+- *num_cpua*, number of individuals (abundance) in $\frac{individuals}{km^2}$
+- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{h}$
 - *num*, number of individuals (abundance)
-- *wgt_cpue*, weight in $\frac{kg}{km^2}$
-- *wgt_h*, weight in $\frac{kg}{h}$
+- *wgt_cpua*, weight in $\frac{kg}{km^2}$
+- *wgt_cpue*, weight in $\frac{kg}{h}$
 - *wgt*, weight in ${kg}$
 
 ```{r summary_var_plot, eval = T, echo = F, message =  F,warning =  F}
 
 var_plot <-  survey %>%
   group_by(year) %>% 
-  summarise_at(vars(num:wgt_cpue),
+  summarise_at(vars(num:wgt_cpua),
                funs(sum,mean),na.rm=T) %>% 
   # head()
   gather("var","val",2:13) %>% 
@@ -238,15 +237,15 @@ var_plot
 
 Here we show a yearly total distribution of the biomass data to visualize outliers:
 
-- *wgt*, total weight in ${kg}$ per haul and year per haul and year, if available in the survey data
-- *num*, total number of individuals, if available in the survey data
+- *num_cpue*, number of individuals (abundance) in $\frac{individuals}{km^2}$
+- *wgt_cpue*, weight in $\frac{kg}{km^2}$
 
 ```{r extreme_biomass, eval = T, echo = F, message =  F,warning =  F}
 
-if(!is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
   var_plot <-  survey %>%
   group_by(year, haul_id) %>% 
-  summarize(Weight = sum(wgt_cpue), Abundance = sum(num_cpue)) %>% 
+  summarize(Weight = sum(wgt_cpua), Abundance = sum(num_cpua)) %>% 
   gather("var","val",3:4) %>% 
   ggplot() +
   geom_boxplot(
@@ -263,10 +262,10 @@ if(!is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T
   theme(axis.text.x = element_text(angle = 90))
 }
 
-if(!is.na(mean(survey$num_cpue, na.rm=T)) & is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T)) & is.na(mean(survey$wgt_cpua, na.rm=T))){
 var_plot <-  survey %>%
   group_by(year, haul_id) %>% 
-  summarize(Abundance = sum(num_cpue)) %>% 
+  summarize(Abundance = sum(num_cpua)) %>% 
   # head()
   ggplot() +
   geom_boxplot(
@@ -282,10 +281,10 @@ var_plot <-  survey %>%
   theme(axis.text.x = element_text(angle = 90))
 } 
 
-if(is.na(mean(survey$num_cpue, na.rm=T)) & !is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
 var_plot <-  survey %>%
   group_by(year, haul_id) %>% 
-  summarize(Weight = sum(wgt_cpue)) %>% 
+  summarize(Weight = sum(wgt_cpua)) %>% 
   # head()
   ggplot() +
   geom_boxplot(
@@ -301,7 +300,7 @@ var_plot <-  survey %>%
   theme(axis.text.x = element_text(angle = 90))
 } 
 var_plot
-rm(var_plot)
+
 ```
 
 
@@ -312,16 +311,17 @@ rm(var_plot)
 Here we show the total abundance and number of taxa relationships with the area swept:
 
 - *nbr_taxa*, number of marine fish taxa after taxonomic data cleaning
-- *num*, number of individuals, if available in the survey data
-- *wgt*, weight in ${kg}$, if available in the survey data
+- *num_cpua*, number of individuals (abundance) in $\frac{individuals}{km^2}$
+- *wgt_cpua*, weight in $\frac{kg}{km^2}$
+
 
 
 ```{r summary_var_swept, eval = T, echo = F, message =  F,warning =  F}
 
-if(!is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
   var_plot <-  survey %>%
   group_by(haul_id, haul_dur, area_swept) %>%
-  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num),Weight = sum(wgt)) %>% 
+  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpua),Weight = sum(wgt_cpua)) %>% 
   gather("var","val",4:6) %>% 
   # head()
   ggplot() +
@@ -331,10 +331,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
   theme_bw()
 }
 
-if(!is.na(mean(survey$num, na.rm=T)) & is.na(mean(survey$wgt, na.rm=T))){
+if(!is.na(mean(survey$num_cpue, na.rm=T)) & mean(survey$wgt_cpue, na.rm=T)){
   var_plot <-  survey %>%
   group_by(haul_id, haul_dur, area_swept) %>%
-  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num)) %>% 
+  summarize(Number_Taxa = length(accepted_name), Abundance = sum(num_cpue)) %>% 
   gather("var","val",4:5) %>% 
   # head()
   ggplot() +
@@ -344,10 +344,10 @@ if(!is.na(mean(survey$num, na.rm=T)) & is.na(mean(survey$wgt, na.rm=T))){
   theme_bw()
 }
 
-if(is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
+if(is.na(mean(survey$num_cpua, na.rm=T)) & !is.na(mean(survey$wgt_cpua, na.rm=T))){
   var_plot <-  survey %>%
   group_by(haul_id, haul_dur, area_swept) %>%
-  summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt)) %>% 
+  summarize(Number_Taxa = length(accepted_name), Weight = sum(wgt_cpua)) %>% 
   gather("var","val",4:5) %>% 
   # head()
   ggplot() +
@@ -358,7 +358,6 @@ if(is.na(mean(survey$num, na.rm=T)) & !is.na(mean(survey$wgt, na.rm=T))){
 }
 
 var_plot
-
 ```
 
 \clearpage
@@ -367,10 +366,10 @@ var_plot
 
 ```{r abundant_spp, eval=T, echo=F, message=F, warning=F}
 
-if(!is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(!is.na(mean(survey$num_cpua, na.rm=T))){
 spp <- survey %>% 
   group_by(year, accepted_name) %>% 
-  summarize(wgt = sum(wgt_cpue), nbr_years = length(year)) %>% 
+  summarize(wgt = sum(wgt_cpua), nbr_years = length(year)) %>% 
   filter(nbr_years>10) %>% 
   group_by(accepted_name) %>% 
   summarize(wgt = median(wgt)) %>% 
@@ -381,7 +380,7 @@ spp <- survey %>%
 spp_plot <- survey %>% 
   filter(accepted_name %in% spp) %>% 
   group_by(year, accepted_name) %>%
-  summarize(wgt = sum(wgt_cpue, na.rm=T)) %>% 
+  summarize(wgt = sum(wgt_cpua, na.rm=T)) %>% 
   ggplot() +
   geom_point( aes(x = year, y = wgt), size=0.5 ) +
   geom_line(aes(x = year,y = wgt), size=0.5) +
@@ -390,10 +389,10 @@ spp_plot <- survey %>%
   ylab("Species Weight (kg)") + xlab("Year")
 }
 
-if(is.na(mean(survey$wgt_cpue, na.rm=T))){
+if(is.na(mean(survey$wgt_cpua, na.rm=T))){
   spp <- survey %>% 
   group_by(year, accepted_name) %>% 
-  summarize(num = sum(num_cpue), nbr_years = length(year)) %>% 
+  summarize(num = sum(num_cpua), nbr_years = length(year)) %>% 
   filter(nbr_years>10) %>% 
   group_by(accepted_name) %>% 
   summarize(num = median(num)) %>% 
@@ -404,7 +403,7 @@ if(is.na(mean(survey$wgt_cpue, na.rm=T))){
 spp_plot <- survey %>% 
   filter(accepted_name %in% spp) %>% 
   group_by(year, accepted_name) %>%
-  summarize(num = sum(num_cpue, na.rm=T)) %>% 
+  summarize(num = sum(num_cpua, na.rm=T)) %>% 
   ggplot() +
   geom_point( aes(x = year, y = num), size=0.5 ) +
   geom_line(aes(x = year,y = num), size=0.5) +
@@ -414,7 +413,6 @@ spp_plot <- survey %>%
 }
 
 spp_plot
-
 ```
 
 \clearpage
@@ -446,6 +444,7 @@ survey %>%
 ```
 
 
+\clearpage
 
 ## 9. Taxonomic flagging
 
@@ -454,16 +453,18 @@ This species flagging method was adapted from https://github.com/pinskylab/Ocean
 Visualization of flagged taxa
 
 ```{r, echo=FALSE, out.width = '80%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
+knitr::include_graphics(here::here("outputs", "Flags","taxonomic_flagging", paste0(survey$survey[1],"_taxonomic_flagging.png")))
 ```
 
 Statistics related to the taxonomic flagging outputs
 
 ```{r, echo=FALSE}
-df <- read.csv(here::here("standardization_steps", "outputs", "taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
+df <- read.csv(here::here("outputs", "Flags","taxonomic_flagging", paste0(survey$survey[1],'_stats.csv')))
 knitr::kable(df, col.names = NULL)
 ```
 
+\clearpage 
+
 ## 10. Spatio-temporal standardization
 
 ### a. Standardization method 1
@@ -475,32 +476,32 @@ It was run for hex resolution 7 and 8.
 Plot of number of cells x years with overlaid flagging options
 
 ```{r, echo=FALSE, out.width = '80%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_plot.png")))
 ```
 ```{r, echo=FALSE, out.width = '80%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_plot.png")))
 ```
 
 Map of hauls retained and removed per flagging method and threshold
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7",  paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7",  paste0(survey$survey[1],"_hex_res_7_map_per_haul.png")))
 ```
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
+knitr::include_graphics(here::here("outputs", "Flags", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_haul.png")))
 ```
 
 
 Map of numbers of years removed per grid cell and flagging method/threshold
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_map_per_grid_nyears.png")))
 ```
 
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_map_per_grid_nyears.png")))
 ```
 
 
@@ -511,7 +512,7 @@ This standardization method was adapted from BioTIME code from https://github.co
 Map of hauls retained and removed
 
 ```{r, echo=FALSE, out.width = '100%'}
-knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming_method2", 
+knitr::include_graphics(here::here("outputs", "Flags","trimming_method2", 
                                    paste0(survey$survey[1],"_map_per_haul.png")))
 ```
 
@@ -520,9 +521,9 @@ knitr::include_graphics(here::here("standardization_steps", "outputs", "trimming
 Statistics of hauls removed for each standardization method
 
 ```{r, echo=FALSE}
-met1_7 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
-met1_8 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
-met2 <- read.csv(here::here("standardization_steps", "outputs", "trimming_method2", 
+met1_7 <- read.csv(here::here("outputs", "Flags","trimming_method1", "hex_res7", paste0(survey$survey[1],"_hex_res_7_stats_hauls.csv")))
+met1_8 <- read.csv(here::here("outputs", "Flags","trimming_method1", "hex_res8", paste0(survey$survey[1],"_hex_res_8_stats_hauls.csv")))
+met2 <- read.csv(here::here("outputs", "Flags", "trimming_method2", 
                             paste0(survey$survey[1],"_stats_hauls.csv")))
 knitr::kable(cbind(met1_7, met1_8[,2:3], met2[,2]), 
              col.names = c("summary", "grid cell 7, 0% threshold", "grid cell 7, 2% threshold",