Skip to content

Commit

Permalink
update england data
Browse files Browse the repository at this point in the history
  • Loading branch information
jalapic committed Nov 5, 2022
1 parent 7099f05 commit f3061c1
Show file tree
Hide file tree
Showing 9 changed files with 4,479 additions and 278 deletions.
4 changes: 0 additions & 4 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -31,10 +31,6 @@ export(spain_current)
export(totgoals)
export(turkey_current)
export(worstlosses)
importFrom(dplyr,"case_when")
importFrom(dplyr,"mutate")
importFrom(magrittr,"%>%")
importFrom(rvest,"html_table")
importFrom(utils,"head")
importFrom(utils,"read.csv")
importFrom(xml2,"read_html")
6 changes: 3 additions & 3 deletions R/england.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
#' English league results 1888-2020
#' English league results 1888-2022
#'
#' All results for English soccer games in the top 4 tiers
#' from 1888/89 season to 2019/20 season. For playoff games
#' from 1888/89 season to 2021/22 season. For playoff games
#' see separate dataset `englandplayoffs`
#'
#' @format A data frame with 199884 rows and 12 variables:
#' @format A data frame with 203956 rows and 12 variables:
#' \describe{
#' \item{Date}{Date of match}
#' \item{Season}{Season of match - refers to starting year}
Expand Down
267 changes: 153 additions & 114 deletions R/england_current.R
Original file line number Diff line number Diff line change
@@ -1,130 +1,169 @@
#' Get current England season data for all tiers
#'
#' @return a dataframe with results for current
#' season for all top four divisions
#' @importFrom rvest "html_table"
#' @importFrom xml2 "read_html"
#' @importFrom dplyr "case_when"
#' @importFrom dplyr "mutate"
#' @importFrom magrittr "%>%"
#' season for top four divisions
#' @param Season the current Season
#' @importFrom utils "read.csv"
#' @examples
#' england_current()
#' @export

england_current <- function(){


home<-visitor<-hgoal<-vgoal<-goaldif<-FT<-Season<-division<-result<-NULL

url1 <- "https://www.11v11.com/competitions/premier-league/2022/matches/"
url2 <- "https://www.11v11.com/competitions/league-championship/2022/matches/"
url3 <- "https://www.11v11.com/competitions/league-one/2022/matches/"
url4 <- "https://www.11v11.com/competitions/league-two/2022/matches/"

x1 <- xml2::read_html(url1) %>% rvest::html_table(fill = TRUE)
x2 <- xml2::read_html(url2) %>% rvest::html_table(fill = TRUE)
x3 <- xml2::read_html(url3) %>% rvest::html_table(fill = TRUE)
x4 <- xml2::read_html(url4) %>% rvest::html_table(fill = TRUE)

make_data <- function(x){
x <- x[[1]][,1:4]
# x <-x[grepl("([0-9]+).*$", x[,1]),]#get rid of months text
x <-x[grepl("([0-9]+).*$", unlist(x[,3])),]#get rid of months text
colnames(x)<-c("Date","home","FT","visitor")
x$Date <- as.character(as.Date(x$Date, format="%d %b %Y"))
x$Season <- 2021
x$FT <- gsub(":", "-", x$FT)
x <- x[nchar(x$FT)>1,]
hgvg <- matrix(unlist(strsplit(x$FT, "-")), ncol=2, byrow = T)
x$hgoal <- as.numeric(hgvg[,1])
x$vgoal <- as.numeric(hgvg[,2])
x$totgoal <- x$hgoal+x$vgoal
x$goaldif <- x$hgoal-x$vgoal
x$result <- ifelse(x$hgoal>x$vgoal, "H", ifelse(x$hgoal<x$vgoal, "A", "D"))
return(x)
}
england_current <- function(Season=2022){

x1d <- make_data(x1)
x2d <- make_data(x2)
x3d <- make_data(x3)
x4d <- make_data(x4)
ee1<-ee2<-myseason<-e1<-e2<-e3<-e4<-df1<-NULL
myseason<-Season
ee2<-as.numeric(substr(myseason,3,4))
ee1 <- ee2+1

x1d$division <- 1
x1d$tier <- 1
x2d$division <- 2
x2d$tier <- 2
x3d$division <- 3
x3d$tier <- 3
x4d$division <- 4
x4d$tier <- 4
e1=read.csv(paste0("https://www.football-data.co.uk/mmz4281/",ee2,ee1,"/E0.csv"))
e2=read.csv(paste0("https://www.football-data.co.uk/mmz4281/",ee2,ee1,"/E1.csv"))
e3=read.csv(paste0("https://www.football-data.co.uk/mmz4281/",ee2,ee1,"/E2.csv"))
e4=read.csv(paste0("https://www.football-data.co.uk/mmz4281/",ee2,ee1,"/E3.csv"))

xd <- rbind(x1d,x2d,x3d,x4d)
xd <- xd[colnames(engsoccerdata::england)]

xd %>%
dplyr::mutate(home = dplyr::case_when(
grepl("Brighton and Hove", home) ~ "Brighton & Hove Albion",
grepl("Cheltenham Town", home) ~ "Cheltenham",
grepl("Stevenage", home) ~ "Stevenage Borough",
grepl("Harrogate Town", home) ~ "Harrogate Town A.F.C.",
grepl("Macclesfield Town", home) ~ "Macclesfield",
grepl("Yeovil", home) ~ "Yeovil",
TRUE ~ home
)) %>%
dplyr::mutate(visitor = dplyr::case_when(
grepl("Brighton and Hove", visitor) ~ "Brighton & Hove Albion",
grepl("Cheltenham Town", visitor) ~ "Cheltenham",
grepl("Stevenage", visitor) ~ "Stevenage Borough",
grepl("Macclesfield Town", visitor) ~ "Macclesfield",
grepl("Harrogate Town", visitor) ~ "Harrogate Town A.F.C.",
grepl("Yeovil", visitor) ~ "Yeovil",
TRUE ~ visitor
)) -> xd

return(xd)
df1 <- rbind(engsoccerdata::getCurrentData(e1,1,1,Season=myseason),engsoccerdata::getCurrentData(e2,2,2,Season=myseason),
engsoccerdata::getCurrentData(e3,3,3,Season=myseason),engsoccerdata::getCurrentData(e4,4,4,Season=myseason))

df1$Date <- as.Date(df1$Date, format="%Y-%m-%d")
eng <- engsoccerdata::england
if(identical(max(df1$Date), max(eng$Date))) warning("The returned dataframe contains data already included in 'scotland' dataframe")
tm <- engsoccerdata::teamnames
df1$home <- tm$name[match(df1$home,tm$name_other)]
df1$visitor <- tm$name[match(df1$visitor,tm$name_other)]
return(df1)
}


## this is a nightmare with the teamnames Tranmere, Forest Green Rovers, Harrogate, Salford, Lincoln

# s1 <- s2 <- myseason <- tm <- df1 <- df <- . <- Date <- tier <- home <- visitor <- hgoal <- vgoal <- goaldif <- FT <- division <- result <- name <- name_other <- most_recent <- country <- NULL
#
# myseason <- Season
# s2 <- as.numeric(substr(myseason, 3, 4))
# s1 <- s2 + 1
#
# df <- rbind(read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E0.csv")),
# read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E1.csv")),
# read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E2.csv")),
# read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E3.csv"))
# )
#
# df <- df[1:8]
# df$Date <- as.Date(df$Date, "%d/%m/%y")
#
#
# df1 <- data.frame(Date = df$Date,
# Season = myseason,
# home = as.character(df$HomeTeam),
# visitor = as.character(df$AwayTeam),
# FT = paste0(df$FTHG,"-", df$FTAG),
# hgoal = df$FTHG,
# vgoal = df$FTAG,
# division = as.numeric(factor(df$Div)),
# tier = as.numeric(factor(df$Div)),
# totgoal = df$FTHG + df$FTAG,
# goaldif = df$FTHG - df$FTAG,
# result = as.character(df$FTR)
# )
#
# i <- sapply(df1, is.factor)
# df1[i] <- lapply(df1[i], as.character)
# df1$Date <- as.character(df1$Date)
#
# #fix teamnames
# df1$home <- as.character(as.character(teamnames$name)[match(as.character(df1$home), as.character(teamnames$name_other))])
# df1$visitor <- as.character(as.character(teamnames$name)[match(as.character(df1$visitor), as.character(teamnames$name_other))])
#
# return(df1)
#################
#' Get current England season data for all tiers
#'
#' @return a dataframe with results for current
#' season for all top four divisions
#' @importFrom rvest "html_table"
#' @importFrom xml2 "read_html"
#' @importFrom dplyr "case_when"
#' @importFrom dplyr "mutate"
#' @importFrom magrittr "%>%"
#' @examples
#' england_current()
#' @export

# england_current <- function(){
#
#
# home<-visitor<-hgoal<-vgoal<-goaldif<-FT<-Season<-division<-result<-NULL
#
# url1 <- "https://www.11v11.com/competitions/premier-league/2022/matches/"
# url2 <- "https://www.11v11.com/competitions/league-championship/2022/matches/"
# url3 <- "https://www.11v11.com/competitions/league-one/2022/matches/"
# url4 <- "https://www.11v11.com/competitions/league-two/2022/matches/"
#
# x1 <- xml2::read_html(url1) %>% rvest::html_table(fill = TRUE)
# x2 <- xml2::read_html(url2) %>% rvest::html_table(fill = TRUE)
# x3 <- xml2::read_html(url3) %>% rvest::html_table(fill = TRUE)
# x4 <- xml2::read_html(url4) %>% rvest::html_table(fill = TRUE)
#
# make_data <- function(x){
# x <- x[[1]][,1:4]
# # x <-x[grepl("([0-9]+).*$", x[,1]),]#get rid of months text
# x <-x[grepl("([0-9]+).*$", unlist(x[,3])),]#get rid of months text
# colnames(x)<-c("Date","home","FT","visitor")
# x$Date <- as.character(as.Date(x$Date, format="%d %b %Y"))
# x$Season <- 2021
# x$FT <- gsub(":", "-", x$FT)
# x <- x[nchar(x$FT)>1,]
# hgvg <- matrix(unlist(strsplit(x$FT, "-")), ncol=2, byrow = T)
# x$hgoal <- as.numeric(hgvg[,1])
# x$vgoal <- as.numeric(hgvg[,2])
# x$totgoal <- x$hgoal+x$vgoal
# x$goaldif <- x$hgoal-x$vgoal
# x$result <- ifelse(x$hgoal>x$vgoal, "H", ifelse(x$hgoal<x$vgoal, "A", "D"))
# return(x)
# }
#
# x1d <- make_data(x1)
# x2d <- make_data(x2)
# x3d <- make_data(x3)
# x4d <- make_data(x4)
#
# x1d$division <- 1
# x1d$tier <- 1
# x2d$division <- 2
# x2d$tier <- 2
# x3d$division <- 3
# x3d$tier <- 3
# x4d$division <- 4
# x4d$tier <- 4
#
# xd <- rbind(x1d,x2d,x3d,x4d)
# xd <- xd[colnames(engsoccerdata::england)]
#
# xd %>%
# dplyr::mutate(home = dplyr::case_when(
# grepl("Brighton and Hove", home) ~ "Brighton & Hove Albion",
# grepl("Cheltenham Town", home) ~ "Cheltenham",
# grepl("Stevenage", home) ~ "Stevenage Borough",
# grepl("Harrogate Town", home) ~ "Harrogate Town A.F.C.",
# grepl("Macclesfield Town", home) ~ "Macclesfield",
# grepl("Yeovil", home) ~ "Yeovil",
# TRUE ~ home
# )) %>%
# dplyr::mutate(visitor = dplyr::case_when(
# grepl("Brighton and Hove", visitor) ~ "Brighton & Hove Albion",
# grepl("Cheltenham Town", visitor) ~ "Cheltenham",
# grepl("Stevenage", visitor) ~ "Stevenage Borough",
# grepl("Macclesfield Town", visitor) ~ "Macclesfield",
# grepl("Harrogate Town", visitor) ~ "Harrogate Town A.F.C.",
# grepl("Yeovil", visitor) ~ "Yeovil",
# TRUE ~ visitor
# )) -> xd
#
# return(xd)
#
# }
#
#
# ## this is a nightmare with the teamnames Tranmere, Forest Green Rovers, Harrogate, Salford, Lincoln
#
# # s1 <- s2 <- myseason <- tm <- df1 <- df <- . <- Date <- tier <- home <- visitor <- hgoal <- vgoal <- goaldif <- FT <- division <- result <- name <- name_other <- most_recent <- country <- NULL
# #
# # myseason <- Season
# # s2 <- as.numeric(substr(myseason, 3, 4))
# # s1 <- s2 + 1
# #
# # df <- rbind(read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E0.csv")),
# # read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E1.csv")),
# # read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E2.csv")),
# # read.csv(paste0("http://www.football-data.co.uk/mmz4281/ ", s2, s1, "/E3.csv"))
# # )
# #
# # df <- df[1:8]
# # df$Date <- as.Date(df$Date, "%d/%m/%y")
# #
# #
# # df1 <- data.frame(Date = df$Date,
# # Season = myseason,
# # home = as.character(df$HomeTeam),
# # visitor = as.character(df$AwayTeam),
# # FT = paste0(df$FTHG,"-", df$FTAG),
# # hgoal = df$FTHG,
# # vgoal = df$FTAG,
# # division = as.numeric(factor(df$Div)),
# # tier = as.numeric(factor(df$Div)),
# # totgoal = df$FTHG + df$FTAG,
# # goaldif = df$FTHG - df$FTAG,
# # result = as.character(df$FTR)
# # )
# #
# # i <- sapply(df1, is.factor)
# # df1[i] <- lapply(df1[i], as.character)
# # df1$Date <- as.character(df1$Date)
# #
# # #fix teamnames
# # df1$home <- as.character(as.character(teamnames$name)[match(as.character(df1$home), as.character(teamnames$name_other))])
# # df1$visitor <- as.character(as.character(teamnames$name)[match(as.character(df1$visitor), as.character(teamnames$name_other))])
# #
# # return(df1)
#
#
#
Loading

0 comments on commit f3061c1

Please sign in to comment.