-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathanalyze_data.R
71 lines (57 loc) · 2.18 KB
/
analyze_data.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
## top 20 packages
tail(sort(table(downloads$package)), 20)
## create a data table of packages and download numbers
package_downloads<-as.data.frame(table(downloads$package))
names(package_downloads)<- c("package","downloads")
library(dplyr)
# sort by download number
package_downloads <- arrange(package_downloads, desc(downloads))
# add rank column
barplot(head(package_downloads$downloads,10), names.arg=head(package_downloads$package,10))
package_downloads <- mutate(package_downloads, rank=c(1:9704))
# reverse dependencies and reverse imports
library(tools)
package_downloads <- mutate(package_downloads, rv_dp=lengths(package_dependencies(package_downloads$package, which=c("Depends", "Imports", "LinkingTo"), reverse=TRUE)))
save(package_downloads, file="package_downloads.Rdata")
##operating systems
os<-as.data.frame(table(downloads$r_os))
##r versions
r_vers<-as.data.frame(table(downloads$r_version))
substr(r_vers$Var1,1,1)
gsub(".","", r_vers$Var1)
r_vers$Var1
## countries
countries <- as.data.frame(table(downloads$country))
names(countries) <- c("country", "freq")
countries <- arrange(countries, desc(freq))
## countries and packages
library(dplyr)
popular <- data.frame(matrix(ncol=10, nrow=length(countries$country)))
for(i in 1:length(countries$country)){
message(i)
a <- downloads[country==countries$country[i],]
b <- arrange(as.data.frame(table(a$package)), desc(Freq))
c <- as.character(head(b[,1],10))
popular[i,] <- c
}
popular <- cbind(countries$country, cp)
names(popular) <- c("country", "p1", "p2", "p3", "p4", "p5", "p6", "p7", "p8", "p9", "p10")
save(popular, file="popular.Rdata")
## countries and packages with downloads
cp <- vector("list", 221)
names(cp) <- countries$country
for(i in 1:length(countries$country)){
message(i)
a <- downloads[country==countries$country[i],]
b <- arrange(as.data.frame(table(a$package)), desc(Freq))
c <- as.character(head(b[,1],10))
d <- head(b[,2], 10)
cp[[i]] <- data.frame(package=c, downloads=d)
}
save(cp, file="cp.Rdata")
## countries data structure
country <- rep(names(cp), each=10)
rank <- rep(1:10, length(cp))
countries <- rbindlist(cp)
countries <- cbind(country, rank, countries)
save(countries, file="countries.Rdata")