-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathday07.R
173 lines (128 loc) · 5.37 KB
/
day07.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
# day7 <- readLines("inputs/day07_ex.txt") # test with example data
day7 <- readLines("inputs/day07.txt")
## META LEVEL DESCRIPTION ------------------------------------------------------
# This code is a bit clunky, but here's what it does:
# I first find all directories that don't contain any subdirectories (i.e. omly
# files). I calculate their size, extract their name and store that in a table.
# Then, I delete the files of those subdirectories, and replace the directories
# for which I know the size with their size. That means they will be treated
# as files in later iterations, allowing me to calculate the size of the upper
# level folders correctly.
# For example, say I have the following structure:
# $ cd /
# $ ls
# 113975 bqpslnv
# 50243 btttmt.nmb
# dir gbjh
# dir hlpzbht
# 43500 lblt
# $ cd gbjh
# $ ls
# 64971 dhmprc.qpl
# 123 abc
# gbjh only contains files (total size 65094). I delete all those files ...
# $ cd /
# $ ls
# 113975 bqpslnv
# 50243 btttmt.nmb
# dir gbjh
# dir hlpzbht
# 43500 lblt
# ... and deplace dir gbjh (line 5) with it's size
# $ cd /
# $ ls
# 113975 bqpslnv
# 50243 btttmt.nmb
# 65094
# dir hlpzbht
# 43500 lblt
# Now I can calculate the size of /, because it only consists of "files"
## PART 1 ----------------------------------------------------------------------
tic <- Sys.time()
# call the first directory "cd main", because I don't want to deal with the \
day7[day7 == "$ cd /"] <- "$ cd main"
# Caution: In the original data, some dirs have the same name! (But they are
# different dirs.) If we assume each dir is only cd-ed once, we can give them
# unique IDs
all_cds <- day7[grepl("cd [a-z]+", day7)]
# All cds that appear more than once
more_than_once <- table(all_cds)[table(all_cds) > 1]
# Give them individual names (number them)
for (i in names(more_than_once)) {
day7[day7 == i] <- paste0(i, "-", letters[1:length(day7[day7 == i])])
}
# Now all cd calls have unique names, enabling us to distinguish folders
# with the same name. However, the dir names ("dir x") are not unique at this
# point. This is handled later in the code.
# Where we will store the dir sizes later.
dir_table <-
data.frame(
dir_name = sub("\\$ cd ", "", day7[grepl("cd [a-z]+", day7)]),
size = NA
)
# Everything with numbers is a file - remove anything that's not a number in
# the files names to make it easier to calculate the size later.
day7[grepl("[0-9]", day7)] <- gsub("[^0-9]", "", day7[grepl("[0-9]", day7)])
# For my approach, I can ignore $ cd .. or $ ls
day7 <- day7[!day7 %in% c("$ cd ..", "$ ls")]
# Loop through the commands until dir_table is filled
while (any(is.na(dir_table$size))) {
# Does not seem to stop the loop when it should?
# Files count as FALSE, and non-files count as TRUE (handy for sum calculation)
day7_nonfiles <- !grepl("([0-9]+)", day7)
# Count cds
cd_no <- cumsum(grepl("\\$ cd", day7))
# All groups where all elements (minus the first, which is the cd command) are
# FALSE
dir_stats <- by(day7_nonfiles, cd_no, function(x) sum(x[-1]))
# Name of directories is in the first element of each group
dir_names <- by(day7, cd_no, function(x) sub("\\$ cd ", "", x[1]))
# Sum of all files (only meaningful for our target directories)
dir_sizes <- by(day7, cd_no, function(x) sum(as.numeric(x[grepl("[0-9]+", x)])))
# Check which dirs only contain files (index sum == 0) and store their size in
# our table
target_dirs <- names(dir_stats)[dir_stats == 0]
for (i in target_dirs) {
temp_name <- dir_names[as.numeric(i)]
dir_table$size[dir_table$dir_name == temp_name] <- dir_sizes[as.numeric(i)]
}
# Exit here when while condition is fulfilled
if (!any(is.na(dir_table$size))) break
# Remove the directories for which we identified a size and replace them with
# a string containing their size. Start with the last group - otherwise, we'll
# mess up the indices by removing stuff.
for (i in rev(target_dirs)) {
temp_idx <- which(cd_no == i)
# Delete files of directory
day7 <- day7[-temp_idx]
# Replace directory name ("dir x") with its size.
# Problem: We have directories that have the same name (but are different).
# At the cd level, I gave them unique names, but that is not the case for
# dir. It should work to only replace the first directory that corresponds
# to the name (without the unique identifier) from the END of the command
# chain.
temp_dir_name <- dir_names[as.numeric(i)]
dir_idx <- which(day7 == paste0("dir ", sub("-[a-z]", "", temp_dir_name)))
# We need to replace first index BEFORE temp_idx with the file size.
# The first dir x before temp_idx is the folder for which we just
# calculated the size.
day7[max(dir_idx[dir_idx < min(temp_idx)])] <-
as.character(dir_sizes[as.numeric(i)])
}
}
# Sum of all directories where the size is at most 100000
sum(dir_table$size[dir_table$size <= 100000])
# 2061777
Sys.time() - tic
## PART 2 ----------------------------------------------------------------------
tic <- Sys.time()
disk_space <- 70000000
space_needed <- 30000000
# unused space: disk space - main
unused_space <- disk_space - dir_table$size[dir_table$dir_name == "main"]
# delete a directory with a size of at least:
min_delete <- space_needed - unused_space
dir_table <- dir_table[order(dir_table$size), ]
min(dir_table$size[dir_table$size >= min_delete])
# 4473403
Sys.time() - tic