-
Notifications
You must be signed in to change notification settings - Fork 6
/
Copy pathoptimize.c
156 lines (140 loc) · 4.24 KB
/
optimize.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
#include <stdlib.h>
#include <string.h>
#include "config.h"
#include "optimize.h"
#ifdef INLINE_UNSIGNED
const static uint32_t unsigned_tag = 0x80000000;
#endif
struct id_count {
uint32_t id;
uint32_t count;
};
struct strings_frequency {
struct id_count *counts;
size_t size;
uint32_t max_id;
bool sorted_by_count;
};
struct strings_frequency *strings_frequency_new() {
struct strings_frequency *frequency = malloc(sizeof(*frequency));
if (!frequency) {
return NULL;
}
frequency->counts = malloc(sizeof(*frequency->counts));
if (!frequency->counts) {
free(frequency);
return NULL;
}
frequency->counts[0].count = 0;
frequency->max_id = 0;
frequency->size = 1;
frequency->sorted_by_count = false;
return frequency;
}
void strings_frequency_free(struct strings_frequency *frequency) {
free(frequency->counts);
free(frequency);
}
__attribute__ ((noinline))
static bool resize_counts(struct strings_frequency *frequency,
size_t required_size) {
size_t new_size = frequency->size;
while (new_size < required_size) {
new_size *= 2;
}
struct id_count *counts = realloc(frequency->counts,
sizeof(*counts) * new_size);
if (!counts) {
return false;
}
memset(counts + frequency->size, 0,
(new_size - frequency->size) * sizeof(struct id_count));
frequency->counts = counts;
frequency->size = new_size;
return true;
}
static int count_comparator(const void *a_, const void *b_) {
const struct id_count *a = (const struct id_count *)a_;
const struct id_count *b = (const struct id_count *)b_;
return b->count > a->count ? 1 : (a->count > b->count ? -1 : 0);
}
static int id_comparator(const void *a_, const void *b_) {
const struct id_count *a = (const struct id_count *)a_;
const struct id_count *b = (const struct id_count *)b_;
return b->id > a->id ? -1 : (a->id > b->id ? 1 : 0);
}
static void sort_by_count(struct strings_frequency *frequency) {
frequency->sorted_by_count = true;
qsort(frequency->counts, frequency->max_id, sizeof(struct id_count),
count_comparator);
}
static void sort_by_id(struct strings_frequency *frequency) {
frequency->sorted_by_count = false;
qsort(frequency->counts, frequency->max_id, sizeof(struct id_count),
id_comparator);
}
bool strings_frequency_add(struct strings_frequency *frequency, uint32_t id) {
#ifdef INLINE_UNSIGNED
if (id > unsigned_tag) {
return true;
}
#endif
if (!id) {
return true;
}
if (frequency->sorted_by_count) {
sort_by_id(frequency);
}
if (frequency->max_id < id) {
if (frequency->size < id) {
if (!resize_counts(frequency, id)) {
return false;
}
}
frequency->max_id = id;
}
frequency->counts[id - 1].count++;
return true;
}
bool strings_frequency_add_all(struct strings_frequency *frequency,
const struct strings *strings) {
if (frequency->sorted_by_count) {
sort_by_id(frequency);
}
uint32_t total = strings_count(strings);
if (frequency->size < total && !resize_counts(frequency, total)) {
return false;
}
frequency->max_id = total;
for (uint32_t id = 0; id < total; id++) {
frequency->counts[id].count++;
}
return true;
}
struct strings *strings_optimize(struct strings *strings,
struct strings_frequency *frequency) {
struct strings *optimized = strings_new();
if (!optimized) {
return NULL;
}
if (!frequency->sorted_by_count) {
for (uint32_t i = 0; i < frequency->max_id; i++) {
frequency->counts[i].id = i + 1;
}
sort_by_count(frequency);
}
for (uint32_t i = 0; i < frequency->max_id; i++) {
const struct id_count *id_count = &frequency->counts[i];
if (!id_count->count) {
break;
}
const char *string = strings_lookup_id(strings, id_count->id);
if (!string || !strings_intern(optimized, string)) {
goto error;
}
}
return optimized;
error:
strings_free(optimized);
return NULL;
}