forked from sorbet/sorbet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFlatten.cc
427 lines (380 loc) · 19 KB
/
Flatten.cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
#include "rewriter/Flatten.h"
#include "ast/Helpers.h"
#include "ast/ast.h"
#include "ast/treemap/treemap.h"
#include "core/core.h"
#include <utility>
using namespace std;
// This DSL pass flattens nested methods, so that once we've reached a non-definition AST node (i.e. not a ClassDef or a
// MethodDef) then we know that there are no MethodDefs lurking deeper in the tree. In order to work correctly, this
// also needs to move some non-method-def things as well, specifically `sig`s and sends for method visibility
// (e.g. `private` and the like), and it also updates the static-ness of some MethodDefs based on where they have
// appeared in a nested context.
//
// So, a file like the following
//
// class A
// sig{void}
// private def foo
// sig{void}
// def self.bar; end
// end
// end
//
// will morally be transformed into the following
//
// class A
// sig{void}
// private def foo; :bar; end
// sig{void}
// def bar; end # notice the lack of `self.` here
// end
//
// So no nested methods exist any longer, and additionally, the nested method `bar` has had the `self.` qualifier
// removed: if you run the above code in Ruby, you'll find that `bar` is not defined as a class method on `A`, but
// rather as a not-always-available instance method on `A`, so introducing it as a static method is not at all
// correct. Finally, because methods evaluate to their corresponding symbols, the former location of `bar`'s definition
// has been replaced with the verbatim symbol `:bar`.
namespace sorbet::rewriter {
class FlattenWalk {
const bool compiledFile;
enum class ScopeType { ClassScope, StaticMethodScope, InstanceMethodScope };
struct ScopeInfo {
// This tells us how many `class << self` levels we're nested inside
uint32_t staticLevel;
// this corresponds to the thing we're moving
ScopeType scopeType;
ScopeInfo(uint32_t staticLevel, ScopeType scopeType) : staticLevel(staticLevel), scopeType(scopeType) {}
};
// This is what we keep on the stack: we need to know whether an item should be moved or not (i.e. whether it's
// nested or not) and keep a stack of the current 'staticness' level (i.e. how many levels of `def self.something`
// we're inside)
struct MethodData {
// If this is non-nullopt, then it means that we've allocated space for an expression and this is the index to
// that space. The reason we do this is so that we can keep the moved expressions in the same order we
// originally see them in: in the following example
//
// def foo
// sig{void}
// def bar
// sig{returns(Integer)}
// def baz; 1; end
// end
// end
//
// we want to end up with the following (modulo a few symbols):
//
// def foo; end
// sig{void}
// def bar; end
// sig{returns(Integer)}
// def baz; 1; end
//
// which means we want `sig{void} `first, then `def bar`, then... and so forth. But we can only replace these
// definitions in the post-traversal pass. The preTransform methods for the things which need to be moved will
// always fire in the order that we want, but the postTransform methods won't: we'll get all the sigs in their
// entirety before any postTransform invocation for a `methodDef`. So, this is a way of storing the order that
// we want, and the `ClassScope` class below uses ENFORCEs to make sure that we always use these indices
// correctly.
optional<int> targetLocation;
// this is all the metadata about this specific stack frame, and what scope it puts it into
ScopeInfo scopeInfo;
MethodData(optional<int> targetLocation, ScopeInfo scopeInfo)
: targetLocation(targetLocation), scopeInfo(scopeInfo){};
};
// This represents something that needs to be moved to the end of the class scope as well as information about how
// can reconstruct the preorder traversal)'static' it should be: i.e. whether it should have a `self.` qualifier and
// whether it should be in a `class << self` block.
struct MovedItem {
ast::ExpressionPtr expr;
uint32_t staticLevel;
MovedItem(ast::ExpressionPtr expr, uint32_t staticLevel) : expr(move(expr)), staticLevel(staticLevel){};
MovedItem() = default;
};
// This corresponds to a class scope, which in turn might have nested method scopes, as well as a queue of things to
// move to the end of the class scope when we get to it.
struct ClassScope {
// this is the queue of methods that we want to move
vector<MovedItem> moveQueue;
// this is where we track the current state of which methods we're contained in
vector<MethodData> stack;
ClassScope() = default;
// push a method scope, possibly noting whether
void pushScope(ScopeInfo info) {
stack.emplace_back(moveQueue.size(), info);
moveQueue.emplace_back();
}
// Pop a method scope, and if the scope corresponded to something which we need to move then return the
// information we need to move it. We expect that pushScope and popScope are called the same number of times,
// and this should be exercised by ENFORCEs.
optional<MethodData> popScope() {
ENFORCE(stack.size() > 0);
if (auto idx = stack.back().targetLocation) {
// we have a non-nullopt target location, which means the element corresponding to this scope should get
// moved to the end of the enclosing ClassDef
// We should have allocated space in the move queue for this already
ENFORCE(moveQueue.size() > *idx);
// This space also should still be unoccupied
ENFORCE(moveQueue[*idx].expr == nullptr);
auto back = stack.back();
stack.pop_back();
return MethodData(back);
} else {
stack.pop_back();
return std::nullopt;
}
}
// this moves an expression to the already-allocated space
void addExpr(MethodData md, ast::ExpressionPtr expr) {
ENFORCE(md.targetLocation);
int idx = *md.targetLocation;
ENFORCE(moveQueue.size() > idx);
ENFORCE(moveQueue[idx].expr == nullptr);
auto staticLevel = md.scopeInfo.staticLevel;
// the staticLevel on the stack corresponded to how many `class << self` blocks we were inside of, which is
// why we don't count `self.foo` methods as increasing the number there. We do want to treat them as static
// once we replace them, so we add 1 to the computed staticLevel if we're moving a static method here.
if (md.scopeInfo.scopeType == ScopeType::StaticMethodScope) {
staticLevel += 1;
}
moveQueue[idx] = {move(expr), staticLevel};
}
};
// each entry on this corresponds to a class or module scope in which we might in turn have nested methods. We only
// care about the innermost class scope at a given time, but the outer class may still have definitions which we
// need to move to the end, so we keep that below on the stack.
vector<ClassScope> classScopes;
// compute the new scope information.
ScopeInfo computeScopeInfo(ScopeType scopeType) {
auto &methods = curMethodSet();
// if we're in instance scope, then we'll always stay in it. This is an incorrect approximation, but it's as
// close as we can get with our specific model: in particular, if we have something like
//
// def foo; def bar; end; end
//
// then both #foo and #bar will be instance methods, but if we have
//
// def foo; def self.bar; end; end
//
// then #foo will be an instance method while #bar will be an instance method stored on the singleton class for
// the instances on which #foo has been called: that is to say, it will act as an instance method, but it won't
// be an instance method available on every instance of the enclosing class. We desugar it as though it's an
// instance method.
if (!methods.stack.empty() && methods.stack.back().scopeInfo.scopeType == ScopeType::InstanceMethodScope &&
methods.stack.back().scopeInfo.staticLevel == 0) {
return ScopeInfo(0, ScopeType::InstanceMethodScope);
} else {
// if we're not in an instance scope, then we carry on the staticLevel from the stack
auto existingLevel = methods.stack.empty() ? 0 : methods.stack.back().scopeInfo.staticLevel;
if (scopeType == ScopeType::ClassScope) {
return ScopeInfo(existingLevel + 1, scopeType);
} else {
return ScopeInfo(existingLevel, scopeType);
}
}
}
void newMethodSet() {
classScopes.emplace_back();
}
ClassScope &curMethodSet() {
ENFORCE(!classScopes.empty());
return classScopes.back();
}
void popCurMethodSet() {
ENFORCE(!classScopes.empty());
classScopes.pop_back();
}
// grab all the final moved items once we're done with everything: this will grab anything that needs to be moved at
// the top level
vector<MovedItem> popCurMethodDefs() {
auto ret = std::move(curMethodSet().moveQueue);
ENFORCE(curMethodSet().stack.empty());
popCurMethodSet();
return ret;
};
// extract all the methods from the current queue and put them at the end of the class's current body
ast::ClassDef::RHS_store addClassDefMethods(core::Context ctx, ast::ClassDef::RHS_store rhs, core::LocOffsets loc) {
auto currentMethodDefs = popCurMethodDefs();
// this adds all the methods at the appropriate 'staticness' level
int highestLevel = 0;
for (int i = 0; i < currentMethodDefs.size(); i++) {
auto &expr = currentMethodDefs[i];
if (highestLevel < expr.staticLevel) {
highestLevel = expr.staticLevel;
}
// we need to make sure that we keep sends with their attached methods, so fix that up here
if (i > 0) {
auto send = ast::cast_tree<ast::Send>(currentMethodDefs[i - 1].expr);
if (send != nullptr && send->fun == core::Names::sig()) {
currentMethodDefs[i - 1].staticLevel = expr.staticLevel;
}
}
}
// these will store the bodies of the `class << self` blocks we create at the end
vector<ast::ClassDef::RHS_store> nestedClassBodies;
for (int level = 2; level <= highestLevel; level++) {
nestedClassBodies.emplace_back();
}
// This vector contains all the possible RHS_store target locations that we might move to.
// Both vectors are indexed by staticLevel.
vector<ast::ClassDef::RHS_store *> targets;
// staticLevel 0 (no surrounding class << self) just goes into the top-level rhs.
// do it while preserving order
targets.emplace_back(&rhs);
// staticLevel 1 (one surrounding class << self) also goes into the top-level rhs, but we want to interleave
// them in source order so there's some special handling. nullptr here ensures that this fails early.
targets.emplace_back(nullptr);
// staticLevel 2 and up go into the to-be-created `class << self` blocks
for (auto &target : nestedClassBodies) {
targets.emplace_back(&target);
}
vector<vector<ast::ExpressionPtr>> expressionsToBePutInTargets;
// This makes each element be a length-0 vector
expressionsToBePutInTargets.resize(targets.size());
// move everything to its appropriate target
for (auto &expr : currentMethodDefs) {
if (auto methodDef = ast::cast_tree<ast::MethodDef>(expr.expr)) {
methodDef->flags.isSelfMethod = expr.staticLevel > 0;
}
auto targetLevel = expr.staticLevel == 1 ? 0 : expr.staticLevel;
expressionsToBePutInTargets[targetLevel].emplace_back(std::move(expr.expr));
}
int targetLevel = -1;
for (auto &target : targets) {
targetLevel += 1;
if (targetLevel == 1) {
ENFORCE(expressionsToBePutInTargets[targetLevel].empty());
} else {
target->insert(target->begin(), make_move_iterator(expressionsToBePutInTargets[targetLevel].begin()),
make_move_iterator(expressionsToBePutInTargets[targetLevel].end()));
}
}
// generate the nested `class << self` blocks as needed and add them to the class
for (auto &body : nestedClassBodies) {
auto classDef = ast::MK::Class(loc, loc,
ast::make_expression<ast::UnresolvedIdent>(core::LocOffsets::none(),
ast::UnresolvedIdent::Kind::Class,
core::Names::singleton()),
{}, std::move(body));
rhs.emplace_back(std::move(classDef));
}
return rhs;
}
public:
FlattenWalk(core::Context ctx) : compiledFile(ctx.file.data(ctx).compiledLevel == core::CompiledLevel::True) {
newMethodSet();
}
~FlattenWalk() {
ENFORCE(classScopes.empty());
}
void preTransformClassDef(core::Context ctx, ast::ExpressionPtr &tree) {
if (!curMethodSet().stack.empty()) {
curMethodSet().pushScope(computeScopeInfo(ScopeType::InstanceMethodScope));
}
newMethodSet();
}
void postTransformClassDef(core::Context ctx, ast::ExpressionPtr &tree) {
auto &classDef = ast::cast_tree_nonnull<ast::ClassDef>(tree);
classDef.rhs = addClassDefMethods(ctx, std::move(classDef.rhs), classDef.loc);
auto &methods = curMethodSet();
if (curMethodSet().stack.empty()) {
return;
}
// if this class is dirrectly nested inside a method, we want to steal it
auto md = methods.popScope();
ENFORCE(md);
methods.addExpr(*md, move(tree));
tree = ast::MK::EmptyTree();
};
void preTransformSend(core::Context ctx, ast::ExpressionPtr &tree) {
auto &send = ast::cast_tree_nonnull<ast::Send>(tree);
// we might want to move sigs, so we mostly use the same logic that we use for methods. The one exception is
// that we don't know the 'staticness level' of a sig, as it depends on the method that follows it (whether that
// method has a `self.` or not), so we'll fill that information in later
if (send.fun == core::Names::sig()) {
curMethodSet().pushScope(computeScopeInfo(ScopeType::StaticMethodScope));
}
}
void postTransformSend(core::Context ctx, ast::ExpressionPtr &tree) {
auto &send = ast::cast_tree_nonnull<ast::Send>(tree);
auto &methods = curMethodSet();
// if it's not a send, then we didn't make a stack frame for it
if (send.fun != core::Names::sig()) {
return;
}
// if we get a MethodData back, then we need to move this and replace it with a `nil`
// (`nil` instead of `EmptyTree` so that we still have a loc for the thing, because the
// `nil` might appear in an error message)
if (auto md = methods.popScope()) {
methods.addExpr(*md, move(tree));
tree = ast::MK::Nil(send.loc);
return;
}
}
void preTransformMethodDef(core::Context ctx, ast::ExpressionPtr &tree) {
auto &methodDef = ast::cast_tree_nonnull<ast::MethodDef>(tree);
// add a new scope for this method def
curMethodSet().pushScope(computeScopeInfo(methodDef.flags.isSelfMethod ? ScopeType::StaticMethodScope
: ScopeType::InstanceMethodScope));
}
void postTransformMethodDef(core::Context ctx, ast::ExpressionPtr &tree) {
auto &methodDef = ast::cast_tree_nonnull<ast::MethodDef>(tree);
auto &methods = curMethodSet();
// if we get a MethodData back, then we need to move this and replace it
auto md = methods.popScope();
ENFORCE(md);
// Stash some stuff from the methodDef before we move it
auto loc = methodDef.declLoc;
auto name = methodDef.name;
auto discardable = methodDef.flags.discardDef;
methods.addExpr(*md, move(tree));
if (discardable) {
tree = ast::MK::EmptyTree();
return;
} else if (!this->compiledFile) {
// We need to return something here so things like `module_function` and method
// visibility tracking can work correctly.
tree = ast::MK::RuntimeMethodDefinition(loc, name, methodDef.flags.isSelfMethod);
return;
} else {
auto keepName = methodDef.flags.isSelfMethod ? core::Names::keepSelfDef() : core::Names::keepDef();
auto kind = methodDef.flags.genericPropGetter ? core::Names::genericPropGetter()
: methodDef.flags.isAttrReader ? core::Names::attrReader()
: core::Names::normal();
tree = ast::MK::Send3(loc, ast::MK::Constant(loc, core::Symbols::Sorbet_Private_Static()), keepName,
loc.copyWithZeroLength(), ast::MK::Self(loc), ast::MK::Symbol(loc, name),
ast::MK::Symbol(loc, kind));
return;
}
};
ast::ExpressionPtr addTopLevelMethods(core::Context ctx, ast::ExpressionPtr tree) {
auto &methods = curMethodSet().moveQueue;
if (methods.empty()) {
ENFORCE(popCurMethodDefs().empty());
return tree;
}
if (methods.size() == 1 && ast::isa_tree<ast::EmptyTree>(tree)) {
// It was only 1 method to begin with, put it back
ast::ExpressionPtr methodDef = std::move(popCurMethodDefs()[0].expr);
return methodDef;
}
auto insSeq = ast::cast_tree<ast::InsSeq>(tree);
if (insSeq == nullptr) {
ast::InsSeq::STATS_store stats;
tree = ast::make_expression<ast::InsSeq>(tree.loc(), std::move(stats), std::move(tree));
return addTopLevelMethods(ctx, std::move(tree));
}
for (auto &method : popCurMethodDefs()) {
ENFORCE(method.expr != nullptr);
insSeq->stats.emplace_back(std::move(method.expr));
}
return tree;
}
};
ast::ExpressionPtr Flatten::run(core::Context ctx, ast::ExpressionPtr tree) {
FlattenWalk flatten(ctx);
ast::TreeWalk::apply(ctx, flatten, tree);
tree = flatten.addTopLevelMethods(ctx, std::move(tree));
return tree;
}
} // namespace sorbet::rewriter