Skip to content

Commit

Permalink
Refactor and cleanup some of the code (#40)
Browse files Browse the repository at this point in the history
* improve some names

* rename

* remove unnecessary gotos

* reorder

* richer intermediate step

* wip

* use enum codes

* comment

* bump
  • Loading branch information
ericphanson authored Apr 26, 2024
1 parent 548a7d3 commit f166f50
Show file tree
Hide file tree
Showing 3 changed files with 107 additions and 71 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "ExplicitImports"
uuid = "7d51a73a-1435-4ff3-83d9-f097790105c7"
authors = ["Eric P. Hanson"]
version = "1.4.2"
version = "1.4.3"

[deps]
AbstractTrees = "1520ce14-60c1-5f80-bbc7-55ef81b5835c"
Expand Down
148 changes: 92 additions & 56 deletions src/get_names_used.jl
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ function is_anonymous_function_definition_arg(leaf)
return child_index(get_parent(leaf, 3)) == 2
elseif parents_match(leaf, (K"::",))
# we must be on the LHS, otherwise we're a type
is_hastype_LHS(leaf) || return false
is_double_colon_LHS(leaf) || return false
# Ok, let's just step up one level and see again
return is_anonymous_function_definition_arg(parent(leaf))
elseif parents_match(leaf, (K"=",))
Expand Down Expand Up @@ -92,10 +92,10 @@ function is_struct_field_name(leaf)
kind(leaf) == K"Identifier" || return false
if parents_match(leaf, (K"::", K"block", K"struct"))
# we want to be on the LHS of the `::`
return is_hastype_LHS(leaf)
return is_double_colon_LHS(leaf)
elseif parents_match(leaf, (K"::", K"=", K"block", K"struct"))
# if we are in a `Base.@kwdef`, we may be on the LHS of an `=`
return is_hastype_LHS(leaf) && child_index(parent(leaf)) == 1
return is_double_colon_LHS(leaf) && child_index(parent(leaf)) == 1
else
return false
end
Expand Down Expand Up @@ -149,7 +149,7 @@ function is_non_anonymous_function_definition_arg(leaf)
return is_non_anonymous_function_definition_arg(parent(leaf))
elseif parents_match(leaf, (K"::",))
# we must be on the LHS, otherwise we're a type
is_hastype_LHS(leaf) || return false
is_double_colon_LHS(leaf) || return false
# Ok, let's just step up one level and see again
return is_non_anonymous_function_definition_arg(parent(leaf))
else
Expand All @@ -158,7 +158,7 @@ function is_non_anonymous_function_definition_arg(leaf)
end

# matches `x` in `x::Y`, but not `Y`, nor `foo(::Y)`
function is_hastype_LHS(leaf)
function is_double_colon_LHS(leaf)
parents_match(leaf, (K"::",)) || return false
unary = has_flags(get_parent(leaf), JuliaSyntax.PREFIX_OP_FLAG)
unary && return false
Expand All @@ -168,13 +168,13 @@ end

# Here we use the magic of AbstractTrees' `TreeCursor` so we can start at
# a leaf and follow the parents up to see what scopes our leaf is in.
# TODO- cleanup with parsing utilities (?)
# TODO- cleanup. This basically has two jobs: check is function arg etc, and figure out the scope/module path.
# We could do these two things separately for more clarity.
function analyze_name(leaf; debug=false)
# Ok, we have a "name". Let us work our way up and try to figure out if it is in local scope or not
function_arg = is_function_definition_arg(leaf)
struct_arg = is_struct_type_param(leaf) || is_struct_field_name(leaf)
for_arg = is_for_arg(leaf)
global_scope = !function_arg && !struct_arg && !for_arg
struct_field_or_type_param = is_struct_type_param(leaf) || is_struct_field_name(leaf)
for_loop_index = is_for_arg(leaf)
module_path = Symbol[]
scope_path = JuliaSyntax.SyntaxNode[]
is_assignment = false
Expand All @@ -189,12 +189,10 @@ function analyze_name(leaf; debug=false)

debug && println(val, ": ", k)
if k in (K"let", K"for", K"function", K"struct")
global_scope = false
push!(scope_path, nodevalue(node).node)
# try to detect presence in RHS of inline function definition
elseif idx > 3 && k == K"=" && !isempty(args) &&
kind(first(args)) == K"call"
global_scope = false
push!(scope_path, nodevalue(node).node)
end

Expand All @@ -216,16 +214,16 @@ function analyze_name(leaf; debug=false)
kids = children(nodevalue(node))
if !isempty(kids)
c = first(kids)
is_assignment = c == nodevalue(leaf)
is_assignment |= c == nodevalue(leaf)
end
end

node = parent(node)

# finished climbing to the root
node === nothing &&
return (; function_arg, global_scope, is_assignment, module_path, scope_path,
struct_arg, for_arg)
return (; function_arg, is_assignment, module_path, scope_path,
struct_field_or_type_param, for_loop_index)
idx += 1
end
end
Expand All @@ -252,10 +250,10 @@ function analyze_all_names(file; debug=false)

per_usage_info = @NamedTuple{name::Symbol,qualified::Bool,import_type::Symbol,
location::String,
function_arg::Bool,global_scope::Bool,is_assignment::Bool,
function_arg::Bool,is_assignment::Bool,
module_path::Vector{Symbol},
scope_path::Vector{JuliaSyntax.SyntaxNode},
struct_arg::Bool,for_arg::Bool}[]
struct_field_or_type_param::Bool,for_loop_index::Bool}[]

# we need to keep track of all names that we see, because we could
# miss entire modules if it is an `include` we cannot follow.
Expand Down Expand Up @@ -307,63 +305,101 @@ function analyze_all_names(file; debug=false)
(; name, qualified, import_type, location, ret...,))
end
untainted_modules = setdiff!(seen_modules, tainted_modules)
return per_usage_info, untainted_modules
return analyze_per_usage_info(per_usage_info), untainted_modules
end

function get_global_names(per_usage_info)
function is_name_internal_in_higher_local_scope(name, scope_path, seen)
# We will recurse up the `scope_path`. Note the order is "reversed",
# so the first entry of `scope_path` is deepest.

while !isempty(scope_path)
# First, if we are directly in a module, then we don't want to recurse further.
# We will just end up in a different module.
if kind(first(scope_path)) == K"module"
return false
end
# Ok, now pop off the first scope and check.
scope_path = scope_path[2:end]
ret = get(seen, (; name, scope_path), nothing)
if ret === nothing
# Not introduced here yet, trying recursing further
continue
else
# return value is `is_global`, so negate it
return !ret
end
end
# Did not find a local introduction
return false
end

@enum AnalysisCode IgnoredNonFirst IgnoredQualified IgnoredImportRHS InternalHigherScope InternalFunctionArg InternalAssignment InternalStruct InternalForLoop External

function analyze_per_usage_info(per_usage_info)
# For each scope, we want to understand if there are any global usages of the name in that scope
# First, throw away all qualified usages, they are irrelevant
# Next, if a name is on the RHS of an import, we don't care, so throw away
# Next, if the name is beign used at global scope, obviously it is a global
# Otherwise, we are in local scope:
# 1. Next, if the name is a function arg, then this is not a global name (essentially first usage is assignment)
# 2. Otherwise, if first usage is assignment, then it is local, otherwise it is global
seen = Dict{@NamedTuple{name::Symbol,scope_path::Vector{JuliaSyntax.SyntaxNode}},Bool}()
return map(per_usage_info) do nt
if (; nt.name, nt.scope_path) in keys(seen)
return (; nt..., first_usage_in_scope=false, external_global_name=missing,
analysis_code=IgnoredNonFirst)
end
if nt.qualified
return (; nt..., first_usage_in_scope=true, external_global_name=missing,
analysis_code=IgnoredQualified)
end
if nt.import_type == :import_RHS
return (; nt..., first_usage_in_scope=true, external_global_name=missing,
analysis_code=IgnoredImportRHS)
end

# At this point, we have an unqualified name, which is not the RHS of an import, and it is the first time we have seen this name in this scope.
# Is it global or local?
# We will check a bunch of things:
# * this name could be local due to syntax: due to it being a function argument, LHS of an assignment, a struct field or type param, or due to a loop index.
for (is_local, reason) in
((nt.function_arg, InternalFunctionArg),
(nt.is_assignment, InternalAssignment),
(nt.struct_field_or_type_param, InternalStruct),
(nt.for_loop_index, InternalForLoop))
if is_local
external_global_name = false
push!(seen, (; nt.name, nt.scope_path) => external_global_name)
return (; nt..., first_usage_in_scope=true, external_global_name,
analysis_code=reason)
end
end
# * this was the first usage in this scope, but it could already be used in a "higher" local scope. It is possible we have not yet processed that scope fully but we will assume we have (TODO). So we will recurse up and check if it is a local name there.
if is_name_internal_in_higher_local_scope(nt.name,
nt.scope_path,
seen)
external_global_name = false
push!(seen, (; nt.name, nt.scope_path) => external_global_name)
return (; nt..., first_usage_in_scope=true, external_global_name,
analysis_code=InternalHigherScope)
end

external_global_name = true
push!(seen, (; nt.name, nt.scope_path) => external_global_name)
return (; nt..., first_usage_in_scope=true, external_global_name,
analysis_code=External)
end
end

function get_global_names(per_usage_info)
names_used_for_global_bindings = Set{@NamedTuple{name::Symbol,
module_path::Vector{Symbol},
location::String}}()
seen = Dict{@NamedTuple{name::Symbol,scope_path::Vector{JuliaSyntax.SyntaxNode}},Bool}()

for nt in per_usage_info
(; nt.name, nt.scope_path) in keys(seen) && continue
nt.qualified && continue
nt.import_type == :import_RHS && continue

# Ok, at this point it counts!
push!(seen, (; nt.name, nt.scope_path) => nt.global_scope)

if nt.global_scope
if nt.external_global_name === true
push!(names_used_for_global_bindings, (; nt.name, nt.module_path, nt.location))
else
# we are in local scope.
# If we were e.g. an assignment in a higher local scope though, it could still be a local name, as opposed to a global one.
# We will recurse up the `scope_path`. Note the order is "reversed",
# so the first entry of `scope_path` is deepest.
scope_path = nt.scope_path
while !isempty(scope_path)
# First, if we are directly in a module, then we don't want to recurse further.
# We will just end up in a different module.
if kind(first(scope_path)) == K"module"
@goto inner
end
# Ok, now pop off the first scope and check.
scope_path = scope_path[2:end]
ret = get(seen, (; nt.name, scope_path), nothing)
if ret === false # local usage found earlier
@goto outer
elseif ret === true
# We hit global scope, time to bail
@goto inner
end
# else, continue recursing
end
@label inner
if !(nt.function_arg || nt.is_assignment || nt.struct_arg || nt.for_arg)
push!(names_used_for_global_bindings,
(; nt.name, nt.module_path, nt.location))
end
end
@label outer
end
return names_used_for_global_bindings
end
Expand Down
28 changes: 14 additions & 14 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,8 @@ using Aqua
using Logging
using AbstractTrees
using ExplicitImports: is_function_definition_arg, SyntaxNodeWrapper, get_val
using ExplicitImports: is_struct_type_param, is_struct_field_name, is_for_arg
using ExplicitImports: is_struct_type_param, is_struct_field_name, is_for_arg,
analyze_per_usage_info
using TestPkg, Markdown

# DataFrames version of `filter_to_module`
Expand Down Expand Up @@ -177,9 +178,8 @@ end

function get_per_scope(per_usage_info)
per_usage_df = DataFrame(per_usage_info)
subset!(per_usage_df, :qualified => ByRow(!), :import_type => ByRow(==(:not_import)))
return combine(groupby(per_usage_df, [:name, :scope_path, :module_path, :global_scope]),
:is_assignment => first => :assigned_first)
dropmissing!(per_usage_df, :external_global_name)
return per_usage_df
end

# TODO- unit tests for `analyze_import_type`, `is_qualified`, `analyze_name`, etc.
Expand All @@ -204,19 +204,19 @@ end
per_usage_info, _ = analyze_all_names("TestModA.jl")
df = get_per_scope(per_usage_info)
locals = contains.(string.(df.name), Ref("local"))
@test all(!, df.global_scope[locals])
@test all(!, df.external_global_name[locals])

# we use `x` in two scopes; first time is global scope, second time is local
# we use `x` in two scopes
xs = subset(df, :name => ByRow(==(:x)))
@test xs[1, :global_scope]
@test !xs[2, :global_scope]
@test xs[2, :assigned_first]
@test !xs[1, :external_global_name]
@test !xs[2, :external_global_name]
@test xs[2, :analysis_code] == ExplicitImports.InternalAssignment

# we use `exported_a` in two scopes; both times refer to the global name
exported_as = subset(df, :name => ByRow(==(:exported_a)))
@test exported_as[1, :global_scope]
@test !exported_as[2, :global_scope]
@test !exported_as[2, :assigned_first]
@test exported_as[1, :external_global_name]
@test exported_as[2, :external_global_name]
@test !exported_as[2, :is_assignment]

# Test submodules
@test using_statement.(explicit_imports_nonrecursive(TestModA.SubModB, "TestModA.jl")) ==
Expand All @@ -228,8 +228,8 @@ end
sub_df = restrict_to_module(df, TestModA.SubModB)

h = only(subset(sub_df, :name => ByRow(==(:h))))
@test h.global_scope
@test !h.assigned_first
@test h.external_global_name
@test !h.is_assignment

# Nested submodule with same name as outer module...
@test using_statement.(explicit_imports_nonrecursive(TestModA.SubModB.TestModA,
Expand Down

2 comments on commit f166f50

@ericphanson
Copy link
Owner Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/105700

Tip: Release Notes

Did you know you can add release notes too? Just add markdown formatted text underneath the comment after the text
"Release notes:" and it will be added to the registry PR, and if TagBot is installed it will also be added to the
release that TagBot creates. i.e.

@JuliaRegistrator register

Release notes:

## Breaking changes

- blah

To add them here just re-invoke and the PR will be updated.

Tagging

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v1.4.3 -m "<description of version>" f166f50e9bf3566596d98411d93e69b1715c0914
git push origin v1.4.3

Please sign in to comment.