Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rebar_compiler: fix DAG and speed-up analysis for large repositories #2442

Merged
merged 1 commit into from
Dec 7, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions src/rebar_compiler.erl
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,9 @@ analyze_all({Compiler, G}, Apps) ->
rebar_compiler_dag:populate_deps(G, SrcExt, OutExt),
rebar_compiler_dag:propagate_stamps(G),

[$a, Sep, $b] = filename:join("a", "b"),
AppPaths = [{rebar_app_info:name(AppInfo),
rebar_utils:to_list(rebar_app_info:dir(AppInfo))}
rebar_utils:to_list(rebar_app_info:dir(AppInfo)) ++ [Sep]}
|| AppInfo <- Apps],
AppNames = rebar_compiler_dag:compile_order(G, AppPaths),
{Contexts, sort_apps(AppNames, Apps)}.
Expand Down Expand Up @@ -205,7 +206,7 @@ prepare_compiler_env(Compiler, Apps) ->
EbinDir = rebar_utils:to_list(rebar_app_info:ebin_dir(AppInfo)),
%% Make sure that outdir is on the path
ok = rebar_file_utils:ensure_dir(EbinDir),
true = code:add_patha(filename:absname(EbinDir))
true = code:add_pathz(filename:absname(EbinDir))
end,
Apps
),
Expand Down
113 changes: 47 additions & 66 deletions src/rebar_compiler_dag.erl
Original file line number Diff line number Diff line change
Expand Up @@ -242,10 +242,40 @@ propagate_stamps(G) ->
compile_order(_, AppDefs) when length(AppDefs) =< 1 ->
[Name || {Name, _Path} <- AppDefs];
compile_order(G, AppDefs) ->
Edges = [{V1,V2} || E <- digraph:edges(G),
{_,V1,V2,_} <- [digraph:edge(G, E)]],
AppPaths = prepare_app_paths(AppDefs),
compile_order(Edges, AppPaths, #{}).
%% Build a digraph for following topo-sort, and populate
%% FileToApp map as a side effect for caching
AppDAG = digraph:new([acyclic]), % ignore cycles and hope it works
lists:foldl(
fun(E, FileToApp) ->
case digraph:edge(G, E) of
{_, _, _, artifact} ->
%% skip artifacts, they don't affect compile order
FileToApp;
{_, V1, V2, _} ->
%% V1 depends on V2, and we know V1 is in our repo,
%% otherwise this edge would've never appeared
{App, FTA1} = find_and_cache(V1, AppDefs, FileToApp),
case find_and_cache(V2, AppDefs, FTA1) of
{[], FTA2} ->
%% dependency on a file outside of the repo
FTA2;
{App, FTA2} ->
%% dependency within the same app
FTA2;
{AnotherApp, FTA2} ->
%% actual dependency
%% unfortunately digraph has non-functional API depending on side effects
digraph:add_vertex(AppDAG, App), %% ignore errors for duplicate inserts
digraph:add_vertex(AppDAG, AnotherApp),
digraph:add_edge(AppDAG, App, AnotherApp),
FTA2
end
end
end, #{}, digraph:edges(G)),
Sorted = lists:reverse(digraph_utils:topsort(AppDAG)),
digraph:delete(AppDAG),
Standalone = [Name || {Name, _} <- AppDefs] -- Sorted,
Standalone ++ Sorted.

-dialyzer({no_opaque, maybe_store/5}). % optimized digraph usage breaks opacity
%% @doc Store the DAG on disk if it was dirty
Expand Down Expand Up @@ -409,72 +439,23 @@ propagate_stamps(G, [File|Files]) ->
end,
propagate_stamps(G, Files).

%% Do the actual reversal; be aware that only working from the edges
%% may omit files, so we have to add all non-dependant apps manually
%% to make sure we don't drop em. Since they have no deps, they're
%% safer to put first (and compile first)
compile_order([], AppPaths, AppDeps) ->
%% use a digraph so we don't reimplement topsort by hand.
G = digraph:new([acyclic]), % ignore cycles and hope it works
Tups = maps:keys(AppDeps),
{Va,Vb} = lists:unzip(Tups),
[digraph:add_vertex(G, V) || V <- Va],
[digraph:add_vertex(G, V) || V <- Vb],
[digraph:add_edge(G, V1, V2) || {V1, V2} <- Tups],
Sorted = lists:reverse(digraph_utils:topsort(G)),
digraph:delete(G),
Standalone = [Name || {_, Name} <- AppPaths] -- Sorted,
Standalone ++ Sorted;
compile_order([{P1,P2}|T], AppPaths, AppDeps) ->
%% Assume most dependencies are between files of the same app
%% so ask to see if it's the same before doing a deeper check:
case find_app(P1, AppPaths) of
not_found -> % system lib probably! not in the repo
compile_order(T, AppPaths, AppDeps);
{P1App, P1Path} ->
case find_cached_app(P2, {P1App, P1Path}, AppPaths) of
{P2App, _} when P2App =/= P1App ->
compile_order(T, AppPaths, AppDeps#{{P1App,P2App} => true});
_ ->
compile_order(T, AppPaths, AppDeps)
end
end.

%% Swap app name with paths in the order, and sort there; this lets us
%% bail out early in a search where a file won't be found.
prepare_app_paths(AppPaths) ->
lists:sort([{filename:split(Path), Name} || {Name, Path} <- AppPaths]).

%% Look for the app to which the path belongs; needed to
%% go from an edge between files in the DAG to building
%% app-related orderings
find_app(Path, AppPaths) ->
find_app_(filename:split(Path), AppPaths).

%% A cached search for the app to which a path belongs;
%% the assumption is that sorted edges and common relationships
%% are going to be between local files within an app most
%% of the time; so we first look for the same path as a
%% prior match to avoid searching _all_ potential candidates.
%% If it doesn't work, go for the normal search.
find_cached_app(Path, {Name, AppPath}, AppPaths) ->
Split = filename:split(Path),
case find_app_(Split, [{AppPath, Name}]) of
not_found -> find_app_(Split, AppPaths);
LastEntry -> LastEntry
find_and_cache(File, AppDefs, FileToApp) ->
case maps:find(File, FileToApp) of
error ->
App = find(File, AppDefs),
{App, FileToApp#{File => App}};
{ok, App} ->
{App, FileToApp}
end.

%% Do the actual recursive search
find_app_(_Path, []) ->
not_found;
find_app_(Path, [{AppPath, AppName}|Rest]) ->
case lists:prefix(AppPath, Path) of
find(_File, []) ->
[];
find(File, [{App, Dir} | Tail]) ->
case lists:prefix(Dir, File) of
true ->
{AppName, AppPath};
false when AppPath > Path ->
not_found;
App;
false ->
find_app_(Path, Rest)
find(File, Tail)
end.

%% @private Return what should be the base name of an erl file, relocated to the
Expand Down
4 changes: 3 additions & 1 deletion src/rebar_compiler_erl.erl
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,9 @@ needed_files(Graph, FoundFiles, _, AppInfo) ->

%% Make sure that the ebin dir is on the path
ok = rebar_file_utils:ensure_dir(EbinDir),
true = code:add_patha(filename:absname(EbinDir)),
%% this is only needed to provide behaviours/parse_transforms for
%% applications that will be compiled next
true = code:add_pathz(filename:absname(EbinDir)),

{ParseTransforms, Rest} = split_source_files(FoundFiles, ErlOpts),
NeededErlFiles = case needed_files(Graph, ErlOpts, RebarOpts, OutDir, EbinDir, ParseTransforms) of
Expand Down
6 changes: 5 additions & 1 deletion src/rebar_prv_compile.erl
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,11 @@ prepare_project_app(_State, _Providers, AppInfo) ->
copy_app_dirs(AppInfo,
rebar_app_info:dir(AppInfo),
rebar_app_info:out_dir(AppInfo)),
code:add_pathsa([rebar_app_info:ebin_dir(AppInfo)]),
%% application code path must be added to the source
%% otherwise code_server will remember 'lib_dir' for
%% this application, and all `-include_lib` directives
%% will actually go into _build/profile/lib/...
code:add_pathsz([rebar_app_info:dir(AppInfo)]),
AppInfo.

prepare_compile(State, Providers, AppInfo) ->
Expand Down