Skip to content

Commit

Permalink
Fix DAG pruning perf for extra_src_dirs
Browse files Browse the repository at this point in the history
We run the analysis for an extra_src_dir by making a fake app for it,
but send in the app alone for analysis.

In there, the DAG pruning routine looks at the DAG and files submitted
and goes "this right here is a project with 99% of its apps deleted". It
then tries to prune the whole DAG except for some test files. The code
"works" simply because there's a false-positive check that makes sure
the file is on disk before removing it for the DAG.

This ends up making extra runs where ~80% of the time is spent
double-checking the false positives for file deletions.

This commit fixes this by merging in all extra_src fake apps and making them
run in a single analysis phase, meaning we only pay the cost of the DAG
pruning once for the whole project, making it faster than any sparse
repo.

There's also a small patch needed for the root-level extra src dirs;
turns out that since the context-handling in the `rebar_compiler` uses a
map to store content, running single-pass analysis clobbered entries for
a given app if they had more than one extra_src_dir in there.

I also took the time to clean up the ordering of that file.
  • Loading branch information
ferd committed May 13, 2020
1 parent 9a2898a commit a2f10e3
Show file tree
Hide file tree
Showing 3 changed files with 125 additions and 81 deletions.
147 changes: 82 additions & 65 deletions src/rebar_compiler.erl
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
-module(rebar_compiler).

-export([analyze_all/2,
analyze_all_extras/2,
compile_analyzed/3,
compile_all/2,
clean/2,
Expand Down Expand Up @@ -61,7 +62,7 @@ analyze_all({Compiler, G}, Apps) ->
OutExt = maps:get(artifact_exts, Contexts),

rebar_compiler_dag:prune(
G, SrcExt, OutExt, lists:append(AbsSources), AppOutPaths
G, SrcExt, OutExt, lists:append(AbsSources), lists:append(AppOutPaths)
),
rebar_compiler_dag:populate_deps(G, SrcExt, OutExt),
rebar_compiler_dag:propagate_stamps(G),
Expand All @@ -72,6 +73,78 @@ analyze_all({Compiler, G}, Apps) ->
AppNames = rebar_compiler_dag:compile_order(G, AppPaths),
{Contexts, sort_apps(AppNames, Apps)}.

%% @doc same as analyze_all/2, but over extra_src_apps,
%% which are a big cheat.
-spec analyze_all_extras(DAG, [App, ...]) -> {map(), [App]} when
DAG :: {module(), digraph:graph()},
App :: rebar_app_info:t().
analyze_all_extras(DAG, Apps) ->
case lists:append([annotate_extras(App) || App <- Apps]) of
[] -> {#{}, []};
ExtraApps -> analyze_all(DAG, ExtraApps)
end.

-spec compile_analyzed({module(), digraph:graph()}, rebar_app_info:t(), map()) -> ok.
compile_analyzed({Compiler, G}, AppInfo, Contexts) -> % > 3.13.2
run(G, Compiler, AppInfo, Contexts),
ok.

-spec compile_all([module(), ...], rebar_app_info:t()) -> ok.
compile_all(Compilers, AppInfo) -> % =< 3.13.0 interface; plugins use this!
%% Support the old-style API by re-declaring a local DAG for the
%% compile steps needed.
lists:foreach(fun(Compiler) ->
OutDir = rebar_app_info:out_dir(AppInfo),
G = rebar_compiler_dag:init(OutDir, Compiler, undefined, []),
{Ctx, _} = analyze_all({Compiler, G}, [AppInfo]),
compile_analyzed({Compiler, G}, AppInfo, Ctx),
rebar_compiler_dag:maybe_store(G, OutDir, Compiler, undefined, []),
rebar_compiler_dag:terminate(G)
end, Compilers).

%% @doc remove compiled artifacts from an AppDir.
-spec clean([module()], rebar_app_info:t()) -> 'ok'.
clean(Compilers, AppInfo) ->
lists:foreach(fun(CompilerMod) ->
clean_(CompilerMod, AppInfo, undefined),
Extras = annotate_extras(AppInfo),
[clean_(CompilerMod, ExtraApp, "extra") || ExtraApp <- Extras]
end, Compilers).


%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%% COMPILER UTIL EXPORTS %%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% These functions are here for the ultimate goal of getting rid of
%% rebar_base_compiler. This can't be done because of existing plugins.

-spec needs_compile(filename:all(), extension(), [{extension(), file:dirname()}]) -> boolean().
needs_compile(Source, OutExt, Mappings) ->
Ext = filename:extension(Source),
BaseName = filename:basename(Source, Ext),
{_, OutDir} = lists:keyfind(OutExt, 1, Mappings),
Target = filename:join(OutDir, BaseName++OutExt),
filelib:last_modified(Source) > filelib:last_modified(Target).

ok_tuple(Source, Ws) ->
rebar_base_compiler:ok_tuple(Source, Ws).

error_tuple(Source, Es, Ws, Opts) ->
rebar_base_compiler:error_tuple(Source, Es, Ws, Opts).

maybe_report(Reportable) ->
rebar_base_compiler:maybe_report(Reportable).

format_error_source(Path, Opts) ->
rebar_base_compiler:format_error_source(Path, Opts).

report(Messages) ->
rebar_base_compiler:report(Messages).

%%%%%%%%%%%%%%%
%%% PRIVATE %%%
%%%%%%%%%%%%%%%

gather_contexts(Compiler, Apps) ->
Default = default_ctx(),
Contexts = [{rebar_app_info:name(AppInfo),
Expand Down Expand Up @@ -121,37 +194,14 @@ analyze_app({Compiler, G}, Contexts, AppInfo) ->
rebar_compiler_dag:populate_sources(
G, Compiler, InDirs, AbsSources, DepOpts
),
{{BaseDir, ArtifactDir}, AbsSources}.
{[{filename:join([BaseDir, SrcDir]), ArtifactDir} || SrcDir <- SrcDirs],
AbsSources}.

sort_apps(Names, Apps) ->
NamedApps = [{rebar_app_info:name(App), App} || App <- Apps],
[App || Name <- Names,
{_, App} <- [lists:keyfind(Name, 1, NamedApps)]].

-spec compile_analyzed({module(), digraph:graph()}, rebar_app_info:t(), map()) -> ok.
compile_analyzed({Compiler, G}, AppInfo, Contexts) -> % > 3.13.2
run(G, Compiler, AppInfo, Contexts),
%% Extras are tricky and get their own mini-analysis
ExtraApps = annotate_extras(AppInfo),
[begin
{ExtraCtx, [SortedExtra]} = analyze_all({Compiler, G}, [ExtraAppInfo]),
run(G, Compiler, SortedExtra, ExtraCtx)
end || ExtraAppInfo <- ExtraApps],
ok.

-spec compile_all([module(), ...], rebar_app_info:t()) -> ok.
compile_all(Compilers, AppInfo) -> % =< 3.13.0 interface; plugins use this!
%% Support the old-style API by re-declaring a local DAG for the
%% compile steps needed.
lists:foreach(fun(Compiler) ->
OutDir = rebar_app_info:out_dir(AppInfo),
G = rebar_compiler_dag:init(OutDir, Compiler, undefined, []),
{Ctx, _} = analyze_all({Compiler, G}, [AppInfo]),
compile_analyzed({Compiler, G}, AppInfo, Ctx),
rebar_compiler_dag:maybe_store(G, OutDir, Compiler, undefined, []),
rebar_compiler_dag:terminate(G)
end, Compilers).

prepare_compiler_env(Compiler, Apps) ->
lists:foreach(
fun(AppInfo) ->
Expand Down Expand Up @@ -293,16 +343,6 @@ compile_handler({Error, Source}, [Config | _Rest]) ->
maybe_report(Error),
?FAIL.


%% @doc remove compiled artifacts from an AppDir.
-spec clean([module()], rebar_app_info:t()) -> 'ok'.
clean(Compilers, AppInfo) ->
lists:foreach(fun(CompilerMod) ->
clean_(CompilerMod, AppInfo, undefined),
Extras = annotate_extras(AppInfo),
[clean_(CompilerMod, ExtraApp, "extra") || ExtraApp <- Extras]
end, Compilers).

clean_(CompilerMod, AppInfo, _Label) ->
#{src_dirs := SrcDirs,
src_ext := SrcExt} = CompilerMod:context(AppInfo),
Expand All @@ -313,21 +353,18 @@ clean_(CompilerMod, AppInfo, _Label) ->
CompilerMod:clean(FoundFiles, AppInfo),
ok.

-spec needs_compile(filename:all(), extension(), [{extension(), file:dirname()}]) -> boolean().
needs_compile(Source, OutExt, Mappings) ->
Ext = filename:extension(Source),
BaseName = filename:basename(Source, Ext),
{_, OutDir} = lists:keyfind(OutExt, 1, Mappings),
Target = filename:join(OutDir, BaseName++OutExt),
filelib:last_modified(Source) > filelib:last_modified(Target).

annotate_extras(AppInfo) ->
ExtraDirs = rebar_dir:extra_src_dirs(rebar_app_info:opts(AppInfo), []),
OldSrcDirs = rebar_app_info:get(AppInfo, src_dirs, ["src"]),
AppDir = rebar_app_info:dir(AppInfo),
lists:map(fun(Dir) ->
EbinDir = filename:join(rebar_app_info:out_dir(AppInfo), Dir),
AppInfo1 = rebar_app_info:ebin_dir(AppInfo, EbinDir),
%% need a unique name to prevent lookup issues that clobber entries
AppName = unicode:characters_to_binary(
[rebar_app_info:name(AppInfo), "_", Dir]
),
AppInfo0 = rebar_app_info:name(AppInfo, AppName),
AppInfo1 = rebar_app_info:ebin_dir(AppInfo0, EbinDir),
AppInfo2 = rebar_app_info:set(AppInfo1, src_dirs, [Dir]),
AppInfo3 = rebar_app_info:set(AppInfo2, extra_src_dirs, OldSrcDirs),
add_to_includes( % give access to .hrl in app's src/
Expand All @@ -339,26 +376,6 @@ annotate_extras(AppInfo) ->
filelib:is_dir(filename:join(AppDir, ExtraDir))]
).

%% These functions are here for the ultimate goal of getting rid of
%% rebar_base_compiler. This can't be done because of existing plugins.

ok_tuple(Source, Ws) ->
rebar_base_compiler:ok_tuple(Source, Ws).

error_tuple(Source, Es, Ws, Opts) ->
rebar_base_compiler:error_tuple(Source, Es, Ws, Opts).

maybe_report(Reportable) ->
rebar_base_compiler:maybe_report(Reportable).

format_error_source(Path, Opts) ->
rebar_base_compiler:format_error_source(Path, Opts).

report(Messages) ->
rebar_base_compiler:report(Messages).

%%% private functions

find_source_files(BaseDir, SrcExt, SrcDirs, Opts) ->
SourceExtRe = "^(?!\\._).*\\" ++ SrcExt ++ [$$],
lists:flatmap(fun(SrcDir) ->
Expand Down
43 changes: 28 additions & 15 deletions src/rebar_compiler_dag.erl
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ init(Dir, Compiler, Label, CritMeta) ->
%% The file must be in one of the directories that may contain source files
%% for an OTP application; source files found in the DAG `G' that lie outside
%% of these directories may be used in other circumstances (i.e. options affecting
%% visibility).
%% visibility, extra_src_dirs).
%% Prune out files that have no corresponding sources
prune(G, SrcExt, ArtifactExt, Sources, AppPaths) ->
%% Collect source files that may have been removed. These files:
Expand All @@ -50,14 +50,28 @@ prune(G, SrcExt, ArtifactExt, Sources, AppPaths) ->
%% In the process, prune header files - those don't have ArtifactExt
%% extension - using side effect in is_deleted_source/5.
case [Del || Del <- (digraph:vertices(G) -- Sources),
is_deleted_source(G, Del, filename:extension(Del), SrcExt, ArtifactExt)] of
is_deleted_source(G, Del, filename:extension(Del), SrcExt, ArtifactExt)] of
[] ->
ok; %% short circuit without sorting AppPaths
Deleted ->
prune_source_files(G, SrcExt, ArtifactExt,
lists:sort(AppPaths), lists:sort(Deleted))
SafeAppPaths = safe_dirs(AppPaths),
OutFiles = filter_prefix(G, lists:sort(SafeAppPaths), lists:sort(Deleted)),
[maybe_rm_artifact_and_edge(G, Out, SrcExt, ArtifactExt, File)
|| {File, Out} <- OutFiles],
ok
end.

%% Some app paths may be prefixes of one another; for example,
%% `/some/app/directory' may be seen as a prefix
%% of `/some/app/directory_trick' and cause pruning outside
%% of the proper scopes.
safe_dirs(AppPaths) ->
[{safe_dir(AppDir), Path} || {AppDir, Path} <- AppPaths].

safe_dir([]) -> "/";
safe_dir("/") -> "/";
safe_dir([H|T]) -> [H|safe_dir(T)].

is_deleted_source(_G, _F, Extension, Extension, _ArtifactExt) ->
%% source file
true;
Expand All @@ -74,22 +88,21 @@ is_deleted_source(G, F, _Extension, _SrcExt, _ArtifactExt) ->
%% AppDirs & Fs are sorted, and to check if File is outside of
%% App, lists:prefix is checked. When the App with File in it
%% exists, verify file is still there on disk.
prune_source_files(_G, _SrcExt, _ArtifactExt, [], _) ->
ok;
prune_source_files(_G, _SrcExt, _ArtifactExt, _, []) ->
ok;
prune_source_files(G, SrcExt, ArtifactExt, [AppDir | AppTail], Fs) when is_atom(AppDir) ->
filter_prefix(_G, [], _) ->
[];
filter_prefix(_G, _, []) ->
[];
filter_prefix(G, Apps, [F|Fs]) when is_atom(F) ->
%% dirty bit shenanigans
prune_source_files(G, SrcExt, ArtifactExt, AppTail, Fs);
prune_source_files(G, SrcExt, ArtifactExt, [{App, Out} | AppTail] = AppPaths, [File | FTail]) ->
filter_prefix(G, Apps, Fs);
filter_prefix(G, [{App, Out} | AppTail] = AppPaths, [File | FTail]) ->
case lists:prefix(App, File) of
true ->
maybe_rm_artifact_and_edge(G, Out, SrcExt, ArtifactExt, File),
prune_source_files(G, SrcExt, ArtifactExt, AppPaths, FTail);
[{File, Out} | filter_prefix(G, AppPaths, FTail)];
false when App < File ->
prune_source_files(G, SrcExt, ArtifactExt, AppTail, [File|FTail]);
filter_prefix(G, AppTail, [File|FTail]);
false ->
prune_source_files(G, SrcExt, ArtifactExt, AppPaths, FTail)
filter_prefix(G, AppPaths, FTail)
end.

%% @doc this function scans all the source files found and looks into
Expand Down
16 changes: 15 additions & 1 deletion src/rebar_prv_compile.erl
Original file line number Diff line number Diff line change
Expand Up @@ -251,7 +251,13 @@ extra_virtual_apps(State, VApp0, [Dir|Dirs]) ->
VApp2 = rebar_app_info:ebin_dir(VApp1, OutDir),
Opts = rebar_state:opts(State),
VApp3 = rebar_app_info:opts(VApp2, Opts),
[rebar_app_info:set(VApp3, src_dirs, [OutDir])
%% ensure we don't end up running a similar extra fake app while
%% compiling root-level extras by marking it explicitly null
VApp4 = rebar_app_info:set(VApp3, extra_src_dirs, []),
%% need a unique name to prevent lookup issues that clobber entries
AppName = unicode:characters_to_binary(["extra_", Dir]),
VApp5 = rebar_app_info:name(VApp4, AppName),
[rebar_app_info:set(VApp5, src_dirs, [OutDir])
| extra_virtual_apps(State, VApp0, Dirs)]
end.

Expand Down Expand Up @@ -303,6 +309,7 @@ build_rebar3_apps(DAGs, Apps, State) ->
LastDAG = lists:last(DAGs),
%% we actually need to compile each DAG one after the other to prevent
%% issues where a .yrl file that generates a .erl file gets to be seen.
?INFO("Analyzing applications", []),
[begin
{Ctx, ReorderedApps} = rebar_compiler:analyze_all(DAG, Apps),
lists:foreach(
Expand All @@ -312,6 +319,13 @@ build_rebar3_apps(DAGs, Apps, State) ->
rebar_compiler:compile_analyzed(DAG, AppInfo, Ctx)
end,
ReorderedApps
),
{ExtraCtx, ReorderedExtraApps} = rebar_compiler:analyze_all_extras(DAG, Apps),
lists:foreach(
fun(AppInfo) ->
rebar_compiler:compile_analyzed(DAG, AppInfo, ExtraCtx)
end,
ReorderedExtraApps
)
end || DAG <- DAGs],
ok.
Expand Down

0 comments on commit a2f10e3

Please sign in to comment.