Skip to content

Commit

Permalink
No public description
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 703621700
  • Loading branch information
Propeller Team authored and copybara-github committed Dec 6, 2024
1 parent 1e1e9de commit 6aa5e31
Show file tree
Hide file tree
Showing 10 changed files with 261 additions and 8 deletions.
5 changes: 5 additions & 0 deletions propeller/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -269,6 +269,7 @@ cc_library(
deps = [
":addr2cu",
":status_macros",
"@abseil-cpp//absl/container:btree",
"@abseil-cpp//absl/container:flat_hash_map",
"@abseil-cpp//absl/log",
"@abseil-cpp//absl/log:check",
Expand All @@ -280,6 +281,7 @@ cc_library(
"@llvm-project//llvm:DebugInfo",
"@llvm-project//llvm:Object",
"@llvm-project//llvm:Support",
"@llvm-project//llvm:TargetParser",
],
)

Expand Down Expand Up @@ -485,6 +487,7 @@ cc_library(
":binary_address_branch",
":binary_address_branch_path",
":binary_content",
":branch_aggregation",
":propeller_options_cc_proto",
":propeller_statistics",
":status_macros",
Expand Down Expand Up @@ -1080,6 +1083,7 @@ cc_test(
name = "binary_content_test",
srcs = ["binary_content_test.cc"],
data = [
"//propeller/testdata:fake_thunks.bin",
"//propeller/testdata:llvm_function_samples.binary",
"//propeller/testdata:propeller_barebone_nopie_buildid",
"//propeller/testdata:propeller_barebone_pie_nobuildid_bin",
Expand Down Expand Up @@ -1167,6 +1171,7 @@ cc_test(
"//propeller/testdata:clang_v0_labels.binary",
"//propeller/testdata:duplicate_symbols_bin",
"//propeller/testdata:duplicate_unique_names",
"//propeller/testdata:fake_thunks.bin",
"//propeller/testdata:sample.bin",
"//propeller/testdata:sample_section.bin",
],
Expand Down
80 changes: 73 additions & 7 deletions propeller/binary_address_mapper.cc
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
#include "absl/algorithm/container.h"
#include "absl/base/attributes.h"
#include "absl/base/nullability.h"
#include "absl/container/btree_map.h"
#include "absl/container/btree_set.h"
#include "absl/container/flat_hash_map.h"
#include "absl/container/flat_hash_set.h"
Expand All @@ -32,6 +33,7 @@
#include "propeller/binary_address_branch.h"
#include "propeller/binary_address_branch_path.h"
#include "propeller/binary_content.h"
#include "propeller/branch_aggregation.h"
#include "propeller/propeller_options.pb.h"
#include "propeller/propeller_statistics.h"
#include "propeller/status_macros.h"
Expand Down Expand Up @@ -84,7 +86,9 @@ class BinaryAddressMapperBuilder {
symtab,
std::vector<llvm::object::BBAddrMap> bb_addr_map, PropellerStats &stats,
absl::Nonnull<const PropellerOptions *> options
ABSL_ATTRIBUTE_LIFETIME_BOUND);
ABSL_ATTRIBUTE_LIFETIME_BOUND,
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
thunk_map = std::nullopt);

BinaryAddressMapperBuilder(const BinaryAddressMapperBuilder &) = delete;
BinaryAddressMapperBuilder &operator=(const BinaryAddressMapper &) = delete;
Expand Down Expand Up @@ -131,6 +135,9 @@ class BinaryAddressMapperBuilder {
int FilterDuplicateNameFunctions(
absl::btree_set<int> &selected_functions) const;

// Create a sorted vector of thunks in the binary from `thunk_map_`.
std::optional<std::vector<ThunkInfo>> GetThunks();

// BB address map of functions.
std::vector<llvm::object::BBAddrMap> bb_addr_map_;
// Non-zero sized function symbols from elf symbol table, indexed by
Expand All @@ -144,6 +151,10 @@ class BinaryAddressMapperBuilder {

PropellerStats *stats_;
const PropellerOptions *options_;

// Map of thunks by address.
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
thunk_map_;
};

// Helper class for extracting intra-function paths from binary-address paths.
Expand Down Expand Up @@ -504,6 +515,42 @@ bool BinaryAddressMapper::CanFallThrough(int from, int to) const {
return true;
}

std::optional<ThunkInfo> BinaryAddressMapper::GetThunkInfoUsingBinaryAddress(
uint64_t address) const {
std::optional<int> index = FindThunkInfoIndexUsingBinaryAddress(address);
if (!index.has_value()) return std::nullopt;
return thunks_->at(*index);
}

// Find thunk by binary address
std::optional<int> BinaryAddressMapper::FindThunkInfoIndexUsingBinaryAddress(
uint64_t address) const {
if (!thunks_.has_value()) return std::nullopt;
auto it = absl::c_upper_bound(*thunks_, address,
[](uint64_t addr, const ThunkInfo &thunk) {
return addr < thunk.address;
});
if (it == thunks_->begin()) return std::nullopt;
it = std::prev(it);
uint64_t thunk_end_address = it->address + it->symbol.getSize();
if (address >= thunk_end_address) return std::nullopt;
return it - thunks_->begin();
}

void BinaryAddressMapper::UpdateThunkTargets(
const BranchAggregation &branch_aggregation) {
if (!thunks_.has_value()) return;
for (auto [branch, weight] : branch_aggregation.branch_counters) {
std::optional<int> thunk_index =
FindThunkInfoIndexUsingBinaryAddress(branch.from);

if (!thunk_index.has_value()) continue;

ThunkInfo &thunk_info = thunks_->at(*thunk_index);
thunk_info.target = branch.to;
}
}

// For each lbr record addr1->addr2, find function1/2 that contain addr1/addr2
// and add function1/2's index into the returned set.
absl::btree_set<int> BinaryAddressMapperBuilder::CalculateHotFunctions(
Expand Down Expand Up @@ -638,6 +685,17 @@ absl::btree_set<int> BinaryAddressMapperBuilder::SelectFunctions(
return selected_functions;
}

std::optional<std::vector<ThunkInfo>> BinaryAddressMapperBuilder::GetThunks() {
if (!thunk_map_.has_value()) return std::nullopt;
std::vector<ThunkInfo> thunks;
for (const auto &thunk_entry : *thunk_map_) {
uint64_t thunk_address = thunk_entry.first;
llvm::object::ELFSymbolRef thunk_symbol = thunk_entry.second;
thunks.push_back({.address = thunk_address, .symbol = thunk_symbol});
}
return thunks;
}

std::vector<BbHandleBranchPath> BinaryAddressMapper::ExtractIntraFunctionPaths(
const BinaryAddressBranchPath &address_path) const {
return IntraFunctionPathsExtractor(this).Extract(address_path);
Expand All @@ -647,12 +705,15 @@ BinaryAddressMapperBuilder::BinaryAddressMapperBuilder(
absl::flat_hash_map<uint64_t, llvm::SmallVector<llvm::object::ELFSymbolRef>>
symtab,
std::vector<llvm::object::BBAddrMap> bb_addr_map, PropellerStats &stats,
absl::Nonnull<const PropellerOptions *> options)
absl::Nonnull<const PropellerOptions *> options,
std::optional<absl::btree_map<uint64_t, llvm::object::ELFSymbolRef>>
thunk_map)
: bb_addr_map_(std::move(bb_addr_map)),
symtab_(std::move(symtab)),
symbol_info_map_(GetSymbolInfoMap(symtab_, bb_addr_map_)),
stats_(&stats),
options_(options) {
options_(options),
thunk_map_(std::move(thunk_map)) {
stats_->bbaddrmap_stats.bbaddrmap_function_does_not_have_symtab_entry +=
bb_addr_map_.size() - symbol_info_map_.size();
}
Expand All @@ -661,11 +722,13 @@ BinaryAddressMapper::BinaryAddressMapper(
absl::btree_set<int> selected_functions,
std::vector<llvm::object::BBAddrMap> bb_addr_map,
std::vector<BbHandle> bb_handles,
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map)
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map,
std::optional<std::vector<ThunkInfo>> thunks)
: selected_functions_(std::move(selected_functions)),
bb_handles_(std::move(bb_handles)),
bb_addr_map_(std::move(bb_addr_map)),
symbol_info_map_(std::move(symbol_info_map)) {}
symbol_info_map_(std::move(symbol_info_map)),
thunks_(std::move(thunks)) {}

absl::StatusOr<std::unique_ptr<BinaryAddressMapper>> BuildBinaryAddressMapper(
const PropellerOptions &options, const BinaryContent &binary_content,
Expand All @@ -676,14 +739,16 @@ absl::StatusOr<std::unique_ptr<BinaryAddressMapper>> BuildBinaryAddressMapper(
ASSIGN_OR_RETURN(bb_addr_map, ReadBbAddrMap(binary_content));

return BinaryAddressMapperBuilder(ReadSymbolTable(binary_content),
std::move(bb_addr_map), stats, &options)
std::move(bb_addr_map), stats, &options,
ReadThunkSymbols(binary_content))
.Build(hot_addresses);
}

std::unique_ptr<BinaryAddressMapper> BinaryAddressMapperBuilder::Build(
const absl::flat_hash_set<uint64_t> *hot_addresses) && {
std::optional<uint64_t> last_function_address;
std::vector<BbHandle> bb_handles;
std::optional<std::vector<ThunkInfo>> thunks = GetThunks();
absl::btree_set<int> selected_functions = SelectFunctions(hot_addresses);
DropNonSelectedFunctions(selected_functions);
for (int function_index : selected_functions) {
Expand All @@ -696,9 +761,10 @@ std::unique_ptr<BinaryAddressMapper> BinaryAddressMapperBuilder::Build(
bb_handles.push_back({function_index, bb_index});
last_function_address = function_bb_addr_map.getFunctionAddress();
}

return std::make_unique<BinaryAddressMapper>(
std::move(selected_functions), std::move(bb_addr_map_),
std::move(bb_handles), std::move(symbol_info_map_));
std::move(bb_handles), std::move(symbol_info_map_), std::move(thunks));
}

} // namespace propeller
32 changes: 31 additions & 1 deletion propeller/binary_address_mapper.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,10 +18,12 @@
#include "absl/time/time.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Object/ELFObjectFile.h"
#include "llvm/Object/ELFTypes.h"
#include "propeller/bb_handle.h"
#include "propeller/binary_address_branch_path.h"
#include "propeller/binary_content.h"
#include "propeller/branch_aggregation.h"
#include "propeller/propeller_options.pb.h"
#include "propeller/propeller_statistics.h"

Expand Down Expand Up @@ -103,6 +105,12 @@ struct BbHandleBranchPath {
}
};

struct ThunkInfo {
uint64_t address;
uint64_t target;
llvm::object::ELFSymbolRef symbol;
};

// Finds basic block entries from binary addresses.
class BinaryAddressMapper {
public:
Expand All @@ -120,7 +128,8 @@ class BinaryAddressMapper {
absl::btree_set<int> selected_functions,
std::vector<llvm::object::BBAddrMap> bb_addr_map,
std::vector<BbHandle> bb_handles,
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map);
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map,
std::optional<std::vector<ThunkInfo>> thunks = std::nullopt);

BinaryAddressMapper(const BinaryAddressMapper &) = delete;
BinaryAddressMapper &operator=(const BinaryAddressMapper &) = delete;
Expand All @@ -141,6 +150,10 @@ class BinaryAddressMapper {
return selected_functions_;
}

const std::optional<std::vector<ThunkInfo>> &thunks() const {
return thunks_;
}

// Returns the `bb_handles_` index associated with the binary address
// `address` given a branch from/to this address based on `direction`.
// It returns nullopt if the no `bb_handles_` index can be mapped.
Expand Down Expand Up @@ -186,6 +199,20 @@ class BinaryAddressMapper {
bool CanFallThrough(int function_index, int from_bb_index,
int to_bb_index) const;

// Returns the index of the thunk that contains the given binary address.
// Returns nullopt if no thunk contains the address.
std::optional<int> FindThunkInfoIndexUsingBinaryAddress(
uint64_t address) const;

// Returns the thunk that contains the given binary address. Returns nullopt
// if no thunk contains the address.
std::optional<ThunkInfo> GetThunkInfoUsingBinaryAddress(
uint64_t address) const;

// Sets the targets of thunks in `binary_address_mapper_` to the targets of
// their corresponding branches in `branch_aggregation`.
void UpdateThunkTargets(const BranchAggregation &branch_aggregation);

// Returns the full function's BB address map associated with the given
// `bb_handle`.
const llvm::object::BBAddrMap &GetFunctionEntry(BbHandle bb_handle) const {
Expand Down Expand Up @@ -268,6 +295,9 @@ class BinaryAddressMapper {
// A map from function indices to their symbol info (function names and
// section name).
absl::flat_hash_map<int, FunctionSymbolInfo> symbol_info_map_;

// A vector of thunks in the binary, ordered in increasing order of address.
std::optional<std::vector<ThunkInfo>> thunks_;
};

// Builds a `BinaryAddressMapper` for binary represented by `binary_content` and
Expand Down
44 changes: 44 additions & 0 deletions propeller/binary_address_mapper_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -606,5 +606,49 @@ TEST(BinaryAddressMapper, ExtractPathsCoalescesCallees) {
.bb_index = 0}}}},
.returns_to = {{.function_index = 2, .bb_index = 0}}})));
}

TEST(LlvmBinaryAddressMapper, GetThunkInfoUsingBinaryAddress) {
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(GetPropellerTestDataFilePath("fake_thunks.bin")));
PropellerStats stats;
PropellerOptions options;
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<BinaryAddressMapper> binary_address_mapper,
BuildBinaryAddressMapper(options, *binary_content, stats,
/*hot_addresses=*/nullptr));

// Match thunk address only
EXPECT_THAT(binary_address_mapper->GetThunkInfoUsingBinaryAddress(0x107bc),
Optional(FieldsAre(0x107bc, _, _)));
EXPECT_THAT(binary_address_mapper->GetThunkInfoUsingBinaryAddress(0x107be),
Optional(FieldsAre(0x107bc, _, _)));

EXPECT_THAT(binary_address_mapper->GetThunkInfoUsingBinaryAddress(0x107cc),
Optional(FieldsAre(0x107cc, _, _)));
}

TEST(LlvmBinaryAddressMapper, FindThunkInfoIndexUsingBinaryAddress) {
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<BinaryContent> binary_content,
GetBinaryContent(GetPropellerTestDataFilePath("fake_thunks.bin")));
PropellerStats stats;
PropellerOptions options;
ASSERT_OK_AND_ASSIGN(
std::unique_ptr<BinaryAddressMapper> binary_address_mapper,
BuildBinaryAddressMapper(options, *binary_content, stats,
/*hot_addresses=*/nullptr));

EXPECT_THAT(
binary_address_mapper->FindThunkInfoIndexUsingBinaryAddress(0x107bc),
Optional(0));
EXPECT_THAT(
binary_address_mapper->FindThunkInfoIndexUsingBinaryAddress(0x107be),
Optional(0));

EXPECT_THAT(
binary_address_mapper->FindThunkInfoIndexUsingBinaryAddress(0x107cc),
Optional(1));
}
} // namespace
} // namespace propeller
Loading

0 comments on commit 6aa5e31

Please sign in to comment.