Skip to content

Commit

Permalink
feat: Add counters for opening file (#12480)
Browse files Browse the repository at this point in the history
Summary:

Emit runtime counters for opening files by passing stats as an optional property in FileOptions. This will be used in Hive connector when o files.

Differential Revision: D70338343
  • Loading branch information
yuandagits authored and facebook-github-bot committed Feb 28, 2025
1 parent b3f34e6 commit e8ffda6
Show file tree
Hide file tree
Showing 9 changed files with 62 additions and 23 deletions.
48 changes: 37 additions & 11 deletions velox/common/caching/CachedFactory.h
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ template <
typename Value,
typename Generator,
typename Properties = void,
typename Statistics = void,
typename Sizer = DefaultSizer<Value>,
typename Comparator = std::equal_to<Key>,
typename Hash = std::hash<Key>>
Expand Down Expand Up @@ -178,7 +179,8 @@ class CachedFactory {
/// will probably mess with your memory model, so really try to avoid it.
CachedPtr<Key, Value, Comparator, Hash> generate(
const Key& key,
const Properties* properties = nullptr);
const Properties* properties = nullptr,
Statistics* stats = nullptr);

/// Looks up the cache entry of the given key if it exists, otherwise returns
/// null.
Expand Down Expand Up @@ -358,17 +360,25 @@ template <
typename Value,
typename Generator,
typename Properties,
typename Statistics,
typename Sizer,
typename Comparator,
typename Hash>
CachedPtr<Key, Value, Comparator, Hash>
CachedFactory<Key, Value, Generator, Properties, Sizer, Comparator, Hash>::
generate(const Key& key, const Properties* properties) {
CachedPtr<Key, Value, Comparator, Hash> CachedFactory<
Key,
Value,
Generator,
Properties,
Statistics,
Sizer,
Comparator,
Hash>::
generate(const Key& key, const Properties* properties, Statistics* stats) {
process::TraceContext trace("CachedFactory::generate");
if (cache_ == nullptr) {
return CachedPtr<Key, Value, Comparator, Hash>{
/*fromCache=*/false,
(*generator_)(key, properties).release(),
(*generator_)(key, properties, stats).release(),
nullptr,
std::make_unique<Key>(key)};
}
Expand Down Expand Up @@ -397,7 +407,7 @@ CachedFactory<Key, Value, Generator, Properties, Sizer, Comparator, Hash>::
}
pendingLock.unlock();
// Regenerates in the edge case.
return generate(key, properties);
return generate(key, properties, stats);
}

pending_.insert(key);
Expand All @@ -408,7 +418,7 @@ CachedFactory<Key, Value, Generator, Properties, Sizer, Comparator, Hash>::
pendingCv_.notify_all();
};

std::unique_ptr<Value> generatedValue = (*generator_)(key, properties);
std::unique_ptr<Value> generatedValue = (*generator_)(key, properties, stats);
const uint64_t valueSize = Sizer()(*generatedValue);
Value* rawValue = generatedValue.release();
const bool inserted = addCache(key, rawValue, valueSize);
Expand All @@ -433,12 +443,19 @@ template <
typename Value,
typename Generator,
typename Properties,
typename Statistics,
typename Sizer,
typename Comparator,
typename Hash>
CachedPtr<Key, Value, Comparator, Hash>
CachedFactory<Key, Value, Generator, Properties, Sizer, Comparator, Hash>::get(
const Key& key) {
CachedPtr<Key, Value, Comparator, Hash> CachedFactory<
Key,
Value,
Generator,
Properties,
Statistics,
Sizer,
Comparator,
Hash>::get(const Key& key) {
if (cache_ == nullptr) {
return {};
}
Expand All @@ -460,10 +477,19 @@ template <
typename Value,
typename Generator,
typename Properties,
typename Statistics,
typename Sizer,
typename Comparator,
typename Hash>
void CachedFactory<Key, Value, Generator, Properties, Sizer, Comparator, Hash>::
void CachedFactory<
Key,
Value,
Generator,
Properties,
Statistics,
Sizer,
Comparator,
Hash>::
retrieveCached(
const std::vector<Key>& keys,
std::vector<std::pair<Key, CachedPtr<Key, Value, Comparator, Hash>>>&
Expand Down
9 changes: 6 additions & 3 deletions velox/common/caching/tests/CachedFactoryTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,8 @@ namespace {
struct DoublerGenerator {
std::unique_ptr<int> operator()(
const int& value,
const void* properties = nullptr) {
const void* properties = nullptr,
void* statistics = nullptr) {
++generated;
return std::make_unique<int>(value * 2);
}
Expand All @@ -40,7 +41,8 @@ struct DoublerGenerator {
struct IdentityGenerator {
std::unique_ptr<int> operator()(
const int& value,
const void* properties = nullptr) {
const void* properties = nullptr,
void* statistics = nullptr) {
return std::make_unique<int>(value);
}
};
Expand Down Expand Up @@ -113,7 +115,8 @@ TEST(CachedFactoryTest, basicGeneration) {
struct DoublerWithExceptionsGenerator {
std::unique_ptr<int> operator()(
const int& value,
const void* properties = nullptr) {
const void* properties = nullptr,
void* statistics = nullptr) {
if (value == 3) {
VELOX_FAIL("3 is bad");
}
Expand Down
5 changes: 5 additions & 0 deletions velox/common/file/FileSystems.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ class ConfigBase;
}
class ReadFile;
class WriteFile;
namespace filesystems::File {
class IoStats;
}
} // namespace facebook::velox

namespace facebook::velox::filesystems {
Expand Down Expand Up @@ -69,6 +72,8 @@ struct FileOptions {
/// S3.
std::optional<std::unordered_map<std::string, std::string>> properties{
std::nullopt};

File::IoStats* stats{nullptr};
};

/// Defines directory options
Expand Down
4 changes: 3 additions & 1 deletion velox/connectors/hive/FileHandle.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,8 @@ std::string groupName(const std::string& filename) {

std::unique_ptr<FileHandle> FileHandleGenerator::operator()(
const std::string& filename,
const FileProperties* properties) {
const FileProperties* properties,
filesystems::File::IoStats* stats) {
// We have seen cases where drivers are stuck when creating file handles.
// Adding a trace here to spot this more easily in future.
process::TraceContext trace("FileHandleGenerator::operator()");
Expand All @@ -51,6 +52,7 @@ std::unique_ptr<FileHandle> FileHandleGenerator::operator()(
MicrosecondTimer timer(&elapsedTimeUs);
fileHandle = std::make_unique<FileHandle>();
filesystems::FileOptions options;
options.stats = stats;
if (properties) {
options.fileSize = properties->fileSize;
}
Expand Down
4 changes: 3 additions & 1 deletion velox/connectors/hive/FileHandle.h
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ class FileHandleGenerator {
: properties_(std::move(properties)) {}
std::unique_ptr<FileHandle> operator()(
const std::string& filename,
const FileProperties* properties);
const FileProperties* properties,
filesystems::File::IoStats* stats);

private:
const std::shared_ptr<const config::ConfigBase> properties_;
Expand All @@ -80,6 +81,7 @@ using FileHandleFactory = CachedFactory<
FileHandle,
FileHandleGenerator,
FileProperties,
filesystems::File::IoStats,
FileHandleSizer>;

using FileHandleCachedPtr = CachedPtr<std::string, FileHandle>;
Expand Down
4 changes: 2 additions & 2 deletions velox/connectors/hive/SplitReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -249,8 +249,8 @@ void SplitReader::createReader() {
try {
fileHandleCachePtr = fileHandleFactory_->generate(
hiveSplit_->filePath,
hiveSplit_->properties.has_value() ? &*hiveSplit_->properties
: nullptr);
hiveSplit_->properties.has_value() ? &*hiveSplit_->properties : nullptr,
fsStats_ ? fsStats_.get() : nullptr);
VELOX_CHECK_NOT_NULL(fileHandleCachePtr.get());
} catch (const VeloxRuntimeError& e) {
if (e.errorCode() == error_code::kFileNotFound &&
Expand Down
3 changes: 2 additions & 1 deletion velox/dwio/common/Throttler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,8 @@ uint64_t Throttler::calculateBackoffDurationAndUpdateThrottleCache(
std::unique_ptr<Throttler::ThrottleSignal>
Throttler::ThrottleSignalGenerator::operator()(
const std::string& /*unused*/,
const void* /*unused*/) {
const void* /*unused*/,
void* /*unused*/) {
return std::unique_ptr<ThrottleSignal>(new ThrottleSignal{1});
}

Expand Down
3 changes: 2 additions & 1 deletion velox/dwio/common/Throttler.h
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@ class Throttler {

std::unique_ptr<ThrottleSignal> operator()(
const std::string& /*unused*/,
const void* /*unused*/);
const void* /*unused*/,
void* /*unused*/);
};

using CachedThrottleSignalPtr = CachedPtr<std::string, ThrottleSignal>;
Expand Down
5 changes: 2 additions & 3 deletions velox/experimental/wave/common/KernelCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,9 +94,8 @@ class AsyncCompiledKernel : public CompiledKernel {

class KernelGenerator {
public:
std::unique_ptr<ModulePtr> operator()(
const std::string,
const KernelGenFunc* gen) {
std::unique_ptr<ModulePtr>
operator()(const std::string, const KernelGenFunc* gen, void* /*unused*/) {
using ModulePromise = folly::Promise<ModulePtr>;
struct PromiseHolder {
ModulePromise promise;
Expand Down

0 comments on commit e8ffda6

Please sign in to comment.