Skip to content

Commit

Permalink
feat: Implement fb_unescape Function in Velox
Browse files Browse the repository at this point in the history
Summary:
Implementing the fb_unescape function in velox based on presto's implementation. 
[FacebookEscapeFunctions java code](https://www.internalfb.com/code/fbsource/[4a8b76c84fcb]/fbcode/github/presto-facebook-trunk/presto-facebook-functions/src/main/java/com/facebook/presto/facebook/FacebookEscapeFunctions.java)

Differential Revision: D69147813
  • Loading branch information
duxiao1212 authored and facebook-github-bot committed Feb 28, 2025
1 parent 8ca7f33 commit 0fd2001
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 4 deletions.
48 changes: 45 additions & 3 deletions velox/common/fuzzer/ConstrainedGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,8 +139,8 @@ folly::dynamic JsonInputGenerator::convertVariantToDynamic(
}
}

std::vector<std::string> getControlCharacters() {
static std::vector<std::string> controlCharacters = {
const std::vector<std::string>& getControlCharacters() {
static const std::vector<std::string> controlCharacters = {
"\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06",
"\x07", "\x08", "\x09", "\x0A", "\x0B", "\x0C", "\x0D",
"\x0E", "\x0F", "\x10", "\x11", "\x12", "\x13", "\x14",
Expand All @@ -156,7 +156,7 @@ std::vector<std::string> getControlCharacters() {

void JsonInputGenerator::makeRandomVariation(std::string json) {
if (coinToss(rng_, 0.1)) {
const auto controlCharacters = getControlCharacters();
const auto& controlCharacters = getControlCharacters();
const auto index = rand<uint32_t>(rng_, 0, controlCharacters.size() - 1);
const auto& controlCharacter = controlCharacters[index];
const auto indexToInsert = rand<uint32_t>(rng_, 0, json.size());
Expand All @@ -167,6 +167,48 @@ void JsonInputGenerator::makeRandomVariation(std::string json) {
}
}

StringEscapingInputGenerator::StringEscapingInputGenerator(
size_t seed,
const TypePtr& type,
double nullRatio,
std::shared_ptr<RandomInputGenerator<StringView>> randomStrGenerator)
: AbstractInputGenerator(seed, type, nullptr, nullRatio),
randomStrGenerator_(std::move(randomStrGenerator)) {}

variant StringEscapingInputGenerator::generate() {
if (coinToss(rng_, nullRatio_)) {
return variant::null(type_->kind());
}
auto optionalVariant = randomStrGenerator_->generate();
if (!optionalVariant.hasValue()) {
return variant::null(type_->kind());
}

std::string randonStr = optionalVariant.value<TypeKind::VARCHAR>();
makeRandomVariation(randonStr);
return variant(std::move(randonStr));
}

void StringEscapingInputGenerator::makeRandomVariation(std::string& randonStr) {
if (coinToss(rng_, 0.5)) {
const auto& controlCharacters = getControlCharacters();
const auto index = rand<uint32_t>(rng_, 0, controlCharacters.size() - 1);
const auto& controlCharacter = controlCharacters[index];
const auto indexToInsert = rand<uint32_t>(rng_, 0, randonStr.size());
randonStr.insert(indexToInsert, controlCharacter);
} else if (coinToss(rng_, 0.1)) {
const auto size = rand<uint32_t>(rng_, 0, randonStr.size());
randonStr.resize(size);
} else if (!randonStr.empty() && coinToss(rng_, 0.1)) {
const auto start = rand<uint32_t>(rng_, 0, randonStr.size() - 1);
randonStr = randonStr.substr(start);
} else if (coinToss(rng_, 0.5)) {
// Add escaped string test cases
// make the string suitable for representation as a C string literal
randonStr = folly::cEscape<std::string>(randonStr);
}
}

// Utility functions
template <bool, TypeKind KIND>
std::unique_ptr<AbstractInputGenerator> getRandomInputGeneratorPrimitive(
Expand Down
17 changes: 17 additions & 0 deletions velox/common/fuzzer/ConstrainedGenerators.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,4 +360,21 @@ class JsonInputGenerator : public AbstractInputGenerator {
folly::json::serialization_opts opts_;
};

class StringEscapingInputGenerator : public AbstractInputGenerator {
public:
StringEscapingInputGenerator(
size_t seed,
const TypePtr& type,
double nullRatio,
std::shared_ptr<RandomInputGenerator<StringView>> randomStrGenerator);

~StringEscapingInputGenerator() override = default;

variant generate() override;

private:
void makeRandomVariation(std::string& str);
std::shared_ptr<RandomInputGenerator<StringView>> randomStrGenerator_;
};

} // namespace facebook::velox::fuzzer
31 changes: 30 additions & 1 deletion velox/expression/fuzzer/ArgValuesGenerators.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,35 @@ std::vector<core::TypedExprPtr> JsonParseArgValuesGenerator::generate(
inputExpressions[0] = std::make_shared<core::FieldAccessTypedExpr>(
signature.args[0], state.inputRowNames_.back());
return inputExpressions;
}
};

std::vector<core::TypedExprPtr> StringEscapeArgValuesGenerator::generate(
const CallableSignature& signature,
const VectorFuzzer::Options& options,
FuzzerGenerator& rng,
ExpressionFuzzerState& state) {
VELOX_CHECK_EQ(signature.args.size(), 1);
populateInputTypesAndNames(signature, state);

const auto representedType = facebook::velox::randType(rng, 0);
const auto seed = rand<uint32_t>(rng);
const auto nullRatio = options.nullRatio;

state.customInputGenerators_.emplace_back(
std::make_shared<fuzzer::StringEscapingInputGenerator>(
seed,
signature.args[0],
nullRatio,
std::make_shared<fuzzer::RandomInputGenerator<StringView>>(
seed, signature.args[0], nullRatio)));

// Populate inputExpressions_ for the argument that requires custom
// generation. A nullptr should be added at inputExpressions[i] if the i-th
// argument does not require custom input generation.
std::vector<core::TypedExprPtr> inputExpressions{
signature.args.size(), nullptr};
inputExpressions[0] = std::make_shared<core::FieldAccessTypedExpr>(
signature.args[0], state.inputRowNames_.back());
return inputExpressions;
};
} // namespace facebook::velox::fuzzer
11 changes: 11 additions & 0 deletions velox/expression/fuzzer/ArgValuesGenerators.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,15 @@ class JsonParseArgValuesGenerator : public ArgValuesGenerator {
ExpressionFuzzerState& state) override;
};

class StringEscapeArgValuesGenerator : public ArgValuesGenerator {
public:
~StringEscapeArgValuesGenerator() override = default;

std::vector<core::TypedExprPtr> generate(
const CallableSignature& signature,
const VectorFuzzer::Options& options,
FuzzerGenerator& rng,
ExpressionFuzzerState& state) override;
};

} // namespace facebook::velox::fuzzer

0 comments on commit 0fd2001

Please sign in to comment.