Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fixed auto-scheduler for recursive rules using Selinger w/ vector #2258

Merged
merged 1 commit into from
Apr 6, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 6 additions & 3 deletions src/ast/analysis/ProfileUse.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -70,9 +70,12 @@ std::size_t ProfileUseAnalysis::getNonRecursiveUniqueKeys(
return reader->getNonRecursiveCountUniqueKeys(rel, attributes, constants);
}

std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) const {
return reader->getRecursiveCountUniqueKeys(rel, attributes, constants);
std::size_t ProfileUseAnalysis::getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const {
return reader->getRecursiveCountUniqueKeys(rel, attributes, constants, iteration);
}

std::size_t ProfileUseAnalysis::getIterations(const std::string& rel) const {
return reader->getIterations(rel);
}
} // namespace souffle::ast::analysis
6 changes: 4 additions & 2 deletions src/ast/analysis/ProfileUse.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ class ProfileUseAnalysis : public Analysis {
std::size_t getNonRecursiveUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) const;

std::size_t getRecursiveUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) const;
std::size_t getRecursiveUniqueKeys(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) const;

std::size_t getIterations(const std::string& rel) const;

private:
/** performance model of profile run */
Expand Down
108 changes: 69 additions & 39 deletions src/ast/utility/SipsMetric.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
auto* prof = profileUseAnalysis;
auto getRelationSize = [&prof](bool isRecursive, const ast::QualifiedName& rel,
const std::vector<std::size_t>& joinColumns,
const std::map<std::size_t, std::string>& constantsMap) {
const std::map<std::size_t, std::string>& constantsMap,
const std::string& iteration) {
std::set<std::size_t> joinKeys(joinColumns.begin(), joinColumns.end());
for (auto& [k, _] : constantsMap) {
joinKeys.insert(k);
Expand All @@ -127,7 +128,7 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
constants[constants.size() - 1] = ']';

if (isRecursive) {
return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants);
return prof->getRecursiveUniqueKeys(rel.toString(), attributes, constants, iteration);
}

return prof->getNonRecursiveUniqueKeys(rel.toString(), attributes, constants);
Expand Down Expand Up @@ -243,6 +244,17 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(

std::unordered_map<AtomIdx, std::map<ArgIdx, std::string>> atomToIdxConstants;

std::size_t iterations = 1;
for (std::size_t i = 0; i < atoms.size(); ++i) {
auto* atom = atoms[i];
std::string name = getClauseAtomName(*clause, atom, sccAtoms, version, mode);
bool isRecursive = recursiveInCurrentStratum.count(i) > 0;
if (isRecursive) {
iterations = prof->getIterations(name);
break;
}
}

AtomIdx atomIdx = 0;
for (auto* atom : atoms) {
std::string name = getClauseAtomName(*clause, atom, sccAtoms, version, mode);
Expand Down Expand Up @@ -271,11 +283,17 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
// start by storing the access cost for each individual relation
std::vector<AtomIdx> empty;
bool isRecursive = recursiveInCurrentStratum.count(atomIdx) > 0;
std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant);
double cost = static_cast<double>(tuples * atom->getArity());
AtomSet singleton = {atomIdx};
std::vector<AtomIdx> plan = {atomIdx};
cache[1].insert(std::make_pair(singleton, PlanTuplesCost(plan, tuples, cost)));
PlanTuplesCost p;
p.plan = plan;
for (std::size_t iter = 0; iter < iterations; ++iter) {
std::size_t tuples = getRelationSize(isRecursive, name, empty, idxConstant, std::to_string(iter));
double cost = static_cast<double>(tuples * atom->getArity());
p.tuplesPerIteration.push_back(tuples);
p.costsPerIteration.push_back(cost);
}
cache[1].insert(std::make_pair(singleton, p));
++atomIdx;
}

Expand All @@ -295,12 +313,6 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
smallerSubset.insert(subset[j]);
}

// lookup the cost in the cache
auto& planTuplesCost = cache[K - 1].at(smallerSubset);
auto& oldPlan = planTuplesCost.plan;
auto oldTuples = planTuplesCost.tuples;
auto oldCost = planTuplesCost.cost;

// compute the grounded variables from the subset
VarSet groundedVariablesFromSubset;
for (auto idx : smallerSubset) {
Expand Down Expand Up @@ -350,49 +362,67 @@ std::vector<std::size_t> SelingerProfileSipsMetric::getReordering(
}
}

// lookup the cost in the cache
auto& planTuplesCost = cache[K - 1].at(smallerSubset);
auto& oldPlan = planTuplesCost.plan;
auto oldTuples = planTuplesCost.tuplesPerIteration;
auto oldCost = planTuplesCost.costsPerIteration;

PlanTuplesCost p;
bool isRecursive = recursiveInCurrentStratum.count(atomIdx) > 0;
std::vector<ArgIdx> empty;
double expectedTuples = 0;

if (numBound == atom->getArity()) {
expectedTuples = 1;
} else {
auto relSizeWithConstants = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty,
atomToIdxConstants[atomIdx]);

if (joinColumns.empty()) {
expectedTuples = static_cast<double>(relSizeWithConstants);
double newTotalCost = 0.0;
for (std::size_t iter = 0; iter < iterations; ++iter) {
if (numBound == atom->getArity()) {
expectedTuples = 1;
} else {
auto uniqueKeys = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx]);

bool normalize = (uniqueKeys > 0);
expectedTuples =
static_cast<double>(relSizeWithConstants) / (normalize ? uniqueKeys : 1);
auto relSizeWithConstants = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), empty,
atomToIdxConstants[atomIdx], std::to_string(iter));

if (joinColumns.empty()) {
expectedTuples = static_cast<double>(relSizeWithConstants);
} else {
auto uniqueKeys = getRelationSize(isRecursive,
getClauseAtomName(*clause, atom, sccAtoms, version, mode), joinColumns,
atomToIdxConstants[atomIdx], std::to_string(iter));

bool normalize = (uniqueKeys > 0);
expectedTuples =
static_cast<double>(relSizeWithConstants) / (normalize ? uniqueKeys : 1);
}
}
}

// calculate new number of tuples
std::size_t newTuples = static_cast<std::size_t>(oldTuples * expectedTuples);
// calculate new number of tuples
std::size_t newTuples = static_cast<std::size_t>(oldTuples[iter] * expectedTuples);

// calculate new cost
double newCost = oldCost[iter] + newTuples * atom->getArity();

// calculate new cost
double newCost = oldCost + newTuples * atom->getArity();
// add to vector of costs/tuples
p.tuplesPerIteration.push_back(newTuples);
p.costsPerIteration.push_back(newCost);
newTotalCost += newCost;
}

// calculate new plan
std::vector<AtomIdx> newPlan(oldPlan.begin(), oldPlan.end());
newPlan.push_back(atomIdx);
p.plan = newPlan;

// if no plan then insert it
AtomSet currentSet(subset.begin(), subset.end());
if (cache[K].count(currentSet) == 0) {
cache[K].insert(std::make_pair(currentSet, PlanTuplesCost(newPlan, newTuples, newCost)));
}
// if we have a lower cost
else if (cache[K].at(currentSet).cost >= newCost) {
cache[K].erase(currentSet);
cache[K].insert(std::make_pair(currentSet, PlanTuplesCost(newPlan, newTuples, newCost)));
cache[K].insert(std::make_pair(currentSet, p));
} else {
// if we have a lower cost
auto& costVector = cache[K].at(currentSet).costsPerIteration;
double oldTotalCost = std::accumulate(costVector.begin(), costVector.end(), 0.0);
if (oldTotalCost >= newTotalCost) {
cache[K].erase(currentSet);
cache[K].insert(std::make_pair(currentSet, p));
}
}
}
}
Expand Down
7 changes: 2 additions & 5 deletions src/ast/utility/SipsMetric.h
Original file line number Diff line number Diff line change
Expand Up @@ -74,12 +74,9 @@ class SelingerProfileSipsMetric : public SipsMetric {
private:
/* helper struct for Selinger */
struct PlanTuplesCost {
PlanTuplesCost(const std::vector<std::size_t>& givenPlan, std::size_t givenTuples, double givenCost)
: plan(givenPlan), tuples(givenTuples), cost(givenCost) {}

std::vector<std::size_t> plan;
std::size_t tuples;
double cost;
std::vector<std::size_t> tuplesPerIteration;
std::vector<double> costsPerIteration;
};

const PowerSet& getSubsets(std::size_t N, std::size_t K) const;
Expand Down
22 changes: 14 additions & 8 deletions src/include/souffle/profile/Reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -428,16 +428,22 @@ class Reader {
return countNonRecursiveUniqueKeysMap.at(key);
}

std::size_t getRecursiveCountUniqueKeys(
const std::string& rel, const std::string& attributes, const std::string& constants) {
std::size_t getIterations(const std::string& rel) {
for (auto& [key, m] : countRecursiveUniqueKeysMap) {
std::string token = key.substr(0, key.find(" "));
if (token == rel) {
return m.size();
}
}
assert(false);
return 0;
}

std::size_t getRecursiveCountUniqueKeys(const std::string& rel, const std::string& attributes,
const std::string& constants, const std::string& iteration) {
auto key = rel + " " + attributes + " " + constants;
auto& m = countRecursiveUniqueKeysMap.at(key);
double total = 0.0;
for (auto [_, count] : m) {
total += count;
}
double average = ceil(total / m.size());
return static_cast<std::size_t>(average);
return static_cast<std::size_t>(m.at(iteration));
}

void addRelation(const DirectoryEntry& relation) {
Expand Down