Skip to content

Commit

Permalink
Add array_frequency Presto function (#3807)
Browse files Browse the repository at this point in the history
Summary:
Pull Request resolved: #3807

Implement `array_frequency` in velox with same signature [as presto](https://prestodb.io/docs/current/functions/array.html#array_frequency).

Fixes #3752

Reviewed By: Yuhta, mbasmanova

Differential Revision: D42712223

fbshipit-source-id: 48c73e9ab0574c01945af91f85913628bd5e01ad
  • Loading branch information
vermapratyush authored and facebook-github-bot committed Jan 26, 2023
1 parent 045d5df commit 14e9932
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 0 deletions.
10 changes: 10 additions & 0 deletions velox/docs/functions/array.rst
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ Array Functions
SELECT array_except(ARRAY [1, 2, 2], ARRAY [1, 3, 4]); -- [2]
SELECT array_except(ARRAY [1, NULL, NULL], ARRAY [1, 1, NULL]); -- []

.. function:: array_frequency(array(E) x) -> map(E, int)

Returns a map: keys are the unique elements in the array, values are how many times the key appears.
Ignores null elements. Empty array returns empty map. E must be bigint or varchar. ::

SELECT array_frequency(ARRAY [1, 1, 2, 2, 2, 2]); -- {1 -> 2, 2 -> 4}
SELECT array_frequency(ARRAY [1, 1, NULL, NULL, NULL]); -- {1 -> 2}
SELECT array_frequency(ARRAY ["knock", "knock", "who", "?"]); -- {"knock" -> 2, "who" -> 1, "?" -> 1}
SELECT array_frequency(ARRAY []); -- {}

.. function:: array_has_duplicates(array(E)) -> boolean

Returns a boolean: whether array has any elements that occur more than once.
Expand Down
27 changes: 27 additions & 0 deletions velox/functions/prestosql/ArrayFunctions.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,4 +399,31 @@ struct ArrayHasDuplicatesFunction {
}
};

// Function Signature: array<T> -> map<T, int>, where T is ("bigint", "varchar")
// Returns a map with frequency of each element in the input array vector.
template <typename TExecParams, typename T>
struct ArrayFrequencyFunction {
VELOX_DEFINE_FUNCTION_TYPES(TExecParams);

FOLLY_ALWAYS_INLINE void call(
out_type<velox::Map<T, int>>& out,
arg_type<velox::Array<T>> inputArray) {
folly::F14FastMap<arg_type<T>, int> frequencyCount;

// If array contains NULL elements,
// we can optimize by skipping over NULL values
if (inputArray.mayHaveNulls()) {
for (const auto& item : inputArray.skipNulls()) {
frequencyCount[item]++;
}
} else {
for (const auto& item : inputArray) {
frequencyCount[item.value()]++;
}
}

out.copy_from(frequencyCount);
}
};

} // namespace facebook::velox::functions
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ inline void registerArrayHasDuplicatesFunctions() {
Array<T>>({"array_has_duplicates"});
}

template <typename T>
inline void registerArrayFrequencyFunctions() {
registerFunction<
ParameterBinder<ArrayFrequencyFunction, T>,
Map<T, int>,
Array<T>>({"array_frequency"});
}

void registerArrayFunctions() {
registerArrayConstructor("array_constructor");
VELOX_REGISTER_VECTOR_FUNCTION(udf_array_distinct, "array_distinct");
Expand Down Expand Up @@ -120,5 +128,8 @@ void registerArrayFunctions() {
registerArrayHasDuplicatesFunctions<int32_t>();
registerArrayHasDuplicatesFunctions<int64_t>();
registerArrayHasDuplicatesFunctions<Varchar>();

registerArrayFrequencyFunctions<int64_t>();
registerArrayFrequencyFunctions<Varchar>();
}
}; // namespace facebook::velox::functions
101 changes: 101 additions & 0 deletions velox/functions/prestosql/tests/ArrayFrequencyTest.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "velox/functions/prestosql/tests/utils/FunctionBaseTest.h"

using namespace facebook::velox;
using namespace facebook::velox::test;

namespace {
class ArrayFrequencyTest : public functions::test::FunctionBaseTest {
protected:
void testExpr(const VectorPtr& expected, const VectorPtr& input) {
auto result =
evaluate<BaseVector>("array_frequency(C0)", makeRowVector({input}));
assertEqualVectors(expected, result);
}
};
} // namespace

TEST_F(ArrayFrequencyTest, integerArray) {
auto array = makeNullableArrayVector<int64_t>(
{{2, 1, 1, -2},
{},
{1, 2, 1, 1, 1, 1},
{-1, std::nullopt, -1, -1},
{std::numeric_limits<int64_t>::max(),
std::numeric_limits<int64_t>::max(),
1,
std::nullopt,
0,
1,
std::nullopt,
0}});

auto expected = makeMapVector<int64_t, int>(
{{{1, 2}, {2, 1}, {-2, 1}},
{},
{{1, 5}, {2, 1}},
{{-1, 3}},
{{std::numeric_limits<int64_t>::max(), 2}, {1, 2}, {0, 2}}});

testExpr(expected, array);
}

TEST_F(ArrayFrequencyTest, integerArrayWithoutNull) {
auto array =
makeArrayVector<int64_t>({{2, 1, 1, -2}, {}, {1, 2, 1, 1, 1, 1}});

auto expected = makeMapVector<int64_t, int>(
{{{1, 2}, {2, 1}, {-2, 1}}, {}, {{1, 5}, {2, 1}}});

testExpr(expected, array);
}

TEST_F(ArrayFrequencyTest, varcharArray) {
auto array = makeNullableArrayVector<StringView>({
{"hello"_sv, "world"_sv, "!"_sv, "!"_sv, "!"_sv},
{},
{"hello"_sv, "world"_sv, std::nullopt, "!"_sv, "!"_sv},
{"helloworldhelloworld"_sv,
"helloworldhelloworld"_sv,
std::nullopt,
"!"_sv,
"!"_sv},
});

auto expected = makeMapVector<StringView, int>(
{{{"hello"_sv, 1}, {"world"_sv, 1}, {"!"_sv, 3}},
{},
{{"hello"_sv, 1}, {"world"_sv, 1}, {"!"_sv, 2}},
{{"helloworldhelloworld"_sv, 2}, {"!"_sv, 2}}});

testExpr(expected, array);
}

TEST_F(ArrayFrequencyTest, varcharArrayWithoutNull) {
auto array = makeNullableArrayVector<StringView>({
{"hello"_sv, "world"_sv, "!"_sv, "!"_sv, "!"_sv},
{},
{"helloworldhelloworld"_sv, "helloworldhelloworld"_sv, "!"_sv, "!"_sv},
});
auto expected = makeMapVector<StringView, int>(
{{{"hello"_sv, 1}, {"world"_sv, 1}, {"!"_sv, 3}},
{},
{{"helloworldhelloworld"_sv, 2}, {"!"_sv, 2}}});

testExpr(expected, array);
}

0 comments on commit 14e9932

Please sign in to comment.