Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1410,6 +1410,86 @@ def timestamp_to_unix_micros(self) -> "Expression":
"""
return FunctionExpression("timestamp_to_unix_micros", [self])

@expose_as_static
def array_agg(self) -> "Expression":
"""Creates an aggregation that collects all values of an expression
across multiple stage inputs into an array.

If the expression resolves to an absent value, it is converted to
`None`. The order of elements in the output array is not stable and
shouldn't be relied upon.

This API is provided as a preview for developers and may change based
on feedback that we receive. Do not use this API in a production
environment.

Example:
>>> # Collect all values of field 'color' into an array
>>> Field.of("color").array_agg()

Returns:
A new `AggregateFunction` representing the array aggregation.
"""
return AggregateFunction("array_agg", [self])

@expose_as_static
def array_agg_distinct(self) -> "Expression":
"""Creates an aggregation that collects all distinct values of an
expression across multiple stage inputs into an array.

If the expression resolves to an absent value, it is converted to
`None`. The order of elements in the output array is not stable and
shouldn't be relied upon.

This API is provided as a preview for developers and may change based
on feedback that we receive. Do not use this API in a production
environment.

Example:
>>> # Collect distinct values of field 'color' into an array
>>> Field.of("color").array_agg_distinct()

Returns:
A new `AggregateFunction` representing the distinct array aggregation.
"""
return AggregateFunction("array_agg_distinct", [self])

@expose_as_static
def first(self) -> "Expression":
"""Creates an aggregation that finds the first value of an expression
across multiple stage inputs.

This API is provided as a preview for developers and may change based
on feedback that we receive. Do not use this API in a production
environment.

Example:
>>> # Select the first value of field 'color'
>>> Field.of("color").first()

Returns:
A new `AggregateFunction` representing the first aggregation.
"""
return AggregateFunction("first", [self])

@expose_as_static
def last(self) -> "Expression":
"""Creates an aggregation that finds the last value of an expression
across multiple stage inputs.

This API is provided as a preview for developers and may change based
on feedback that we receive. Do not use this API in a production
environment.

Example:
>>> # Select the last value of field 'color'
>>> Field.of("color").last()

Returns:
A new `AggregateFunction` representing the last aggregation.
"""
return AggregateFunction("last", [self])

@expose_as_static
def unix_micros_to_timestamp(self) -> "Expression":
"""Creates an expression that converts a number of microseconds since the epoch (1970-01-01
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,4 +281,165 @@ tests:
- "total_rating"
assert_results:
- total_rating: 8.8

- description: testArrayAgg
pipeline:
- Collection: books
- Sort:
- Ordering:
- Field: title
- ASCENDING
- Aggregate:
- AliasedExpression:
- FunctionExpression.array_agg:
- Field: title
- "all_titles"
assert_results:
- all_titles:
- "1984"
- "Crime and Punishment"
- "Dune"
- "One Hundred Years of Solitude"
- "Pride and Prejudice"
- "The Great Gatsby"
- "The Handmaid's Tale"
- "The Hitchhiker's Guide to the Galaxy"
- "The Lord of the Rings"
- "To Kill a Mockingbird"
assert_proto:
pipeline:
stages:
- args:
- referenceValue: /books
name: collection
- args:
- mapValue:
fields:
direction:
stringValue: ascending
expression:
fieldReferenceValue: title
name: sort
- args:
- mapValue:
fields:
all_titles:
functionValue:
name: array_agg
args:
- fieldReferenceValue: title
- mapValue: {}
name: aggregate
- description: testArrayAggDistinct
pipeline:
- Collection: books
- Where:
- FunctionExpression.equal:
- Field: genre
- Constant: Science Fiction
- Aggregate:
- AliasedExpression:
- FunctionExpression.array_agg_distinct:
- Field: genre
- "distinct_genres"
assert_results:
- distinct_genres:
- "Science Fiction"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a better test example that could be used? It looks like this is finding distinct values out of a fixed array with size 1

Could we make it read genres from the database, and return the distinct values or something?

assert_proto:
pipeline:
stages:
- args:
- referenceValue: /books
name: collection
- args:
- functionValue:
args:
- fieldReferenceValue: genre
- stringValue: Science Fiction
name: equal
name: where
- args:
- mapValue:
fields:
distinct_genres:
functionValue:
name: array_agg_distinct
args:
- fieldReferenceValue: genre
- mapValue: {}
name: aggregate
- description: testFirst
pipeline:
- Collection: books
- Sort:
- Ordering:
- Field: title
- ASCENDING
- Aggregate:
- AliasedExpression:
- FunctionExpression.first:
- Field: title
- "first_title"
assert_results:
- first_title: "1984"
assert_proto:
pipeline:
stages:
- args:
- referenceValue: /books
name: collection
- args:
- mapValue:
fields:
direction:
stringValue: ascending
expression:
fieldReferenceValue: title
name: sort
- args:
- mapValue:
fields:
first_title:
functionValue:
name: first
args:
- fieldReferenceValue: title
- mapValue: {}
name: aggregate
- description: testLast
pipeline:
- Collection: books
- Sort:
- Ordering:
- Field: title
- ASCENDING
- Aggregate:
- AliasedExpression:
- FunctionExpression.last:
- Field: title
- "last_title"
assert_results:
- last_title: "To Kill a Mockingbird"
assert_proto:
pipeline:
stages:
- args:
- referenceValue: /books
name: collection
- args:
- mapValue:
fields:
direction:
stringValue: ascending
expression:
fieldReferenceValue: title
name: sort
- args:
- mapValue:
fields:
last_title:
functionValue:
name: last
args:
- fieldReferenceValue: title
- mapValue: {}
name: aggregate
Original file line number Diff line number Diff line change
Expand Up @@ -1583,3 +1583,39 @@ def test_maximum(self):
assert repr(instance) == "Value.maximum()"
infix_instance = arg1.maximum()
assert infix_instance == instance

def test_array_agg(self):
arg1 = self._make_arg("Value")
instance = Expression.array_agg(arg1)
assert instance.name == "array_agg"
assert instance.params == [arg1]
assert repr(instance) == "Value.array_agg()"
infix_instance = arg1.array_agg()
assert infix_instance == instance

def test_array_agg_distinct(self):
arg1 = self._make_arg("Value")
instance = Expression.array_agg_distinct(arg1)
assert instance.name == "array_agg_distinct"
assert instance.params == [arg1]
assert repr(instance) == "Value.array_agg_distinct()"
infix_instance = arg1.array_agg_distinct()
assert infix_instance == instance

def test_first(self):
arg1 = self._make_arg("Value")
instance = Expression.first(arg1)
assert instance.name == "first"
assert instance.params == [arg1]
assert repr(instance) == "Value.first()"
infix_instance = arg1.first()
assert infix_instance == instance

def test_last(self):
arg1 = self._make_arg("Value")
instance = Expression.last(arg1)
assert instance.name == "last"
assert instance.params == [arg1]
assert repr(instance) == "Value.last()"
infix_instance = arg1.last()
assert infix_instance == instance
Loading