User GuidesAPI ReferenceRelease Notes
Doc HomeHelp CenterLog In
User Guides

General Functions

Transformation Functions

ABS

Returns the absolute value of the given input

SYNTAX

abs(input)

  • input
    • Type: number
    • Description: The attribute to return absolute value for

OUTPUT

number

EXAMPLE

SELECT abs(number_value) as absolute_value
number_valueabsolute_value
1.01.0
-1.01.0
-123.45123.45
00
InfinityInfinity
-InfinityInfinity
nullnull

ADD

Returns the sum of two integers.

SYNTAX

add(integer1, integer2)

  • integer1
    • Type: int
    • Description: The first integer to add
  • integer2
    • Type: int
    • Description: The second integer to add

OUTPUT

int

EXAMPLE

SELECT add(apples, oranges) AS total_fruit
applesorangestotal_fruit
13316
8210
-155-10
246
11516
4nullnull
null3null
nullnullnull

ARRAY

Returns an array with the given elements.

SYNTAX

array()

OUTPUT

{'type': 'array', 'elementType': 'any'}

EXAMPLES

SELECT array(opening_act, headliner) AS bands
opening_actheadlinerbands
Pink FloydJimi Hendrix Experience['Pink Floyd', 'Jimi Hendrix Experience']
nullBeyoncé[null, 'Beyoncé']
SELECT array(...some_array) AS identical
some_arrayidentical
[1, 2, 3][1, 2, 3]
[2][2]
[null, 2, null][null, 2, null]

ARRAY_CONCAT

Converts all values in an array into strings and then concatenates them together into a single string.

SYNTAX

array_concat(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array whose values will be concatenated into a single string

OUTPUT

string

EXAMPLE

SELECT array_concat(example_arrays) AS concatenated_arrays
example_arraysconcatenated_arrays
['meow']meow
['This', ' ', 'is', ' ', 'a', ' ', 'sentence.']This is a sentence.
['']
[null]
[null, 'ta', '', null, 'mr', '']tamr
nullnull

ATOF

Converts a string to a floating point value. (ASCII-to-Float)

SYNTAX

atof(string_to_convert)

  • string_to_convert
    • Type: string
    • Description: The string to convert to a floating point value.

OUTPUT

double

EXAMPLE

SELECT atof(example_strings) AS example_strings_as_double;
example_stringsexample_strings_as_double
4242.0
-42-42.0
-273.15-273.15
7.7.0
.70.7
-.7-0.7
42E54200000.0
42.E+424.2e+43
6.022E-236.022e-23
abc123null
null
nullnull

ATOI

Converts a string to an integer value. (ASCII-to-Integer)

SYNTAX

atoi(string_to_convert)

  • string_to_convert
    • Type: string
    • Description: The string to convert to an integer value.

OUTPUT

int

EXAMPLE

SELECT atoi(example_strings) AS example_strings_as_int;
example_stringsexample_strings_as_int
4242
42.0null
33
3.14null
11
1.null
-300-300
-300.00000null
abc123null
null
nullnull

CEIL

Returns the smallest integer that is greater than or equal to the given number.

SYNTAX

ceil(input)

  • input
    • Type: number
    • Description: The input to compute the ceiling for.

OUTPUT

long

EXAMPLES

SELECT ceil(input_value) AS ceiling
input_valueceiling
1.12
1.92
-1.1-1
-1.9-1
00
NaN0
-Infinity-9223372036854775808
Infinity9223372036854775807
5e-3241
1.7976931348623157e+3089223372036854775807
nullnull
SELECT ceil(input_value) AS ceiling
input_valueceiling
4242
-42-42
nullnull

COALESCE

Returns the first input that is not null, or null if all inputs are null.

SYNTAX

coalesce()

OUTPUT

any

EXAMPLES

SELECT coalesce(firstName1, firstName2) AS nonNullName
firstName2firstName1nonNullName
ThomasTomTom
nullTomTom
ThomasnullThomas
Thomas
null
nullnullnull
SELECT coalesce(...ranked_names) AS nonNullName
ranked_namesnonNullName
['Tom', 'Thomas', '', 'Tommy']Tom
['Tom', null, '', 'Tommy']Tom
[null, 'Thomas', '', 'Tommy']Thomas
[null, null, '', 'Tommy']
[null, null, null]null
[null]null
[]null
nullnull

CONCAT

Concatenates string values. If any of the arguments to concat is null, it returns null.

SYNTAX

concat()

OUTPUT

string

EXAMPLES

SELECT concat(first_name, last_name) AS combined
first_namelast_namecombined
davidbowiedavidbowie
princenullnull
SELECT concat(...string_array) AS combined
string_arraycombined
['a', 'b', 'c']abc
['a', null]null
[]

DATEDIFF

Gets the difference in days between the start and end date. The result will be negative if the end date occurs before the start date. More precisely, this function counts the number of 24-hour periods between the start and end dates.

SYNTAX

datediff(endDate, startDate)

  • endDate
    • Type: string
    • Description: The end date
  • startDate
    • Type: string
    • Description: The start date

OUTPUT

long

EXAMPLE

SELECT start, "end", datediff("end", start) AS diff
startendstartdiffend
1991199219913651992
2004-03-012004-02-282004-03-01-22004-02-28
2016-12-25T06:44+02:002016-12-24 23:44-05:002016-12-25T06:44+02:0002016-12-24 23:44-05:00
1877-05-03 11:18:34.399+14:001877-05-03T11:18:34.399-10:001877-05-03 11:18:34.399+14:0011877-05-03T11:18:34.399-10:00
2004-12-12null2004-12-12nullnull
1982-02-01:35:551980-15-21 06:301982-02-01:35:55null1980-15-21 06:30
7 March 17791779-03-117 March 1779null1779-03-11

DATETIME_ERROR

Tries to convert the input to a datetime, returning the conversion error, if any.

SYNTAX

datetime_error(input)

  • input
    • Type: string
    • Description: The input to try to convert.

OUTPUT

string

EXAMPLE

SELECT datetime_error(date) as error
dateerror
nullnull
1990-03-07null
1990-03-07T15:44null
1990-03-07T15:44:10.392+06:00null
Text '' could not be parsed at index 0
I am not a dateText 'I am not a date' could not be parsed at index 0
2018-03-oopsText '2018-03-oops' could not be parsed, unparsed text found at index 7
2018-03-95Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95

DATETIME_TO_ISO

Formats a datetime string to the ISO_LOCAL_DATE_TIME format using the specified patterns. Patterns are matched in sequence, and the function will return the first matched pattern. If no patterns are matched, this function returns null.

SYNTAX

datetime_to_iso(datetime_string, patterns)

  • datetime_string
    • Type: string
    • Description: A datetime string
  • patterns
    • Type: {'type': 'array', 'elementType': 'string'}
    • Description: An array of patterns for parsing the input

OUTPUT

string

EXAMPLES

SELECT datetime_to_iso(datetime, patterns_array) AS datetimes_as_iso
patterns_arraydatetimedatetimes_as_iso
['yyyy-MM-dd-HH:mm:ss']2013-08-23-08:45:002013-08-23T08:45:00
['dd MMM yyyy']01 Apr 20172017-04-01
['dd MMM yyyy', 'dd MM yyyy']24 05 19971997-05-24
['dd MMM yyyy', 'dd MM yyyy']24 May 19971997-05-24
SELECT datetime_to_iso(datetime, patterns_array) AS datetimes_as_iso
patterns_arraydatetimedatetimes_as_iso
['dd MMM yyyy']null
['dd MMM yyyy']nullnull
null01 Apr 2017null
[null]01 Apr 2017null
['']01 Apr 2017null
['dd MMM yyyy']nomatchnull
['%@#*$&']01 Apr 2017null

DATE_AND_TIME_TO_ISO

Formats a date and time string to ISO datetime format using the specified patterns. Patterns are matched in sequence, and the function will return the first matched pattern. If no patterns are matched, this function returns null.

SYNTAX

date_and_time_to_iso(date_string, time_string, patterns)

  • date_string
    • Type: string
    • Description: The string representing the date
  • time_string
    • Type: string
    • Description: The string representing the time
  • patterns
    • Type: {'type': 'array', 'elementType': 'string'}
    • Description: The patterns used for matching the date and time to the ISO format

OUTPUT

string

EXAMPLES

SELECT date_and_time_to_iso(dates, times, patterns_arrays) AS datetimes_as_iso
datestimespatterns_arraysdatetimes_as_iso
2013-08-2308:45:00['yyyy-MM-dd:HH:mm:ss']2013-08-23T08:45:00
26-Feb-200908 27['dd-MMM-yyyy:HH mm']2009-02-26T08:27:00
2017-08-1411:45:47.999['yyyy-MM-dd:HH:mm:ss.SSS']2017-08-14T11:45:47.999
Aug-23-2013null['MMM-dd-yyyy']2013-08-23
nullAug-23-2013['MMM-dd-yyyy']2013-08-23
SELECT date_and_time_to_iso(dates, times, patterns_arrays) AS datetimes_as_iso
datestimespatterns_arraysdatetimes_as_iso
08:45:00['yyyy-MM-dd:HH:mm:ss']null
2013-08-23['yyyy-MM-dd:HH:mm:ss']null
2013-08-2308:45:00['']null
null08:45:00['yyyy-MM-dd:HH:mm:ss']null
2013-08-23null['yyyy-MM-dd:HH:mm:ss']null
2013-08-2308:45:00[null]null
2013-08-2308:45:00nullnull

DAYOFMONTH

Gets the day-of-month field of a date.

SYNTAX

dayofmonth(datetime)

  • datetime
    • Type: string
    • Description: The timestamp to get the day of month from

OUTPUT

int

EXAMPLES

SELECT dayOfMonth(date) AS day
dateday
1997-10-033
2077-06-23+06:4523
2001-03-14T15:43:19.15514
1580-01-04 06:30-04:004
SELECT dayOfMonth(date) AS day, datetime_error(date) as error
descriptiondateerrorday
null valuenullnullnull
empty stringText '' could not be parsed at index 0null
not a dateI am not a dateText 'I am not a date' could not be parsed at index 0null
starts date-like2018-03-oopsText '2018-03-oops' could not be parsed, unparsed text found at index 7null
bad date2018-03-95Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95null

DECIMAL

Converts a numeric value to double type.

SYNTAX

decimal(input)

  • input
    • Type: number
    • Description: The value to convert

OUTPUT

double

EXAMPLE

SELECT decimal(integer_value) as double_value
integer_valuedouble_value
11.0
-1-1.0
00.0
nullnull

DISTINCT

Returns an array of all distinct items from the given array converted to strings.

SYNTAX

distinct(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array to remove duplicates from

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT distinct(array) as distinctified
arraydistinctified
[1, 1, 2, 1, 2, 3, 1, 3]['1', '2', '3']
[1, 2, 3]['1', '2', '3']
[null, null][null]
[][]
nullnull

DURATION_TO_ISO

Converts to a given duration in the specified time unit to the ISO-8601 format. If minimal is true, the format is PnYnMnDTnHnMnS, which stands for 'Period: number of Years, number of Months, number of Days, Time: number of Hours, number of Minutes, number of Seconds.' If minimal is false, only the significant units are shown.

SYNTAX

duration_to_iso(duration, time_unit, minimal)

  • duration
    • Type: double
    • Description: The positive duration value to convert to iso
  • time_unit
    • Type: string
    • Description: A three letter string representing the unit of the duration. Valid options are: 'SEC', 'MIN', 'HRS', 'DAY', 'WKS'
  • minimal
    • Type: bool
    • Description: Determines whether the duration is returned with only the significant values

OUTPUT

string

EXAMPLES

SELECT duration_to_iso(duration, time_unit, minimal) as isofied_duration
durationtime_unitminimalisofied_duration
10.0SECFalseP0Y0M0DT0H0M10.000S
60.0SECFalseP0Y0M0DT0H1M0.000S
61.5MINFalseP0Y0M0DT1H1M30.000S
32HRSFalseP0Y0M1DT8H0M0.000S
2.483DAYFalseP0Y0M2DT11H35M31.200S
3.42WKSFalseP0Y0M23DT22H33M36.000S
10.5MINTruePT10M30S
2.483DAYTruePT59H35M31.2S
-3.45daYFalseP0Y0M-3DT-10H-48M0.000S
SELECT duration_to_iso(duration, time_unit, minimal) as isofied_duration
durationtime_unitminimalisofied_duration
1.0Truenull
nullSECFalsenull
60.0nullFalsenull
61.5MINnullnull

EQUALS_IGNORE_CASE

Case-insensitively checks whether two values are equal.

SYNTAX

equals_ignore_case(str1, str2)

  • str1
    • Type: string
    • Description: The first value to compare
  • str2
    • Type: string
    • Description: The second value to compare

OUTPUT

bool

EXAMPLE

SELECT equals_ignore_case(family_member, state) as is_equal
family_memberstateis_equal
maMATrue
PApaTrue
SisterCAFalse
Brothernullnull
nullWYnull
nullnullnull

EXTRACT

Matches a string against a Java regular expression, and returns the specified group. The group 0 refers to the entire pattern. Indexes of individual groups start at 1. Returns null if no match is found, the group number or regular expression is invalid, or any of the input, regular expression, or group is null, the result is null.

SYNTAX

extract(input, regex, group_index)

  • input
    • Type: string
    • Description: The string to be matched
  • regex
    • Type: string
    • Description: The Java regular expression specifying the pattern to extract
  • group_index
    • Type: int
    • Description: The index of the group in the regular expression to extract

OUTPUT

string

EXAMPLES

SELECT extract('2001: A Space Odyssey', '(\\d+):\\s?([\\w\\s]+)', group_idx) AS result
group_idxresult
02001: A Space Odyssey
12001
2A Space Odyssey
3null
-1null
nullnull
SELECT extract(input, regex, group_index) as result
inputregexgroup_indexresult
null valuesnullnullnull
null valuesnull1null
nullnull1null
nullnullnullnull
SELECT extract('invalid regex', regex, 1) as result
regexresult
(invalid regexnull
(wrong regex)null

EXTRACT_ALL

Matches a string against a Java regular expression, and returns results for all groups. Returns null if no match is found, the regular expression is invalid, or either of the input or regular expression is null.

SYNTAX

extract_all(input, regex)

  • input
    • Type: string
    • Description: The string to be matched
  • regex
    • Type: string
    • Description: The Java regular expression specifying the pattern to extract

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT extract_all(input, regex) as result
inputregexresult
2001: A Space Odyssey(\d+):\s?([\w\s]+)['2001', 'A Space Odyssey']
Gone With The Wind^.The.$[]
The Good, The Bad and The Ugly(The)['The', 'The', 'The']
The given regex is invalid(null
barb(b*)['b', '', '', 'b', '']
null(The)null
The Third Mannullnull
nullnullnull

FLOOR

Returns the greatest integer that is smaller than or equal to the given number.

SYNTAX

floor(input)

  • input
    • Type: number
    • Description: The input to compute the floor for.

OUTPUT

long

EXAMPLE

SELECT floor(input_value) AS floored_value
input_valuefloored_value
1.11
1.91
-1.1-2
-1.9-2
0.00
nullnull
NaN0
-Infinity-9223372036854775808
Infinity9223372036854775807

GET

Return the value in the array at the specified index. If the index is invalid, return null. An index is invalid if it is negative, not an integer, or greater than or equal to the size of the array it is applied to.

SYNTAX

get(data, index)

  • data
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: An array of values that you can index into.
  • index
    • Type: int
    • Description: The index to apply to the data argument. Indexes are integers between 0 (inclusive) and the length of the array (exclusive).

OUTPUT

any

EXAMPLE

SELECT get(col, idx) AS new_col
colidxnew_col
['a', 'b', 'c']0a
['a', 'b', 'c']1b
['a', 'b', 'c']-1null
['a', 'b', 'c']4null
['a', 'b', 'c']nullnull
[]0null
null0null

GREATEST

Returns the greatest value of the list of values, skipping null values. It will return null if and only if all parameters are null. String characters are ranked from least to greatest in the following order:!"'#$%&()*+-,./:;<=>?_@[\]^{|}~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz

SYNTAX

greatest()

OUTPUT

any

EXAMPLES

SELECT branch, greatest(sales_jan, sales_feb, sales_mar) AS q1_highest_month_sales
sales_febbranchsales_marsales_janq1_highest_month_salesbranch
120Cambridge80100120Cambridge
70San Francisco90null90San Francisco
nullLondonnullnullnullLondon
SELECT greatest(...string_array) AS greatest_string
string_arraygreatest_string
['a', 'b']b
['ab', 'aa']ab
['a', 'aa']aa
[null, 'a', null]a
['b', 'a', null]b
[null, 'ab', 'az']az
[null]null
[null, null]null
SELECT greatest(...array_array) AS greatest_array
array_arraygreatest_array
[[1], [2]][2]
[[1, 2], [1, 3]][1, 3]
[[null, 3], [null, 5]][null, 5]
[[0, 3], [0, null]][0, 3]
[[3], [1, 2]][3]
[[3], [3, -1]][3, -1]
[null, [3]][3]
[null, [null]][null]
[null, [null], [null, null]][null, null]

HASH

Returns a number, called a hash code, based on the input data. The hash code is always the same given the same inputs and is practically guaranteed to be different given different inputs.

SYNTAX

hash()

OUTPUT

long

EXAMPLES

SELECT hash(A, B) AS result
ABresult
twenty[3.0, 8.0, 9.0]1830378446688894221
forty[18.9, 21.1]-5987411484308668877
forty[]4495871505993623155
fortynull353237852084435270
nullnull-7218393911220984134
SELECT hash(A, B) AS result
ABresult
[null][null]8068983417364430449
[null][[]]-4768040760762171177
[[]][null]865141773612688779
null[[]]5085241997507925413
[[]]null-2601053220775996364
SELECT hash(A, B) = hash(array(A, B)) AS "hash(elements) = hash(array)", hash(A, B) = hash({A, B}) AS "hash(elements) = hash(struct)"
ABhash(elements) = hash(array)hash(elements) = hash(struct)
onetwoFalseFalse

HEX

Returns the hexadecimal value of the argument.

SYNTAX

hex(input)

  • input
    • Type: int
    • Description: The input value.

OUTPUT

string

EXAMPLE

SELECT hex(input_value) AS hex_value
input_valuehex_value
11
-1FFFFFFFFFFFFFFFF
00
123453039
-54321FFFFFFFFFFFF2BCF
-2147483648FFFFFFFF80000000
21474836477FFFFFFF
nullnull

INTEGER

Converts a numeric value to int type.

SYNTAX

integer(input)

  • input
    • Type: number
    • Description: The value to convert

OUTPUT

int

EXAMPLE

SELECT integer(double_value) as integer_value
double_valueinteger_value
1.11
-1.1-1
1.91
-1.9-1
0.00
nullnull
NaN0
-Infinity-2147483648
Infinity2147483647

ISNULL

Returns true if and only if the argument is null.

SYNTAX

isnull(input)

  • input
    • Type: any
    • Description: The input value.

OUTPUT

bool

EXAMPLES

SELECT isnull(input_value) AS is_it_null
input_valueis_it_null
i am not nullFalse
False
nullTrue
SELECT isnull(input_value) AS is_it_null
input_valueis_it_null
TrueFalse
FalseFalse
nullTrue
SELECT isnull(input_value) AS is_it_null
input_valueis_it_null
[1]False
[null]False
[]False
nullTrue

ISO_DIFFERENCE_DAYS

Returns the number of days between two ISO date-time strings rounded down to the nearest integer. If the first day comes after the second day, the result will be negative.

SYNTAX

iso_difference_days(date1, date2)

  • date1
    • Type: string
    • Description: The first date in the difference
  • date2
    • Type: string
    • Description: The second date in the difference

OUTPUT

int

EXAMPLE

SELECT iso_difference_days(date1, date2) AS date_difference
date2date1date_difference
1987-10-061987-10-051
1987-10-041987-10-05-1
1987-10-051987-10-050
1987-10-061987-10-05T01:26:001
1987-10-06T01:26:001987-10-051
1987-10-06T01:25:001987-10-05T01:26:001
1987-10-06T01:25:001987-10-05-28T01:26:00null
1987-10-06T01:25:001987-10-34T01:26:00null
1987-10-05nullnull
null1987-10-05null
nullnullnull

ISO_DATETIME_ADD

Adds an amount of time to a datetime string in ISO format.

SYNTAX

iso_datetime_add(datetime, duration, time_unit)

  • datetime
    • Type: string
    • Description: A datetime string formatted to ISO-8601 duration format PnYnMnDTnHnMnS
  • duration
    • Type: double
    • Description: Time unit, must be whole number if unit is 'YRS' or 'MTH'
  • time_unit
    • Type: string
    • Description: A 3 letter string representing the unit of the duration. Valid options are: 'SEC', 'MIN', 'HRS', 'DAY', 'WKS', 'YRS', 'MTH'

OUTPUT

string

EXAMPLES

SELECT iso_datetime_add(datetime, duration, time_unit) AS new_datetime
durationtime_unitdatetimenew_datetime
3.1day2050-02-04T12:30:002050-02-07T14:54:00
2.0yRs2010-08-04T00:00:002012-08-04T00:00:00
-5.0MtH2000-08-04T00:00:002000-03-04T00:00:00
SELECT iso_datetime_add(datetime, duration, time_unit) AS new_datetime
durationtime_unitdatetimenew_datetime
-5.0mTh2000-08-04T00:00:002000-03-04T00:00:00
-5.1mTh2000-08-04T00:00:00null
-5.0yrs2000-08-04T00:00:001995-08-04T00:00:00
-5.1yrs2000-08-04T00:00:00null
-5.02000-08-04T00:00:00null
-5.0mThnullnull
-5.0mThnull
nulldAy2050-02-04T12:30:00null
1.0null2050-02-04T12:30:00null

IS_NOT_NULL

Returns false if the input is equal to null, and true if it is not equal to null.

SYNTAX

is_not_null(input)

  • input
    • Type: any
    • Description: The input to check

OUTPUT

bool

EXAMPLES

SELECT is_not_null(string_col) as string_not_null
string_colstring_not_null
True
nullTrue
content!True
nullFalse
SELECT is_not_null(string_array_col) as array_not_null
string_array_colarray_not_null
[]True
[null]True
['']True
['content!']True
nullFalse

IS_NOT_EMPTY

Returns true if the input is not null and not equal to the empty string. Returns false if the input is equal to the empty string. Returns null if the input is null.

SYNTAX

is_not_empty(input)

  • input
    • Type: string
    • Description: The input to check

OUTPUT

bool

EXAMPLE

SELECT is_not_empty(string_col) as not_empty
string_colnot_empty
True
nullTrue
contentTrue
False
nullnull

JACCARD_INDEX

Computes the Jaccard similarity index between two sets. It is calculated as the intersection of the two sets divided by the union of the two sets.

SYNTAX

jaccard_index(a, b)

  • a
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The first set
  • b
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The second set

OUTPUT

double

EXAMPLE

SELECT jaccard_index(set_a, set_b) AS similarity
set_aset_bsimilarity
[1, 2, 3][1, 2, 3]1.0
[1, 2, 3][2, 1, 3]1.0
[1, 2, 3][2, 2, 1, 3]1.0
[1, 2, 3, 4, 5][1, 3, 5, 7]0.5
[9, 8, 7, 6][5, 2, 1, 9]0.14285714285714285
[5, 2, 1, 9][9, 8, 7, 6]0.14285714285714285
[][1]0.0
[1][]0.0
[][]NaN
null[]null
[]nullnull
nullnullnull

LEAST

Returns the smallest value of the list of values, skipping null values. It will return null if and only if all parameters are null. String characters are ranked from least to greatest in the following order:!"'#$%&()*+-,./:;<=>?_@[\]^{|}~0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz

SYNTAX

least()

OUTPUT

any

EXAMPLES

SELECT branch, least(sales_jan, sales_feb, sales_mar) AS q1_lowest_month_sales
sales_febbranchsales_marsales_janq1_lowest_month_salesbranch
120Cambridge8010080Cambridge
70San Francisco90null70San Francisco
nullLondonnullnullnullLondon
SELECT least(...string_array) AS least_string
string_arrayleast_string
['a', 'b']a
['ab', 'aa']aa
['a', 'aa']a
[null, 'a', null]a
['b', 'a', null]a
[null, 'ab', 'az']ab
[null]null
[null, null]null
SELECT least(...array_array) AS least_array
array_arrayleast_array
[[1], [2]][1]
[[1, 2], [1, 3]][1, 2]
[[null, 3], [null, 5]][null, 3]
[[0, 3], [0, null]][0, 3]
[[3], [1, 2]][1, 2]
[[3], [3, -1]][3]
[null, [3]][3]
[null, [null]][null]
[null, [null], [null, null]][null]

LEN

Returns the number of non-null, non-empty, non-whitespace items in an array.

SYNTAX

len(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array to count items of

OUTPUT

int

EXAMPLE

SELECT len(array) AS array_length
arrayarray_length
['1', '2', '3']3
['whitespace', "doesn't", 'get', ' \n ', 'counted']4
['']0
[null]0
[]0
[null, 'non_null', '', 'non_empty']2
nullnull

LENGTH

Returns the length, in characters, of the given string value

SYNTAX

length(input)

  • input
    • Type: string
    • Description: The value to measure

OUTPUT

int

EXAMPLE

SELECT movie, length(movie) as num_characters
movienum_charactersmovie
Citizen Kane12Citizen Kane
Casablanca10Casablanca
3
0
nullnullnull

LOWER

Converts a string to lowercase. Applies to all characters.

SYNTAX

lower(input)

  • input
    • Type: string
    • Description: The string to convert to lowercase

OUTPUT

string

EXAMPLE

SELECT lower(string) as lowercase_string
stringlowercase_string
all_lowerall_lower
ALL UPPERall upper
mIxEdmixed
nullnull

MATCHES

Matches a string against a Java regular expression. Returns true if the regular expression matches the entire input string, false if it does not, and null if the regular expression is invalid or if either of the input or regular expression is null.

SYNTAX

matches(regex, input)

  • regex
    • Type: string
    • Description: The Java regular expression specifying the pattern to match
  • input
    • Type: string
    • Description: The string to be matched

OUTPUT

bool

EXAMPLE

SELECT matches(regex, input) as result
inputregexresult
2001: A Space Odyssey(\d+):\s?([\w\s]+)True
Gone With The Wind^.The.$True
Gone With The WindCasablancaFalse
The Good, The Bad and The Ugly(The)False
Citizen Kane(invalidnull
null(The)null
The Third Mannullnull
nullnullnull

MD5

Returns a hash value, according to the MD5 algorithm.

SYNTAX

md5(input)

  • input
    • Type: string
    • Description: The input value.

OUTPUT

string

EXAMPLE

SELECT md5(input_value) AS md5_hash
input_valuemd5_hash
I am a string.a6be8c1f86bffe9f7a9397e578f7a446
I will be hashed.6e76226f2116eea6e5cf0f4783762e19
Unless of courseda6cd9f98615af7b253a4d321e12c6a4
Your computer has crashed.d45aee724e0cfbe2a1f00613ce118f81
d41d8cd98f00b204e9800998ecf8427e
nullnull

MONTH

Gets the month field of a date.

SYNTAX

month(datetime)

  • datetime
    • Type: string
    • Description: The timestamp to get the month of

OUTPUT

int

EXAMPLES

SELECT month(date) AS month
datemonth
1997-1010
2077-06-23+06:456
2001-03-14T15:43:19.1553
1580-01-04 06:30-04:001
SELECT month(date) AS month, datetime_error(date) as error
descriptiondateerrormonth
null valuenullnullnull
empty stringText '' could not be parsed at index 0null
not a dateI am not a dateText 'I am not a date' could not be parsed at index 0null
starts date-like2018-03-oopsText '2018-03-oops' could not be parsed, unparsed text found at index 7null
bad date2018-03-95Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95null

MONOTONICALLY_INCREASING_ID

Returns a monotonically increasing identifier. The identifiers are guaranteed to be unique but not sequential. The results of this function are non-deterministic, different runs on the same dataset could produce different results.

SYNTAX

monotonically_increasing_id()

OUTPUT

long

EXAMPLE

SELECT monotonically_increasing_id() AS id, *
nameidname
Alice0Alice
Bob1Bob
Carol2Carol
Dave3Dave

NUM

Returns the first numeric element of an array parsed as a double.

SYNTAX

num(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array to find the first numeric element of

OUTPUT

double

EXAMPLE

SELECT num(array) AS first_num_from_array
arrayfirst_num_from_array
['1.2', null]1.2
[null, '23', null]23
[null, '3E7', '1.2']30000000.0
['abc', 'true']null
nullnull
[]null
[null]null

PARSE_JSON_ARRAY

Parses strings as JSON-encoded arrays of strings, returning the decoded values. All results are combined into a single multivalue, with no deduplication. Inputs that cannot be parsed as a JSON array are ignored. Values in arrays that are not strings are ignored.

SYNTAX

parse_json_array(json_array)

  • json_array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: A multivalue containing JSON-encoded arrays of string

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT parse_json_array(json_array) AS parsed
json_arrayparsed
['["a"]']['a']
['["a", "b", "c"]', '["d"]', '["e", "f"]']['a', 'b', 'c', 'd', 'e', 'f']
['1', '"x"', '["x"', '["a"]', null, '["b", 1, ["x"], {"x":"x"}, "c"]', '{"x":"x"}']['a', 'b', 'c']
['a', '1', '[', '["unterminated"', '{"un":"terminated"'][]
[][]
nullnull

POW

Returns the value of the first argument raised to the power of the second argument

SYNTAX

pow(base, exponent)

  • base
    • Type: number
    • Description: The base of the exponential expression
  • exponent
    • Type: number
    • Description: The exponent of the exponential expression

OUTPUT

double

EXAMPLE

SELECT pow(base, exponent) as result
exponentbaseresult
239
2.03.512.25
3-2-8
-320.125
2.525.656854249492381
-2.520.1767766952966369
-2.5-2NaN
001
2nullnull
null2null
nullnullnull

RAND

Returns a random value within the range 0.0 (inclusive) to 1.0 (exclusive).

SYNTAX

rand()

OUTPUT

double

EXAMPLE

SELECT row_number, rand() as random_double
row_numberrow_numberrandom_double
110.7056362008458043
220.8900315077291642
330.48696337565223247
440.4192577676064432

REPLACE

Replaces all occurrences of the target in the given string with the replacement.

SYNTAX

replace(string, target, replacement)

  • string
    • Type: string
    • Description: The string to replace parts of
  • target
    • Type: string
    • Description: The substring to replace in the string
  • replacement
    • Type: string
    • Description: The string to replace the target with

OUTPUT

string

EXAMPLES

SELECT replace(string, target, replacement) AS replaced
replacementstringtargetreplaced
xaabacadaxxbxcxd
xa
x123x1x2x3x
too much spacetoomuchspace
xnullanull
xaabacadnullnull
nullaabacadanull
SELECT replace('Hi my name is {name}!', '{name}', names) AS introductions
namesintroductions
SamHi my name is Sam!
AlexHi my name is Alex!
SkyHi my name is Sky!
TimothyHi my name is Timothy!
SharonHi my name is Sharon!
HarshHi my name is Harsh!

REPLACE_ALL

Replaces all matches of the target regex in the given string with the replacement.

SYNTAX

replace_all(string, target_regex, replacement)

  • string
    • Type: string
    • Description: The string to replace parts of
  • target_regex
    • Type: string
    • Description: The regex to replace in the string
  • replacement
    • Type: string
    • Description: The string to replace the target with

OUTPUT

string

EXAMPLE

SELECT replace_all(string, target_regex, replacement) AS replaced
replacementstringtarget_regexreplaced
xaaaaabaacad[a]xxxxxbxxcxd
xaaaaabaacad[a]+xbxcxd
UNKthis and that or this(this|that)UNK and UNK or UNK
xinvalid regex returns nullinvalid[regexnull
xnull[a]null
xaaaaabaacadnullnull
nullaaaaabaacad[a]null

RINT

Returns the double value that is closest in value to the argument and is equal to a mathematical integer.

SYNTAX

rint(input)

  • input
    • Type: number
    • Description: The input value

OUTPUT

double

EXAMPLE

SELECT rint(input_value) as rounded_value
input_valuerounded_value
1.11.0
-1.9-2.0
3.5000014.0
NaNNaN
InfinityInfinity
-Infinity-Infinity
nullnull

ROUND

Returns the closest Long value to the argument.

SYNTAX

round(input)

  • input
    • Type: number
    • Description: The input value

OUTPUT

long

EXAMPLE

SELECT round(input_value) as rounded_value
input_valuerounded_value
1.11
-1.9-2
3.5000014
NaN0
Infinity9223372036854775807
-Infinity-9223372036854775808
nullnull

RUNTIME_DATE

Returns a string representing the date in ISO Format at the time the function is executed for each value. Each row may have a different runtime date.

SYNTAX

runtime_date()

OUTPUT

string

EXAMPLE

SELECT row_number, runtime_date() as date
row_numberrow_numberdate
112019-09-09Z
222019-09-09Z
332019-09-09Z

RUNTIME_DATETIME

Returns a string representing the date and time in ISO Format at the time the function is executed for each value. Each row may have a different runtime date and time.

SYNTAX

runtime_datetime()

OUTPUT

string

EXAMPLE

SELECT row_number, runtime_datetime() as datetime
row_numberrow_numberdatetime
112019-09-09T02:49:05Z
222019-09-09T02:49:05Z
332019-09-09T02:49:05Z

SHA1

Returns a hash value, according to the SHA-1 algorithm.

SYNTAX

sha1(input)

  • input
    • Type: string
    • Description: The input value to be hashed.

OUTPUT

string

EXAMPLE

SELECT sha1(value) AS sha1_hashified
valuesha1_hashified
My birthday is October 197e79c798abe758b227e111c2ddc98b01a4cc607
My birthday is October 20294ac31298fd3e129380e770c22db998ac71e58
My borthday is October 10e7fa8dce7a3927f28413e0eef65a62e5a6c6dfa
da39a3ee5e6b4b0d3255bfef95601890afd80709
nullnull

SIZE

Returns the number of elements in a given array

SYNTAX

size(input)

  • input
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array in which to count elements

OUTPUT

int

EXAMPLE

SELECT collection, size(members) as num_members
collectionmemberscollectionnum_members
Spices['Salt', 'Pepper']Spices2
Destiny's Child['Beyoncé Knowles', 'Kelly Rowland', 'Michelle Williams']Destiny's Child3
Array containing null['the next value is null', null]Array containing null2
Empty Array[]Empty Array0
Null ArraynullNull Arraynull

SORT_ARRAY

Sorts the input array in ascending order, according to the natural ordering of the array elements. This will filter out null elements.

SYNTAX

sort_array(input)

  • input
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: An array of values to be sorted.

OUTPUT

{'type': 'array', 'elementType': 'any'}

EXAMPLES

SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
['def', '', 'ghi', null, 'abc']['', 'abc', 'def', 'ghi']
['10', '11', '100', '101', '111']['10', '100', '101', '11', '111']
SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
[True, False, False, True, null][False, False, True, True]
SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
[3.2, -3.2, 0.0, 'NaN', '-Infinity', 'Infinity']['-Infinity', -3.2, 0.0, 3.2, 'Infinity', 'NaN']
SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
[[1, 2, 4], [1, 2, 3]][[1, 2, 3], [1, 2, 4]]

SPLIT

Splits a string into an array of strings based on where the provided regex matches.

SYNTAX

split(string, regex)

  • string
    • Type: string
    • Description: The string to split
  • regex
    • Type: string
    • Description: The regex to split by when matched

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT split(string, regex) as result
regexstringresult
\sThis is a sentence.['This', 'is', 'a', 'sentence.']
212345['1', '345']
pineappleThere is no match!['There is no match!']
grape['g', 'r', 'a', 'p', 'e']
grape['']
['']
grapenullnull
nullgrapenull

STRIP_ACCENTS

Removes diacritics from a string.

SYNTAX

strip_accents(string)

  • string
    • Type: string
    • Description: The string to strip the accents from

OUTPUT

string

EXAMPLE

SELECT strip_accents(with_accents) as no_accents
with_accentsno_accents
Et ça sera sa moitié.Et ca sera sa moitie.
aàáâäãåāæaaaaaaaaæ
α
cçćčcccc
eèéêëēėęeeeeeeee
ἦἧᾚηηΗ
iîïíīįìiiiiiiiii
ll
nñńnnn
oôöòóōõøœoooooooøœ
sśšßsssß
uûüùúūuuuuuu
yÿyy
zžźżzzzz

STR

Converts any input into a string

SYNTAX

str(some_object)

  • some_object
    • Type: any
    • Description: The object to convert to a string

OUTPUT

string

EXAMPLES

SELECT str(booleans) AS stringified
booleansstringified
Truetrue
Falsefalse
SELECT str(int) AS stringified
intstringified
11
SELECT str(double) AS stringified
doublestringified
1.01.0
SELECT str(string) AS stringified
stringstringified
this is a stringthis is a string
nullnull

STR_FORMAT

Returns a string using the specified format template with the inserted string arguments

SYNTAX

str_format(format_template, objects_to_insert)

  • format_template
    • Type: string
    • Description: A string detailing how and where the string arguments should be formatted and inserted
  • objects_to_insert
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: An array of objects to format and insert into the template

OUTPUT

string

EXAMPLES

SELECT str_format(templates, integer_values) AS format_result
templatesinteger_valuesformat_result
Is it non-null?: %b, %b, %b[3, null, 7]Is it non-null?: true, false, true
Format integer: |%d|[1]Format integer: |1|
Pad integer with some number of zeros: |%010d|[1]Pad integer with some number of zeros: |0000000001|
Use thousands-separator: |%,d|[10000000]Use thousands-separator: |10,000,000|
Print positive integers with a plus: |%+d|[1]Print positive integers with a plus: |+1|
Add a space before positive numbers: |% d|[1]Add a space before positive numbers: | 1|
Enclose negative ints with parenthesis: |%(d|[-1]Enclose negative ints with parenthesis: |(1)|
Some combination of the above: |%+(010d|[1]Some combination of the above: |+000000001|
Some combination of the above: |%+(010d|[-1]Some combination of the above: |(00000001)|
Octal string: %o[142]Octal string: 216
Format number as string: %s[3]Format number as string: 3
Hex string: %x[142]Hex string: 8e
Uppercase hex string: %X[142]Uppercase hex string: 8E
SELECT str_format(templates, double_values) as format_result
templatesdouble_valuesformat_result
Double to hex: %a[120.382]Double to hex: 0x1.e1872b020c49cp6
Double in scientific notation: %e[15042.0]Double in scientific notation: 1.504200e+04
Double: %f[142.0]Double: 142.000000
Double, possibly in scientific notation if long enough: %g[100000.0]Double, possibly in scientific notation if long enough: 100000
Double, possibly in scientific notation if long enough: %g[1000000.0]Double, possibly in scientific notation if long enough: 1.00000e+06
Four decimal places: %.4f[1.234]Four decimal places: 1.2340
No decimal places: %.0f[1.234]No decimal places: 1
Given null value: %d[null]Given null value: null
Returns null given a null array: %dnullnull
SELECT str_format(templates, string_values) as format_result
templatesstring_valuesformat_result
Hex String of value from hashCode() method: %h['example']Hex String of value from hashCode() method: b125116a
Format as string: %s['i am string']Format as string: i am string
Maximum 7 characters: %.7s['i am string']Maximum 7 characters: i am st
Specify argument index: %2$s, %1$s['one', 'two']Specify argument index: two, one
Use literal %% sign[]Use literal % sign
SELECT str_format(time_format, array(1350933383321L)) AS time_formatted
time_formattime_formatted
%tB %<td, %<tY, %<tl:%<tM%<tpOctober 22, 2012, 7:16pm
%tH | Hour of the day for the 24-hour clock, with a leading zero if necessary19 | Hour of the day for the 24-hour clock, with a leading zero if necessary
%tI | Hour for the 12-hour clock, with a leading zero as necessary07 | Hour for the 12-hour clock, with a leading zero as necessary
%tk | Hour of the day for the 24-hour clock19 | Hour of the day for the 24-hour clock
%tl | Hour for the 12-hour clock7 | Hour for the 12-hour clock
%tM | Minute within the hour formatted as two digits with a leading zero as necessary16 | Minute within the hour formatted as two digits with a leading zero as necessary
%tS | Seconds within the minute, formatted as two digits with a leading zero as necessary23 | Seconds within the minute, formatted as two digits with a leading zero as necessary
%tL | Millisecond within the second formatted as three digits with leading zeros as necessary321 | Millisecond within the second formatted as three digits with leading zeros as necessary
%tN | Nanosecond within the second, as 9 digits with leading zeros as necessary321000000 | Nanosecond within the second, as 9 digits with leading zeros as necessary
%tp | Locale-specific morning or afternoon marker in lower casepm | Locale-specific morning or afternoon marker in lower case
%Tp | Locale-specific morning or afternoon marker in UPPER CASEPM | Locale-specific morning or afternoon marker in UPPER CASE
%tz | RFC 822 style numeric time zone offset from GMT+0000 | RFC 822 style numeric time zone offset from GMT
%tZ | A string representing the abbreviation for the time zoneUTC | A string representing the abbreviation for the time zone
%ts | Seconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC1350933383 | Seconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC
%tQ | Milliseconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC1350933383321 | Milliseconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC
%tB | Locale-specific full month nameOctober | Locale-specific full month name
%tb | Locale-specific abbreviated month nameOct | Locale-specific abbreviated month name
%th | Same as %%tbOct | Same as %tb
%tA | Locale-specific full name of the day of the weekMonday | Locale-specific full name of the day of the week
%ta | Locale-specific short name of the day of the weekMon | Locale-specific short name of the day of the week
%tC | Four-digit year divided by 100, formatted as two digits with leading zero as necessary20 | Four-digit year divided by 100, formatted as two digits with leading zero as necessary
%ty | Last two digits of the year, formatted with leading zeros as necessary12 | Last two digits of the year, formatted with leading zeros as necessary
%tY | Year, formatted as at least four digits with leading zeros as necessary2012 | Year, formatted as at least four digits with leading zeros as necessary
%tj | Day of year, formatted as three digits with leading zeros as necessary296 | Day of year, formatted as three digits with leading zeros as necessary
%tm | Month, formatted as two digits with leading zeros as necessary10 | Month, formatted as two digits with leading zeros as necessary
%td | Day of month, formatted as two digits with leading zeros as necessary22 | Day of month, formatted as two digits with leading zeros as necessary
%te | Day of month, formatted as two digits with no leading zeros22 | Day of month, formatted as two digits with no leading zeros
%tR | Time formatted for the 24-hour clock as "%%tH:%%<tM"19:16 | Time formatted for the 24-hour clock as "%tH:%<tM"
%tT | Time formatted for the 24-hour clock as "%%tH:%%<tM:%%<tS"19:16:23 | Time formatted for the 24-hour clock as "%tH:%<tM:%<tS"
%tr | Time formatted for the 12-hour clock as "%%tI:%%<tM:%%<tS %%<Tp"07:16:23 PM | Time formatted for the 12-hour clock as "%tI:%<tM:%<tS %<Tp"
%tD | Date formatted as "%%tm/%%<td/%%<ty"10/22/12 | Date formatted as "%tm/%<td/%<ty"
%tF | ISO 8601 complete date formatted as "%%tY-%%<tm-%%<td"2012-10-22 | ISO 8601 complete date formatted as "%tY-%<tm-%<td"
%tc | Date and time formatted as "%%ta %%<tb %%<td %%<tT %%<tZ %%<tY"Mon Oct 22 19:16:23 UTC 2012 | Date and time formatted as "%ta %<tb %<td %<tT %<tZ %<tY"

STR_JOIN

Concatenates all strings in an array into a single string, inserting the delimiter between each item.

SYNTAX

str_join(delimiter, string_array)

  • delimiter
    • Type: string
    • Description: The string to insert between each item in the array
  • string_array
    • Type: {'type': 'array', 'elementType': 'string'}
    • Description: The array of string to join into a single string

OUTPUT

string

EXAMPLE

SELECT str_join(delimiter, string_values) AS concatenated_array
delimiterstring_valuesconcatenated_array
-['1', '2', '3']1-2-3
,['1', '2', '3']1, 2, 3
['1', '2', '3']123
|['make', 'a', 'regular', 'expression']make|a|regular|expression
-[]
null['1', '2', '3']null
-nullnull
-[null]

SUBSTR

Removes the portion of the string before the start_index. If start_index is positive or zero, it indexes the string starting from the beginning; if negative, it indexes the string starting from the end (so -1 gives you the last character of the string, -2 the last two characters and so on). The function returns the substring starting from the indexed position and continuing to the end of the string, or null if the indexed position is past the end of the string

SYNTAX

substr(string, start_index)

  • string
    • Type: string
    • Description: The string to get the substring of
  • start_index
    • Type: int
    • Description: The index to start the substring at

OUTPUT

string

EXAMPLE

SELECT substr(string, start_index) as substrings
start_indexstringsubstrings
0piepie
1pieie
3pie
4pienull
-1piee
-4piepie
0nullnull
nullpienull

SUBSTR2

Returns the part of the given string between the start_index and end_index. The values start_index and end_index may be either zero or positive (indexing from the start of the string) or negative (indexing from the end). Together, start_index (inclusive) and end_index (exclusive) determine the range of the string. If the range is empty then null is returned.

SYNTAX

substr2(string, start_index, end_index)

  • string
    • Type: string
    • Description: The string to get the substring of
  • start_index
    • Type: int
    • Description: The index to start the substring at
  • end_index
    • Type: int
    • Description: The index to end the substring at

OUTPUT

string

EXAMPLE

SELECT substr2(string, start_index, end_index) as substrings
end_indexstart_indexstringsubstrings
7301234567893456
61"quote"quote
55start_index = end_index
45end_index < start_indexnull
0-1start_index < 0null
-10start index is negativestart index is negativ
-1-10both start_index and end_index are negativee negativ
-3-1negative start_index > negative end_indexnull
190end_index > lengthend_index > length
00nullnull
1nullstart_index is nullnull
null0end_index is nullnull

TAMR_ID

Computes a unique key from a string source ID and string entity ID. This key is identical to the one generated for the unified dataset based on origin_source_name and origin_entity_id.

SYNTAX

tamr_id(source_id, entity_id)

  • source_id
    • Type: string
    • Description: the source identifier
  • entity_id
    • Type: string
    • Description: the entity identifier

OUTPUT

string

EXAMPLE

SELECT *, tamr_id(source_id, entity_id) as tamr_id
source_identity_idsource_idtamr_identity_id
nullnullnullnullnull
source1.csvnullsource1.csvnullnull
nullentity-idnullnullentity-id
another_sourceentity-idanother_source9055106661574376142entity-id
source1.csvrec-653-orgsource1.csv-3639548815871415108rec-653-org
source1.csvrec-9996-orgsource1.csv713467184200753482rec-9996-org
source1.csvrec-6137-orgsource1.csv-5300772691460392676rec-6137-org
source1.csvrec-7100-orgsource1.csv6054007634817115275rec-7100-org
source1.csvrec-9526-orgsource1.csv-8395450447775343480rec-9526-org

TO_BOOLEAN

Attempts to convert a value of any type to a Boolean. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_boolean_error().

SYNTAX

to_boolean(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

bool

EXAMPLES

SELECT to_boolean(string_col) as converted
string_colconverted
trueTrue
TrueTrue
trueFalse
falseFalse
NOT TRUEFalse
nullnull
SELECT to_boolean(double_col) as converted
double_colconverted
1.0True
0.0False
0.5True
5e-324True
1.7976931348623157e+308True
InfinityTrue
-InfinityTrue
NaNFalse
SELECT to_boolean(string_arr_col) as converted
string_arr_colconverted
[]null
['true']True
['false']False
['false', 'true']null

TO_BOOLEAN_ERROR

If converting the given value to a Boolean using to_boolean() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_boolean_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_boolean(string_col) as converted, to_boolean_error(string_col) as error
string_colerrorconverted
truenullTrue
falsenullFalse
nullnullnull
SELECT to_boolean(string_arr_col) as converted, to_boolean_error(string_arr_col) as error
string_arr_colerrorconverted
[]ConversionException: can't convert empty arraynull
['true']nullTrue
['true', 'false']ConversionException: can't convert array of length: 2null
nullnullnull

TO_DOUBLE

Attempts to convert a value of any type to a Double. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_double_error().

SYNTAX

to_double(input)

  • input
    • Type: any
    • Description: The input value

OUTPUT

double

EXAMPLES

SELECT to_double(long_col) as converted
long_colconverted
92233720368547758079.223372036854776e+18
-9223372036854775808-9.223372036854776e+18
SELECT to_double(float_col) as converted
float_colconverted
1.01.0
0.250.25
1.4e-451.401298464324817e-45
3.4028235e+383.4028234663852886e+38
InfinityInfinity
NaNNaN
SELECT to_double(string_col) as converted
string_colconverted
onenull
11.0
1F1.0
1D1.0
1.7976931348623157E3081.7976931348623157e+308
SELECT to_double(bool_col) as converted
bool_colconverted
True1.0
False0.0
nullnull
SELECT to_double(string_arr_col) as converted
string_arr_colconverted
[]null
['1']1.0
['0']0.0
['NaN']NaN
['0', '1']null

TO_DOUBLE_ERROR

If converting the given value to a Double using to_double() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_double_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_double(string_col) as converted, to_double_error(string_col) as error
string_colerrorconverted
oneNumberFormatException: For input string: "one"null
1null1.0
SELECT to_double(string_arr_col) as converted, to_double_error(string_arr_col) as error
string_arr_colerrorconverted
[]ConversionException: can't convert empty arraynull
['1']null1.0
['one']NumberFormatException: For input string: "one"null
['1', '2']ConversionException: can't convert array of length: 2null
nullnullnull

TO_INT

Attempts to convert a value of any type to an Integer. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_int_error().

SYNTAX

to_int(value)

  • value
    • Type: any
    • Description: The input value

OUTPUT

int

EXAMPLES

SELECT to_int(double_col) as converted
double_colconverted
0.250
0.51
100.0100
1.7976931348623157e+308null
Infinitynull
NaNnull
SELECT to_int(long_col) as converted
long_colconverted
21474836472147483647
2147483648null
SELECT to_int(bool_col) as converted
bool_colconverted
True1
False0
nullnull
SELECT to_int(string_col) as converted
string_colconverted
11
1.0null
onenull
SELECT to_int(string_arr_col) as converted
string_arr_colconverted
[]null
['1']1
['0']0
['NaN']null
['0', '1']null

TO_INT_ERROR

If converting the given value to an Integer using to_int() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_int_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_int(string_col) as converted, to_int_error(string_col) as error
string_colerrorconverted
oneNumberFormatException: For input string: "one"null
1null1
SELECT to_int(string_arr_col) as converted, to_int_error(string_arr_col) as error
string_arr_colerrorconverted
[]ConversionException: can't convert empty arraynull
['1']null1
['one']NumberFormatException: For input string: "one"null
['1', '2']ConversionException: can't convert array of length: 2null
nullnullnull
SELECT to_int(double_col) as converted, to_int_error(double_col) as error
double_colerrorconverted
42.0null42
NaNConversionException: Integer cannot represent NaNnull
1.7976931348623157e+308MagnitudeUnsupportedException: Integer cannot represent a number of this magnitude: 1.7976931348623157E308null

TO_LONG

Attempts to convert a value of any type to a Long. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_long_error().

SYNTAX

to_long(value)

  • value
    • Type: any
    • Description: the input value

OUTPUT

long

EXAMPLES

SELECT to_long(double_col) as converted
double_colconverted
0.250
0.51
100.0100
1.7976931348623157e+308null
Infinitynull
NaNnull
SELECT to_long(bool_col) as converted
bool_colconverted
True1
False0
nullnull
SELECT to_long(string_col) as converted_str
string_colconverted_str
11
1Lnull
1.0null
onenull
SELECT to_long(string_arr_col) as converted
string_arr_colconverted
[]null
['1']1
['0']0
['NaN']null
['0', '1']null

TO_LONG_ERROR

If converting the given value to a Long using to_long() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_long_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_long(string_col) as converted, to_long_error(string_col) as error
string_colerrorconverted
oneNumberFormatException: For input string: "one"null
1null1
SELECT to_long(string_arr_col) as converted, to_long_error(string_arr_col) as error
string_arr_colerrorconverted
[]ConversionException: can't convert empty arraynull
['1']null1
['one']NumberFormatException: For input string: "one"null
['1', '2']ConversionException: can't convert array of length: 2null
nullnullnull
SELECT to_long(double_col) as converted, to_long_error(double_col) as error
double_colerrorconverted
42.0null42
NaNConversionException: Long cannot represent NaNnull
1.7976931348623157e+308MagnitudeUnsupportedException: Long cannot represent a number of this magnitude: 1.7976931348623157E308null

TO_STRING

Attempts to convert a value of any type to a String. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_string_error().

SYNTAX

to_string(value)

  • value
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_string(long_col) as converted
long_colconverted
00
92233720368547758079223372036854775807
nullnull
SELECT to_string(double_col) as converted
double_colconverted
1.01.0
0.50.5
5e-3244.9E-324
1.7976931348623157e+3081.7976931348623157E308
InfinityInfinity
NaNNaN
SELECT to_string(boolean_col) as converted
boolean_colconverted
Truetrue
Falsefalse
nullnull
SELECT to_string(array_col) as converted
array_colconverted
[]null
[1]1
[1, 2, 3]null

TO_STRING_ERROR

If converting the given value to a String using to_string() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_string_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLE

SELECT to_string(int_arr_col) as converted, to_string_error(int_arr_col) as error
int_arr_colerrorconverted
[]ConversionException: can't convert empty arraynull
[1]null1
[1, 2]ConversionException: can't convert array of length: 2null
nullnullnull

TRIM

Trim the spaces from both ends for the specified string.

SYNTAX

trim(input)

  • input
    • Type: string
    • Description: The value to trim

OUTPUT

string

EXAMPLE

SELECT trimmable, trim(trimmable) as trimmed
trimmabletrimmedtrimmable
hairhairhair
beardbeardbeard
hedgeshedgeshedges
eyebrowseyebrowseyebrows
nullnullnull

TYPEOF

Returns the Spark type of the given object as a string.

SYNTAX

typeof(object)

  • object
    • Type: any
    • Description: The object to find the type of

OUTPUT

string

EXAMPLES

SELECT typeof(int) AS type
inttype
13Integer
-13Integer
SELECT typeof(double) AS type
doubletype
13.0Double
-13.0Double
SELECT typeof(boolean) AS type
booleantype
TrueBoolean
FalseBoolean
SELECT typeof(string) AS type
stringtype
this is a stringString
String
SELECT typeof(arrays) AS type
arraystype
['a', 'b', 'c']Object[]
[]Object[]
[null]Object[]
nullnull

UPPER

Converts a string to uppercase. Applies to all characters.

SYNTAX

upper(input)

  • input
    • Type: string
    • Description: The string to convert to uppercase

OUTPUT

string

EXAMPLE

SELECT upper(string) as uppercase_string
stringuppercase_string
all_lowerALL_LOWER
ALL UPPERALL UPPER
mIxEdMIXED
nullnull

VALID_EMAIL

Returns whether the supplied string is a valid email or not.

SYNTAX

valid_email(this_might_be_an_email)

  • this_might_be_an_email
    • Type: string
    • Description: The string to be checked for email validity

OUTPUT

bool

EXAMPLE

SELECT valid_email(emails) as is_valid_email
emailsis_valid_email
[email protected]True
nathanFalse
nullnull

YEAR

Gets the year field of a date.

SYNTAX

year(datetime)

  • datetime
    • Type: string
    • Description: The timestamp to get the year of

OUTPUT

int

EXAMPLES

SELECT year(date) AS year
dateyear
002020
1997-101997
2077-06-23+06:452077
2001-03-14T15:43:19.1552001
1580-01-04 06:30-04:001580
SELECT year(date) AS year, datetime_error(date) as error
descriptiondateerroryear
null valuenullnullnull
empty stringText '' could not be parsed at index 0null
not a dateI am not a dateText 'I am not a date' could not be parsed at index 0null
starts date-like2018-03-oopsText '2018-03-oops' could not be parsed, unparsed text found at index 7null
bad date2018-03-95Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95null

INT.RANGE

Returns an array containing all integers, in order, between start (inclusive) and stop (exclusive).

SYNTAX

int.range(start, stop)

  • start
    • Type: int
    • Description: The starting integer
  • stop
    • Type: int
    • Description: The integer to stop before

OUTPUT

{'type': 'array', 'elementType': 'int'}

EXAMPLE

SELECT int.range(start, stop) as range_of_integers;
stopstartrange_of_integers
30[0, 1, 2]
31[1, 2]
41[1, 2, 3]
55[]
-2-5[-5, -4, -3]
2-2[-2, -1, 0, 1]
-3-2null
56null
1nullnull
null1null

LEGACY.HASH

NOTE: this function is DEPRECATED because of incorrectness. This incorrectness is the result of nulls and arrays not being considered when calculating the hash value. Instead, we suggest using the correct hash() function.
Returns a number, called a hash code, based on the input data. The hash code is always the same given the same inputs and is practically guaranteed to be different given different inputs.

SYNTAX

legacy.hash()

OUTPUT

long

EXAMPLES

SELECT legacy.hash(A, B, C) AS result
ABCresult
20twenty[3.0, 8.0, 9.0]-3390176128415294659
40forty[18.9, 21.1]-6183879932931889889
nullnull[]0
SELECT legacy.hash(A) AS result
Aresult
null0
[]0
SELECT legacy.hash(array(array.concat(A, B))) == legacy.hash(array(A, B)) AS result
ABresult
['a', 'b']['10', '20']True
SELECT legacy.hash(A) == legacy.hash(B) AS result
ABresult
[1, 2][2, 1]False
[null][null, null]True
SELECT legacy.hash(A) == legacy.hash(B) AS result
ABresult
['true', 'false'][True, False]False
SELECT legacy.hash(A) == legacy.hash(B) AS result
ABresult
['1', '2'][1, 2]False