User GuidesAPI ReferenceRelease Notes
Doc HomeHelp CenterLog In
User Guides

General Functions

Transformation functions manipulate and process data values.

ABS

Returns the absolute value of the given input.

SYNTAX

abs(input)

  • input
    • Type: number
    • Description: The attribute to return absolute value for

OUTPUT

number

EXAMPLE

SELECT abs(number_value) as absolute_value
number_valueabsolute_value
1.01.0
-1.01.0
-123.45123.45
00
InfinityInfinity
-InfinityInfinity
nullnull

ADD

Returns the sum of two integers.

SYNTAX

add(integer1, integer2)

  • integer1
    • Type: int
    • Description: The first integer to add
  • integer2
    • Type: int
    • Description: The second integer to add

OUTPUT

int

EXAMPLE

SELECT add(apples, oranges) AS total_fruit
applesorangestotal_fruit
13316
8210
-155-10
246
11516
4nullnull
null3null
nullnullnull

ARRAY

Returns an array with the given elements.

SYNTAX

array(elements)

  • elements
    • Type: any
    • Description: The elements to include in the array

OUTPUT

{'type': 'array', 'elementType': 'any'}

EXAMPLES

SELECT array(opening_act, headliner) AS bands
opening_actheadlinerbands
'Pink Floyd''Jimi Hendrix Experience'['Pink Floyd', 'Jimi Hendrix Experience']
null'Beyoncé'[null, 'Beyoncé']
SELECT array(...some_array) AS identical
some_arrayidentical
[1, 2, 3][1, 2, 3]
[2][2]
[null, 2, null][null, 2, null]

ARRAY_CONCAT

Converts all values in an array into strings and then concatenates them together into a single string.

SYNTAX

array_concat(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array whose values will be concatenated into a single string

OUTPUT

string

EXAMPLE

SELECT array_concat(example_arrays) AS concatenated_arrays
example_arraysconcatenated_arrays
['meow']'meow'
['This', ' ', 'is', ' ', 'a', ' ', 'sentence.']'This is a sentence.'
['']''
[null]''
[null, 'ta', '', null, 'mr', '']'tamr'
nullnull

ATOF

Converts a string to a floating point value. (ASCII-to-Float)

SYNTAX

atof(string_to_convert)

  • string_to_convert
    • Type: string
    • Description: The string to convert to a floating point value.

OUTPUT

double

EXAMPLE

SELECT atof(example_strings) AS example_strings_as_double;
example_stringsexample_strings_as_double
'42'42.0
'-42'-42.0
'-273.15'-273.15
'7.'7.0
'.7'0.7
'-.7'-0.7
'42E5'4200000.0
'42.E+42'4.2e+43
'6.022E-23'6.022e-23
'abc123'null
''null
nullnull

ATOI

Converts a string to an integer value. (ASCII-to-Integer)

SYNTAX

atoi(string_to_convert)

  • string_to_convert
    • Type: string
    • Description: The string to convert to an integer value.

OUTPUT

int

EXAMPLE

SELECT atoi(example_strings) AS example_strings_as_int;
example_stringsexample_strings_as_int
'42'42
'42.0'null
'3'3
'3.14'null
'1'1
'1.'null
'-300'-300
'-300.00000'null
'abc123'null
''null
nullnull

CEIL

Returns the smallest integer that is greater than or equal to the given number.

SYNTAX

ceil(input)

  • input
    • Type: number
    • Description: The input to compute the ceiling for.

OUTPUT

long

EXAMPLES

SELECT ceil(input_value) AS ceiling
input_valueceiling
1.12
1.92
-1.1-1
-1.9-1
00
NaN0
-Infinity-9223372036854775808
Infinity9223372036854775807
5e-3241
1.7976931348623157e+3089223372036854775807
nullnull
SELECT ceil(input_value) AS ceiling
input_valueceiling
4242
-42-42
nullnull

COALESCE

Returns the first input that is not null, or null if all inputs are null. When using COALESCE on an array, note that arrays that contain only nulls or only empty values are not themselves null. COALESCE will return these as the first non-null value found. Similar functions: NON_NULL and NON_EMPTIES.

SYNTAX

coalesce(a)

  • a
    • Type: any
    • Description: the inputs to coalesce

OUTPUT

any

EXAMPLES

SELECT coalesce(Attr_1, Attr_2) AS nonNullNumber
Attr_1Attr_2nonNullNumber
null1010
10null10
nullnullnull
0100
SELECT coalesce(attr_1, attr_2) AS nonNullValue
attr_1attr_2nonNullValue
[null]['c', 'd'][null]
['', '']['c', 'd']['', '']
['a', 'b']['c', 'd']['a', 'b']
[null, 'b']['c', 'd'][null, 'b']
null['c', 'd']['c', 'd']
[]['c', 'd'][]

CONCAT

Concatenates string values. If any of the arguments to concat is null, it returns null.

SYNTAX

concat(a)

  • a
    • Type: string
    • Description: String values to concatenate

OUTPUT

string

EXAMPLES

SELECT concat(first_name, last_name) AS combined
first_namelast_namecombined
'david''bowie''davidbowie'
'prince'nullnull
SELECT concat(...string_array) AS combined
string_arraycombined
['a', 'b', 'c']'abc'
['a', null]null
[]''

DATEDIFF

Gets the difference in days between the start and end date. The result will be negative if the end date occurs before the start date. More precisely, this function counts the number of 24-hour periods between the start and end dates.

SYNTAX

datediff(endDate, startDate)

  • endDate
    • Type: string
    • Description: The end date
  • startDate
    • Type: string
    • Description: The start date

OUTPUT

long

EXAMPLE

SELECT start, "end", datediff("end", start) AS diff
startendstartdiffend
'1991''1992''1991'365'1992'
'2004-03-01''2004-02-28''2004-03-01'-2'2004-02-28'
'2016-12-25T06:44+02:00''2016-12-24 23:44-05:00''2016-12-25T06:44+02:00'0'2016-12-24 23:44-05:00'
'1877-05-03 11:18:34.399+14:00''1877-05-03T11:18:34.399-10:00''1877-05-03 11:18:34.399+14:00'1'1877-05-03T11:18:34.399-10:00'
'2004-12-12'null'2004-12-12'nullnull
'1982-02-01:35:55''1980-15-21 06:30''1982-02-01:35:55'null'1980-15-21 06:30'
'7 March 1779''1779-03-11''7 March 1779'null'1779-03-11'

DATETIME_ERROR

Tries to convert the input to a datetime, returning the conversion error, if any.

SYNTAX

datetime_error(input)

  • input
    • Type: string
    • Description: The input to try to convert.

OUTPUT

string

EXAMPLE

SELECT datetime_error(date) as error
dateerror
nullnull
'1990-03-07'null
'1990-03-07T15:44'null
'1990-03-07T15:44:10.392+06:00'null
'''Text '' could not be parsed at index 0'
'I am not a date''Text 'I am not a date' could not be parsed at index 0'
'2018-03-oops''Text '2018-03-oops' could not be parsed, unparsed text found at index 7'
'2018-03-95''Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95'

DATETIME_TO_ISO

Formats a datetime string to the ISO_LOCAL_DATE_TIME format using the specified patterns. Patterns are matched in sequence, and the function will return the first matched pattern. Will try out a preset list of patterns in addition to the patterns supplied by the user (see second example below for some details). If no patterns are matched, this function returns null.

SYNTAX

datetime_to_iso(datetime_string, patterns)

  • datetime_string
    • Type: string
    • Description: A datetime string
  • patterns
    • Type: {'type': 'array', 'elementType': 'string'}
    • Description: An array of patterns for parsing the input

OUTPUT

string

EXAMPLES

SELECT datetime_to_iso(datetime, patterns_array) AS datetimes_as_iso
datetimepatterns_arraydatetimes_as_iso
'2013-08-23-08:45:00'['yyyy-MM-dd-HH:mm:ss']'2013-08-23T08:45:00'
'01 Apr 2017'['dd MMM yyyy']'2017-04-01'
'24 05 1997'['dd MMM yyyy', 'dd MM yyyy']'1997-05-24'
'24 May 1997'['dd MMM yyyy', 'dd MM yyyy']'1997-05-24'
SELECT datetime_to_iso(datetime, patterns_array) AS datetimes_as_iso
datetimepatterns_arraydatetimes_as_iso
''['dd MMM yyyy']null
null['dd MMM yyyy']null
'01 Apr 2017'null'2017-04-01'
'01 Apr 2017'[null]'2017-04-01'
'01-Apr-2017'['']'2017-04-01'
'nomatch'['dd MMM yyyy']null
'01-Apr-2017'['%@#*$&']'2017-04-01'

DATE_AND_TIME_TO_ISO

Formats a date and time string to ISO datetime format using the specified patterns. Patterns are matched in sequence, and the function will return the first matched pattern. Will try out a preset list of patterns in addition to the patterns supplied by the user (see second example below for some details). If no patterns are matched, this function returns null.

SYNTAX

date_and_time_to_iso(date_string, time_string, patterns)

  • date_string
    • Type: string
    • Description: The string representing the date
  • time_string
    • Type: string
    • Description: The string representing the time
  • patterns
    • Type: {'type': 'array', 'elementType': 'string'}
    • Description: The patterns used for matching the date and time to the ISO format

OUTPUT

string

EXAMPLES

SELECT date_and_time_to_iso(dates, times, patterns_arrays) AS datetimes_as_iso
datestimespatterns_arraysdatetimes_as_iso
'2013-08-23''08:45:00'['yyyy-MM-dd:HH:mm:ss']'2013-08-23T08:45:00'
'26-Feb-2009''08 27'['dd-MMM-yyyy:HH mm']'2009-02-26T08:27:00'
'2017-08-14''11:45:47.999'['yyyy-MM-dd:HH:mm:ss.SSS']'2017-08-14T11:45:47.999'
'Aug-23-2013'null['MMM-dd-yyyy']'2013-08-23'
null'Aug-23-2013'['MMM-dd-yyyy']'2013-08-23'
SELECT date_and_time_to_iso(dates, times, patterns_arrays) AS datetimes_as_iso
datestimespatterns_arraysdatetimes_as_iso
'''08:45:00'['yyyy-MM-dd:HH:mm:ss']null
'2013-08-23'''['yyyy-MM-dd:HH:mm:ss']'2013-08-23'
'2013-08-23''08:45:00'['']'2013-08-23T08:45:00'
null'08:45:00'['yyyy-MM-dd:HH:mm:ss']null
'2013-08-23'null['yyyy-MM-dd:HH:mm:ss']'2013-08-23'
'2013-08-23''08:45:00'[null]'2013-08-23T08:45:00'
'2013-08-23''08:45:00'null'2013-08-23T08:45:00'

DAYOFMONTH

Gets the day-of-month field of a date.

SYNTAX

dayofmonth(datetime)

  • datetime
    • Type: string
    • Description: The timestamp to get the day of month from

OUTPUT

int

EXAMPLES

SELECT dayOfMonth(date) AS day
dateday
'1997-10-03'3
'2077-06-23+06:45'23
'2001-03-14T15:43:19.155'14
'1580-01-04 06:30-04:00'4
SELECT dayOfMonth(date) AS day, datetime_error(date) as error
descriptiondateerrorday
'null value'nullnullnull
'empty string''''Text '' could not be parsed at index 0'null
'not a date''I am not a date''Text 'I am not a date' could not be parsed at index 0'null
'starts date-like''2018-03-oops''Text '2018-03-oops' could not be parsed, unparsed text found at index 7'null
'bad date''2018-03-95''Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95'null

DECIMAL

Converts a numeric value to double type.

SYNTAX

decimal(input)

  • input
    • Type: number
    • Description: The value to convert

OUTPUT

double

EXAMPLE

SELECT decimal(integer_value) as double_value
integer_valuedouble_value
11.0
-1-1.0
00.0
nullnull

DISTINCT

Returns an array of all distinct items from the given array converted to strings.

SYNTAX

distinct(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array to remove duplicates from

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT distinct(array) as distinctified
arraydistinctified
[1, 1, 2, 1, 2, 3, 1, 3]['1', '2', '3']
[1, 2, 3]['1', '2', '3']
[null, null][null]
[][]
nullnull

DURATION_TO_ISO

Converts to a given duration in the specified time unit to the ISO-8601 format. If minimal is true, the format is PnYnMnDTnHnMnS, which stands for 'Period: number of Years, number of Months, number of Days, Time: number of Hours, number of Minutes, number of Seconds.' If minimal is false, only the significant units are shown.

SYNTAX

duration_to_iso(duration, time_unit, minimal)

  • duration
    • Type: double
    • Description: The positive duration value to convert to iso
  • time_unit
    • Type: string
    • Description: A three letter string representing the unit of the duration. Valid options are: 'SEC', 'MIN', 'HRS', 'DAY', 'WKS'
  • minimal
    • Type: bool
    • Description: Determines whether the duration is returned with only the significant values

OUTPUT

string

EXAMPLES

SELECT duration_to_iso(duration, time_unit, minimal) as isofied_duration
durationtime_unitminimalisofied_duration
10.0'SEC'False'P0Y0M0DT0H0M10.000S'
60.0'SEC'False'P0Y0M0DT0H1M0.000S'
61.5'MIN'False'P0Y0M0DT1H1M30.000S'
32'HRS'False'P0Y0M1DT8H0M0.000S'
2.483'DAY'False'P0Y0M2DT11H35M31.200S'
3.42'WKS'False'P0Y0M23DT22H33M36.000S'
10.5'MIN'True'PT10M30S'
2.483'DAY'True'PT59H35M31.2S'
-3.45'daY'False'P0Y0M-3DT-10H-48M0.000S'
SELECT duration_to_iso(duration, time_unit, minimal) as isofied_duration
durationtime_unitminimalisofied_duration
1.0''Truenull
null'SEC'Falsenull
60.0nullFalsenull
61.5'MIN'nullnull

EQUALS_IGNORE_CASE

Case-insensitively checks whether two values are equal.

SYNTAX

equals_ignore_case(str1, str2)

  • str1
    • Type: string
    • Description: The first value to compare
  • str2
    • Type: string
    • Description: The second value to compare

OUTPUT

bool

EXAMPLE

SELECT equals_ignore_case(family_member, state) as is_equal
family_memberstateis_equal
'ma''MA'True
'PA''pa'True
'Sister''CA'False
'Brother'nullnull
null'WY'null
nullnullnull

EXTRACT

Matches a string against a Java regular expression, and returns the specified group. The group 0 refers to the entire pattern. Indexes of individual groups start at 1. Returns null if no match is found, the group number or regular expression is invalid, or any of the input, regular expression, or group is null, the result is null.

SYNTAX

extract(input, regex, group_index)

  • input
    • Type: string
    • Description: The string to be matched
  • regex
    • Type: string
    • Description: The Java regular expression specifying the pattern to extract
  • group_index
    • Type: int
    • Description: The index of the group in the regular expression to extract

OUTPUT

string

EXAMPLES

SELECT extract('2001: A Space Odyssey', '(\\d+):\\s?([\\w\\s]+)', group_idx) AS result
group_idxresult
0'2001: A Space Odyssey'
1'2001'
2'A Space Odyssey'
3null
-1null
nullnull
SELECT extract(input, regex, group_index) as result
inputregexgroup_indexresult
'null values''null'nullnull
'null values'null1null
null'null'1null
nullnullnullnull
SELECT extract('invalid regex', regex, 1) as result
regexresult
'(invalid regex'null
'(wrong regex)'null

EXTRACT_ALL

Matches a string against a Java regular expression, and returns results for all groups. Returns null if no match is found, the regular expression is invalid, or either of the input or regular expression is null.

SYNTAX

extract_all(input, regex)

  • input
    • Type: string
    • Description: The string to be matched
  • regex
    • Type: string
    • Description: The Java regular expression specifying the pattern to extract

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT extract_all(input, regex) as result
inputregexresult
'2001: A Space Odyssey''(\d+):\s?([\w\s]+)'['2001', 'A Space Odyssey']
'Gone With The Wind''^.The.$'[]
'The Good, The Bad and The Ugly''(The)'['The', 'The', 'The']
'The given regex is invalid''('null
'barb''(b*)'['b', '', '', 'b', '']
null'(The)'null
'The Third Man'nullnull
nullnullnull

FLOOR

Returns the greatest integer that is smaller than or equal to the given number.

SYNTAX

floor(input)

  • input
    • Type: number
    • Description: The input to compute the floor for.

OUTPUT

long

EXAMPLE

SELECT floor(input_value) AS floored_value
input_valuefloored_value
1.11
1.91
-1.1-2
-1.9-2
0.00
nullnull
NaN0
-Infinity-9223372036854775808
Infinity9223372036854775807

GET

Return the value in the array at the specified index. If the index is invalid, return null. An index is invalid if it is negative, not an integer, or greater than or equal to the size of the array it is applied to.

SYNTAX

get(data, index)

  • data
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: An array of values that you can index into.
  • index
    • Type: int
    • Description: The index to apply to the data argument. Indexes are integers between 0 (inclusive) and the length of the array (exclusive).

OUTPUT

any

EXAMPLE

SELECT get(col, idx) AS new_col
colidxnew_col
['a', 'b', 'c']0'a'
['a', 'b', 'c']1'b'
['a', 'b', 'c']-1null
['a', 'b', 'c']4null
['a', 'b', 'c']nullnull
[]0null
null0null

GREATEST

Returns the greatest value of the list of values, skipping null values. It will return null if and only if all parameters are null. String characters are ranked from least to greatest in the order specified under sort_array().

SYNTAX

greatest(a)

  • a
    • Type: any
    • Description: Values to compare.

OUTPUT

any

EXAMPLES

SELECT branch, greatest(sales_jan, sales_feb, sales_mar) AS q1_highest_month_sales
branchsales_jansales_febsales_marq1_highest_month_salesbranch
'Cambridge'10012080120'Cambridge'
'San Francisco'null709090'San Francisco'
'London'nullnullnullnull'London'
SELECT greatest(...string_array) AS greatest_string
string_arraygreatest_string
['a', 'b']'b'
['ab', 'aa']'ab'
['a', 'aa']'aa'
[null, 'a', null]'a'
['b', 'a', null]'b'
[null, 'ab', 'az']'az'
[null]null
[null, null]null
SELECT greatest(...array_array) AS greatest_array
array_arraygreatest_array
[[1], [2]][2]
[[1, 2], [1, 3]][1, 3]
[[null, 3], [null, 5]][null, 5]
[[0, 3], [0, null]][0, 3]
[[3], [1, 2]][3]
[[3], [3, -1]][3, -1]
[null, [3]][3]
[null, [null]][null]
[null, [null], [null, null]][null, null]

HASH

Returns a number, called a hash code, based on the input data. The hash code is always the same given the same inputs and is practically guaranteed to be different given different inputs.

SYNTAX

hash(inputs)

  • inputs
    • Type: any
    • Description: The input values to hash

OUTPUT

long

EXAMPLES

SELECT hash(A, B) AS result
ABresult
'twenty'[3.0, 8.0, 9.0]1830378446688894221
'forty'[18.9, 21.1]-5987411484308668877
'forty'[]4495871505993623155
'forty'null353237852084435270
nullnull-7218393911220984134
SELECT hash(A, B) AS result
ABresult
[null][null]8068983417364430449
[null][[]]-4768040760762171177
[[]][null]865141773612688779
null[[]]5085241997507925413
[[]]null-2601053220775996364
SELECT hash(A, B) = hash(array(A, B)) AS "hash(elements) = hash(array)", hash(A, B) = hash({A, B}) AS "hash(elements) = hash(struct)"
ABhash(elements) = hash(array)hash(elements) = hash(struct)
'one''two'FalseFalse

HEX

Returns the hexadecimal value of the argument.

SYNTAX

hex(input)

  • input
    • Type: int
    • Description: The input value.

OUTPUT

string

EXAMPLE

SELECT hex(input_value) AS hex_value
input_valuehex_value
1'1'
-1'FFFFFFFFFFFFFFFF'
0'0'
12345'3039'
-54321'FFFFFFFFFFFF2BCF'
-2147483648'FFFFFFFF80000000'
2147483647'7FFFFFFF'
nullnull

ENCODE_BASE64

Returns the Base64 encoded value of the argument.

SYNTAX

encode_base64(input)

  • input
    • Type: long
    • Description: The input value.

OUTPUT

string

EXAMPLE

SELECT encode_base64(input_value) AS base64_value
input_valuebase64_value
1'AAAAAAAAAAE='
-1'//////////8='
0'AAAAAAAAAAA='
0'AAAAAAAAAAA='
8921367987289891273'e88Q6sD0ock='
-3344781287187723481'0ZTzJAXNSyc='
-9223372036854775808'gAAAAAAAAAA='
9223372036854775807'f/////////8='
-9223372036854775807'gAAAAAAAAAE='
nullnull

INTEGER

Converts a numeric value to int type.

SYNTAX

integer(input)

  • input
    • Type: number
    • Description: The value to convert

OUTPUT

int

EXAMPLE

SELECT integer(double_value) as integer_value
double_valueinteger_value
1.11
-1.1-1
1.91
-1.9-1
0.00
nullnull
NaN0
-Infinity-2147483648
Infinity2147483647

ISNULL

Returns true if and only if the argument is null.

SYNTAX

isnull(input)

  • input
    • Type: any
    • Description: The input value.

OUTPUT

bool

EXAMPLES

SELECT isnull(input_value) AS is_it_null
input_valueis_it_null
'i am not null'False
''False
nullTrue
SELECT isnull(input_value) AS is_it_null
input_valueis_it_null
TrueFalse
FalseFalse
nullTrue
SELECT isnull(input_value) AS is_it_null
input_valueis_it_null
[1]False
[null]False
[]False
nullTrue

ISO_DIFFERENCE_DAYS

Returns the number of days between two ISO date-time strings rounded down to the nearest integer. If the first day comes after the second day, the result will be negative.

SYNTAX

iso_difference_days(date1, date2)

  • date1
    • Type: string
    • Description: The first date in the difference
  • date2
    • Type: string
    • Description: The second date in the difference

OUTPUT

int

EXAMPLE

SELECT iso_difference_days(date1, date2) AS date_difference
date1date2date_difference
'1987-10-05''1987-10-06'1
'1987-10-05''1987-10-04'-1
'1987-10-05''1987-10-05'0
'1987-10-05T01:26:00''1987-10-06'1
'1987-10-05''1987-10-06T01:26:00'1
'1987-10-05T01:26:00''1987-10-06T01:25:00'1
'1987-10-05-28T01:26:00''1987-10-06T01:25:00'null
'1987-10-34T01:26:00''1987-10-06T01:25:00'null
null'1987-10-05'null
'1987-10-05'nullnull
nullnullnull

ISO_DATETIME_ADD

Adds an amount of time to a datetime string in ISO format.

SYNTAX

iso_datetime_add(datetime, duration, time_unit)

  • datetime
    • Type: string
    • Description: A datetime string formatted to ISO-8601 duration format PnYnMnDTnHnMnS
  • duration
    • Type: double
    • Description: Time unit, must be whole number if unit is 'YRS' or 'MTH'
  • time_unit
    • Type: string
    • Description: A 3 letter string representing the unit of the duration. Valid options are: 'SEC', 'MIN', 'HRS', 'DAY', 'WKS', 'YRS', 'MTH'

OUTPUT

string

EXAMPLES

SELECT iso_datetime_add(datetime, duration, time_unit) AS new_datetime
datetimedurationtime_unitnew_datetime
'2050-02-04T12:30:00'3.1'day''2050-02-07T14:54:00'
'2010-08-04T00:00:00'2.0'yRs''2012-08-04T00:00:00'
'2000-08-04T00:00:00'-5.0'MtH''2000-03-04T00:00:00'
SELECT iso_datetime_add(datetime, duration, time_unit) AS new_datetime
datetimedurationtime_unitnew_datetime
'2000-08-04T00:00:00'-5.0'mTh''2000-03-04T00:00:00'
'2000-08-04T00:00:00'-5.1'mTh'null
'2000-08-04T00:00:00'-5.0'yrs''1995-08-04T00:00:00'
'2000-08-04T00:00:00'-5.1'yrs'null
'2000-08-04T00:00:00'-5.0''null
null-5.0'mTh'null
''-5.0'mTh'null
'2050-02-04T12:30:00'null'dAy'null
'2050-02-04T12:30:00'1.0nullnull

IS_NOT_NULL

Returns false if the input is equal to null, and true if it is not equal to null.

SYNTAX

is_not_null(input)

  • input
    • Type: any
    • Description: The input to check

OUTPUT

bool

EXAMPLES

SELECT is_not_null(string_col) as string_not_null
string_colstring_not_null
''True
'null'True
'content!'True
nullFalse
SELECT is_not_null(string_array_col) as array_not_null
string_array_colarray_not_null
[]True
[null]True
['']True
['content!']True
nullFalse

IS_NOT_EMPTY

Returns true if the input is not null and not equal to the empty string. Returns false if the input is equal to the empty string. Returns null if the input is null. Tip: Tamr Core does not offer an IS_EMPTY function. For this logical comparator use an expression with IS EMPTY syntax.

SYNTAX

is_not_empty(input)

  • input
    • Type: string
    • Description: The input to check

OUTPUT

bool

EXAMPLE

SELECT is_not_empty(string_col) as not_empty
string_colnot_empty
' 'True
'null'True
'content'True
''False
nullnull

JACCARD_INDEX

Computes the Jaccard similarity index between two sets. It is calculated as the intersection of the two sets divided by the union of the two sets.

SYNTAX

jaccard_index(a, b)

  • a
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The first set
  • b
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The second set

OUTPUT

double

EXAMPLE

SELECT jaccard_index(set_a, set_b) AS similarity
set_aset_bsimilarity
[1, 2, 3][1, 2, 3]1.0
[1, 2, 3][2, 1, 3]1.0
[1, 2, 3][2, 2, 1, 3]1.0
[1, 2, 3, 4, 5][1, 3, 5, 7]0.5
[9, 8, 7, 6][5, 2, 1, 9]0.14285714285714285
[5, 2, 1, 9][9, 8, 7, 6]0.14285714285714285
[][1]0.0
[1][]0.0
[][]NaN
null[]null
[]nullnull
nullnullnull

LEAST

Returns the smallest value of the list of values, skipping null values. It will return null if and only if all parameters are null. String characters are ranked from least to greatest in the order specified under sort_array().

SYNTAX

least(a)

  • a
    • Type: any
    • Description: Values to compare.

OUTPUT

any

EXAMPLES

SELECT branch, least(sales_jan, sales_feb, sales_mar) AS q1_lowest_month_sales
branchsales_jansales_febsales_marq1_lowest_month_salesbranch
'Cambridge'1001208080'Cambridge'
'San Francisco'null709070'San Francisco'
'London'nullnullnullnull'London'
SELECT least(...string_array) AS least_string
string_arrayleast_string
['a', 'b']'a'
['ab', 'aa']'aa'
['a', 'aa']'a'
[null, 'a', null]'a'
['b', 'a', null]'a'
[null, 'ab', 'az']'ab'
[null]null
[null, null]null
SELECT least(...array_array) AS least_array
array_arrayleast_array
[[1], [2]][1]
[[1, 2], [1, 3]][1, 2]
[[null, 3], [null, 5]][null, 3]
[[0, 3], [0, null]][0, 3]
[[3], [1, 2]][1, 2]
[[3], [3, -1]][3]
[null, [3]][3]
[null, [null]][null]
[null, [null], [null, null]][null]

LEN

Returns the number of non-null, non-empty, non-whitespace items in an array.

SYNTAX

len(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array to count items of

OUTPUT

int

EXAMPLE

SELECT len(array) AS array_length
arrayarray_length
['1', '2', '3']3
['whitespace', "doesn't", 'get', ' \n ', 'counted']4
['']0
[null]0
[]0
[null, 'non_null', '', 'non_empty']2
nullnull

LENGTH

Returns the length, in characters, of the given string value.

SYNTAX

length(input)

  • input
    • Type: string
    • Description: The value to measure

OUTPUT

int

EXAMPLE

SELECT movie, length(movie) as num_characters
movienum_charactersmovie
'Citizen Kane'12'Citizen Kane'
'Casablanca'10'Casablanca'
' '3' '
''0''
nullnullnull

LOWER

Converts a string to lowercase. Applies to all characters.

SYNTAX

lower(input)

  • input
    • Type: string
    • Description: The string to convert to lowercase

OUTPUT

string

EXAMPLE

SELECT lower(string) as lowercase_string
stringlowercase_string
'all_lower''all_lower'
'ALL UPPER''all upper'
'mIxEd''mixed'
''''
nullnull

MATCHES

Matches a string against a Java regular expression. Returns true if the regular expression matches the entire input string, false if it does not, and null if the regular expression is invalid or if either of the input or regular expression is null.

SYNTAX

matches(regex, input)

  • regex
    • Type: string
    • Description: The Java regular expression specifying the pattern to match
  • input
    • Type: string
    • Description: The string to be matched

OUTPUT

bool

EXAMPLE

SELECT matches(regex, input) as result
inputregexresult
'2001: A Space Odyssey''(\d+):\s?([\w\s]+)'True
'Gone With The Wind''^.The.$'True
'Gone With The Wind''Casablanca'False
'The Good, The Bad and The Ugly''(The)'False
'Citizen Kane''(invalid'null
null'(The)'null
'The Third Man'nullnull
nullnullnull

MD5

Returns a hash value, according to the MD5 algorithm.

SYNTAX

md5(input)

  • input
    • Type: string
    • Description: The input value.

OUTPUT

string

EXAMPLE

SELECT md5(input_value) AS md5_hash
input_valuemd5_hash
'I am a string.''a6be8c1f86bffe9f7a9397e578f7a446'
'I will be hashed.''6e76226f2116eea6e5cf0f4783762e19'
'Unless of course''da6cd9f98615af7b253a4d321e12c6a4'
'Your computer has crashed.''d45aee724e0cfbe2a1f00613ce118f81'
'''d41d8cd98f00b204e9800998ecf8427e'
nullnull

MONTH

Gets the month field of a date.

SYNTAX

month(datetime)

  • datetime
    • Type: string
    • Description: The timestamp to get the month of

OUTPUT

int

EXAMPLES

SELECT month(date) AS month
datemonth
'1997-10'10
'2077-06-23+06:45'6
'2001-03-14T15:43:19.155'3
'1580-01-04 06:30-04:00'1
SELECT month(date) AS month, datetime_error(date) as error
descriptiondateerrormonth
'null value'nullnullnull
'empty string''''Text '' could not be parsed at index 0'null
'not a date''I am not a date''Text 'I am not a date' could not be parsed at index 0'null
'starts date-like''2018-03-oops''Text '2018-03-oops' could not be parsed, unparsed text found at index 7'null
'bad date''2018-03-95''Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95'null

MONOTONICALLY_INCREASING_ID

Returns a monotonically increasing identifier. The identifiers are guaranteed to be unique but not sequential. The results of this function are non-deterministic, different runs on the same dataset could produce different results.

SYNTAX

monotonically_increasing_id()

OUTPUT

long

EXAMPLE

SELECT monotonically_increasing_id() AS id, *
nameidname
'Alice'0'Alice'
'Bob'1'Bob'
'Carol'2'Carol'
'Dave'3'Dave'

NUM

Returns the first numeric element of an array parsed as a double.

SYNTAX

num(array)

  • array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array to find the first numeric element of

OUTPUT

double

EXAMPLE

SELECT num(array) AS first_num_from_array
arrayfirst_num_from_array
['1.2', null]1.2
[null, '23', null]23
[null, '3E7', '1.2']30000000.0
['abc', 'true']null
nullnull
[]null
[null]null

PARSE_JSON_ARRAY

Parses strings as JSON-encoded arrays of strings, returning the decoded values. All results are combined into a single multivalue, with no deduplication. Inputs that cannot be parsed as a JSON array are ignored. Values in arrays that are not strings are ignored.

SYNTAX

parse_json_array(json_array)

  • json_array
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: A multivalue containing JSON-encoded arrays of string

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT parse_json_array(json_array) AS parsed
json_arrayparsed
['["a"]']['a']
['["a", "b", "c"]', '["d"]', '["e", "f"]']['a', 'b', 'c', 'd', 'e', 'f']
['1', '"x"', '["x"', '["a"]', null, '["b", 1, ["x"], {"x":"x"}, "c"]', '{"x":"x"}']['a', 'b', 'c']
['a', '1', '[', '["unterminated"', '{"un":"terminated"'][]
[][]
nullnull

PMOD

Computes the positive remainder after division.

SYNTAX

pmod(dividend, divisor)

  • dividend
    • Type: number
    • Description: The dividend number on which to calculate the remainder
  • divisor
    • Type: number
    • Description: The divisor number on which to calculate the remainder

OUTPUT

number

EXAMPLE

SELECT pmod(dividend, divisor) as result
dividenddivisorresult
5.03.02.0
-19.05.01.0
5.52.50.5
Infinity3.0NaN
NaN3.0NaN
null3.0null
5.00.0null

POW

Returns the value of the first argument raised to the power of the second argument.

SYNTAX

pow(base, exponent)

  • base
    • Type: number
    • Description: The base of the exponential expression
  • exponent
    • Type: number
    • Description: The exponent of the exponential expression

OUTPUT

double

EXAMPLE

SELECT pow(base, exponent) as result
baseexponentresult
329
3.52.012.25
-23-8
2-30.125
22.55.656854249492381
2-2.50.1767766952966369
-2-2.5NaN
001
null2null
2nullnull
nullnullnull

SQRT

Returns the square root.

SYNTAX

sqrt(input)

  • input
    • Type: number
    • Description: The input to get the square root of

OUTPUT

double

EXAMPLE

SELECT sqrt(input) as result
inputresult
93
5.762.4
11
00
-1NaN
nullnull

RAND

Returns a random value within the range 0.0 (inclusive) to 1.0 (exclusive).

SYNTAX

rand()

OUTPUT

double

EXAMPLE

SELECT row_number, rand() as random_double
row_numberrow_numberrandom_double
110.7056362008458043
220.8900315077291642
330.48696337565223247
440.4192577676064432

REPLACE

Replaces all occurrences of the target in the given string with the replacement.

SYNTAX

replace(string, target, replacement)

  • string
    • Type: string
    • Description: The string to replace parts of
  • target
    • Type: string
    • Description: The substring to replace in the string
  • replacement
    • Type: string
    • Description: The string to replace the target with

OUTPUT

string

EXAMPLES

SELECT replace(string, target, replacement) AS replaced
stringtargetreplacementreplaced
'aabacad''a''x''xxbxcxd'
'''a''x'''
'123''''x''x1x2x3x'
'too much space'' ''''toomuchspace'
null'a''x'null
'aabacad'null'x'null
'aabacad''a'nullnull
SELECT replace('Hi my name is {name}!', '{name}', names) AS introductions
namesintroductions
'Sam''Hi my name is Sam!'
'Alex''Hi my name is Alex!'
'Sky''Hi my name is Sky!'
'Timothy''Hi my name is Timothy!'
'Sharon''Hi my name is Sharon!'
'Harsh''Hi my name is Harsh!'

REPLACE_ALL

Replaces all matches of the target regex in the given string with the replacement.

SYNTAX

replace_all(string, target_regex, replacement)

  • string
    • Type: string
    • Description: The string to replace parts of
  • target_regex
    • Type: string
    • Description: The regex to replace in the string
  • replacement
    • Type: string
    • Description: The string to replace the target with

OUTPUT

string

EXAMPLE

SELECT replace_all(string, target_regex, replacement) AS replaced
stringtarget_regexreplacementreplaced
'aaaaabaacad''[a]''x''xxxxxbxxcxd'
'aaaaabaacad''[a]+''x''xbxcxd'
'this and that or this''(this|that)''UNK''UNK and UNK or UNK'
'invalid regex returns null''invalid[regex''x'null
null'[a]''x'null
'aaaaabaacad'null'x'null
'aaaaabaacad''[a]'nullnull
'aaaaaBaacad''[A-Z]'' $0''aaaaa Baacad'
'(617)333-3333''[0-9()-]+''+1$0''+1(617)333-3333'

RINT

Returns the double value that is closest in value to the argument and is equal to a mathematical integer.

SYNTAX

rint(input)

  • input
    • Type: number
    • Description: The input value

OUTPUT

double

EXAMPLE

SELECT rint(input_value) as rounded_value
input_valuerounded_value
1.11.0
-1.9-2.0
3.5000014.0
NaNNaN
InfinityInfinity
-Infinity-Infinity
nullnull

ROUND

Returns the closest Long value to the argument.

SYNTAX

round(input)

  • input
    • Type: number
    • Description: The input value

OUTPUT

long

EXAMPLE

SELECT round(input_value) as rounded_value
input_valuerounded_value
1.11
-1.9-2
3.5000014
NaN0
Infinity9223372036854775807
-Infinity-9223372036854775808
nullnull

RUNTIME_DATE

Returns a string representing the date in ISO Format at the time the function is executed for each value. Each row may have a different runtime date.

SYNTAX

runtime_date()

OUTPUT

string

EXAMPLE

SELECT row_number, runtime_date() as date
row_numberrow_numberdate
11'2022-09-20Z'
22'2022-09-20Z'
33'2022-09-20Z'

RUNTIME_DATETIME

Returns a string representing the date and time in ISO Format at the time the function is executed for each value. Each row may have a different runtime date and time.

SYNTAX

runtime_datetime()

OUTPUT

string

EXAMPLE

SELECT row_number, runtime_datetime() as datetime
row_numberrow_numberdatetime
11'2022-09-20T14:19:48Z'
22'2022-09-20T14:19:48Z'
33'2022-09-20T14:19:48Z'

SHA1

Returns a hash value, according to the SHA-1 algorithm.

SYNTAX

sha1(input)

  • input
    • Type: string
    • Description: The input value to be hashed.

OUTPUT

string

EXAMPLE

SELECT sha1(value) AS sha1_hashified
valuesha1_hashified
'My birthday is October 1''97e79c798abe758b227e111c2ddc98b01a4cc607'
'My birthday is October 2''0294ac31298fd3e129380e770c22db998ac71e58'
'My borthday is October 1''0e7fa8dce7a3927f28413e0eef65a62e5a6c6dfa'
'''da39a3ee5e6b4b0d3255bfef95601890afd80709'
nullnull

SIZE

Returns the number of elements in a given array.

SYNTAX

size(input)

  • input
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: The array in which to count elements

OUTPUT

int

EXAMPLE

SELECT collection, size(members) as num_members
collectionmemberscollectionnum_members
'Spices'['Salt', 'Pepper']'Spices'2
'Destiny's Child'['Beyoncé Knowles', 'Kelly Rowland', 'Michelle Williams']'Destiny's Child'3
'Array containing null'['the next value is null', null]'Array containing null'2
'Empty Array'[]'Empty Array'0
'Null Array'null'Null Array'null

SORT_ARRAY

Sorts the input array in ascending order, according to the natural ordering of the array elements. This will filter out null elements. String characters are ranked from least to greatest in the following order: !, ", #, $, %, &, ', (, ), *, +, ,, -, ., /, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, :, ;, <, =, >, ?, @, A, B, C, D, E, F, G, H, I, J, K, L, M, N, O, P, Q, R, S, T, U, V, W, X, Y, Z, [, \, ], ^, _, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o, p, q, r, s, t, u, v, w, x, y, z, {, |, }, ~

SYNTAX

sort_array(input)

  • input
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: An array of values to be sorted.

OUTPUT

{'type': 'array', 'elementType': 'any'}

EXAMPLES

SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
['def', '', 'ghi', null, 'abc']['', 'abc', 'def', 'ghi']
['10', '11', '100', '101', '111']['10', '100', '101', '11', '111']
['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\', ']', '^', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~']['!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', ':', ';', '<', '=', '>', '?', '@', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '[', '\', ']', '^', '_', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '{', '|', '}', '~']
SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
[True, False, False, True, null][False, False, True, True]
SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
[3.2, -3.2, 0.0, 'NaN', '-Infinity', 'Infinity']['-Infinity', -3.2, 0.0, 3.2, 'Infinity', 'NaN']
SELECT sort_array(unsorted_array) AS sorted_array
unsorted_arraysorted_array
[[1, 2, 4], [1, 2, 3]][[1, 2, 3], [1, 2, 4]]

SPLIT

Splits a string into an array of strings based on where the provided regex matches.

SYNTAX

split(string, regex)

  • string
    • Type: string
    • Description: The string to split
  • regex
    • Type: string
    • Description: The regex to split by when matched

OUTPUT

{'type': 'array', 'elementType': 'string'}

EXAMPLE

SELECT split(string, regex) as result
stringregexresult
'This is a sentence.''\s'['This', 'is', 'a', 'sentence.']
'12345''2'['1', '345']
'There is no match!''pineapple'['There is no match!']
'grape'''['g', 'r', 'a', 'p', 'e']
'''grape'['']
''''['']
null'grape'null
'grape'nullnull

STRIP_ACCENTS

Removes diacritics from a string.

SYNTAX

strip_accents(string)

  • string
    • Type: string
    • Description: The string to strip the accents from

OUTPUT

string

EXAMPLE

SELECT strip_accents(with_accents) as no_accents
with_accentsno_accents
'Et ça sera sa moitié.''Et ca sera sa moitie.'
'aàáâäãåāæ''aaaaaaaaæ'
'ᾳ''α'
'cçćč''cccc'
'eèéêëēėę''eeeeeeee'
'ἦἧᾚ''ηηΗ'
'iîïíīįìi''iiiiiiii'
'lł''ll'
'nñń''nnn'
'oôöòóōõøœ''oooooooøœ'
'sśšß''sssß'
'uûüùúū''uuuuuu'
'yÿ''yy'
'zžźż''zzzz'

STR

Converts any input into a string.

SYNTAX

str(some_object)

  • some_object
    • Type: any
    • Description: The object to convert to a string

OUTPUT

string

EXAMPLES

SELECT str(booleans) AS stringified
booleansstringified
True'true'
False'false'
SELECT str(int) AS stringified
intstringified
1'1'
SELECT str(double) AS stringified
doublestringified
1.0'1.0'
SELECT str(string) AS stringified
stringstringified
'this is a string''this is a string'
nullnull

STR_FORMAT

Returns a string using the specified format template with the inserted string arguments.

SYNTAX

str_format(format_template, objects_to_insert)

  • format_template
    • Type: string
    • Description: A string detailing how and where the string arguments should be formatted and inserted. For a syntax reference, see Java Formatter.
  • objects_to_insert
    • Type: {'type': 'array', 'elementType': 'any'}
    • Description: An array of objects to format and insert into the template

OUTPUT

string

EXAMPLES

SELECT str_format(templates, integer_values) AS format_result
descriptiontemplatesinteger_valuesformat_result
'Is it non-null?''%b, %b, %b'[3, null, 7]'true, false, true'
'Format integer''%d'[1]'1'
'Pad integer with some number of zeros''%010d'[1]'0000000001'
'Use thousands-separator''%,d'[10000000]'10,000,000'
'Print positive integers with a plus''%+d'[1]'+1'
'Add a space before positive numbers''% d'[1]' 1'
'Enclose negative ints with parenthesis''%(d'[-1]'(1)'
'Some combination of the above''%+(010d'[1]'+000000001'
'Some combination of the above''%+(010d'[-1]'(00000001)'
'Octal string''%o'[142]'216'
'Format number as string''%s'[3]'3'
'Hex string''%x'[142]'8e'
'Uppercase hex string''%X'[142]'8E'
SELECT str_format(templates, double_values) as format_result
descriptiontemplatesdouble_valuesformat_result
'Double to hex''%a'[120.382]'0x1.e1872b020c49cp6'
'Double in scientific notation''%e'[15042.0]'1.504200e+04'
'Double''%f'[142.0]'142.000000'
'Double, possibly in scientific notation if long enough''%g'[100000.0]'100000'
'Double, possibly in scientific notation if long enough''%g'[1000000.0]'1.00000e+06'
'Four decimal places''%.4f'[1.234]'1.2340'
'No decimal places''%.0f'[1.234]'1'
'Given null value''%d'[null]'null'
'Returns null given a null array''%d'nullnull
SELECT str_format(templates, string_values) as format_result
descriptiontemplatesstring_valuesformat_result
'Hex String of value from hashCode() method''%h'['example']'b125116a'
'Format as string''%s'['i am string']'i am string'
'Maximum 7 characters''%.7s'['i am string']'i am st'
'Specify argument index''%2$s, %1$s'['one', 'two']'two, one'
'Use literal percent sign''%%'[]'%'
SELECT str_format(time_format, array(1350933383321L)) AS time_formatted
descriptiontime_formattime_formatted
'Complete date formatting example''%tB %<td, %<tY, %<tl:%<tM%<tp''October 22, 2012, 7:16pm'
'Hour of the day for the 24-hour clock, with a leading zero if necessary''%tH''19'
'Hour for the 12-hour clock, with a leading zero as necessary''%tI''07'
'Hour of the day for the 24-hour clock''%tk''19'
'Hour for the 12-hour clock''%tl''7'
'Minute within the hour formatted as two digits with a leading zero as necessary''%tM''16'
'Seconds within the minute, formatted as two digits with a leading zero as necessary''%tS''23'
'Millisecond within the second formatted as three digits with leading zeros as necessary''%tL''321'
Nanosecond within the second, as 9 digits with leading zeros as necessary'%tN''321000000'
'Locale-specific morning or afternoon marker in lower case''%tp''pm'
'Locale-specific morning or afternoon marker in UPPER CASE''%Tp''PM'
'RFC 822 style numeric time zone offset from GMT''%tz''+0000'
'A string representing the abbreviation for the time zone''%tZ''UTC'
'Seconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC''%ts''1350933383'
'Milliseconds since the beginning of the epoch starting at 1 January 1970 00:00:00 UTC''%tQ''1350933383321'
'Locale-specific full month name''%tB''October'
'Locale-specific abbreviated month name''%tb''Oct'
'Same as %%tb''%th''Oct'
'Locale-specific full name of the day of the week''%tA''Monday'
'Locale-specific short name of the day of the week''%ta''Mon'
'Four-digit year divided by 100, formatted as two digits with leading zero as necessary''%tC''20'
'Last two digits of the year, formatted with leading zeros as necessary''%ty''12'
'Year, formatted as at least four digits with leading zeros as necessary''%tY''2012'
'Day of year, formatted as three digits with leading zeros as necessary''%tj''296'
'Month, formatted as two digits with leading zeros as necessary''%tm''10'
'Day of month, formatted as two digits with leading zeros as necessary''%td''22'
'Day of month, formatted as two digits with no leading zeros''%te''22'
'Time formatted for the 24-hour clock as "%%tH:%%<tM"''%tR''19:16'
'Time formatted for the 24-hour clock as "%%tH:%%<tM:%%<tS"''%tT''19:16:23'
'Time formatted for the 12-hour clock as "%%tI:%%<tM:%%<tS %%<Tp"''%tr''07:16:23 PM'
'Date formatted as "%%tm/%%<td/%%<ty"''%tD''10/22/12'
'ISO 8601 complete date formatted as "%%tY-%%<tm-%%<td"''%tF''2012-10-22'
'Date and time formatted as "%%ta %%<tb %%<td %%<tT %%<tZ %%<tY"''%tc''Mon Oct 22 19:16:23 UTC 2012'

STR_JOIN

Concatenates all strings in an array into a single string, inserting the delimiter between each item.

SYNTAX

str_join(delimiter, string_array)

  • delimiter
    • Type: string
    • Description: The string to insert between each item in the array
  • string_array
    • Type: {'type': 'array', 'elementType': 'string'}
    • Description: The array of string to join into a single string

OUTPUT

string

EXAMPLE

SELECT str_join(delimiter, string_values) AS concatenated_array
delimiterstring_valuesconcatenated_array
'-'['1', '2', '3']'1-2-3'
', '['1', '2', '3']'1, 2, 3'
''['1', '2', '3']'123'
'|'['make', 'a', 'regular', 'expression']'make|a|regular|expression'
'-'['foo', null, 'bar']'foo-bar'
'-'[]''
null['1', '2', '3']null
'-'nullnull
'-'[null]''

SUBSTR

Removes the portion of the string before the start_index. If start_index is positive or zero, it indexes the string starting from the beginning; if negative, it indexes the string starting from the end (so -1 gives you the last character of the string, -2 the last two characters and so on). The function returns the substring starting from the indexed position and continuing to the end of the string, or null if the indexed position is past the end of the string.

SYNTAX

substr(string, start_index)

  • string
    • Type: string
    • Description: The string to get the substring of
  • start_index
    • Type: int
    • Description: The index to start the substring at

OUTPUT

string

EXAMPLE

SELECT substr(string, start_index) as substrings
stringstart_indexsubstrings
'pie'0'pie'
'pie'1'ie'
'pie'3''
'pie'4null
'pie'-1'e'
'pie'-4'pie'
null0null
'pie'nullnull

SUBSTR2

Returns the part of the given string between the start_index and end_index. The values start_index and end_index may be either zero or positive (indexing from the start of the string) or negative (indexing from the end). Together, start_index (inclusive) and end_index (exclusive) determine the range of the string. If the range is empty then null is returned.

SYNTAX

substr2(string, start_index, end_index)

  • string
    • Type: string
    • Description: The string to get the substring of
  • start_index
    • Type: int
    • Description: The index to start the substring at
  • end_index
    • Type: int
    • Description: The index to end the substring at

OUTPUT

string

EXAMPLE

SELECT substr2(string, start_index, end_index) as substrings
stringstart_indexend_indexsubstrings
'0123456789'37'3456'
'"quote"'16'quote'
'start_index = end_index'55''
'end_index < start_index'54null
'start_index < 0'-10null
'start index is negative'0-1'start index is negativ'
'both start_index and end_index are negative'-10-1'e negativ'
'negative start_index > negative end_index'-1-3null
'end_index > length'019'end_index > length'
null00null
'start_index is null'null1null
'end_index is null'0nullnull

TAMR_ID

Computes a unique key from a string source ID and string entity ID. This key is identical to the one generated for the unified dataset based on origin_source_name and origin_entity_id.

SYNTAX

tamr_id(source_id, entity_id)

  • source_id
    • Type: string
    • Description: the source identifier
  • entity_id
    • Type: string
    • Description: the entity identifier

OUTPUT

string

EXAMPLE

SELECT *, tamr_id(source_id, entity_id) as tamr_id
source_identity_idsource_idtamr_identity_id
nullnullnullnullnull
'source1.csv'null'source1.csv'nullnull
null'entity-id'nullnull'entity-id'
'another_source''entity-id''another_source''9055106661574376142''entity-id'
'source1.csv''rec-653-org''source1.csv''-3639548815871415108''rec-653-org'
'source1.csv''rec-9996-org''source1.csv''713467184200753482''rec-9996-org'
'source1.csv''rec-6137-org''source1.csv''-5300772691460392676''rec-6137-org'
'source1.csv''rec-7100-org''source1.csv''6054007634817115275''rec-7100-org'
'source1.csv''rec-9526-org''source1.csv''-8395450447775343480''rec-9526-org'

TAMR_ID_128

Computes a unique key from a string source ID and string entity ID. This key is identical to the one generated for the unified dataset based on origin_source_name and origin_entity_id. This key is equal to the RFC-4648 URL-safe unpadded Base64 representation of the 128-bit hash value of the source_id and entity_id.

SYNTAX

tamr_id_128(source_id, entity_id)

  • source_id
    • Type: string
    • Description: the source identifier
  • entity_id
    • Type: string
    • Description: the entity identifier

OUTPUT

string

EXAMPLE

SELECT *, tamr_id_128(source_id, entity_id) as tamr_id_128
source_identity_idsource_idtamr_id_128entity_id
nullnullnullnullnull
'source1.csv'null'source1.csv'nullnull
null'entity-id'nullnull'entity-id'
'another_source''entity-id''another_source''WU1V6-8PgJ7SAowkhdVBJg''entity-id'
'source1.csv''rec-653-org''source1.csv''4amv8rljpvli3tpWDnt2SA''rec-653-org'
'source1.csv''rec-9996-org''source1.csv''mi0Qn8pZda5EQdUsLDxZRQ''rec-9996-org'
'source1.csv''rec-6137-org''source1.csv''TnrB0n4kcXKifOD6IMfpOQ''rec-6137-org'
'source1.csv''rec-7100-org''source1.csv''VAd7AXiqeAgjx5VIXRxlRA''rec-7100-org'
'source1.csv''rec-9526-org''source1.csv''wnq9hMxC05jdynTfQEAr3Q''rec-9526-org'

TO_BOOLEAN

Attempts to convert a value of any type to a Boolean. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_boolean_error().

SYNTAX

to_boolean(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

bool

EXAMPLES

SELECT to_boolean(string_col) as converted
string_colconverted
'true'True
'True'True
' true 'False
'false'False
'NOT TRUE'False
nullnull
SELECT to_boolean(double_col) as converted
double_colconverted
1.0True
0.0False
0.5True
5e-324True
1.7976931348623157e+308True
InfinityTrue
-InfinityTrue
NaNFalse
SELECT to_boolean(string_arr_col) as converted
string_arr_colconverted
[]null
['true']True
['false']False
['false', 'true']null

TO_BOOLEAN_ERROR

If converting the given value to a Boolean using to_boolean() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_boolean_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_boolean(string_col) as converted, to_boolean_error(string_col) as error
string_colerrorconverted
'true'nullTrue
'false'nullFalse
nullnullnull
SELECT to_boolean(string_arr_col) as converted, to_boolean_error(string_arr_col) as error
string_arr_colerrorconverted
[]'ConversionException: can't convert empty array'null
['true']nullTrue
['true', 'false']'ConversionException: can't convert array of length: 2'null
nullnullnull

TO_DOUBLE

Attempts to convert a value of any type to a Double. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_double_error().

SYNTAX

to_double(input)

  • input
    • Type: any
    • Description: The input value

OUTPUT

double

EXAMPLES

SELECT to_double(long_col) as converted
long_colconverted
92233720368547758079.223372036854776e+18
-9223372036854775808-9.223372036854776e+18
SELECT to_double(float_col) as converted
float_colconverted
1.01.0
0.250.25
1.4e-451.401298464324817e-45
3.4028235e+383.4028234663852886e+38
InfinityInfinity
NaNNaN
SELECT to_double(string_col) as converted
string_colconverted
'one'null
'1'1.0
'1F'1.0
'1D'1.0
'1.7976931348623157E308'1.7976931348623157e+308
SELECT to_double(bool_col) as converted
bool_colconverted
True1.0
False0.0
nullnull
SELECT to_double(string_arr_col) as converted
string_arr_colconverted
[]null
['1']1.0
['0']0.0
['NaN']NaN
['0', '1']null

TO_DOUBLE_ERROR

If converting the given value to a Double using to_double() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_double_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_double(string_col) as converted, to_double_error(string_col) as error
string_colerrorconverted
'one''NumberFormatException: For input string: "one"'null
'1'null1.0
SELECT to_double(string_arr_col) as converted, to_double_error(string_arr_col) as error
string_arr_colerrorconverted
[]'ConversionException: can't convert empty array'null
['1']null1.0
['one']'NumberFormatException: For input string: "one"'null
['1', '2']'ConversionException: can't convert array of length: 2'null
nullnullnull

TO_INT

Attempts to convert a value of any type to an Integer. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_int_error().

SYNTAX

to_int(value)

  • value
    • Type: any
    • Description: The input value

OUTPUT

int

EXAMPLES

SELECT to_int(double_col) as converted
double_colconverted
0.250
0.51
100.0100
1.7976931348623157e+308null
Infinitynull
NaNnull
SELECT to_int(long_col) as converted
long_colconverted
21474836472147483647
2147483648null
SELECT to_int(bool_col) as converted
bool_colconverted
True1
False0
nullnull
SELECT to_int(string_col) as converted
string_colconverted
'1'1
'1.0'null
'one'null
SELECT to_int(string_arr_col) as converted
string_arr_colconverted
[]null
['1']1
['0']0
['NaN']null
['0', '1']null

TO_INT_ERROR

If converting the given value to an Integer using to_int() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_int_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_int(string_col) as converted, to_int_error(string_col) as error
string_colerrorconverted
'one''NumberFormatException: For input string: "one"'null
'1'null1
SELECT to_int(string_arr_col) as converted, to_int_error(string_arr_col) as error
string_arr_colerrorconverted
[]'ConversionException: can't convert empty array'null
['1']null1
['one']'NumberFormatException: For input string: "one"'null
['1', '2']'ConversionException: can't convert array of length: 2'null
nullnullnull
SELECT to_int(double_col) as converted, to_int_error(double_col) as error
double_colerrorconverted
42.0null42
NaNConversionException: Integer cannot represent NaNnull
1.7976931348623157e+308'MagnitudeUnsupportedException: Integer cannot represent a number of this magnitude: 1.7976931348623157E308'null

TO_LONG

Attempts to convert a value of any type to a Long. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_long_error().

SYNTAX

to_long(value)

  • value
    • Type: any
    • Description: the input value

OUTPUT

long

EXAMPLES

SELECT to_long(double_col) as converted
double_colconverted
0.250
0.51
100.0100
1.7976931348623157e+308null
Infinitynull
NaNnull
SELECT to_long(bool_col) as converted
bool_colconverted
True1
False0
nullnull
SELECT to_long(string_col) as converted_str
string_colconverted_str
'1'1
'1L'null
'1.0'null
'one'null
SELECT to_long(string_arr_col) as converted
string_arr_colconverted
[]null
['1']1
['0']0
['NaN']null
['0', '1']null

TO_LONG_ERROR

If converting the given value to a Long using to_long() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_long_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_long(string_col) as converted, to_long_error(string_col) as error
string_colerrorconverted
'one''NumberFormatException: For input string: "one"'null
'1'null1
SELECT to_long(string_arr_col) as converted, to_long_error(string_arr_col) as error
string_arr_colerrorconverted
[]'ConversionException: can't convert empty array'null
['1']null1
['one']'NumberFormatException: For input string: "one"'null
['1', '2']'ConversionException: can't convert array of length: 2'null
nullnullnull
SELECT to_long(double_col) as converted, to_long_error(double_col) as error
double_colerrorconverted
42.0null42
NaNConversionException: Long cannot represent NaNnull
1.7976931348623157e+308'MagnitudeUnsupportedException: Long cannot represent a number of this magnitude: 1.7976931348623157E308'null

TO_STRING

Attempts to convert a value of any type to a String. If conversion fails, it returns null. Error messages for conversion failures can be accessed with to_string_error().

SYNTAX

to_string(value)

  • value
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLES

SELECT to_string(long_col) as converted
long_colconverted
0'0'
9223372036854775807'9223372036854775807'
nullnull
SELECT to_string(double_col) as converted
double_colconverted
1.0'1.0'
0.5'0.5'
5e-324'4.9E-324'
1.7976931348623157e+308'1.7976931348623157E308'
InfinityInfinity
NaNNaN
SELECT to_string(boolean_col) as converted
boolean_colconverted
True'true'
False'false'
nullnull
SELECT to_string(array_col) as converted
array_colconverted
[]null
[1]'1'
[1, 2, 3]null

TO_STRING_ERROR

If converting the given value to a String using to_string() would result in an error, it returns the error; otherwise returns null.

SYNTAX

to_string_error(input)

  • input
    • Type: any
    • Description: the input value

OUTPUT

string

EXAMPLE

SELECT to_string(int_arr_col) as converted, to_string_error(int_arr_col) as error
int_arr_colerrorconverted
[]'ConversionException: can't convert empty array'null
[1]null'1'
[1, 2]'ConversionException: can't convert array of length: 2'null
nullnullnull

TRIM

Trim the spaces from both ends for the specified string.

SYNTAX

trim(input)

  • input
    • Type: string
    • Description: The value to trim

OUTPUT

string

EXAMPLE

SELECT trimmable, trim(trimmable) as trimmed
trimmabletrimmedtrimmable
'hair ''hair''hair '
' beard''beard'' beard'
' hedges ''hedges'' hedges '
'eyebrows''eyebrows''eyebrows'
nullnullnull

TYPEOF

Returns the Spark type of the given object as a string.

SYNTAX

typeof(object)

  • object
    • Type: any
    • Description: The object to find the type of

OUTPUT

string

EXAMPLES

SELECT typeof(int) AS type
inttype
13'Integer'
-13'Integer'
SELECT typeof(double) AS type
doubletype
13.0'Double'
-13.0'Double'
SELECT typeof(boolean) AS type
booleantype
True'Boolean'
False'Boolean'
SELECT typeof(string) AS type
stringtype
'this is a string''String'
'''String'
SELECT typeof(arrays) AS type
arraystype
['a', 'b', 'c']'Object[]'
[]'Object[]'
[null]'Object[]'
nullnull

UPPER

Converts a string to uppercase. Applies to all characters.

SYNTAX

upper(input)

  • input
    • Type: string
    • Description: The string to convert to uppercase

OUTPUT

string

EXAMPLE

SELECT upper(string) as uppercase_string
stringuppercase_string
'all_lower''ALL_LOWER'
'ALL UPPER''ALL UPPER'
'mIxEd''MIXED'
''''
nullnull

VALID_EMAIL

Returns whether the supplied string is a valid email or not.

SYNTAX

valid_email(this_might_be_an_email)

  • this_might_be_an_email
    • Type: string
    • Description: The string to be checked for email validity

OUTPUT

bool

EXAMPLE

SELECT valid_email(emails) as is_valid_email
emailsis_valid_email
'[email protected]'True
'nathan'False
nullnull

YEAR

Gets the year field of a date.

SYNTAX

year(datetime)

  • datetime
    • Type: string
    • Description: The timestamp to get the year of

OUTPUT

int

EXAMPLES

SELECT year(date) AS year
dateyear
'0020'20
'1997-10'1997
'2077-06-23+06:45'2077
'2001-03-14T15:43:19.155'2001
'1580-01-04 06:30-04:00'1580
SELECT year(date) AS year, datetime_error(date) as error
descriptiondateerroryear
'null value'nullnullnull
'empty string''''Text '' could not be parsed at index 0'null
'not a date''I am not a date''Text 'I am not a date' could not be parsed at index 0'null
'starts date-like''2018-03-oops''Text '2018-03-oops' could not be parsed, unparsed text found at index 7'null
'bad date''2018-03-95''Text '2018-03-95' could not be parsed: Invalid value for DayOfMonth (valid values 1 - 28/31): 95'null

INT.RANGE

Returns an array containing all integers, in order, between start (inclusive) and stop (exclusive).

SYNTAX

int.range(start, stop)

  • start
    • Type: int
    • Description: The starting integer
  • stop
    • Type: int
    • Description: The integer to stop before

OUTPUT

{'type': 'array', 'elementType': 'int'}

EXAMPLE

SELECT int.range(start, stop) as range_of_integers;
startstoprange_of_integers
03[0, 1, 2]
13[1, 2]
14[1, 2, 3]
55[]
-5-2[-5, -4, -3]
-22[-2, -1, 0, 1]
-2-3null
65null
null1null
1nullnull

LEGACY.HASH

NOTE: this function is DEPRECATED because of incorrectness. This incorrectness is the result of nulls and arrays not being considered when calculating the hash value. Instead, we suggest using the correct hash() function.
Returns a number, called a hash code, based on the input data. The hash code is always the same given the same inputs and is practically guaranteed to be different given different inputs.

SYNTAX

legacy.hash(inputs)

  • inputs
    • Type: any
    • Description: The input values to hash

OUTPUT

long

EXAMPLES

SELECT legacy.hash(A, B, C) AS result
ABCresult
20'twenty'[3.0, 8.0, 9.0]-3390176128415294659
40'forty'[18.9, 21.1]-6183879932931889889
nullnull[]0
SELECT legacy.hash(A) AS result
Aresult
null0
[]0
SELECT legacy.hash(array(array.concat(A, B))) == legacy.hash(array(A, B)) AS result
ABresult
['a', 'b']['10', '20']True
SELECT legacy.hash(A) == legacy.hash(B) AS result
ABresult
[1, 2][2, 1]False
[null][null, null]True
SELECT legacy.hash(A) == legacy.hash(B) AS result
ABresult
['true', 'false'][True, False]False
SELECT legacy.hash(A) == legacy.hash(B) AS result
ABresult
['1', '2'][1, 2]False

MATH.SIN

Returns the Sine of the input in radians.

SYNTAX

math.sin(input)

  • input
    • Type: double
    • Description: The attribute to return the sine of

OUTPUT

double

EXAMPLE

SELECT math.sin(radians) as sine_output
radianssine_output
1.57079632679489661.0
0.00.0
NaNNaN
InfinityNaN
-InfinityNaN
nullnull

MATH.COS

Returns the Cosine of the input in radians.

SYNTAX

math.cos(input)

  • input
    • Type: double
    • Description: The attribute to return the cosine of

OUTPUT

double

EXAMPLE

SELECT math.cos(radians) as cosine_output
radianscosine_output
3.141592653589793-1.0
0.01.0
NaNNaN
InfinityNaN
-InfinityNaN
nullnull

MATH.TAN

Returns the trigonometric Tangent of an angle.

SYNTAX

math.tan(input)

  • input
    • Type: double
    • Description: An angle, in radians

OUTPUT

double

EXAMPLE

SELECT math.tan(input) as tan_output
inputtan_output
0.00.0
InfinityNaN
-InfinityNaN
NaNNaN
nullnull

MATH.ASIN

Computes the Inverse Sine of the input in radians.

SYNTAX

math.asin(input)

  • input
    • Type: double
    • Description: The attribute to return the arcsine of

OUTPUT

double

EXAMPLE

SELECT math.asin(input) as arcsine_output
inputarcsine_output
1.01.5707963267948966
20.0NaN
0.00.0
InfinityNaN
-InfinityNaN
nullnull

MATH.ACOS

Computes the Inverse Cosine of the input in radians.

SYNTAX

math.acos(input)

  • input
    • Type: double
    • Description: The attribute to return the arccosine of

OUTPUT

double

EXAMPLE

SELECT math.acos(input) as arccosine_output
inputarccosine_output
1.00.0
0.01.5707963267948966
InfinityNaN
-InfinityNaN
nullnull

MATH.ATAN

Computes the Inverse Tangent of the input in radians.

SYNTAX

math.atan(input)

  • input
    • Type: double
    • Description: The attribute to return the arctangent of

OUTPUT

double

EXAMPLE

SELECT math.atan(input) as arctan_output
inputarctan_output
0.00.0
1.00.7853981633974483
NaNNaN
nullnull

MATH.ATAN2

Returns the angle from the conversion of rectangular coordinates to polar coordinates using the Two Argument Arctangent.

SYNTAX

math.atan2(y, x)

  • y
    • Type: double
    • Description: The y coordinate in the cartesian plane
  • x
    • Type: double
    • Description: The x coordinate in the cartesian plane

OUTPUT

double

EXAMPLE

SELECT math.atan2(y, x) as arctan2_output
yxarctan2_output
0.00.00.0
0.0Infinity0.0
0.0-Infinity3.141592653589793
NaN0.0NaN
null0.0null

MATH.DEGREES

Converts the radian input to degrees.

SYNTAX

math.degrees(input)

  • input
    • Type: double
    • Description: An angle, in radians

OUTPUT

double

EXAMPLE

SELECT math.degrees(radians) as degrees
radiansdegrees
0.00.0
3.141592653589793180.0
InfinityInfinity
-Infinity-Infinity
NaNNaN
nullnull

MATH.RADIANS

Converts the degree input to radians.

SYNTAX

math.radians(input)

  • input
    • Type: double
    • Description: An angle, in degrees

OUTPUT

double

EXAMPLE

SELECT math.radians(degrees) as radians
degreesradians
0.00.0
180.03.141592653589793
InfinityInfinity
-Infinity-Infinity
NaNNaN
nullnull

MATH.LN

Computes the natural logarithm.

SYNTAX

math.ln(input)

  • input
    • Type: double
    • Description: The input value

OUTPUT

double

EXAMPLE

SELECT math.ln(input) as naturalLog
inputnaturalLog
0.0-Infinity
1.00
2.7182818284590451
InfinityInfinity
-InfinityNaN
-1.0NaN
NaNNaN
nullnull

MATH.LOG10

Computes the logarithm with base 10.

SYNTAX

math.log10(input)

  • input
    • Type: double
    • Description: The input value

OUTPUT

double

EXAMPLE

SELECT math.log10(input) as log
inputlog
0.0-Infinity
1.00
101
10003
InfinityInfinity
-InfinityNaN
-1.0NaN
NaNNaN
nullnull

MATH.LOG

Computes the logarithm of a value for a specific base.

SYNTAX

math.log(input, base)

  • input
    • Type: double
    • Description: The input value
  • base
    • Type: double
    • Description: The base value

OUTPUT

double

EXAMPLE

SELECT math.log(input, base) as log
inputbaselog
0.00NaN
1.01NaN
10101
100001002
Infinity2Infinity
-Infinity4NaN
-1.010NaN
NaNNaNNaN
null10null
10nullnull
nullnullnull