Time Travel
DESCRIBE HISTORY customer_data_delta
SELECT COUNT(*) FROM customer_data_delta VERSION AS OF 1
SELECT SUM(total_orders) - ( SELECT SUM(total_orders) FROM customer_counts VERSION AS OF 0 WHERE Country='Sweden') AS new_entries FROM customer_counts WHERE Country='Sweden'
ZORDER
OPTIMIZE customer_counts ZORDER by (CustomerID)
SELECT CustomerID, SUM(total_orders) AS total FROM customer_counts GROUP BY CustomerID ORDER BY total DESC
SELECT CustomerID, COUNT(Country) AS num_countries FROM customer_counts GROUP BY CustomerID SORT BY num_countries DESC
VACUUM
VACUUM customer_data_delta RETAIN 0 HOURS;
uuid
Returns an universally unique identifier (UUID) string.
> SELECT uuid(); 46707d92-02f4-4817-8116-a4c3b23e6266
คำสั่ง uuid()
ใช้ได้ใน SELECT
ถ้าจะใช้ uuid()
ใน MERGE
ด้วยการให้ค่าตอน INSERT
ตรงๆเลย จะ ERROR ประมาณว่า
AnalysisException: nondeterministic expressions are only allowed in Project, Filter, Aggregate, Window, or Generate
ให้ไปใช้ uuid()
ที่ SELECT
ใน USING
แล้วค่อยอ้างอิงมาให้ค่าตรง INSERT
แล้วจะไม่ ERROR
cast
Casts the value expr
to the target data type type
.
> SELECT cast(NULL AS INT); NULL > SELECT cast(5.6 AS INT); 5 > SELECT cast(5.6 AS DECIMAL(2, 0); 6 > SELECT cast(-5.6 AS INT); -5 > SELECT cast(-5.6 AS DECIMAL(2, 0); -6 > SELECT cast(128 AS TINYINT); Overflow > SELECT cast(128 AS DECIMAL(2, 0)); Overflow > SELECT cast('123' AS INT); 123 > SELECT cast('123.0' AS INT); Invalid format > SELECT cast(TIMESTAMP'1970-01-01 00:00:01' AS LONG); 1 > SELECT cast(TIMESTAMP'1970-01-01 00:00:00.000001' AS DOUBLE); 1.0E-6 > SELECT cast(TIMESTAMP'2022-02-01 00:00:00' AS SMALLINT); error: overflow > SELECT cast(true AS BOOLEAN); 1
ฟังก์ชันเกี่ยวกับเวลา
current_timestamp และ now
Returns the current timestamp at the start of query evaluation.
- current_timestamp function (Databricks SQL) | Databricks on AWS
- now function (Databricks SQL) | Databricks on AWS
> SELECT current_timestamp(); 2022-06-08T10:22:43.561+0000 > SELECT current_timestamp; 2022-06-08T10:23:00.961+0000
> SELECT now(); 2022-06-08T10:23:44.145+0000
current_timezone
Returns the current session local timezone.
> SELECT current_timezone(); Etc/UTC
> SELECT current_timezone(); Asia/Shanghai
from_utc_timestamp
Returns a timestamp in expr
specified in UTC in the timezone timeZone
.
> SELECT from_utc_timestamp(current_timestamp, 'Asia/Bangkok'); 2022-06-08T17:30:43.343+0000 > SELECT from_utc_timestamp('2022-06-08', 'Asia/Bangkok'); 2022-06-08T07:00:00.000+0000 > SELECT from_utc_timestamp('2022-06-08 00:30:00.0', 'GMT+7'); 2022-06-08T07:30:00.000+0000
to_utc_timestamp
Returns the timestamp in expr
in a different timezone as UTC.
> SELECT to_utc_timestamp('2022-06-01', 'Asia/Bangkok'); 2022-05-31T17:00:00.000+0000 > SELECT to_utc_timestamp('2022-06-01', 'GMT+7'); 2022-05-31T17:00:00.000+0000
> SELECT to_utc_timestamp('2022-06-01 00:00:00 UTC+07:00', 'GMT+7'); 2022-05-31T10:00:00.000+0000 > SELECT to_utc_timestamp('2022-06-01 00:00:00 UTC+00:00', 'GMT+7'); 2022-05-31T17:00:00.000+0000 > SELECT to_utc_timestamp('2022-06-01 00:00:00 UTC-07:00', 'GMT+7'); 2022-06-01T00:00:00.000+0000
ใช้แบบ 'GMT+7'
(กำหนด time zone offsets) ดีกว่าแบบ 'Asia/Bangkok'
เพราะบาง timezone มีเรื่องของ daylight savings time (DST) Dates and timestamps | Databricks on AWS
As you can see from the preceding examples, the mapping of time zone names to offsets is ambiguous, and is not one to one. In the cases when it is possible, when constructing timestamps we recommend specifying exact time zone offsets, for example 2019-11-03 01:30:00 UTC-07:00.
SELECT CURRENT_TIMESTAMP AS UTC_time , from_utc_timestamp(CURRENT_TIMESTAMP, 'GMT+7') AS local_time , to_utc_timestamp(from_utc_timestamp(CURRENT_TIMESTAMP, 'GMT+7'), 'GMT+7') AS UTC_time_again;
timestampadd
Adds value
unit
s to a timestamp expr
.
timestampadd(unit, value, expr) unit { MICROSECOND | MILLISECOND | SECOND | MINUTE | HOUR | DAY | DAYOFYEAR | WEEK | MONTH | QUARTER | YEAR }
> SELECT timestampadd(HOUR, 7, TIMESTAMP'2022-02-28 00:00:00'); 2022-02-28T07:00:00.000+0000 > SELECT timestampadd(MONTH, -1, TIMESTAMP'2022-03-31 00:00:00'); 2022-02-28T00:00:00.000+0000 > SELECT TIMESTAMPADD(HOUR, +7, CURRENT_TIMESTAMP()); 2022-06-09T09:41:24.763+0000
to_date
Returns expr
cast to a date using an optional formatting.
- to_date function (Databricks SQL) | Databricks on AWS
- Datetime patterns (Databricks SQL) | Databricks on AWS
to_date(expr [, fmt] )
> SELECT to_date('2009-07-30 04:17:52'); 2009-07-30 > SELECT to_date('2016-12-31', 'yyyy-MM-dd'); 2016-12-31
to_timestamp
Returns expr
cast to a timestamp using an optional formatting.
> SELECT to_timestamp('2016-12-31 00:12:00'); 2016-12-31T00:12:00.000+0000 > SELECT to_timestamp('2016-12-31', 'yyyy-MM-dd'); 2016-12-31T00:00:00.000+0000 > SELECT to_timestamp('2022-06-09 10:41:48' , 'yyyy-MM-dd HH:mm:ss'); 2022-06-09T10:41:48.000+0000
ฟังก์ชันเกี่ยวกับสตริง
substr และ substring
Returns the substring of expr
that starts at pos
and is of length len
.
- substr function (Databricks SQL) | Databricks on AWS
- substring function (Databricks SQL) | Databricks on AWS
substr(expr, pos [, len] )
substr(expr FROM pos[ FOR len])
substring(expr, pos [, len])
substring(expr FROM pos [FOR len] ] )
> SELECT substring('Spark SQL', 5); k SQL > SELECT substring('Spark SQL', -3); SQL > SELECT substring('Spark SQL', 5, 1); k > SELECT substring('Spark SQL' FROM 5); k SQL > SELECT substring('Spark SQL' FROM -3); SQL > SELECT substring('Spark SQL' FROM 5 FOR 1); k > SELECT substring('Spark SQL' FROM -10 FOR 5); Spar
> SELECT current_timestamp; 2022-06-16T15:58:43.875+0000 > SELECT cast(current_timestamp() AS STRING); 2022-06-16 15:58:43.875 > SELECT substring(cast(current_timestamp() AS STRING) FROM -23 FOR 19); 2022-06-16 15:58:43
md5
Returns an MD5 128-bit checksum of expr
as a hex string.
> SELECT md5('Spark'); 8cde774d6f7333752ed72cacddb05126
Concatenation
concat
Returns the concatenation of the arguments.
> SELECT concat('Spark', 'SQL'); SparkSQL > SELECT concat(array(1, 2, 3), array(4, 5), array(6)); [1,2,3,4,5,6]
concat_ws
Returns the concatenation strings separated by sep
.
> SELECT concat_ws(' ', 'Spark', 'SQL'); Spark SQL > SELECT concat_ws('s'); '' > SELECT concat_ws(',', 'Spark', array('S', 'Q', NULL, 'L'), NULL); Spark,S,Q,L
array_join
Concatenates the elements of array
.
> SELECT array_join(array('hello', 'world'), ' '); hello world > SELECT array_join(array('hello', NULL ,'world'), ' '); hello world > SELECT array_join(array('hello', NULL ,'world'), ' ', ','); Hello,world
collect_list
Returns an array consisting of all values in expr
within the group.
collect_list ( [ALL | DISTINCT] expr ) [FILTER ( WHERE cond ) ]
> SELECT collect_list(col) FROM VALUES (1), (2), (NULL), (1) AS tab(col); [1,2,1] > SELECT collect_list(DISTINCT col) FROM VALUES (1), (2), (NULL), (1) AS tab(col); [1,2]
รวมอาร์เรย์จาก collect_list
มาเป็นสตริงด้วย array_join
> SELECT array_join(collect_list(col), ',') FROM VALUES (1), (2), (NULL), (1) AS tab(col); 1,2,1
ฟังก์ชันเกี่ยวกับตัวเลข
abs
Returns the absolute value of the numeric value in expr
.
> SELECT abs(-1); 1 > SELECT abs(cast(-32768 AS Smallint)) Error: ARITHMETIC_OVERFLOW
mod
Returns the remainder after dividend / divisor
.
> SELECT MOD(2, 1.8); 0.2 > SELECT MOD(2, 0); Error: DIVIDE_BY_ZERO
hash
Returns a hash value of the arguments.
> SELECT hash('Spark', array(123), 2); -1321691492 > SELECT hash('Spark', array(123)); 1262264271 > SELECT hash('Spark'); 228093765
crc32
Returns a cyclic redundancy check value of expr
.
> SELECT crc32('Spark'); 1557323817