Explore NYC taxi rides over a one month time frame.
- ตาราง nyctaxi.trips
- Counter – Total Trips
- Table – Route Revenues
- Chart – Pickup Hour Distribution
- Scatter – Daily Fare to Distance Analysis
1. ตาราง nyctaxi.trips
SHOW CREATE TABLE samples.nyctaxi.trips
CREATE TABLE samples.nyctaxi.trips ( tpep_pickup_datetime TIMESTAMP, tpep_dropoff_datetime TIMESTAMP, trip_distance DOUBLE, fare_amount DOUBLE, pickup_zip INT, dropoff_zip INT ) USING delta LOCATION 'dbfs:/databricks-datasets/nyctaxi-with-zipcodes/subsampled'
SELECT * FROM samples.nyctaxi.trips LIMIT 5
# | tpep_pickup_datetime | tpep_dropoff_datetime | trip_distance | fare_amount | pickup_zip | dropoff_zip | |
---|---|---|---|---|---|---|---|
1 | 2016-02-14 16:52:13.000 | 2016-02-14 17:16:04.000 | 4.94 | 19.00 | 10282 | 10171 | |
2 | 2016-02-04 18:44:19.000 | 2016-02-04 18:46:00.000 | 0.28 | 3.50 | 10110 | 10110 | |
3 | 2016-02-17 17:13:57.000 | 2016-02-17 17:17:55.000 | 0.70 | 5.00 | 10103 | 10023 | |
4 | 2016-02-18 10:36:07.000 | 2016-02-18 10:41:45.000 | 0.80 | 6.00 | 10022 | 10017 | |
5 | 2016-02-22 14:14:41.000 | 2016-02-22 14:31:52.000 | 4.51 | 17.00 | 10110 | 10282 |
2. Counter – Total Trips
USE CATALOG SAMPLES; SELECT count(*) as total_trips FROM `samples`.`nyctaxi`.`trips` WHERE tpep_pickup_datetime BETWEEN TIMESTAMP '{{ pickup_date.start }}' AND TIMESTAMP '{{ pickup_date.end }}' AND pickup_zip IN ({{ pickupzip }})


Counter



3. Table – Route Revenues
USE CATALOG SAMPLES; SELECT T.route as `Route`, T.frequency as `Route Frequency`, T.total_fare as `Total Fares` FROM ( SELECT concat(pickup_zip, '-', dropoff_zip) AS route, count(*) as frequency, SUM(fare_amount) as total_fare FROM `samples`.`nyctaxi`.`trips` WHERE tpep_pickup_datetime BETWEEN TIMESTAMP '{{ pickup_date.start }}' AND TIMESTAMP '{{ pickup_date.end }}' AND pickup_zip IN ({{ pickupzip }}) GROUP BY 1 ) T ORDER BY 1 ASC LIMIT 200

Table



4. Chart – Pickup Hour Distribution
USE CATALOG SAMPLES; SELECT CASE WHEN T.pickup_hour = 0 THEN '00:00' WHEN T.pickup_hour = 1 THEN '01:00' WHEN T.pickup_hour = 2 THEN '02:00' WHEN T.pickup_hour = 3 THEN '03:00' WHEN T.pickup_hour = 4 THEN '04:00' WHEN T.pickup_hour = 5 THEN '05:00' WHEN T.pickup_hour = 6 THEN '06:00' WHEN T.pickup_hour = 7 THEN '07:00' WHEN T.pickup_hour = 8 THEN '08:00' WHEN T.pickup_hour = 9 THEN '09:00' WHEN T.pickup_hour = 10 THEN '10:00' WHEN T.pickup_hour = 11 THEN '11:00' WHEN T.pickup_hour = 12 THEN '12:00' WHEN T.pickup_hour = 13 THEN '13:00' WHEN T.pickup_hour = 14 THEN '14:00' WHEN T.pickup_hour = 15 THEN '15:00' WHEN T.pickup_hour = 16 THEN '16:00' WHEN T.pickup_hour = 17 THEN '17:00' WHEN T.pickup_hour = 18 THEN '18:00' WHEN T.pickup_hour = 19 THEN '19:00' WHEN T.pickup_hour = 20 THEN '20:00' WHEN T.pickup_hour = 21 THEN '21:00' WHEN T.pickup_hour = 22 THEN '22:00' WHEN T.pickup_hour = 23 THEN '23:00' ELSE 'N/A' END AS `Pickup Hour`, T.num AS `Number of Rides` FROM ( SELECT hour(tpep_pickup_datetime) AS pickup_hour, COUNT(*) AS num FROM `samples`.`nyctaxi`.`trips` WHERE tpep_pickup_datetime BETWEEN TIMESTAMP '{{ pickup_date.start }}' AND TIMESTAMP '{{ pickup_date.end }}' AND pickup_zip IN ({{ pickupzip }}) GROUP BY 1 ) T

Chart


5. Scatter – Daily Fare to Distance Analysis
USE CATALOG SAMPLES; SELECT T.weekday, CASE WHEN T.weekday = 1 THEN 'Sunday' WHEN T.weekday = 2 THEN 'Monday' WHEN T.weekday = 3 THEN 'Tuesday' WHEN T.weekday = 4 THEN 'Wednesday' WHEN T.weekday = 5 THEN 'Thursday' WHEN T.weekday = 6 THEN 'Friday' WHEN T.weekday = 7 THEN 'Saturday' ELSE 'N/A' END AS day_of_week, T.fare_amount, T.trip_distance FROM ( SELECT dayofweek(tpep_pickup_datetime) as weekday, * FROM `samples`.`nyctaxi`.`trips` WHERE ( pickup_zip in ({{ pickupzip }}) OR pickup_zip in (10018) ) AND tpep_pickup_datetime BETWEEN TIMESTAMP '{{ pickup_date.start }}' AND TIMESTAMP '{{ pickup_date.end }}' AND trip_distance < 10 ) T ORDER BY T.weekday

Scatter






