GSP-340 : Build and Optimize Data Warehouses with BigQuery: Challenge Lab
Overview
Create a dataset
1bq mk <Dataset Name>
Task - 1 : Create a table partitioned by date
1CREATE OR REPLACE TABLE <dataset_name>.<table_name>
2PARTITION BY date
3OPTIONS(
4partition_expiration_days=360,
5description="oxford_policy_tracker table in the COVID 19 Government Response public dataset with an expiry time set to 90 days."
6) AS
7SELECT
8 *
9FROM
10 `bigquery-public-data.covid19_govt_response.oxford_policy_tracker`
11WHERE
12 alpha_3_code NOT IN ('GBR', 'BRA', 'CAN','USA')
Task - 2 : Add new columns to your table
1ALTER TABLE <dataset_name>.<table_name>
2ADD COLUMN population INT64,
3ADD COLUMN country_area FLOAT64,
4ADD COLUMN mobility STRUCT<
5 avg_retail FLOAT64,
6 avg_grocery FLOAT64,
7 avg_parks FLOAT64,
8 avg_transit FLOAT64,
9 avg_workplace FLOAT64,
10 avg_residential FLOAT64
11>
Task - 3 : Add country population data to the population column
1CREATE OR REPLACE TABLE <dataset_name>.pop_data_2019 AS
2SELECT
3 country_territory_code,
4 pop_data_2019
5FROM
6 `bigquery-public-data.covid19_ecdc.covid_19_geographic_distribution_worldwide`
7GROUP BY
8 country_territory_code,
9 pop_data_2019
10ORDER BY
11 country_territory_code
1UPDATE
2 `<dataset_name>.<table_name>` t0
3SET
4 population = t1.pop_data_2019
5FROM
6 `<dataset_name>.pop_data_2019` t1
7WHERE
8 CONCAT(t0.alpha_3_code) = CONCAT(t1.country_territory_code);
Task - 4 : Add country area data to the country_area column
1UPDATE
2 `<dataset_name>.<table_name>` t0
3SET
4 t0.country_area = t1.country_area
5FROM
6 `bigquery-public-data.census_bureau_international.country_names_area` t1
7WHERE
8 t0.country_name = t1.country_name
Task - 5 : Populate the mobility record data
1UPDATE
2 `<dataset_name>.<table_name>` t0
3SET
4 t0.mobility.avg_retail = t1.avg_retail,
5 t0.mobility.avg_grocery = t1.avg_grocery,
6 t0.mobility.avg_parks = t1.avg_parks,
7 t0.mobility.avg_transit = t1.avg_transit,
8 t0.mobility.avg_workplace = t1.avg_workplace,
9 t0.mobility.avg_residential = t1.avg_residential
10FROM
11 ( SELECT country_region, date,
12 AVG(retail_and_recreation_percent_change_from_baseline) as avg_retail,
13 AVG(grocery_and_pharmacy_percent_change_from_baseline) as avg_grocery,
14 AVG(parks_percent_change_from_baseline) as avg_parks,
15 AVG(transit_stations_percent_change_from_baseline) as avg_transit,
16 AVG(workplaces_percent_change_from_baseline) as avg_workplace,
17 AVG(residential_percent_change_from_baseline) as avg_residential
18 FROM `bigquery-public-data.covid19_google_mobility.mobility_report`
19 GROUP BY country_region, date
20 ) AS t1
21WHERE
22 CONCAT(t0.country_name, t0.date) = CONCAT(t1.country_region, t1.date)
23
Task - 6 : Query missing data in population & country_area columns
1SELECT country_name, population
2FROM `<dataset_name>.<table_name>`
3WHERE population is NULL
1SELECT country_name, country_area
2FROM `<dataset_name>.<table_name>`
3WHERE country_area IS NULL
1SELECT DISTINCT country_name
2FROM `<dataset_name>.<table_name>`
3WHERE population is NULL
4UNION ALL
5SELECT DISTINCT country_name
6FROM `<dataset_name>.<table_name>`
7WHERE country_area IS NULL
8ORDER BY country_name ASC
Congratulations, you're all done with the lab 😄