GSP-787: Insights From Data With Bigquery Challenge Lab

GSP-787: Insights From Data With Bigquery Challenge Lab

Overview

From Navigation Menu -> BigQuery.

Task - 1 : Total Confirmed Cases

1SELECT sum(cumulative_confirmed) as total_cases_worldwide
2FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
3WHERE date='2020-04-15'

Task - 2 : Worst Affected Areas

 1with deaths_by_states as (
 2
 3    SELECT subregion1_name as state, sum(cumulative_deceased) as death_count
 4
 5    FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
 6
 7    where country_name="United States of America" and date='2020-04-10' and subregion1_name is NOT NULL
 8
 9    group by subregion1_name
10)
11
12select count(*) as count_of_states
13
14from deaths_by_states
15
16where death_count > 100

Task - 3 : Identifying Hotspots

 1SELECT * FROM (
 2
 3    SELECT subregion1_name as state, sum(cumulative_confirmed) as total_confirmed_cases
 4
 5    FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
 6
 7    WHERE country_code="US" AND date='2020-04-10' AND subregion1_name is NOT NULL
 8
 9    GROUP BY subregion1_name
10
11    ORDER BY total_confirmed_cases DESC
12)
13WHERE total_confirmed_cases > 1000

Task - 4 : Fatality Ratio

1SELECT sum(cumulative_confirmed) as total_confirmed_cases, sum(cumulative_deceased) as total_deaths, (sum(cumulative_deceased)/sum(cumulative_confirmed))*100 as case_fatality_ratio
2
3FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
4
5where country_name="Italy" AND date BETWEEN '2020-04-01'and '2020-04-30'

Task - 5 : Identifying specific day

1SELECT date
2
3FROM `bigquery-public-data.covid19_open_data.covid19_open_data`
4
5where country_name="Italy" and cumulative_deceased>10000
6
7order by date asc
8
9limit 1

Task - 6 : Finding days with zero net new cases

 1WITH india_cases_by_date AS (
 2
 3  SELECT
 4
 5    date,
 6
 7    SUM( cumulative_confirmed ) AS cases
 8
 9  FROM
10
11    `bigquery-public-data.covid19_open_data.covid19_open_data`
12
13  WHERE
14
15    country_name ="India"
16
17    AND date between '2020-02-21' and '2020-03-15'
18
19  GROUP BY
20
21    date
22
23  ORDER BY
24
25    date ASC
26
27 )
28
29, india_previous_day_comparison AS
30
31(SELECT
32
33  date,
34
35  cases,
36
37  LAG(cases) OVER(ORDER BY date) AS previous_day,
38
39  cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases
40
41FROM india_cases_by_date
42
43)
44
45select count(*)
46
47from india_previous_day_comparison
48
49where net_new_cases=0

Task - 7 : Doubling rate

 1WITH us_cases_by_date AS (
 2
 3  SELECT
 4
 5    date,
 6
 7    SUM(cumulative_confirmed) AS cases
 8
 9  FROM
10
11    `bigquery-public-data.covid19_open_data.covid19_open_data`
12
13  WHERE
14
15    country_name="United States of America"
16
17    AND date between '2020-03-22' and '2020-04-20'
18
19  GROUP BY
20
21    date
22
23  ORDER BY
24
25    date ASC
26
27 )
28
29
30
31, us_previous_day_comparison AS
32
33(SELECT
34
35  date,
36
37  cases,
38
39  LAG(cases) OVER(ORDER BY date) AS previous_day,
40
41  cases - LAG(cases) OVER(ORDER BY date) AS net_new_cases,
42
43  (cases - LAG(cases) OVER(ORDER BY date))*100/LAG(cases) OVER(ORDER BY date) AS percentage_increase
44
45FROM us_cases_by_date
46
47)
48
49
50
51select Date, cases as Confirmed_Cases_On_Day, previous_day as Confirmed_Cases_Previous_Day, percentage_increase as Percentage_Increase_In_Cases
52
53from us_previous_day_comparison
54
55where percentage_increase > 10

Task - 8 : Recovery rate

 1WITH cases_by_country AS (
 2
 3  SELECT
 4
 5    country_name AS country,
 6
 7    sum(cumulative_confirmed) AS cases,
 8
 9    sum(cumulative_recovered) AS recovered_cases
10
11  FROM
12
13    bigquery-public-data.covid19_open_data.covid19_open_data
14
15  WHERE
16
17    date = '2020-05-10'
18
19  GROUP BY
20
21    country_name
22
23 )
24
25
26
27, recovered_rate AS
28
29(SELECT
30
31  country, cases, recovered_cases,
32
33  (recovered_cases * 100)/cases AS recovery_rate
34
35FROM cases_by_country
36
37)
38
39
40
41SELECT country, cases AS confirmed_cases, recovered_cases, recovery_rate
42
43FROM recovered_rate
44
45WHERE cases > 50000
46
47ORDER BY recovery_rate desc
48
49LIMIT 10

Task - 9 : CDGR - Cumulative Daily Growth Rate

 1WITH
 2
 3  france_cases AS (
 4
 5  SELECT
 6
 7    date,
 8
 9    SUM(cumulative_confirmed) AS total_cases
10
11  FROM
12
13    `bigquery-public-data.covid19_open_data.covid19_open_data`
14
15  WHERE
16
17    country_name="France"
18
19    AND date IN ('2020-01-24',
20
21      '2020-05-10')
22
23  GROUP BY
24
25    date
26
27  ORDER BY
28
29    date)
30
31, summary as (
32
33SELECT
34
35  total_cases AS first_day_cases,
36
37  LEAD(total_cases) OVER(ORDER BY date) AS last_day_cases,
38
39  DATE_DIFF(LEAD(date) OVER(ORDER BY date),date, day) AS days_diff
40
41FROM
42
43  france_cases
44
45LIMIT 1
46
47)
48
49select first_day_cases, last_day_cases, days_diff, POW((last_day_cases/first_day_cases),(1/days_diff))-1 as cdgr
50
51from summary

Task - 10 : Create a Datastudio report

 1SELECT
 2
 3  date, SUM(cumulative_confirmed) AS country_cases,
 4
 5  SUM(cumulative_deceased) AS country_deaths
 6
 7FROM
 8
 9  `bigquery-public-data.covid19_open_data.covid19_open_data`
10
11WHERE
12
13  date BETWEEN '2020-03-15'
14
15  AND '2020-04-30'
16
17  AND country_name ="United States of America"
18
19GROUP BY date

Congratulations, you're all done with the lab 😄