GSP-374 : Predict Soccer Match Outcomes with BigQuery ML
Overview
Create all the necessary table:
spls/bq-soccer-analytics/competitions.json -> competitions
spls/bq-soccer-analytics/matches.json -> matches
spls/bq-soccer-analytics/teams.json -> teams
spls/bq-soccer-analytics/players.json -> players
spls/bq-soccer-analytics/events.json -> events
These tables need to be created in this lab.
Task - 1: Data ingestion
Create a table as mentioned in the lab instructions page.
Task - 2: Analyze soccer data
1SELECT
2playerId,
3(Players.firstName || ' ' || Players.lastName) AS playerName,
4COUNT(id) AS numPKAtt,
5SUM(IF(101 IN UNNEST(tags.id), 1, 0)) AS numPKGoals,
6SAFE_DIVIDE(
7SUM(IF(101 IN UNNEST(tags.id), 1, 0)),
8COUNT(id)
9) AS PKSuccessRate
10FROM
11`soccer.<enter table name given in the lab>` Events
12LEFT JOIN
13`soccer.players` Players ON
14Events.playerId = Players.wyId
15WHERE
16eventName = 'Free Kick' AND
17subEventName = 'Penalty'
18GROUP BY
19playerId, playerName
20HAVING
21numPkAtt >= 5
22ORDER BY
23PKSuccessRate DESC, numPKAtt DESC
Task - 3: Gain insight by analyzing soccer data
1WITH
2Shots AS
3(
4SELECT
5*,
6/* 101 is known Tag for 'goals' from goals table */
7(101 IN UNNEST(tags.id)) AS isGoal,
8/* Translate 0-100 (x,y) coordinate-based distances to absolute positions
9using "average" field dimensions of 105x68 before combining in 2D dist calc */
10SQRT(
11POW(
12 (100 - positions[ORDINAL(1)].x) * 120/100, //Change values according to the lab
13 2) +
14POW(
15 (60 - positions[ORDINAL(1)].y) * 69/100, //Change values according to the lab
16 2)
17) AS shotDistance
18FROM
19`soccer.<enter table name given in the lab>`
20WHERE
21/* Includes both "open play" & free kick shots (including penalties) */
22eventName = 'Shot' OR
23(eventName = 'Free Kick' AND subEventName IN ('Free kick shot', 'Penalty'))
24)
25SELECT
26ROUND(shotDistance, 0) AS ShotDistRound0,
27COUNT(*) AS numShots,
28SUM(IF(isGoal, 1, 0)) AS numGoals,
29AVG(IF(isGoal, 1, 0)) AS goalPct
30FROM
31Shots
32WHERE
33shotDistance <= 50
34GROUP BY
35ShotDistRound0
36ORDER BY
37ShotDistRound0
Task - 4: Create a regression model using soccer data
1CREATE MODEL `soccer.xg_logistic_reg_model_238` //Change values according to the lab
2OPTIONS(
3model_type = 'LOGISTIC_REG',
4input_label_cols = ['isGoal']
5) AS
6SELECT
7Events.subEventName AS shotType,
8/* 101 is known Tag for 'goals' from goals table */
9(101 IN UNNEST(Events.tags.id)) AS isGoal,
10`soccer.GetShotDistanceToGoal238`(Events.positions[ORDINAL(1)].x, //Change values according to the lab
11Events.positions[ORDINAL(1)].y) AS shotDistance,
12`soccer.GetShotAngleToGoal238`(Events.positions[ORDINAL(1)].x, //Change values according to the lab
13Events.positions[ORDINAL(1)].y) AS shotAngle
14FROM
15`soccer.<enter table name given in the lab>` Events
16LEFT JOIN
17`soccer.matches` Matches ON
18Events.matchId = Matches.wyId
19LEFT JOIN
20`soccer.competitions` Competitions ON
21Matches.competitionId = Competitions.wyId
22WHERE
23/* Filter out World Cup matches for model fitting purposes */
24Competitions.name != 'World Cup' AND
25/* Includes both "open play" & free kick shots (including penalties) */
26(
27eventName = 'Shot' OR
28(eventName = 'Free Kick' AND subEventName IN ('Free kick shot', 'Penalty'))
29)
30;
Task - 5: Make predictions from new data with the BigQuery model
1SELECT
2predicted_isGoal_probs[ORDINAL(1)].prob AS predictedGoalProb,
3* EXCEPT (predicted_isGoal, predicted_isGoal_probs),
4FROM
5ML.PREDICT(
6MODEL `soccer.xg_logistic_reg_model_238`, //Change values according to the lab
7(
8SELECT
9 Events.playerId,
10 (Players.firstName || ' ' || Players.lastName) AS playerName,
11 Teams.name AS teamName,
12 CAST(Matches.dateutc AS DATE) AS matchDate,
13 Matches.label AS match,
14/* Convert match period and event seconds to minute of match */
15 CAST((CASE
16 WHEN Events.matchPeriod = '1H' THEN 0
17 WHEN Events.matchPeriod = '2H' THEN 45
18 WHEN Events.matchPeriod = 'E1' THEN 90
19 WHEN Events.matchPeriod = 'E2' THEN 105
20 ELSE 120
21 END) +
22 CEILING(Events.eventSec / 60) AS INT64)
23 AS matchMinute,
24 Events.subEventName AS shotType,
25 /* 101 is known Tag for 'goals' from goals table */
26 (101 IN UNNEST(Events.tags.id)) AS isGoal,
27
28 `soccer.GetShotDistanceToGoal238`(Events.positions[ORDINAL(1)].x, //Change values according to the lab
29 Events.positions[ORDINAL(1)].y) AS shotDistance,
30 `soccer.GetShotAngleToGoal238`(Events.positions[ORDINAL(1)].x, //Change values according to the lab
31 Events.positions[ORDINAL(1)].y) AS shotAngle
32FROM
33 `soccer.<enter table name given in the lab>` Events
34LEFT JOIN
35 `soccer.matches` Matches ON
36 Events.matchId = Matches.wyId
37LEFT JOIN
38 `soccer.competitions` Competitions ON
39 Matches.competitionId = Competitions.wyId
40LEFT JOIN
41 `soccer.players` Players ON
42 Events.playerId = Players.wyId
43LEFT JOIN
44 `soccer.teams` Teams ON
45 Events.teamId = Teams.wyId
46WHERE
47 /* Look only at World Cup matches to apply model */
48 Competitions.name = 'World Cup' AND
49 /* Includes both "open play" & free kick shots (but not penalties) */
50 (
51 eventName = 'Shot' OR
52 (eventName = 'Free Kick' AND subEventName IN ('Free kick shot'))
53 ) AND
54 /* Filter only to goals scored */
55 (101 IN UNNEST(Events.tags.id))
56)
57)
58ORDER BY
59predictedgoalProb
Congratulations, you're all done with the lab 😄