The page uses AJAX to fetch player details. The response is actually in JSON, so you could just replicate the behaviour in Python.
The table cell has a
data-playerid
attribute:<a data-playerid="CD_I271072" href="javascript:void(0);">Daniel Rich</a></td>
The player ID is loaded with AJAX:
http://www.afl.com.au/api/cfs/afl/playerProfile/CD_I271072
and
http://www.afl.com.au/api/cfs/afl/playerRatings?playerId=CD_I271072&pageSize=100
The responses contains JSON data:
{ "playerProfile" : { "id" : "CD_I271072", "position" : "Left Half Back", "surname" : "Rich", "jumperNumber" : 10, "milestones" : null, "careerAverages" : { "goals" : 0.7, "behinds" : 0.7, "superGoals" : null, "kicks" : 11.3, "handballs" : 7.8, "disposals" : 19.1, "marks" : 2.8, "bounces" : 0.1, "tackles" : 4.0, "contestedPossessions" : 8.6, "uncontestedPossessions" : 10.5, "totalPossessions" : 19.1, "inside50s" : 4.5, "marksInside50" : 0.1, "contestedMarks" : 0.2, "hitouts" : 0.1, "onePercenters" : 1.5, "disposalEfficiency" : null, "clangers" : 2.3, "freesFor" : 0.8, "freesAgainst" : 1.0, "dreamTeamPoints" : 76.4, "clearances" : { "centreClearances" : 1.4, "stoppageClearances" : 2.3, "totalClearances" : 3.7 }, "rebound50s" : 1.6, "goalAssists" : 0.6, "goalAccuracy" : null, "ratingPoints" : null, "ranking" : null, "interchangeCounts" : null }, "firstName" : "Daniel", "bio" : "<p>Daniel Rich is a high possession-winning in-and-under midfielder with a penetrating left foot and quality skills. The high-profile West Australian recruit received the AFL Rising Star Award in his debut season with the Lions and is now widely regarded as one of the most damaging midfielders in the AFL competition.</p>", "photoUrl" : "http://m.afl.com.au/staticfile/AFL Tenant/BrisbaneLions/Player Profiles/2014 - Profiles/RICH Daniel.png", "aflAwards" : null, "clubAwards" : null, "qa" : null, "sponsor" : null, "basicStats" : { "dateOfBirth" : "1990-06-07T02:00:00.000+0000", "draftYear" : "2008", "heightInCm" : 183, "weightInKg" : 84, "recruitedFrom" : "Subiaco (WA)", "debutYear" : "2009" }, "careerStats" : { "goals" : 67.0, "behinds" : 66.0, "superGoals" : null, "kicks" : 1139.0, "handballs" : 787.0, "disposals" : 1926.0, "marks" : 285.0, "bounces" : 8.0, "tackles" : 403.0, "contestedPossessions" : 867.0, "uncontestedPossessions" : 1060.0, "totalPossessions" : 1927.0, "inside50s" : 452.0, "marksInside50" : 14.0, "contestedMarks" : 24.0, "hitouts" : 8.0, "onePercenters" : 156.0, "disposalEfficiency" : 69.2, "clangers" : 237.0, "freesFor" : 85.0, "freesAgainst" : 101.0, "dreamTeamPoints" : 7716.0, "clearances" : { "centreClearances" : 141.0, "stoppageClearances" : 233.0, "totalClearances" : 374.0 }, "rebound50s" : 166.0, "goalAssists" : 59.0, "goalAccuracy" : 44.4, "ratingPoints" : null, "ranking" : null, "interchangeCounts" : null }, "yearlySeasonStats" : [ { "year" : "2014", "seasonId" : "CD_S2014014", "totalsAndAverages" : { "averages" : { "stats" : { "goals" : 0.0, "behinds" : 0.3, "superGoals" : null, "kicks" : 8.0, "handballs" : 7.7, "disposals" : 15.7, "marks" : 3.7, "bounces" : 0.0, "tackles" : 2.7, "contestedPossessions" : 9.7, "uncontestedPossessions" : 6.0, "totalPossessions" : 15.7, "inside50s" : 0.7, "marksInside50" : 0.0, "contestedMarks" : 0.3, "hitouts" : 0.0, "onePercenters" : 2.3, "disposalEfficiency" : null, "clangers" : 1.7, "freesFor" : 0.7, "freesAgainst" : 0.7, "dreamTeamPoints" : 60.0, "clearances" : { "centreClearances" : 0.7, "stoppageClearances" : 1.7, "totalClearances" : 2.3 }, "rebound50s" : 3.0, "goalAssists" : 0.0, "goalAccuracy" : null, "ratingPoints" : null, "ranking" : null, "interchangeCounts" : null }, "player" : { "playerId" : "CD_I271072", "playerName" : { "givenName" : "Daniel", "surname" : "Rich" }, "captain" : false, "playerJumperNumber" : null }, "teamId" : "CD_T20", "gamesPlayed" : 3.0, "timeOnGroundPercentage" : null }, "totals" : { "stats" : { "goals" : 0.0, "behinds" : 1.0, "superGoals" : null, "kicks" : 24.0, "handballs" : 23.0, "disposals" : 47.0, "marks" : 11.0, "bounces" : 0.0, "tackles" : 8.0, "contestedPossessions" : 29.0, "uncontestedPossessions" : 18.0, "totalPossessions" : 47.0, "inside50s" : 2.0, "marksInside50" : 0.0, "contestedMarks" : 1.0, "hitouts" : 0.0, "onePercenters" : 7.0, "disposalEfficiency" : 72.3, "clangers" : 5.0, "freesFor" : 2.0, "freesAgainst" : 2.0, "dreamTeamPoints" : 180.0, "clearances" : { "centreClearances" : 2.0, "stoppageClearances" : 5.0, "totalClearances" : 7.0 }, "rebound50s" : 9.0, "goalAssists" : 0.0, "goalAccuracy" : 0.0, "ratingPoints" : 495.3, "ranking" : 22.0, "interchangeCounts" : null }, "player" : { "playerId" : "CD_I271072", "playerName" : { "givenName" : "Daniel", "surname" : "Rich" }, "captain" : false, "playerJumperNumber" : null }, "teamId" : "CD_T20", "gamesPlayed" : 3.0, "timeOnGroundPercentage" : 63.3 } } }, // etc. ], "seasonStats" : { "goals" : 0.0, "behinds" : 1.0, "superGoals" : null, "kicks" : 24.0, "handballs" : 23.0, "disposals" : 47.0, "marks" : 11.0, "bounces" : 0.0, "tackles" : 8.0, "contestedPossessions" : 29.0, "uncontestedPossessions" : 18.0, "totalPossessions" : 47.0, "inside50s" : 2.0, "marksInside50" : 0.0, "contestedMarks" : 1.0, "hitouts" : 0.0, "onePercenters" : 7.0, "disposalEfficiency" : 72.3, "clangers" : 5.0, "freesFor" : 2.0, "freesAgainst" : 2.0, "dreamTeamPoints" : 180.0, "clearances" : { "centreClearances" : 2.0, "stoppageClearances" : 5.0, "totalClearances" : 7.0 }, "rebound50s" : 9.0, "goalAssists" : 0.0, "goalAccuracy" : 0.0, "ratingPoints" : 495.3, "ranking" : 22.0, "interchangeCounts" : null }, "latestPlayerRating" : { "position" : "MIDFIELDER", "roundId" : "CD_R201401407", "player" : { "playerId" : "CD_I271072", "playerName" : { "givenName" : "Daniel", "surname" : "Rich" }, "captain" : false, "playerJumperNumber" : null }, "team" : { "teamId" : "CD_T20", "teamAbbr" : "BL", "teamName" : "Brisbane Lions", "teamNickname" : "Lions" }, "detailedRatings" : [ { "ratingPoints" : 478, "ranking" : 28, "ratingType" : "OVERALL", "trend" : "FALLING_FAST" }, { "ratingPoints" : 478, "ranking" : 1, "ratingType" : "TEAM", "trend" : "NO_CHANGE" }, { "ratingPoints" : 478, "ranking" : 24, "ratingType" : "POSITION", "trend" : "FALLING_FAST" } ] }, "careerGamesPlayed" : 101 } }
and
{ "playerRatings" : [ { "position": "MIDFIELDER", "roundId": "CD_R201401407", "player": { "playerId": "CD_I271072", "playerName": { "givenName": "Daniel", "surname": "Rich" }, "captain": false, "playerJumperNumber": null }, "team": { "teamId": "CD_T20", "teamAbbr": "BL", "teamName": "Brisbane Lions", "teamNickname": "Lions" }, "detailedRatings": [ { "ratingPoints": 478, "ranking": 28, "ratingType": "OVERALL", "trend": "FALLING_FAST" }, { "ratingPoints": 478, "ranking": 1, "ratingType": "TEAM", "trend": "NO_CHANGE" }, { "ratingPoints": 478, "ranking": 24, "ratingType": "POSITION", "trend": "FALLING_FAST" } ] }, // etc. ], "pageNum" : 1, "pageSize" : 100, "pagesTotal" : 1, "ratingsTotal" : 61 }
Use this to your advantage. AJAX Requests do require a X-media-mis-token
token set in the request header; this is obtained by using a session (to track cookies) and POSTing to an API URL.
A sample script using requests
library with BeautifulSoup would look like:
import requests
from bs4 import BeautifulSoup
page_url = 'http://www.afl.com.au/afl/stats/player-ratings/overall-standings'
token_url = 'http://www.afl.com.au/api/cfs/afl/WMCTok'
player_url = 'http://www.afl.com.au/api/cfs/afl/playerProfile/'
session = requests.Session()
r = session.get(page_url)
soup = BeautifulSoup(r.content)
token = session.post(token_url).json()['token']
for player in soup.find_all('a', {'data-playerid': True}):
playerid = player['data-playerid']
data_r = session.get(player_url + playerid, headers={
'X-media-mis-token': token})
profile = data_r.json()['playerProfile']
print profile['firstName'], profile['surname'], profile['position']
Last but not least, note that the token POST response contains a disclaimer:
>>> print session.post(token_url).json()['disclaimer']
All content and material contained within this site is protected by copyright owned by or licensed to Telstra. Unauthorised reproduction, publishing, transmission, distribution, copying or other use is prohibited.
Take that into account when you start using this data.