python - Access Data from Behind Java -
im trying extract goal times dropdown on each fixture here http://www.bbc.co.uk/sport/football/league-one/results
i can't seem find data when searching - ideas why?
import requests bs4 import beautifulsoup # load page data r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results") soup = beautifulsoup(r.content) print soup.prettify() # save teams link in soup.find_all("a"): print link.text # save results link in soup.find_all("abbr"): print link.text
ff
so data heavy (not mention slow because of loading separate pages) , may block excess amount of requests, way see doing it. go through , href
associated results
button, load page , parse score information it
import requests bs4 import beautifulsoup def parse_page(data): subsoup = beautifulsoup(data) matchoverview = subsoup.find('div', attrs={'id':'match-overview'}) print '--------------' hometeam = matchoverview.find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('a').text homescore = matchoverview.find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('span').text homegoalscorers = ["home goal scorers:"] goals in matchoverview.find('div', attrs={'class':'team-match-details'}).findnext('p').find_all('span'): homegoalscorers.append(goals.text.replace(u'\u2032', "'")) homegoals = "\n".join(homegoalscorers) awayteam = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('a').text awayscore = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('span').text awaygoalscorers = ["away goal scorers:"] goals in matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findnext('p').find_all('span'): awaygoalscorers.append(goals.text.replace(u'\u2032', "'")) awaygoals = "\n".join(awaygoalscorers) print '{0} {1} - {2} {3}'.format(hometeam, homescore, awayteam, awayscore) print homegoals print awaygoals def all_league_results(): r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results") soup = beautifulsoup(r.content) # save teams link in soup.find_all("a", attrs={'class': 'report'}): fulllink = 'http://www.bbc.com' + link['href'] subr = requests.get(fulllink) parse_page(subr.text) def specific_game_results(url): subr = requests.get(url) parse_page(subr.text) #get specific games results specific_game_results('http://www.bbc.co.uk/sport/0/football/32460049') #get current league results all_league_results()
Comments
Post a Comment