python - Access Data from Behind Java -

- January 15, 2011

im trying extract goal times dropdown on each fixture here http://www.bbc.co.uk/sport/football/league-one/results

i can't seem find data when searching - ideas why?

import requests bs4 import beautifulsoup  # load page data r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results") soup = beautifulsoup(r.content) print soup.prettify()  # save teams link in soup.find_all("a"):     print link.text  # save results link in soup.find_all("abbr"):     print link.text

so data heavy (not mention slow because of loading separate pages) , may block excess amount of requests, way see doing it. go through , href associated results button, load page , parse score information it

import requests bs4 import beautifulsoup  def parse_page(data):         subsoup = beautifulsoup(data)         matchoverview = subsoup.find('div', attrs={'id':'match-overview'})         print '--------------'         hometeam = matchoverview.find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('a').text         homescore = matchoverview.find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('span').text         homegoalscorers = ["home goal scorers:"]         goals in matchoverview.find('div', attrs={'class':'team-match-details'}).findnext('p').find_all('span'):             homegoalscorers.append(goals.text.replace(u'\u2032', "'"))         homegoals = "\n".join(homegoalscorers)         awayteam = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('a').text         awayscore = matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findnext('span').findnext('span').text         awaygoalscorers = ["away goal scorers:"]         goals in matchoverview.find('div', attrs={'id': 'away-team'}).find('div', attrs={'class':'team-match-details'}).findnext('p').find_all('span'):             awaygoalscorers.append(goals.text.replace(u'\u2032', "'"))         awaygoals = "\n".join(awaygoalscorers)         print '{0} {1} - {2} {3}'.format(hometeam, homescore, awayteam, awayscore)         print homegoals         print awaygoals  def all_league_results():     r = requests.get("http://www.bbc.co.uk/sport/football/league-one/results")     soup = beautifulsoup(r.content)      # save teams     link in soup.find_all("a", attrs={'class': 'report'}):         fulllink = 'http://www.bbc.com' + link['href']         subr = requests.get(fulllink)         parse_page(subr.text)  def specific_game_results(url):     subr = requests.get(url)     parse_page(subr.text)  #get specific games results specific_game_results('http://www.bbc.co.uk/sport/0/football/32460049') #get current league results all_league_results()

Search This Blog

Click Hand

python - Access Data from Behind Java -

Comments

Post a Comment

Popular posts from this blog

python - pip install -U PySide error -

apache - setting document root in antoher partition on ubuntu -

cytoscape.js - How to add nodes to Dagre layout with Cytoscape -