I have BeautifulSoup 3 and this seems to work correctly:
import BeautifulSoup as BS
import urllib
request = urllib.urlopen('http://www.stockmarketsreview.com/companies_sp500/')
html = request.read()
request.close()
soup = BS.BeautifulSoup(html)
try:
tags = soup.findAll('div', attrs={'class':'mainContent'})
print '# tags = ' + str(len(tags))
for tag in tags:
try:
tables = tag.findAll('table')
print '# tables = ' + str(len(tables))
for table in tables:
try:
rows = tag.findAll('tr')
for row in rows:
try:
columns = row.findAll('td')
for column in columns:
print column.text
except:
e = 1
# print 'Caught error getting td tag under ' + str(row)
# This is okay since some rows have <th>, not <td>
except:
print 'Caught error getting tr tag under ' + str(table)
except:
print 'Caught error getting table tag under ' + str(tag)
except:
print 'Caught error getting div tag'
I believe you'd need to replace 'findAll' with 'find_all'.
Output looks like this: