Try this code: (see here, here, and here)
from bs4 import BeautifulSoup
import urllib
with open('twfile.txt','r') as inputf, open('tw2file.txt','w') as output:
for line in inputf:
try:
list1 = line.split(' ')
for i in range(len(list1)):
if "http" in list1[i]:
response = urllib.urlopen(list1[i])
html = response.read()
soup = BeautifulSoup(html)
list1[i] = soup.html.head.title
list1[i] = ''.join(ch for ch in list1[i]).strip() # here
else:
list1[i] = ''.join(ch for ch in list1[i]).strip() # here
line = ' '.join(list1)
print line
output.write('{}\n'.format(line)) # here
except:
pass
BTW, you are using Python 2.7.x +
, two open
s expressed in the same with
clause. Also their close
s are unnecessary.