I made a few changes to your code. It looks to me like the function expunge
already loops through all the results in the page, so I'm not sure what loop you need to make, but there's an example of how you could check whether results are found or not at the end, but there's no loops to break from.
Don't know how to change the proxy/ip.
btw, you had 'reno'
twice.
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import urllib
from twill.commands import go
areas = ['sfbay', 'chico', 'fresno', 'goldcountry', 'humboldt',
'mendocino', 'modesto', 'monterey', 'redding', 'reno',
'sacramento', 'siskiyou', 'stockton', 'yubasutter']
queries = ['james+"916+821+0590"','"DRE+%23+01902542"']
def expunge(url, area):
page = urllib.urlopen(url).read() # <-- and v and vv gets you urls of ind. postings
page = page[page.index('<hr>'):].split('\n')[0]
page = [i[:i.index('">')] for i in page.split('href="')[1:-1] if '<font size="-1">' in i]
for u in page:
num = u[u.rfind('/')+1:u.index('.html')] # the number of the posting (like 34235235252)
spam = 'https://post.craigslist.org/flag?flagCode=15&postingID='+num # url for flagging as spam
go(spam) # flag it
print 'Checking ' + str(len(areas)) + ' areas...'
for area in areas:
for query in queries:
qurl = 'http://' + area + '.craigslist.org/search/?query=' + query + '+&catAbb=hhh'
try:
q = urllib.urlopen(qurl).read()
except:
print 'tl;dr error for {} in {}'.format(query, area)
break
if 'Found: ' in q:
print 'Found results for {} in {}'.format(query, area)
expunge(qurl, area)
print 'All {} listings marked as spam for area'.format(query)
elif 'Nothing found for that search' in q:
print 'No results for {} in {}'.format(query, area)
break
else:
break