Python - make script loop until condition met and use a different proxy address for each loop

Question 1

you can create a constant loop like this:

while True:
    if condition :
        break

Itertools has a handful of tricks for iterating http://docs.python.org/2/library/itertools.html

notably, check out itertools.cycle

( these are meant as pointers in the right direction. you could craft a solution with one, the other , or even both )

Question 2

I made a few changes to your code. It looks to me like the function expunge already loops through all the results in the page, so I'm not sure what loop you need to make, but there's an example of how you could check whether results are found or not at the end, but there's no loops to break from.

Don't know how to change the proxy/ip.

btw, you had 'reno' twice.

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib
from twill.commands import go

areas = ['sfbay', 'chico', 'fresno', 'goldcountry', 'humboldt',
        'mendocino', 'modesto', 'monterey', 'redding', 'reno',
        'sacramento', 'siskiyou', 'stockton', 'yubasutter']
queries = ['james+"916+821+0590"','"DRE+%23+01902542"']

def expunge(url, area):
    page = urllib.urlopen(url).read() # <-- and v and vv gets you urls of ind. postings
    page = page[page.index('<hr>'):].split('\n')[0]
    page = [i[:i.index('">')] for i in page.split('href="')[1:-1] if '<font size="-1">' in i]

    for u in page:
        num = u[u.rfind('/')+1:u.index('.html')] # the number of the posting (like 34235235252)
        spam = 'https://post.craigslist.org/flag?flagCode=15&amppostingID='+num # url for flagging as spam
        go(spam) # flag it

print 'Checking ' + str(len(areas)) + ' areas...'

for area in areas:
    for query in queries:
        qurl = 'http://' + area + '.craigslist.org/search/?query=' + query + '+&catAbb=hhh'
        try:
            q = urllib.urlopen(qurl).read()
        except:
            print 'tl;dr error for {} in {}'.format(query, area)
            break

        if 'Found: ' in q:
            print 'Found results for {} in {}'.format(query, area)
            expunge(qurl, area)
            print 'All {} listings marked as spam for area'.format(query)
        elif 'Nothing found for that search' in q:
            print 'No results for {} in {}'.format(query, area)
            break
        else:
            break

Question 3

I've made some changes... not sure how well they are working but I'm not getting any errors. Please let me know if you find any thing that is wrong / missing. - Thanks

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib, urllib2
from twill.commands import go


proxy = urllib2.ProxyHandler({'https': '108.60.219.136:8080'})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
proxy2 = urllib2.ProxyHandler({'https': '198.144.186.98:3128'})
opener2 = urllib2.build_opener(proxy2)
urllib2.install_opener(opener2)
proxy3 = urllib2.ProxyHandler({'https': '66.55.153.226:8080'})
opener3 = urllib2.build_opener(proxy3)
urllib2.install_opener(opener3)
proxy4 = urllib2.ProxyHandler({'https': '173.213.113.111:8080'})
opener4 = urllib2.build_opener(proxy4)
urllib2.install_opener(opener4)
proxy5 = urllib2.ProxyHandler({'https': '198.154.114.118:3128'})
opener5 = urllib2.build_opener(proxy5)
urllib2.install_opener(opener5)


    areas = ['sfbay', 'chico', 'fresno', 'goldcountry', 'humboldt',
    'mendocino', 'modesto', 'monterey', 'redding', 'reno',
    'sacramento', 'siskiyou', 'stockton', 'yubasutter']
queries = ['james+"916+821+0590"','"DRE+%23+01902542"']

    def expunge(url, area):
page = urllib.urlopen(url).read() # <-- and v and vv gets you urls of ind. postings
page = page[page.index('<hr>'):].split('\n')[0]
page = [i[:i.index('">')] for i in page.split('href="')[1:-1] if '<font size="-1">' in i]

    for u in page:
    num = u[u.rfind('/')+1:u.index('.html')] # the number of the posting (like 34235235252)
    spam = urllib2.urlopen('https://post.craigslist.org/flag?flagCode=15&amppostingID='+num )
    spam2 = urllib2.urlopen('https://post.craigslist.org/flag?flagCode=28&amppostingID='+num )
    spam3 = urllib2.urlopen('https://post.craigslist.org/flag?flagCode=16&amppostingID='+num )
    go(spam) # flag it
    go(spam2) # flag it
    go(spam3) # flag it

print 'Checking ' + str(len(areas)) + ' areas...'

    for area in areas:
for query in queries:
    qurl = 'http://' + area + '.craigslist.org/search/?query=' + query + '+&catAbb=hhh'
    try:
        q = urllib.urlopen(qurl).read()
    except:
        print 'tl;dr error for {} in {}'.format(query, area)
        break

    if 'Found: ' in q:
        print 'Found results for {} in {}'.format(query, area)
        expunge(qurl, area)
        print 'All {} listings marked as spam for {}'.format(query, area)
        print ''
        print ''
    elif 'Nothing found for that search' in q:
        print 'No results for {} in {}'.format(query, area)
        print ''
        print ''
        break
    else:
        break

Question 4

#!/usr/bin/env python
# -*- coding: utf-8 -*-

import urllib, urllib2
from twill.commands import go


proxy = urllib2.ProxyHandler({'https': '108.60.219.136:8080'})
opener = urllib2.build_opener(proxy)
urllib2.install_opener(opener)
proxy2 = urllib2.ProxyHandler({'https': '198.144.186.98:3128'})
opener2 = urllib2.build_opener(proxy2)
urllib2.install_opener(opener2)
proxy3 = urllib2.ProxyHandler({'https': '66.55.153.226:8080'})
opener3 = urllib2.build_opener(proxy3)
urllib2.install_opener(opener3)
proxy4 = urllib2.ProxyHandler({'https': '173.213.113.111:8080'})
opener4 = urllib2.build_opener(proxy4)
urllib2.install_opener(opener4)
proxy5 = urllib2.ProxyHandler({'https': '198.154.114.118:3128'})
opener5 = urllib2.build_opener(proxy5)
urllib2.install_opener(opener5)


areas = ['capecod']
queries = ['rent','rental','home','year','falmouth','lease','credit','tenant','apartment','bedroom','bed','bath']

    def expunge(url, area):
page = urllib.urlopen(url).read() # <-- and v and vv gets you urls of ind. postings
page = page[page.index('<hr>'):].split('\n')[0]
page = [i[:i.index('">')] for i in page.split('href="')[1:-1] if '<font size="-1">' in i]

    for u in page:
    num = u[u.rfind('/')+1:u.index('.html')] # the number of the posting (like 34235235252)
    spam = urllib2.urlopen('https://post.craigslist.org/flag?flagCode=15&amppostingID='+num )
    spam2 = urllib2.urlopen('https://post.craigslist.org/flag?flagCode=28&amppostingID='+num )
    spam3 = urllib2.urlopen('https://post.craigslist.org/flag?flagCode=16&amppostingID='+num )
    go(spam) # flag it
    go(spam2) # flag it
    go(spam3) # flag it

print 'Checking ' + str(len(areas)) + ' areas...'

    for area in areas:
for query in queries:
    qurl = 'http://' + area + '.craigslist.org/search/?query=' + query + '+&catAbb=hhh'
    try:
        q = urllib.urlopen(qurl).read()
    except:
        print 'tl;dr error for {} in {}'.format(query, area)
        break

    if 'Found: ' in q:
        print 'Found results for {} in {}'.format(query, area)
        expunge(qurl, area)
        print 'All {} listings marked as spam for {}'.format(query, area)
        print ''
        print ''
    elif 'Nothing found for that search' in q:
        print 'No results for {} in {}'.format(query, area)
        print ''
        print ''
        break
    else:
        break