python lxml and requests syntax error

https://stackoverflow.com/questions/23401534

13-07-2023
|

题

Getting a syntax error on line 29, which would be the following: links = parsed_body.xpath('//div[contains(@class, "b-thumb-128px")]/a/@href')] . I've written valid xpaths for the target site so I'm un-sure on why or how to fix this paticular error.

import requests
from lxml import html
from pprint import pprint
from urlparse import urljoin
from thready import threaded
import os
import math
import csv

CACHE_DIR = os.path.join(os.path.dirname(__file__), 'wanpy')

def get_links():
    STARTING_URL = 'http://example.com/en/search/?h=3&k=&p=1&sid=wan'
    results_per_page = 60
    response = requests.get(STARTING_URL)
    dive = html.fromstring(response.text)
    div = dive.xpath("//div[contains(@class, 'b-tabs-utility')]")[0].text
    last_pg = math.ceil(int(div.split()[-2]) / results_per_page)
    BASE_URL = 'http://example.com/en/search/?h=3&k=&p=%d&sid=wanboo'
    urls = []
    for i in xrange(last_pg):
        response = requests.get(BASE_URL % i)
        parsed_body = html.fromstring(response.text)
        links = parsed_body.xpath('//div[contains(@class, "b-thumb-128px")]//a/@href')]
        for link in links:
            urls.append(link)
    threaded(urls, scrape_inventory, num_threads=10)    


def scrape_inventory():
    with open("data/wan.csv", "w") as f:
        fieldnames = ("model", "title", "description", "price", "image","additional_image", "scrape_url")
        output = csv.writer(f, delimiter="\t")
        output.writerow(fieldnames)
        print "scraping %s ..." % url
        response = requests.get(url)
        parsed_body = html.fromstring(response.text)
        name = re.sub(r'\D\W\S', "", parsed_body.xpath("//h1[contains(@class, 'b-ttl-main')]/text()"))
        #description = re.sub(r'\D\W\S', "", parsed_body.xpath("//div[contains(@class, 'b-container b-editable')]/text()"))
        price = re.sub(r'\D\W\S', "", round(float(parsed_body.xpath("//span[contains(@class, 'b-text-xxlarge b-text-prime')]/text()")) * 2 + 15), 2)

        output.writerow([name, price])


if __name__ == '__main__':
    get_links()

解决方案

The ] at the end of the line in question does not line up with any [.

许可以下： CC-BY-SA 和归因

不隶属于 StackOverflow