Question

I am a newbie and wanted to know why I am experiencing error on webclient downloadstring() inside the parallel. I am out of nowhere of knowledge whether it is because of the my slow connection. Here is my code:

for (int i = 2; i <= 5; i++)
        {
            string ebayLink = "http://www.ebay.de/sch/Studium-Wissen-/1105/i.html?LH_Auction=1&_sop=1&_nkw=&_pgn=" + i;
            //string ebayLink = "http://www.ebay.de/sch/Schule-Ausbildung-/40434/i.html?LH_Auction=1&_sop=1&_nkw=&_pgn=" + i;
            ebayLink = "http://www.ebay.de/sch/i.html?LH_Auction=1&_sacat=0&_from=R40&_nkw=B%C3%BCcher&_sop=1&_pgn=" + i; 

            HtmlWeb hw = new HtmlWeb();
            HtmlAgilityPack.HtmlDocument doc = hw.Load(ebayLink);


            List<string> eanList = new List<string>();

            List<string> links = new List<string>();

            foreach (var link in doc.DocumentNode.SelectNodes("//a[@href]"))
            {
                string url = link.GetAttributeValue("href", "");
                if (url.Contains(".de/itm") && !links.Contains(url) && !url.Contains("pt=Zeitschriften") && !url.Contains("pt=Belletristik"))
                {
                    links.Add(url);
                }
            }

            Parallel.ForEach(links, link =>
            {
                WebClient wc = new WebClient();
                string html = wc.DownloadString(link);

                EbayItem ebayItem = new EbayItem(html);

                string ean = ebayItem.ean;


                string amazonUsedPrice = string.Empty;

                amazonUsedPrice = getAmazonUsedPrice(ean);

                Product product = new Product();
                product.EbayUrl = link;
                product.Ean = ean;
                product.AmazonPriceString = amazonUsedPrice;
                product.ebayItem = ebayItem;
                productList.Add(product);


            } 
     );}

The error occurs in the string html = wc.DownloadString(link);. I see on the output that it stops when it reaches at least 20 links.

Était-ce utile?

La solution

Your connections are waiting for previous connections to close, thus the timeout. The default limit for concurrent connections to the same host is 2. Try increasing that limit before entering your Parallel call:

System.Net.ServicePointManager.DefaultConnectionLimit = int.MaxValue;

Read more about the DefaultConnectionLimit here.

Property Value

Type: System.Int32

The maximum number of concurrent connections allowed by a ServicePoint object. The default value is 2.

Licencié sous: CC-BY-SA avec attribution
Non affilié à StackOverflow
scroll top