Question

I'm trying to the html information on a certain a website so I can parse out the info for our database. The problem is that the second & third responseFromServer info comes back the same. However, when I follow the links on inside a real web browser, I get the right information (correct page).

I'm thinking that each WebRequest is basically opening a 'new' instance of the web and what I want it to do is use the same instance.

Is there a way to specify (using a WebClient?) so that each request is contained in the 'same browser' (for lack of a better term)

static void CountyInfo(string Address)
            {
        WebClient webClient = new WebClient();
        webClient.Headers.Add("Cache-Control: private");
        webClient.Headers.Add("Content-Type: text/html; charset=utf-8");
        webClient.Headers.Add("Server: Microsoft-IIS/6.0");
        webClient.Headers.Add("X-AspNet-Version: 4.0.30319");
        webClient.Headers.Add("X-Powered-By: ASP.NET");
        webClient.Headers.Add("X-UA-Compatible: IE=8, IE=9, IE=10, IE=11");

                Address = Address.Replace(" ", "+");
                string url1 = "http://mcassessor.maricopa.gov/?s=" + Address;
                WebRequest request1 = WebRequest.Create(url1);
                WebResponse response1 = request1.GetResponse();
                //Stream dataStream1 = response1.GetResponseStream();
           Stream dataStream1 = webClient.OpenRead(url1);
                StreamReader reader1 = new StreamReader(dataStream1);

                string responseFromServer1 = reader1.ReadToEnd();
                string ParcelNum = getBetween(responseFromServer1, "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=", "target=");
                ParcelNum = new String(ParcelNum.Where(Char.IsDigit).ToArray());
                //reader1.Close();
                //response1.Close();           

                //NEW GET request
                string url2 = "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=" + ParcelNum;
                WebRequest request2 = WebRequest.Create(url2);
                WebResponse response2 = request2.GetResponse();
                //Stream dataStream2 = response2.GetResponseStream();
           Stream dataStream2 = webClient.OpenRead(url2);
                StreamReader reader2 = new StreamReader(dataStream2);
                string responseFromServer2 = reader2.ReadToEnd();
                //reader2.Close();
                //response2.Close();

                //NEW GET request
                string url3 = "http://treasurer.maricopa.gov/Parcel/" + "TaxDetails.aspx?taxyear=2013";
                WebRequest request3 = WebRequest.Create(url3);
                WebResponse response3 = request3.GetResponse();
                //Stream dataStream3 = response3.GetResponseStream();
           Stream dataStream3 = webClient.OpenRead(url3);
                StreamReader reader3 = new StreamReader(dataStream3);
                string responseFromServer3 = reader3.ReadToEnd();
                reader3.Close();
                response3.Close();
            }

EDIT: just saw this. request1 gives me the correct page (the query results page) but request 2 and 3 return me back to the "Home Page" of the website. Even though i am passing in url2 and url3 into the requests2 & 3 respectively.

Was it helpful?

Solution 2

Sorry about the long code post, but this is how i got it working. Let me know if you can condense it any.

static public CookieContainer cookieJar;

  static void Main(string[] args)
    {
        string ControlNumber = "######";
        GetOrderInfo newOrder = new GetOrderInfo(ControlNumber);

        obtainCookies();
        MARICOPAcounty(newOrder.OrderAddress);
    }

static void obtainCookies()
        {
            string postData = "http://mcassessor.maricopa.gov/";
            CookieContainer tempCookies = new CookieContainer();
            UTF8Encoding encoding = new UTF8Encoding();
            byte[] byteData = encoding.GetBytes(postData);

            HttpWebRequest postReq = (HttpWebRequest)WebRequest.Create("http://mcassessor.maricopa.gov/");

            postReq.Method = "POST";
            postReq.KeepAlive = true;
            Cookie chocoChip = new Cookie("_ga", "GA1.2.1813386723.1386802842") { Domain = "http://mcassessor.maricopa.gov/" };

            postReq.CookieContainer = new CookieContainer();
            postReq.CookieContainer.Add(chocoChip);
            postReq.ContentType = "text/html; charset=utf-8";
            postReq.Referer = "http://mcassessor.maricopa.gov/";
            postReq.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            postReq.ContentLength = byteData.Length;

            Stream postReqStream = postReq.GetRequestStream();
            postReqStream.Write(byteData, 0, byteData.Length);
            postReqStream.Close();
            HttpWebResponse postResponse;

            postResponse = (HttpWebResponse)postReq.GetResponse();
            postResponse.Cookies.Add(chocoChip);                           
            StreamReader postReqReader = new StreamReader(postResponse.GetResponseStream());

            tempCookies.Add(chocoChip);
            cookieJar = tempCookies;
            string soureCode = postReqReader.ReadToEnd();

            if (postReq != null)
            {
                Console.WriteLine("\r\n\r\n postResponse COOKIES");
                Console.WriteLine(postResponse.Cookies[0]);

                Console.WriteLine("\r\n\r\n postReq Headers");
                Console.WriteLine(postReq.Headers.ToString());

                Console.WriteLine("\r\n\r\n postResponse Headers");
                Console.Write("\t" + postResponse.Headers);
            }
        }


    public static string MARICOPAcounty(string Address)
{
//-------------------------------------------------------------------------------------------------------------------------------//
            //-------------------------------------------------------------------------------------------------------------------------------//
            Address = Address.Replace(" ", "+");
            string url1 = "http://mcassessor.maricopa.gov/?s=" + Address;
            HttpWebRequest request1 = (HttpWebRequest)WebRequest.Create(url1);


            request1.CookieContainer = cookieJar;                
            request1.Method = "GET";
            request1.Accept = "text/html, application/xhtml+xml, */*";
            request1.Headers.Add("Accept-Language: en-US");
            request1.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            request1.Headers.Add("Accept-Enconding: gzip, deflate");

            HttpWebResponse response1 = (HttpWebResponse)request1.GetResponse();
            StreamReader sr1 = new StreamReader(response1.GetResponseStream());
            string sourceCode1 = sr1.ReadToEnd();
            string ParcelNum = getBetween(sourceCode1, "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=", "target=");
            ParcelNum = new String(ParcelNum.Where(Char.IsDigit).ToArray());

            if (response1 != null)
            {
                Console.WriteLine("\r\n\r\n request1 Headers");
                Console.WriteLine(request1.Headers.ToString());

                Console.WriteLine("\r\n\r\nresponse1 Headers");
                Console.Write("\t" + response1.Headers);
            }

            sr1.Close();
            response1.Close();
            //-------------------------------------------------------------------------------------------------------------------------------//
            //-------------------------------------------------------------------------------------------------------------------------------//
            //NEW GET request

            string url2 = "http://treasurer.maricopa.gov/parcels/default.asp?Parcel=50423370"; //+ ParcelNum;           
            HttpWebRequest request2 = (HttpWebRequest)WebRequest.Create(url2);

            request2.CookieContainer = cookieJar;
            request2.Method = "GET";
            request2.Accept = "text/html, application/xhtml+xml, */*";
            request2.Headers.Add("Accept-Language: en-US");
            request2.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            request2.Headers.Add("Accept-Encoding: gzip, deflate");
            request2.Referer = url1;

            HttpWebResponse response2 = (HttpWebResponse)request2.GetResponse();
            StreamReader sr2 = new StreamReader(response2.GetResponseStream());
            string sourceCode2 = sr2.ReadToEnd();

            if (response2 != null)
            {
                Console.WriteLine("\r\n\r\nrrequest2 Headers");
                Console.WriteLine(request2.Headers);
                Console.WriteLine(request2.CookieContainer);

                Console.WriteLine("\r\n\r\nresponse2 Headers");
                Console.Write("\t" + response2.Headers);
            }

            sr2.Close();
            response2.Close();

            //-------------------------------------------------------------------------------------------------------------------------------//
            //-------------------------------------------------------------------------------------------------------------------------------//
            //new GET request
            string url3 = "http://treasurer.maricopa.gov/Parcel/TaxDetails.aspx?taxyear=2013";
            HttpWebRequest request3 = (HttpWebRequest)WebRequest.Create(url3);

            request3.CookieContainer = cookieJar;
            request3.Method = "GET";
            request3.Accept = "text/html, application/xhtml+xml, */*";
            request3.Headers.Add("Accept-Language: en-US");
            request3.UserAgent = "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; MAM3; rv:11.0) like Gecko";
            request3.Headers.Add("Accept-Encoding: gzip, deflate");
            request3.Referer = url2;

            HttpWebResponse response3 = (HttpWebResponse)request3.GetResponse();
            StreamReader sr3 = new StreamReader(response3.GetResponseStream());
            string sourceCode3 = sr3.ReadToEnd();

            if (response3 != null)
            {
                Console.WriteLine("\r\n\r\nrrequest3 Headers");
                Console.WriteLine(request3.Headers);

                Console.WriteLine("\r\n\r\nresponse3 Headers");
                Console.Write("\t" + response3.Headers);
            }

            sr3.Close();
            response3.Close();

            return sourceCode3;
}

OTHER TIPS

You need to manage the cookies. Get the cookies served by the web server and pass them back on the next request.

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top