Question

I have a form that scrapes data from a website, using Awesomium as the browser and HTMLAgilityPack to pass the Html, and sends it to another process using WCF.

When I call the method ScraperForm.GetData() with the button on the form I get no error (the html is grabbed properly by the javascript).

When I call it from my other form (using the WCF client) I get the following error in my ScraperForm:

public partial class ScraperForm : Form
{
    public string Html;
    public HtmlAgilityPack.HtmlDocument HtmlDoc;

    ServiceHost Host;
    ModelDataServer DataServer;

    public ScraperForm()
    {
        InitializeComponent();

        #region Start Data Server
        DataServer = new ModelDataServer();
        DataServer._GetData = new ModelDataServer.GetData(this.GetData);

        Host = new ServiceHost(DataServer, new Uri[]
            {
                new Uri("http://localhost:8000")
            });

        Host.AddServiceEndpoint(typeof(IModelData),
            new BasicHttpBinding(),
            "ModelData");

        Host.Open();

        #endregion

        HtmlDoc = new HtmlAgilityPack.HtmlDocument();
        Html = "";
    }

    private void CloseSever()
    {
        Host.Close();
    }

    public ArrayList GetData()
    {
        #region HTMLScrape - HTML Agility Pack

        // navigate to website
        ArrayList ret = new ArrayList();
        Html = webControl.ExecuteJavascriptWithResult("document.documentElement.outerHTML").ToString();
        // *********** Breaks on the line above this! ***********

    // Grab data out from html, add to ret (ArrayList) 

    return ret;        
    }

    private void button1_Click(object sender, EventArgs e)
    {
        GetData();   // Causes No Errors!
    }
}

My WCF Interprocess communication code:

[ServiceContract]
public interface IModelData
{
    [OperationContract]
    ArrayList GetData();
}

[ServiceBehavior(InstanceContextMode = InstanceContextMode.Single)]
public class ModelDataClient
{
    ChannelFactory<IModelData> HttpFactory;
    IModelData HttpProxy;

    public ModelDataClient()
    {
        HttpFactory = new ChannelFactory<IModelData>(
            new BasicHttpBinding(),
            new EndpointAddress("http://localhost:8000/ModelData"));

        HttpProxy = HttpFactory.CreateChannel();
    }

    public ArrayList GetData()
    {
        return HttpProxy.GetData();
    }
}

[ServiceBehavior(UseSynchronizationContext = false, InstanceContextMode = InstanceContextMode.Single)]
public class ModelDataServer : IModelData
{
    public delegate ArrayList GetData();
    public GetData _GetData { get; set; }

    public ModelDataServer()
    {
    }

    public ArrayList GetData()
    {
        return _GetData();   // When called here (Using WCF) it causes the error!
    }
}

Error (points to Html = webControl.ExecuteJavascriptWithResult("document.documentElement.outerHTML").ToString();):

Attempted to read or write protected memory. This is often an indication that other memory is corrupt

I think this is something to do with setting my scraper form to run as administrator or something. Thanks for your help.

Update:

I followed the instructions here: http://jtstroup.net/post/Attempted-to-read-or-write-protected-memory-This-is-often-an-indication-that-other-memory-is-corrupt.aspx (Turning off Data Execution Prevention) and I'm still recieving the same error.

I believe the problem lies with a mis-match between Awesomium and the rest of my program. Something to do with a different version of .net or something.

Another suggestion I found was to build the whole project as x86 but this hasn't worked either.

Does anyone have any suggestions I can try? How can I change my target .net version to that of Awesomium? Will this work?

Was it helpful?

Solution

I fixed the problem by pulling the HTML from the webform on a timer, then my WCF just calls a method that grabs the data that was updated from the last tick of the timer.

So yes, the problem is likely to be that the WebCore component is not thread safe.

Thanks groverboy!

Licensed under: CC-BY-SA with attribution
Not affiliated with StackOverflow
scroll top