Microsoft says you shouldn't use Microsoft Office Interop to manipulate documents in an automated application.
You can use a free library like Spire Doc to convert a Word Doc to TXT, then open the txt file. I think there is a way to save directly to MemoryStream
from Spire, but I'm not sure. (I know there is in Aspose Words, but that isn't free).
private void button1_Click(object sender, EventArgs e)
{
//Open word document
Document document = new Document();
string docPath = @"C:\Users\<computer name>\Documents\TestItemHelpers";
document.LoadFromFile(Path.Combine(docPath,"TestWordDoc.docx"));
//Save doc file.
document.SaveToFile(Path.Combine(docPath,"TestTxt.txt"), FileFormat.Txt);
string readText = File.ReadAllText(Path.Combine(docPath,"TestTxt.txt"));
//do regex here
}
Edit: If you're going to use Interop because it is okay for user-run activities (as pointed out in comments), you can save the document as a text file then do the regex:
private void button1_Click(object sender, EventArgs e)
{
string docPath = @"C:\Users\<computer name>\Documents\TestItemHelpers"
string testFile = "TestWordDoc.docx";
Microsoft.Office.Interop.Word.Application application = new Microsoft.Office.Interop.Word.Application();
Document document = application.Documents.Open(Path.Combine(docPath,testFile );
application.ActiveDocument.SaveAs(Path.Combine(docPath,"TestTxt.txt"), WdSaveFormat.wdFormatText, ref noEncodingDialog);
((_Application)application).Quit();
string readText = File.ReadAllText(Path.Combine(docPath,"TestTxt.txt"));
//do regex here
}