Hi try this one (I am using dotnet 4.0). First u have to make a reference to Microsoft Word 12.0 Object Library from the Com tab.
Then write the below code
private void button1_Click(object sender, EventArgs e)
{
string filePath = @"D:\Test.docx";
object file = filePath;
object nullobj = System.Reflection.Missing.Value;
Word.ApplicationClass wordApp = new Word.ApplicationClass();
Word.Document doc = wordApp.Documents.Open(ref file,
ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj,
ref nullobj, ref nullobj, ref nullobj, ref nullobj, ref nullobj,
ref nullobj);
var extractedEmails = Input(doc.Content.Text);
doc.Application.Quit(ref nullobj, ref nullobj, ref nullobj);
}
private string Input(string txt)
{
StringBuilder sb = new StringBuilder();
var splittedString = txt.Split(' ');
foreach (string str in splittedString)
{
if (IsValidEmail(str))
{
sb.Append("," + str);
}
}
return sb.ToString().Substring(1);
}
private bool IsValidEmail(string email)
{
string regexPattern = @"^[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4}$";
return new Regex(regexPattern, RegexOptions.IgnoreCase).IsMatch(email);
}
The content of Test.docx is as under
Hello
How r u
Email: mail3456@gg.com
Another email is aaa@zz.com
And not a valid email as invalid@.com
The output in the extractedEmails variable will be mail3456@gg.com,aaa@zz.com.
Hope this helps
Thanks
Best Regards,
Niladri Biswas
Parcha, if this helps please login to Mark As Answer. | Alert Moderator