I have to search the content in pdf documents.. for that i have used OCR in my application
pdf documents are saved in D:/Books (directory)
I have used this code ..its not working
public void CheckFileType(string directoryPath)
{
IEnumerator files = Directory.GetFiles(directoryPath).GetEnumerator();
while (files.MoveNext())
{
//get file extension
string fileExtension = Path.GetExtension(Convert.ToString(files.Current));
//get file name without extenstion
string fileName = Convert.ToString(files.Current).Replace(fileExtension, string.Empty);
//Check for JPG File Format
if (fileExtension == ".pdf" || fileExtension == ".PDF") // or // ImageFormat.Jpeg.ToString()
{
try
{
//OCR Operations ...
MODI.Document md = new MODI.Document();
md.Create(Convert.ToString(files.Current));
md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, true, true);
MODI.Image image = (MODI.Image)md.Images[0];
//create text file with the same Image file name
FileStream createFile = new FileStream(fileName + ".txt", FileMode.CreateNew);
//save the image text in the text file
StreamWriter writeFile = new StreamWriter(createFile);
writeFile.Write(image.Layout.Text);
writeFile.Close();
}
catch (Exception e)
{
// //MessageBox.Show("This Image hasn't a text or has a problem",
// //"OCR Notifications",
// //MessageBoxButtons.OK, MessageBoxIcon.Information);
// MessageBox.Show(e.ToString());
//// MessageBox.Show(e.StackTrace);
}
}
}
}