GetRecognizedCharacters Method (original) (raw)

Summary

Gets the last recognized character data of this IOcrPage

Syntax

C#

Objective-C

C++/CLI

Java

Python

- (nullable LTOcrPageCharacters *)recognizedCharacters:(NSError **)error; 
public OcrPageCharacters getRecognizedCharacters() 
def GetRecognizedCharacters(self): 

Return Value

An instance of IOcrPageCharacters containing the last recognized characters data of this IOcrPage.

Example

This example will get the recognized characters of a page, modify them and set them back before saving the final document.

using Leadtools; using Leadtools.Codecs; using Leadtools.Ocr; using Leadtools.Forms.Common; using Leadtools.Document.Writer; using Leadtools.WinForms; using Leadtools.Drawing; using Leadtools.ImageProcessing; using Leadtools.ImageProcessing.Color; public void RecognizedCharactersExample() { // Create an image with some text in it RasterImage image = new RasterImage(RasterMemoryFlags.Conventional, 640, 200, 24, RasterByteOrder.Bgr, RasterViewPerspective.TopLeft, null, IntPtr.Zero, 0); Rectangle imageRect = new Rectangle(0, 0, image.ImageWidth, image.ImageHeight); IntPtr hdc = RasterImagePainter.CreateLeadDC(image); using (Graphics g = Graphics.FromHdc(hdc)) { g.SmoothingMode = System.Drawing.Drawing2D.SmoothingMode.HighQuality; g.FillRectangle(Brushes.White, imageRect); using (Font f = new Font("Arial", 20, FontStyle.Regular)) g.DrawString("Normal line", f, Brushes.Black, 0, 0); using (Font f = new Font("Arial", 20, FontStyle.Bold)) g.DrawString("Bold, italic and underline", f, Brushes.Black, 0, 40); using (Font f = new Font("Courier New", 20, FontStyle.Regular)) g.DrawString("Monospaced line", f, Brushes.Black, 0, 80); } RasterImagePainter.DeleteLeadDC(hdc); string textFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.txt"); string pdfFileName = Path.Combine(LEAD_VARS.ImagesDir, "MyImageWithTest.pdf"); // Create an instance of the engine using (IOcrEngine ocrEngine = OcrEngineManager.CreateEngine(OcrEngineType.LEAD)) { // Start the engine using default parameters ocrEngine.Startup(null, null, null, LEAD_VARS.OcrLEADRuntimeDir); // Create an OCR page IOcrPage ocrPage = ocrEngine.CreatePage(image, OcrImageSharingMode.AutoDispose); // Recognize this page ocrPage.Recognize(null); // Dump the characters into a text file using (StreamWriter writer = File.CreateText(textFileName)) { IOcrPageCharacters ocrPageCharacters = ocrPage.GetRecognizedCharacters(); foreach (IOcrZoneCharacters ocrZoneCharacters in ocrPageCharacters) { // Show the words found in this zone. Get the word boundaries in inches ICollection<OcrWord> words = ocrZoneCharacters.GetWords(); Console.WriteLine("Words:"); foreach (OcrWord word in words) Console.WriteLine("Word: {0}, at {1}, characters index from {2} to {3}", word.Value, word.Bounds, word.FirstCharacterIndex, word.LastCharacterIndex); bool nextCharacterIsNewWord = true; for (int i = 0; i < ocrZoneCharacters.Count; i++) { OcrCharacter ocrCharacter = ocrZoneCharacters[i]; // Capitalize the first letter if this is a new word if (nextCharacterIsNewWord) ocrCharacter.Code = Char.ToUpper(ocrCharacter.Code); writer.WriteLine("Code: {0}, Confidence: {1}, WordIsCertain: {2}, Bounds: {3}, Position: {4}, FontSize: {5}, FontStyle: {6}", ocrCharacter.Code, ocrCharacter.Confidence, ocrCharacter.WordIsCertain, ocrCharacter.Bounds, ocrCharacter.Position, ocrCharacter.FontSize, ocrCharacter.FontStyle); // If the character is bold, make it underline if ((ocrCharacter.FontStyle & OcrCharacterFontStyle.Bold) == OcrCharacterFontStyle.Bold) { ocrCharacter.FontStyle |= OcrCharacterFontStyle.Italic; ocrCharacter.FontStyle |= OcrCharacterFontStyle.Underline; } // Check if next character is the start of a new word if ((ocrCharacter.Position & OcrCharacterPosition.EndOfWord) == OcrCharacterPosition.EndOfWord || (ocrCharacter.Position & OcrCharacterPosition.EndOfLine) == OcrCharacterPosition.EndOfLine) nextCharacterIsNewWord = true; else nextCharacterIsNewWord = false; ocrZoneCharacters[i] = ocrCharacter; } } // Replace the characters with the modified one before we save ocrPage.SetRecognizedCharacters(ocrPageCharacters); } // Create an OCR document so we can save the results using (IOcrDocument ocrDocument = ocrEngine.DocumentManager.CreateDocument(null, OcrCreateDocumentOptions.AutoDeleteFile)) { // Add the page and dispose it ocrDocument.Pages.Add(ocrPage); ocrPage.Dispose(); // Show the recognition results // Set the PDF options to save as PDF/A text only PdfDocumentOptions pdfOptions = ocrEngine.DocumentWriterInstance.GetOptions(DocumentFormat.Pdf) as PdfDocumentOptions; pdfOptions.DocumentType = PdfDocumentType.PdfA; pdfOptions.ImageOverText = false; ocrEngine.DocumentWriterInstance.SetOptions(DocumentFormat.Pdf, pdfOptions); ocrDocument.Save(pdfFileName, DocumentFormat.Pdf, null); // Open and check the result file, it should contain the following text // "Normal Line" // "Bold And Italic Line" // "Monospaced Line" // With the second line bold and underlined now } // Shutdown the engine // Note: calling Dispose will also automatically shutdown the engine if it has been started ocrEngine.Shutdown(); } } static class LEAD_VARS { public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images"; public const string OcrLEADRuntimeDir = @"C:\LEADTOOLS22\Bin\Common\OcrLEADRuntime"; }