DocumentPageText Class (original) (raw)

Summary

Contains the text characters and words found in a document page.

Syntax

[SerializableAttribute()] [DataContractAttribute()] public class DocumentPageText

public [SerializableAttribute, DataContractAttribute] ref class DocumentPageText

public class DocumentPageText implements Serializable 

Example

using Leadtools; using Leadtools.Codecs; using Leadtools.Document.Writer; using Leadtools.Document; using Leadtools.Caching; using Leadtools.Annotations.Engine; using Leadtools.Ocr; using Leadtools.Barcode; using Leadtools.Document.Converter; public void DocumentPageTextExample() { var options = new LoadDocumentOptions(); using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.doc"), options)) { // Get page text var page = document.Pages[0]; // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference) DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode)); foreach (var modes in textExtractionModes) { Console.WriteLine($"Text extraction mode: {modes}"); } // Text extraction mode. Auto is default document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto; // DocumentPageText reference var pageText = page.GetText(); if (pageText != null) { pageText.BuildText(); var characters = pageText.Characters; var text = pageText.Text; Console.WriteLine(text); Console.WriteLine($"Total number of characters: {characters.Count}"); pageText.BuildWords(); Console.WriteLine($"Total number of words: {pageText.Words.Count}"); // Get each word foreach (DocumentWord word in pageText.Words) { Console.WriteLine($"Bounds: {word.Bounds} | First character index: {word.FirstCharacterIndex} " + $"| Last character index: {word.LastCharacterIndex} | Value: {word.Value}"); } } else { Console.WriteLine("Failed!"); } } } static class LEAD_VARS { public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images"; }