DocumentTextExtractionMode Enumeration (original) (raw)

Summary

Mode to use when extracting text from this document.

Syntax

[SerializableAttribute()] [DataContractAttribute()] public enum DocumentTextExtractionMode

public [SerializableAttribute, DataContractAttribute] enum class DocumentTextExtractionMode sealed

public enum DocumentTextExtractionMode 

class DocumentTextExtractionMode(Enum): Auto = 0 SvgOnly = 1 OcrOnly = 2

Members

0 Auto Automatic. This depends on the document type, if it supports SVG, then SVG is used; otherwise, if available use OCR.
1 SvgOnly Use SVG only if the document supports it. Otherwise, do not extract text.
2 OcrOnly Use OCR always even if the document supports SVG.

Example

using Leadtools; using Leadtools.Codecs; using Leadtools.Document.Writer; using Leadtools.Document; using Leadtools.Caching; using Leadtools.Annotations.Engine; using Leadtools.Ocr; using Leadtools.Barcode; using Leadtools.Document.Converter; public void DocumentPageTextExample() { var options = new LoadDocumentOptions(); using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.doc"), options)) { // Get page text var page = document.Pages[0]; // Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference) DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode)); foreach (var modes in textExtractionModes) { Console.WriteLine($"Text extraction mode: {modes}"); } // Text extraction mode. Auto is default document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto; // DocumentPageText reference var pageText = page.GetText(); if (pageText != null) { pageText.BuildText(); var characters = pageText.Characters; var text = pageText.Text; Console.WriteLine(text); Console.WriteLine($"Total number of characters: {characters.Count}"); pageText.BuildWords(); Console.WriteLine($"Total number of words: {pageText.Words.Count}"); // Get each word foreach (DocumentWord word in pageText.Words) { Console.WriteLine($"Bounds: {word.Bounds} | First character index: {word.FirstCharacterIndex} " + $"| Last character index: {word.LastCharacterIndex} | Value: {word.Value}"); } } else { Console.WriteLine("Failed!"); } } } static class LEAD_VARS { public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images"; }