DocumentTextExtractionMode Enumeration (original) (raw)
Summary
Mode to use when extracting text from this document.
Syntax
[SerializableAttribute()]
[DataContractAttribute()]
public enum DocumentTextExtractionMode
public [SerializableAttribute,
DataContractAttribute]
enum class DocumentTextExtractionMode sealed
public enum DocumentTextExtractionMode
class DocumentTextExtractionMode(Enum):
Auto = 0
SvgOnly = 1
OcrOnly = 2
Members
0 | Auto | Automatic. This depends on the document type, if it supports SVG, then SVG is used; otherwise, if available use OCR. |
---|---|---|
1 | SvgOnly | Use SVG only if the document supports it. Otherwise, do not extract text. |
2 | OcrOnly | Use OCR always even if the document supports SVG. |
Example
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void DocumentPageTextExample()
{
var options = new LoadDocumentOptions();
using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.doc"), options))
{
// Get page text
var page = document.Pages[0];
// Get all of the DocumentTextExtractionModes (DocumentTextExtractionMode reference)
DocumentTextExtractionMode[] textExtractionModes = (DocumentTextExtractionMode[])Enum.GetValues(typeof(DocumentTextExtractionMode));
foreach (var modes in textExtractionModes)
{
Console.WriteLine($"Text extraction mode: {modes}");
}
// Text extraction mode. Auto is default
document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto;
// DocumentPageText reference
var pageText = page.GetText();
if (pageText != null)
{
pageText.BuildText();
var characters = pageText.Characters;
var text = pageText.Text;
Console.WriteLine(text);
Console.WriteLine($"Total number of characters: {characters.Count}");
pageText.BuildWords();
Console.WriteLine($"Total number of words: {pageText.Words.Count}");
// Get each word
foreach (DocumentWord word in pageText.Words)
{
Console.WriteLine($"Bounds: {word.Bounds} | First character index: {word.FirstCharacterIndex} " +
$"| Last character index: {word.LastCharacterIndex} | Value: {word.Value}");
}
}
else
{
Console.WriteLine("Failed!");
}
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images";
}