TextExtractionMode Property (original) (raw)
Summary
Mode to use when extracting text from this document.
Syntax
public DocumentTextExtractionMode getTextExtractionMode()
public void setTextExtractionMode(DocumentTextExtractionMode value)
Property Value
The mode to use when extracting text from this document. Default value is DocumentTextExtractionMode.Auto.
Example
using Leadtools;
using Leadtools.Codecs;
using Leadtools.Document.Writer;
using Leadtools.Document;
using Leadtools.Caching;
using Leadtools.Annotations.Engine;
using Leadtools.Ocr;
using Leadtools.Barcode;
using Leadtools.Document.Converter;
public void DocumentPageGetLinksExample()
{
var cache = GetCache();
var options = new LoadDocumentOptions();
options.Cache = cache;
using (var document = DocumentFactory.LoadFromFile(Path.Combine(LEAD_VARS.ImagesDir, "Leadtools.pdf"), options))
{
document.IsReadOnly = false;
// Show the links before parsing the URL in the text
Console.WriteLine("Before get text");
Console.WriteLine("---------");
var page = document.Pages[0];
page.SetLinks(page.GetLinks());
ShowLinks(page);
// Get all of the DocumentPageFitTypes
DocumentPageFitType[] pageFitType = (DocumentPageFitType[])Enum.GetValues(typeof(DocumentPageFitType));
foreach (var type in pageFitType)
{
Console.WriteLine($"Page fit type: {type}");
}
// Make sure we will parse the hyper links
// DocumentText reference
document.Text.AutoParseLinks = true;
document.Text.TextExtractionMode = DocumentTextExtractionMode.Auto;
// Show the regular expressions
Console.WriteLine("Parsing links from the text using these regular expressions:");
foreach (var regex in DocumentText.LinkPatterns)
{
Console.WriteLine(regex.ToString());
}
// Now, get the text to parse the links from it
page.GetText();
page.IsLinksModified = false;
// Show the links before parsing the URL in the text. It should now show the original plus any parsed URLs from the text
Console.WriteLine("After get text");
Console.WriteLine("---------");
ShowLinks(page);
}
}
private static void ShowLinks(Leadtools.Document.DocumentPage page)
{
// DocumentLink reference
var links = page.GetLinks();
if (links != null)
{
int index = 0;
Console.WriteLine("Page " + page.PageNumber);
foreach (var link in links)
{
Console.WriteLine(index++);
Console.WriteLine(" Bounds:" + link.Bounds);
Console.WriteLine(" LinkType:" + link.LinkType);
if (link.LinkType == DocumentLinkType.Value)
{
Console.WriteLine(" Value:" + link.Value);
}
else
{
// DocumentLinkTarget reference
Console.WriteLine(" Target.PageFitType:" + link.Target.PageFitType);
Console.WriteLine(" Target.PageNumber:" + link.Target.PageNumber);
Console.WriteLine(" Target.Position:" + link.Target.Position);
Console.WriteLine(" Target.ZoomPercent:" + link.Target.ZoomPercent);
}
Console.WriteLine();
}
}
}
static class LEAD_VARS
{
public const string ImagesDir = @"C:\LEADTOOLS22\Resources\Images";
}