Read PDF tags
Read PDF tags
This code sample enumerates the tags in an existing PDF document.
static private void EnumTagStructure(Tag tag, int depth)
{
Console.WriteLine(new string(' ', 2*depth) + tag.Type);
foreach (var child in tag.Childs)
{
if (child is Tag)
{
EnumTagStructure(child as Tag, depth + 1);
}
}
}
static void Main(string[] args)
{
using (FileStream fs = new FileStream(@"..\..\..\InputDocuments\TaggedPDF.pdf", FileMode.Open))
{
Document document = new Document(fs);
EnumTagStructure(document.LogicalStructure.RootTag, 0);
}
}
This will output the following:
Document
Part
P
Span
bold
Span
Span
Span
Span
Span
Span
Span
Span
bold
Span
Span
Document
Figure
Caption
P
H1
Span
Div
Span
Span
Div