Read PDF tags

Read PDF tags

This code sample enumerates the tags in an existing PDF document.

static private void EnumTagStructure(Tag tag, int depth)
{
  Console.WriteLine(new string(' ', 2*depth) + tag.Type);

  foreach (var child in tag.Childs)
  {
     if (child is Tag)
     {
        EnumTagStructure(child as Tag, depth + 1);
     }
  }
}

static void Main(string[] args)
{
  using (FileStream fs = new FileStream(@"..\..\..\InputDocuments\TaggedPDF.pdf", FileMode.Open))
  {
    Document document = new Document(fs);
    EnumTagStructure(document.LogicalStructure.RootTag, 0);
  }
}

This will output the following:

Document
    Part
      P
        Span
        bold
          Span
        Span
        Span
          Span
        Span
        Span
          Span
        Span
        bold
          Span
        Span
    Document
      Figure
        Caption
      P
        H1
        Span
        Div
        Span
        Span
        Div