Extract graphics from PDF
This c# code sample shows how to extract text, images and curves as shapes from a PDF document.
Shapes
The Shape class is an abstract class with concrete specializations such as TextShape, ImageShape and LineShape. They represent graphics of all types and were originally introduced to draw on a new or existing PDF page. The reverse however is also true: extract existing graphics on a PDF page as shapes. The central method is Page.CreateShapes.
The following c# code sample enumerates all shapes on each page of a PDF document and dumps their properties to the console.
static int shapeCollections = 0;
static int textShapes = 0;
static int imageShapes = 0;
static int freehandShapes = 0;
static int layerShapes = 0;
static int clipShapes = 0;
static int otherShapes = 0;
static void Main(string[] args)
{
using (FileStream fileIn = new FileStream(@"..\..\..\inputDocuments/vectorgraphics.pdf",
FileMode.Open,
FileAccess.Read))
{
Document pdfIn = new Document(fileIn);
IterateShapes(pdfIn);
}
Console.WriteLine("Nr of shape collections = {0}", shapeCollections);
Console.WriteLine("Nr of text shapes = {0}", textShapes);
Console.WriteLine("Nr of image shapes = {0}", imageShapes);
Console.WriteLine("Nr of freehand shapes = {0}", freehandShapes);
Console.WriteLine("Nr of layer shapes = {0}", layerShapes);
Console.WriteLine("Nr of clip shapes = {0}", clipShapes);
Console.WriteLine("Nr of other shapes = {0}", otherShapes);
}
//
// iterate through all pages in a PDF document
//
static void IterateShapes(Document pdf)
{
foreach (Page page in pdf.Pages)
{
IterateShapes(page);
}
}
//
// Get all shapes in a PDF page (this will be a shape collection)
//
static void IterateShapes(Page page)
{
ShapeCollection shapes = page.CreateShapes();
IterateShapes(shapes, "");
}
//
// iterate through each shape in a shape collections (this may recurse)
//
static void IterateShapes(ShapeCollection shapes, string indent)
{
DumpShapeInfo(shapes, indent);
foreach (Shape shape in shapes)
{
DumpShapeInfo(shape, indent);
if (shape is ShapeCollection)
{
// recurse
IterateShapes(shape as ShapeCollection, indent + " ");
}
if (shape is LayerShape)
{
// A LayerShape is also as collection
IterateShapes(shape as LayerShape, indent + " ");
}
}
}
//
// iterate through all shapes in a layer shape
//
static void IterateShapes(LayerShape shapes, string indent)
{
foreach (Shape shape in shapes)
{
DumpShapeInfo(shape, indent);
}
}
//
// Dump information on any kind of shape
//
internal static void DumpShapeInfo(Shape shape, string indent)
{
if (shape is ShapeCollection)
{
DumpShapeInfo(shape as ShapeCollection, indent);
}
else if (shape is LayerShape)
{
DumpShapeInfo(shape as LayerShape, indent);
}
if (shape is TextShape)
{
DumpShapeInfo(shape as TextShape, indent);
}
else if (shape is ImageShape)
{
DumpShapeInfo(shape as ImageShape, indent);
}
else if (shape is FreeHandShape)
{
DumpShapeInfo(shape as FreeHandShape, indent);
}
else if (shape is LayerShape)
{
DumpShapeInfo(shape as LayerShape, indent);
}
else if (shape is ClipShape)
{
DumpShapeInfo(shape as ClipShape, indent);
}
else
{
Console.WriteLine("{0}Shape = some other type", indent, indent);
otherShapes++;
}
}
//
// Dump information on a shape collection
//
internal static void DumpShapeInfo(ShapeCollection shape, string indent)
{
Console.WriteLine("{0}Shape = shape collection", indent);
Console.WriteLine("{0} : N elements = {1}", indent, shape.Count);
shapeCollections++;
}
//
// Dump information on a layer shape
//
internal static void DumpShapeInfo(LayerShape shape, string indent)
{
Console.WriteLine("{0}Shape = Layer shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
layerShapes++;
}
//
// Dump information on a text shape
//
internal static void DumpShapeInfo(TextShape shape, string indent)
{
Console.WriteLine("{0}Shape = Text shape", indent);
Console.WriteLine("{0} : Font name = {1}", indent, shape.Font.FamilyName);
Console.WriteLine("{0} : Font weight = {1}", indent, shape.Font.Weight);
Console.WriteLine("{0} : Font size = {1}", indent, shape.FontSize);
Console.WriteLine("{0} : embed mode = {1}", indent, shape.Font.EmbedMode);
Console.WriteLine("{0} : bounding box left = {1}", indent, shape.BoundingBox.Left);
Console.WriteLine("{0} : bounding box top = {1}", indent, shape.BoundingBox.Top);
Console.WriteLine("{0} : bounding box Width = {1}", indent, shape.BoundingBox.Width);
Console.WriteLine("{0} : bounding box Height = {1}", indent, shape.BoundingBox.Height);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X,Y = {1}, {2}", indent, shape.X, shape.Y);
Console.WriteLine("{0} : Bold = {1}", indent, shape.Bold);
Console.WriteLine("{0} : Italic = {1}", indent, shape.Italic);
Console.WriteLine("{0} : Underline = {1}", indent, shape.Underline);
Console.WriteLine("{0} : StrikeOut = {1}", indent, shape.StrikeOut);
Console.WriteLine("{0} : width = {1}", indent, shape.MeasuredWidth);
Console.WriteLine("{0} : height = {1}", indent, shape.MeasuredHeight);
textShapes++;
}
//
// Dump information on a image shape
//
internal static void DumpShapeInfo(ImageShape shape, string indent)
{
Console.WriteLine("{0}Shape = image shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : mask color = {1}", indent, shape.MaskColor);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
Console.WriteLine("{0} : width = {1}", indent, shape.Width);
Console.WriteLine("{0} : height = {1}", indent, shape.Height);
imageShapes++;
}
//
// Dump information on a freehand shape
//
internal static void DumpShapeInfo(FreeHandShape shape, string indent)
{
Console.WriteLine("{0}Shape = freehand shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count);
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule);
Console.WriteLine("{0} : Brush = {1}", indent, shape.Brush);
Console.WriteLine("{0} : Pen = {1}", indent, shape.Pen);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
freehandShapes++;
}
//
// Dump information on a clip shape
//
internal static void DumpShapeInfo(ClipShape shape, string indent)
{
Console.WriteLine("{0}Shape = Clip shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
clipShapes++;
}
Dim shapeCollections As Integer = 0
Dim textShapes As Integer = 0
Dim imageShapes As Integer = 0
Dim freehandShapes As Integer = 0
Dim layerShapes As Integer = 0
Dim clipShapes As Integer = 0
Dim otherShapes As Integer = 0
Private Sub Main(args As String())
Using fileIn As New FileStream("..\..\..\inputDocuments/vectorgraphics.pdf", FileMode.Open, FileAccess.Read)
Dim pdfIn As New Document(fileIn)
IterateShapes(pdfIn)
End Using
Console.WriteLine("Nr of shape collections = {0}", shapeCollections)
Console.WriteLine("Nr of text shapes = {0}", textShapes)
Console.WriteLine("Nr of image shapes = {0}", imageShapes)
Console.WriteLine("Nr of freehand shapes = {0}", freehandShapes)
Console.WriteLine("Nr of layer shapes = {0}", layerShapes)
Console.WriteLine("Nr of clip shapes = {0}", clipShapes)
Console.WriteLine("Nr of other shapes = {0}", otherShapes)
End Sub
'
' iterate through all pages in a PDF document
'
Private Sub IterateShapes(pdf As Document)
For Each page As Page In pdf.Pages
IterateShapes(page)
Next
End Sub
'
' Get all shapes in a PDF page (this will be a shape collection)
'
Private Sub IterateShapes(page As Page)
Dim shapes As ShapeCollection = page.CreateShapes()
IterateShapes(shapes, "")
End Sub
'
' iterate through each shape in a shape collections (this may recurse)
'
Private Sub IterateShapes(shapes As ShapeCollection, indent As String)
DumpShapeInfo(shapes, indent)
For Each shape As Shape In shapes
DumpShapeInfo(shape, indent)
If TypeOf shape Is ShapeCollection Then
' recurse
IterateShapes(TryCast(shape, ShapeCollection), indent & Convert.ToString(" "))
End If
If TypeOf shape Is LayerShape Then
' A LayerShape is also as collection
IterateShapes(TryCast(shape, LayerShape), indent & Convert.ToString(" "))
End If
Next
End Sub
'
' iterate through all shapes in a layer shape
'
Private Sub IterateShapes(shapes As LayerShape, indent As String)
For Each shape As Shape In shapes
DumpShapeInfo(shape, indent)
Next
End Sub
'
' Dump information on any kind of shape
'
Friend Sub DumpShapeInfo(shape As Shape, indent As String)
If TypeOf shape Is ShapeCollection Then
DumpShapeInfo(TryCast(shape, ShapeCollection), indent)
ElseIf TypeOf shape Is LayerShape Then
DumpShapeInfo(TryCast(shape, LayerShape), indent)
End If
If TypeOf shape Is TextShape Then
DumpShapeInfo(TryCast(shape, TextShape), indent)
ElseIf TypeOf shape Is ImageShape Then
DumpShapeInfo(TryCast(shape, ImageShape), indent)
ElseIf TypeOf shape Is FreeHandShape Then
DumpShapeInfo(TryCast(shape, FreeHandShape), indent)
ElseIf TypeOf shape Is LayerShape Then
DumpShapeInfo(TryCast(shape, LayerShape), indent)
ElseIf TypeOf shape Is ClipShape Then
DumpShapeInfo(TryCast(shape, ClipShape), indent)
Else
Console.WriteLine("{0}Shape = some other type", indent, indent)
otherShapes += 1
End If
End Sub
'
' Dump information on a shape collection
'
Friend Sub DumpShapeInfo(shape As ShapeCollection, indent As String)
Console.WriteLine("{0}Shape = shape collection", indent)
Console.WriteLine("{0} : N elements = {1}", indent, shape.Count)
shapeCollections += 1
End Sub
'
' Dump information on a layer shape
'
Friend Sub DumpShapeInfo(shape As LayerShape, indent As String)
Console.WriteLine("{0}Shape = Layer shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
layerShapes += 1
End Sub
'
' Dump information on a text shape
'
Friend Sub DumpShapeInfo(shape As TextShape, indent As String)
Console.WriteLine("{0}Shape = Text shape", indent)
Console.WriteLine("{0} : Font name = {1}", indent, shape.Font.FamilyName)
Console.WriteLine("{0} : Font weight = {1}", indent, shape.Font.Weight)
Console.WriteLine("{0} : Font size = {1}", indent, shape.FontSize)
Console.WriteLine("{0} : embed mode = {1}", indent, shape.Font.EmbedMode)
Console.WriteLine("{0} : bounding box left = {1}", indent, shape.BoundingBox.Left)
Console.WriteLine("{0} : bounding box top = {1}", indent, shape.BoundingBox.Top)
Console.WriteLine("{0} : bounding box Width = {1}", indent, shape.BoundingBox.Width)
Console.WriteLine("{0} : bounding box Height = {1}", indent, shape.BoundingBox.Height)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X,Y = {1}, {2}", indent, shape.X, shape.Y)
Console.WriteLine("{0} : Bold = {1}", indent, shape.Bold)
Console.WriteLine("{0} : Italic = {1}", indent, shape.Italic)
Console.WriteLine("{0} : Underline = {1}", indent, shape.Underline)
Console.WriteLine("{0} : StrikeOut = {1}", indent, shape.StrikeOut)
Console.WriteLine("{0} : width = {1}", indent, shape.MeasuredWidth)
Console.WriteLine("{0} : height = {1}", indent, shape.MeasuredHeight)
textShapes += 1
End Sub
'
' Dump information on a image shape
'
Friend Sub DumpShapeInfo(shape As ImageShape, indent As String)
Console.WriteLine("{0}Shape = image shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : mask color = {1}", indent, shape.MaskColor)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
Console.WriteLine("{0} : width = {1}", indent, shape.Width)
Console.WriteLine("{0} : height = {1}", indent, shape.Height)
imageShapes += 1
End Sub
'
' Dump information on a freehand shape
'
Friend Sub DumpShapeInfo(shape As FreeHandShape, indent As String)
Console.WriteLine("{0}Shape = freehand shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count)
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule)
Console.WriteLine("{0} : Brush = {1}", indent, shape.Brush)
Console.WriteLine("{0} : Pen = {1}", indent, shape.Pen)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
freehandShapes += 1
End Sub
'
' Dump information on a clip shape
'
Friend Sub DumpShapeInfo(shape As ClipShape, indent As String)
Console.WriteLine("{0}Shape = Clip shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
clipShapes += 1
End Sub
No results
Add Long Term Validation (LTV) data to an existing signature
Render PDF to multipage color TIFF
Render PDF page to Skia surface
Render PDF page as PNG
How to downscale all images in a PDF
How to generate and export certificates
How to downscale all images in a PDF
Add Stamp to PDF
How to use a system font for rendering text
Customize the GUI interaction of a radio button
Customize the UI interaction of a check box
PDF to grayscale TIFF
How to reduce PDF file size
How do I create graphics with Icc based colors
Highlight fields in PDF
Add a note to PDF
Display PDF in a WPF app and stay responsive – the code
Draw interactively on a PDF page
Resize PDF pages
Verify a custom digital PDF signature
C# Print PDF documents from a WPF application
Extract glyph boxes from PDF
Use TrueType font collections
Layout text with MultilineTextShape
Calculate the height of a paragraph in PDF
Merge PDF files in C# .NET
How do I extract page destinations from bookmarks?
Clip PDF page content in C#
How do I use PDFControls.NET in a WPF application
Fill PDF form
Extract glyphs and sort by reading order
Add bookmarks to PDF
How to scale content of PDF
Create rectangles with rounded corners
Create text with decorations
Create layers in PDF and draw on each layer
Multipage TIFF to PDF
TIFF to PDF C#
Crop content on a PDF page
How to embed files in a PDF document
Remove graphics from PDF
Change the color inside a PDF
Create PDF in C#
Text formatting
Import FDF into PDF
Flatten PDF form
Digitally sign a PDF form in C# or VB.NET
Vector graphics in PDF
Translate PDF page content
Extract graphics from PDF
Determine the content bounding box
How to add page numbers to your PDF
Create / impose PDF 2-up
Search text in PDF
Append multiple PDF documents
Convert PDF to plain text
Flatten Markup Annotation
Add text field to PDF
Extract embedded files from PDF
Extract images from PDF
Add a Diagonal Watermark to PDF in C#
Fit image to PDF page
Add simple html text to PDF
Add multiline text to a PDF document
Add single-line text to PDF
Create a new digitally signed PDF document
PDF Viewer Preferences
Change page orientation PDF
Split PDF pages in C# and VB.NET
Append two or more existing PDF files
Change colors of black-and-white TIFF after converting from PDF
Determine if a PDF only contains images
Add footer to PDF
Convert SVG to PDF
Fill in a PDF form using MVC
C# render pdf in browser using MVC
Convert XHTML to PDF
Add hyperlink to PDF
Rotate a PDF page
Change the formatting of a numeric field
How to mirror PDF pages and other shapes
Fill in a template PDF document
How to add autosized text to PDF
Create formfields in PDF documents
Export FDF from PDF form
Add a link with an internal destination to PDF
Remove PDF security settings
Add a link to PDF with an external destination
How to sign and verify updates to a PDF document
Convert PDF to PNG using WPF
Embed TrueType font
Override MouseWheel event
Convert PDF to an image using a dither matrix
Font mapping
Convert PDF with layers to image
C# Print PDF Document
Render PDF with ResolveFont event handler
Render PDF to EMF
Convert PDF to XPS
How to create a thumbnail viewer
How to create a tiling for shapes in PDF
Add footer with left and right aligned text on same line
Convert PDF to JPG in C#
EMF to PDF as vector image
EMF to PDF as raster image
Replace field with image
Add a rubber stamp annotation with a custom icon
Create a text annotation in PDF with rich text
XhtmlParagraph and TrueType fonts
What is the resulting fontsize in PDF for rich text used in a SimpleXhtmlShape
Read and write meta data from PDF
Create a custom signature handler to sign and verify PDF documents
Merge PDF
Stitch PDF documents
Download and convert image to PDF
Convert TXT to PDF
Add barcodes to PDF
Convert PDF to multipage TIFF in C# .NET
Convert multiple PDF pages to bitmap
Render a PDF to bitmap
Bulleted list from XML and XSL
Tagged PDF
PDFKit.NET 5.0 – detailed changes to the API
Create tagged PDF
PDFKit.NET 5.0 and .NET Core
PDFKit.NET 5.0 and Xamarin
Dynamic XFA
PDFKit.NET 5.0 .NET Standard API
.NET Core console app on MacOS
Add tags to existing PDF
Read PDF tags
Merge XDP data with dynamic XFA form
Fill XFA form and export XDP data
Fill and save dynamic XFA form
Use PDFKit.NET 5.0 with a Xamarin.Forms app
Use multiple licenses
Licensing and .NET Standard
Reduce PDF size
Generate PDF form from XML
Generate PDF with local images from XML with Xamarin.iOS
Disable submit button after submitting
Write Document to HttpResponse