- Add a link to PDF with an external destination
- Add a link with an internal destination to PDF
- Add a note to PDF
- Add barcodes to PDF
- Add bookmarks to PDF
- Add footer to PDF
- Add simple html text to PDF
- Add hyperlink to PDF
- Add multiline text to a PDF document
- Add a rubber stamp annotation with a custom icon
- Add single-line text to PDF
- Add Stamp to PDF
- Add text field to PDF
- Add a Diagonal Watermark to PDF in C# - TallComponents - PDF Library
- Append two or more existing PDF files
- Change the color inside a PDF
- Change the formatting of a numeric field
- Change page orientation PDF
- Clip PDF page content in C#
- Convert PDF to plain text
- Convert SVG to PDF
- Create a text annotation in PDF with rich text
- Create formfields in PDF documents
- Create layers in PDF and draw on each layer
- Create a new digitally signed PDF document
- Create rectangles with rounded corners
- Create text with decorations
- How to create a tiling for shapes in PDF
- Crop content on a PDF page
- Determine the content bounding box
- Determine if a PDF only contains images
- Digitally sign a PDF form in C# or VB.NET
- Disable submit button after submitting
- How to downscale all images in a PDF
- Download and convert image to PDF
- How to downscale all images in a PDF
- Vector graphics in PDF
- How to embed files in a PDF document
- Embed TrueType font in PDF
- EMF to PDF as raster image - Code Sample C#
- EMF to PDF as vector image
- Export FDF from PDF form
- Extract embedded files from PDF
- Extract glyph boxes from PDF
- Extract glyphs and sort by reading order
- Extract graphics from PDF
- Extract images from PDF
- Fill in a PDF form using MVC
- Fill in a template PDF document
- Fill PDF form
- Fit image to PDF page
- Flatten Markup Annotation
- Flatten PDF form
- How to generate and export certificates
- How do I extract page destinations from bookmarks?
- Highlight fields in PDF
- How do I create graphics with Icc based colors
- How to add autosized text to PDF
- How to sign and verify updates to a PDF document
- Import FDF into PDF
- Merge PDF files in C# .NET
- How to mirror PDF pages and other shapes
- Layout text with MultilineTextShape
- Read and write meta data from PDF
- How to reduce PDF file size
- Remove graphics from PDF
- Remove PDF security settings
- Replace field with image
- Resize PDF pages
- Rotate a PDF page
- How to scale content of PDF
- Search text in PDF
- PDF Viewer Preferences
- Create a custom signature handler to sign and verify PDF documents
- Split PDF pages in C# and VB.NET
- TIFF to PDF C#
- Translate PDF page content
- Use multiple licenses
- Use TrueType font collections
- Verify a custom digital PDF signature
Extract graphics from PDF
This c# code sample shows how to extract text, images and curves as shapes from a PDF document.
Shapes
The Shape class is an abstract class with concrete specializations such as TextShape, ImageShape and LineShape. They represent graphics of all types and were originally introduced to draw on a new or existing PDF page. The reverse however is also true: extract existing graphics on a PDF page as shapes. The central method is Page.CreateShapes.
The following c# code sample enumerates all shapes on each page of a PDF document and dumps their properties to the console.
static int shapeCollections = 0;
static int textShapes = 0;
static int imageShapes = 0;
static int freehandShapes = 0;
static int layerShapes = 0;
static int clipShapes = 0;
static int otherShapes = 0;
static void Main(string[] args)
{
using (FileStream fileIn = new FileStream(@"..\..\..\inputDocuments/vectorgraphics.pdf",
FileMode.Open,
FileAccess.Read))
{
Document pdfIn = new Document(fileIn);
IterateShapes(pdfIn);
}
Console.WriteLine("Nr of shape collections = {0}", shapeCollections);
Console.WriteLine("Nr of text shapes = {0}", textShapes);
Console.WriteLine("Nr of image shapes = {0}", imageShapes);
Console.WriteLine("Nr of freehand shapes = {0}", freehandShapes);
Console.WriteLine("Nr of layer shapes = {0}", layerShapes);
Console.WriteLine("Nr of clip shapes = {0}", clipShapes);
Console.WriteLine("Nr of other shapes = {0}", otherShapes);
}
//
// iterate through all pages in a PDF document
//
static void IterateShapes(Document pdf)
{
foreach (Page page in pdf.Pages)
{
IterateShapes(page);
}
}
//
// Get all shapes in a PDF page (this will be a shape collection)
//
static void IterateShapes(Page page)
{
ShapeCollection shapes = page.CreateShapes();
IterateShapes(shapes, "");
}
//
// iterate through each shape in a shape collections (this may recurse)
//
static void IterateShapes(ShapeCollection shapes, string indent)
{
DumpShapeInfo(shapes, indent);
foreach (Shape shape in shapes)
{
DumpShapeInfo(shape, indent);
if (shape is ShapeCollection)
{
// recurse
IterateShapes(shape as ShapeCollection, indent + " ");
}
if (shape is LayerShape)
{
// A LayerShape is also as collection
IterateShapes(shape as LayerShape, indent + " ");
}
}
}
//
// iterate through all shapes in a layer shape
//
static void IterateShapes(LayerShape shapes, string indent)
{
foreach (Shape shape in shapes)
{
DumpShapeInfo(shape, indent);
}
}
//
// Dump information on any kind of shape
//
internal static void DumpShapeInfo(Shape shape, string indent)
{
if (shape is ShapeCollection)
{
DumpShapeInfo(shape as ShapeCollection, indent);
}
else if (shape is LayerShape)
{
DumpShapeInfo(shape as LayerShape, indent);
}
if (shape is TextShape)
{
DumpShapeInfo(shape as TextShape, indent);
}
else if (shape is ImageShape)
{
DumpShapeInfo(shape as ImageShape, indent);
}
else if (shape is FreeHandShape)
{
DumpShapeInfo(shape as FreeHandShape, indent);
}
else if (shape is LayerShape)
{
DumpShapeInfo(shape as LayerShape, indent);
}
else if (shape is ClipShape)
{
DumpShapeInfo(shape as ClipShape, indent);
}
else
{
Console.WriteLine("{0}Shape = some other type", indent, indent);
otherShapes++;
}
}
//
// Dump information on a shape collection
//
internal static void DumpShapeInfo(ShapeCollection shape, string indent)
{
Console.WriteLine("{0}Shape = shape collection", indent);
Console.WriteLine("{0} : N elements = {1}", indent, shape.Count);
shapeCollections++;
}
//
// Dump information on a layer shape
//
internal static void DumpShapeInfo(LayerShape shape, string indent)
{
Console.WriteLine("{0}Shape = Layer shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
layerShapes++;
}
//
// Dump information on a text shape
//
internal static void DumpShapeInfo(TextShape shape, string indent)
{
Console.WriteLine("{0}Shape = Text shape", indent);
Console.WriteLine("{0} : Font name = {1}", indent, shape.Font.FamilyName);
Console.WriteLine("{0} : Font weight = {1}", indent, shape.Font.Weight);
Console.WriteLine("{0} : Font size = {1}", indent, shape.FontSize);
Console.WriteLine("{0} : embed mode = {1}", indent, shape.Font.EmbedMode);
Console.WriteLine("{0} : bounding box left = {1}", indent, shape.BoundingBox.Left);
Console.WriteLine("{0} : bounding box top = {1}", indent, shape.BoundingBox.Top);
Console.WriteLine("{0} : bounding box Width = {1}", indent, shape.BoundingBox.Width);
Console.WriteLine("{0} : bounding box Height = {1}", indent, shape.BoundingBox.Height);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X,Y = {1}, {2}", indent, shape.X, shape.Y);
Console.WriteLine("{0} : Bold = {1}", indent, shape.Bold);
Console.WriteLine("{0} : Italic = {1}", indent, shape.Italic);
Console.WriteLine("{0} : Underline = {1}", indent, shape.Underline);
Console.WriteLine("{0} : StrikeOut = {1}", indent, shape.StrikeOut);
Console.WriteLine("{0} : width = {1}", indent, shape.MeasuredWidth);
Console.WriteLine("{0} : height = {1}", indent, shape.MeasuredHeight);
textShapes++;
}
//
// Dump information on a image shape
//
internal static void DumpShapeInfo(ImageShape shape, string indent)
{
Console.WriteLine("{0}Shape = image shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : mask color = {1}", indent, shape.MaskColor);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
Console.WriteLine("{0} : width = {1}", indent, shape.Width);
Console.WriteLine("{0} : height = {1}", indent, shape.Height);
imageShapes++;
}
//
// Dump information on a freehand shape
//
internal static void DumpShapeInfo(FreeHandShape shape, string indent)
{
Console.WriteLine("{0}Shape = freehand shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count);
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule);
Console.WriteLine("{0} : Brush = {1}", indent, shape.Brush);
Console.WriteLine("{0} : Pen = {1}", indent, shape.Pen);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
freehandShapes++;
}
//
// Dump information on a clip shape
//
internal static void DumpShapeInfo(ClipShape shape, string indent)
{
Console.WriteLine("{0}Shape = Clip shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
clipShapes++;
}
Dim shapeCollections As Integer = 0
Dim textShapes As Integer = 0
Dim imageShapes As Integer = 0
Dim freehandShapes As Integer = 0
Dim layerShapes As Integer = 0
Dim clipShapes As Integer = 0
Dim otherShapes As Integer = 0
Private Sub Main(args As String())
Using fileIn As New FileStream("..\..\..\inputDocuments/vectorgraphics.pdf", FileMode.Open, FileAccess.Read)
Dim pdfIn As New Document(fileIn)
IterateShapes(pdfIn)
End Using
Console.WriteLine("Nr of shape collections = {0}", shapeCollections)
Console.WriteLine("Nr of text shapes = {0}", textShapes)
Console.WriteLine("Nr of image shapes = {0}", imageShapes)
Console.WriteLine("Nr of freehand shapes = {0}", freehandShapes)
Console.WriteLine("Nr of layer shapes = {0}", layerShapes)
Console.WriteLine("Nr of clip shapes = {0}", clipShapes)
Console.WriteLine("Nr of other shapes = {0}", otherShapes)
End Sub
'
' iterate through all pages in a PDF document
'
Private Sub IterateShapes(pdf As Document)
For Each page As Page In pdf.Pages
IterateShapes(page)
Next
End Sub
'
' Get all shapes in a PDF page (this will be a shape collection)
'
Private Sub IterateShapes(page As Page)
Dim shapes As ShapeCollection = page.CreateShapes()
IterateShapes(shapes, "")
End Sub
'
' iterate through each shape in a shape collections (this may recurse)
'
Private Sub IterateShapes(shapes As ShapeCollection, indent As String)
DumpShapeInfo(shapes, indent)
For Each shape As Shape In shapes
DumpShapeInfo(shape, indent)
If TypeOf shape Is ShapeCollection Then
' recurse
IterateShapes(TryCast(shape, ShapeCollection), indent & Convert.ToString(" "))
End If
If TypeOf shape Is LayerShape Then
' A LayerShape is also as collection
IterateShapes(TryCast(shape, LayerShape), indent & Convert.ToString(" "))
End If
Next
End Sub
'
' iterate through all shapes in a layer shape
'
Private Sub IterateShapes(shapes As LayerShape, indent As String)
For Each shape As Shape In shapes
DumpShapeInfo(shape, indent)
Next
End Sub
'
' Dump information on any kind of shape
'
Friend Sub DumpShapeInfo(shape As Shape, indent As String)
If TypeOf shape Is ShapeCollection Then
DumpShapeInfo(TryCast(shape, ShapeCollection), indent)
ElseIf TypeOf shape Is LayerShape Then
DumpShapeInfo(TryCast(shape, LayerShape), indent)
End If
If TypeOf shape Is TextShape Then
DumpShapeInfo(TryCast(shape, TextShape), indent)
ElseIf TypeOf shape Is ImageShape Then
DumpShapeInfo(TryCast(shape, ImageShape), indent)
ElseIf TypeOf shape Is FreeHandShape Then
DumpShapeInfo(TryCast(shape, FreeHandShape), indent)
ElseIf TypeOf shape Is LayerShape Then
DumpShapeInfo(TryCast(shape, LayerShape), indent)
ElseIf TypeOf shape Is ClipShape Then
DumpShapeInfo(TryCast(shape, ClipShape), indent)
Else
Console.WriteLine("{0}Shape = some other type", indent, indent)
otherShapes += 1
End If
End Sub
'
' Dump information on a shape collection
'
Friend Sub DumpShapeInfo(shape As ShapeCollection, indent As String)
Console.WriteLine("{0}Shape = shape collection", indent)
Console.WriteLine("{0} : N elements = {1}", indent, shape.Count)
shapeCollections += 1
End Sub
'
' Dump information on a layer shape
'
Friend Sub DumpShapeInfo(shape As LayerShape, indent As String)
Console.WriteLine("{0}Shape = Layer shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
layerShapes += 1
End Sub
'
' Dump information on a text shape
'
Friend Sub DumpShapeInfo(shape As TextShape, indent As String)
Console.WriteLine("{0}Shape = Text shape", indent)
Console.WriteLine("{0} : Font name = {1}", indent, shape.Font.FamilyName)
Console.WriteLine("{0} : Font weight = {1}", indent, shape.Font.Weight)
Console.WriteLine("{0} : Font size = {1}", indent, shape.FontSize)
Console.WriteLine("{0} : embed mode = {1}", indent, shape.Font.EmbedMode)
Console.WriteLine("{0} : bounding box left = {1}", indent, shape.BoundingBox.Left)
Console.WriteLine("{0} : bounding box top = {1}", indent, shape.BoundingBox.Top)
Console.WriteLine("{0} : bounding box Width = {1}", indent, shape.BoundingBox.Width)
Console.WriteLine("{0} : bounding box Height = {1}", indent, shape.BoundingBox.Height)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X,Y = {1}, {2}", indent, shape.X, shape.Y)
Console.WriteLine("{0} : Bold = {1}", indent, shape.Bold)
Console.WriteLine("{0} : Italic = {1}", indent, shape.Italic)
Console.WriteLine("{0} : Underline = {1}", indent, shape.Underline)
Console.WriteLine("{0} : StrikeOut = {1}", indent, shape.StrikeOut)
Console.WriteLine("{0} : width = {1}", indent, shape.MeasuredWidth)
Console.WriteLine("{0} : height = {1}", indent, shape.MeasuredHeight)
textShapes += 1
End Sub
'
' Dump information on a image shape
'
Friend Sub DumpShapeInfo(shape As ImageShape, indent As String)
Console.WriteLine("{0}Shape = image shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : mask color = {1}", indent, shape.MaskColor)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
Console.WriteLine("{0} : width = {1}", indent, shape.Width)
Console.WriteLine("{0} : height = {1}", indent, shape.Height)
imageShapes += 1
End Sub
'
' Dump information on a freehand shape
'
Friend Sub DumpShapeInfo(shape As FreeHandShape, indent As String)
Console.WriteLine("{0}Shape = freehand shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count)
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule)
Console.WriteLine("{0} : Brush = {1}", indent, shape.Brush)
Console.WriteLine("{0} : Pen = {1}", indent, shape.Pen)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
freehandShapes += 1
End Sub
'
' Dump information on a clip shape
'
Friend Sub DumpShapeInfo(shape As ClipShape, indent As String)
Console.WriteLine("{0}Shape = Clip shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
clipShapes += 1
End Sub