- Use multiple licenses
- Replace field with image
- Import FDF into PDF
- Embed TrueType font in PDF
- Determine if a PDF only contains images
- Download and convert image to PDF
- Use TrueType font collections
- Determine the content bounding box
- Highlight fields in PDF
- Add hyperlink to PDF
- How do I create graphics with Icc based colors
- Add Stamp to PDF
- How do I extract page destinations from bookmarks?
- Convert SVG to PDF
- Extract glyph boxes from PDF
- Fill in a template PDF document
- Extract graphics from PDF
- Flatten Markup Annotation
- Clip PDF page content in C#
- How to scale content of PDF
- Add single-line text to PDF
- Change the formatting of a numeric field
- Add bookmarks to PDF
- Convert PDF to plain text
- TIFF to PDF C#
- Add a note to PDF
- Add a link with an internal destination to PDF
- Fill in a PDF form using MVC
- Create formfields in PDF documents
- Extract embedded files from PDF
- Remove graphics from PDF
- Fit image to PDF page
- Split PDF pages in C# and VB.NET
- Add a link to PDF with an external destination
- Translate PDF page content
- How to sign and verify updates to a PDF document
- Create a new digitally signed PDF document
- EMF to PDF as vector image
- Layout text with MultilineTextShape
- Add multiline text to a PDF document
- How to add autosized text to PDF
- How to generate and export certificates
- Create rectangles with rounded corners
- Add barcodes to PDF
- Append two or more existing PDF files
- Create a text annotation in PDF with rich text
- Rotate a PDF page
- Add text field to PDF
- Change the color inside a PDF
- How to embed files in a PDF document
- How to mirror PDF pages and other shapes
- Add a Diagonal Watermark to PDF in C# - TallComponents - PDF Library
- How to reduce PDF file size
- Read and write meta data from PDF
- Add footer to PDF
- Create a custom signature handler to sign and verify PDF documents
- Export FDF from PDF form
- Create text with decorations
- Vector graphics in PDF
- Resize PDF pages
- Change page orientation PDF
- Crop content on a PDF page
- Extract glyphs and sort by reading order
- PDF Viewer Preferences
- Extract images from PDF
- EMF to PDF as raster image - Code Sample C#
- Disable submit button after submitting
- How to downscale all images in a PDF
- Remove PDF security settings
- Merge PDF files in C# .NET
- Flatten PDF form
- How to downscale all images in a PDF
- Fill PDF form
- Verify a custom digital PDF signature
- Add a rubber stamp annotation with a custom icon
- How to create a tiling for shapes in PDF
- Create layers in PDF and draw on each layer
- Search text in PDF
- Digitally sign a PDF form in C# or VB.NET
- Add simple html text to PDF
Extract graphics from PDF
This c# code sample shows how to extract text, images and curves as shapes from a PDF document.
Shapes
The Shape class is an abstract class with concrete specializations such as TextShape, ImageShape and LineShape. They represent graphics of all types and were originally introduced to draw on a new or existing PDF page. The reverse however is also true: extract existing graphics on a PDF page as shapes. The central method is Page.CreateShapes.
The following c# code sample enumerates all shapes on each page of a PDF document and dumps their properties to the console.
static int shapeCollections = 0;
static int textShapes = 0;
static int imageShapes = 0;
static int freehandShapes = 0;
static int layerShapes = 0;
static int clipShapes = 0;
static int otherShapes = 0;
static void Main(string[] args)
{
using (FileStream fileIn = new FileStream(@"..\..\..\inputDocuments/vectorgraphics.pdf",
FileMode.Open,
FileAccess.Read))
{
Document pdfIn = new Document(fileIn);
IterateShapes(pdfIn);
}
Console.WriteLine("Nr of shape collections = {0}", shapeCollections);
Console.WriteLine("Nr of text shapes = {0}", textShapes);
Console.WriteLine("Nr of image shapes = {0}", imageShapes);
Console.WriteLine("Nr of freehand shapes = {0}", freehandShapes);
Console.WriteLine("Nr of layer shapes = {0}", layerShapes);
Console.WriteLine("Nr of clip shapes = {0}", clipShapes);
Console.WriteLine("Nr of other shapes = {0}", otherShapes);
}
//
// iterate through all pages in a PDF document
//
static void IterateShapes(Document pdf)
{
foreach (Page page in pdf.Pages)
{
IterateShapes(page);
}
}
//
// Get all shapes in a PDF page (this will be a shape collection)
//
static void IterateShapes(Page page)
{
ShapeCollection shapes = page.CreateShapes();
IterateShapes(shapes, "");
}
//
// iterate through each shape in a shape collections (this may recurse)
//
static void IterateShapes(ShapeCollection shapes, string indent)
{
DumpShapeInfo(shapes, indent);
foreach (Shape shape in shapes)
{
DumpShapeInfo(shape, indent);
if (shape is ShapeCollection)
{
// recurse
IterateShapes(shape as ShapeCollection, indent + " ");
}
if (shape is LayerShape)
{
// A LayerShape is also as collection
IterateShapes(shape as LayerShape, indent + " ");
}
}
}
//
// iterate through all shapes in a layer shape
//
static void IterateShapes(LayerShape shapes, string indent)
{
foreach (Shape shape in shapes)
{
DumpShapeInfo(shape, indent);
}
}
//
// Dump information on any kind of shape
//
internal static void DumpShapeInfo(Shape shape, string indent)
{
if (shape is ShapeCollection)
{
DumpShapeInfo(shape as ShapeCollection, indent);
}
else if (shape is LayerShape)
{
DumpShapeInfo(shape as LayerShape, indent);
}
if (shape is TextShape)
{
DumpShapeInfo(shape as TextShape, indent);
}
else if (shape is ImageShape)
{
DumpShapeInfo(shape as ImageShape, indent);
}
else if (shape is FreeHandShape)
{
DumpShapeInfo(shape as FreeHandShape, indent);
}
else if (shape is LayerShape)
{
DumpShapeInfo(shape as LayerShape, indent);
}
else if (shape is ClipShape)
{
DumpShapeInfo(shape as ClipShape, indent);
}
else
{
Console.WriteLine("{0}Shape = some other type", indent, indent);
otherShapes++;
}
}
//
// Dump information on a shape collection
//
internal static void DumpShapeInfo(ShapeCollection shape, string indent)
{
Console.WriteLine("{0}Shape = shape collection", indent);
Console.WriteLine("{0} : N elements = {1}", indent, shape.Count);
shapeCollections++;
}
//
// Dump information on a layer shape
//
internal static void DumpShapeInfo(LayerShape shape, string indent)
{
Console.WriteLine("{0}Shape = Layer shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
layerShapes++;
}
//
// Dump information on a text shape
//
internal static void DumpShapeInfo(TextShape shape, string indent)
{
Console.WriteLine("{0}Shape = Text shape", indent);
Console.WriteLine("{0} : Font name = {1}", indent, shape.Font.FamilyName);
Console.WriteLine("{0} : Font weight = {1}", indent, shape.Font.Weight);
Console.WriteLine("{0} : Font size = {1}", indent, shape.FontSize);
Console.WriteLine("{0} : embed mode = {1}", indent, shape.Font.EmbedMode);
Console.WriteLine("{0} : bounding box left = {1}", indent, shape.BoundingBox.Left);
Console.WriteLine("{0} : bounding box top = {1}", indent, shape.BoundingBox.Top);
Console.WriteLine("{0} : bounding box Width = {1}", indent, shape.BoundingBox.Width);
Console.WriteLine("{0} : bounding box Height = {1}", indent, shape.BoundingBox.Height);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X,Y = {1}, {2}", indent, shape.X, shape.Y);
Console.WriteLine("{0} : Bold = {1}", indent, shape.Bold);
Console.WriteLine("{0} : Italic = {1}", indent, shape.Italic);
Console.WriteLine("{0} : Underline = {1}", indent, shape.Underline);
Console.WriteLine("{0} : StrikeOut = {1}", indent, shape.StrikeOut);
Console.WriteLine("{0} : width = {1}", indent, shape.MeasuredWidth);
Console.WriteLine("{0} : height = {1}", indent, shape.MeasuredHeight);
textShapes++;
}
//
// Dump information on a image shape
//
internal static void DumpShapeInfo(ImageShape shape, string indent)
{
Console.WriteLine("{0}Shape = image shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : mask color = {1}", indent, shape.MaskColor);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
Console.WriteLine("{0} : width = {1}", indent, shape.Width);
Console.WriteLine("{0} : height = {1}", indent, shape.Height);
imageShapes++;
}
//
// Dump information on a freehand shape
//
internal static void DumpShapeInfo(FreeHandShape shape, string indent)
{
Console.WriteLine("{0}Shape = freehand shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count);
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule);
Console.WriteLine("{0} : Brush = {1}", indent, shape.Brush);
Console.WriteLine("{0} : Pen = {1}", indent, shape.Pen);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
freehandShapes++;
}
//
// Dump information on a clip shape
//
internal static void DumpShapeInfo(ClipShape shape, string indent)
{
Console.WriteLine("{0}Shape = Clip shape", indent);
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode);
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule);
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity);
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count);
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y);
clipShapes++;
}
Dim shapeCollections As Integer = 0
Dim textShapes As Integer = 0
Dim imageShapes As Integer = 0
Dim freehandShapes As Integer = 0
Dim layerShapes As Integer = 0
Dim clipShapes As Integer = 0
Dim otherShapes As Integer = 0
Private Sub Main(args As String())
Using fileIn As New FileStream("..\..\..\inputDocuments/vectorgraphics.pdf", FileMode.Open, FileAccess.Read)
Dim pdfIn As New Document(fileIn)
IterateShapes(pdfIn)
End Using
Console.WriteLine("Nr of shape collections = {0}", shapeCollections)
Console.WriteLine("Nr of text shapes = {0}", textShapes)
Console.WriteLine("Nr of image shapes = {0}", imageShapes)
Console.WriteLine("Nr of freehand shapes = {0}", freehandShapes)
Console.WriteLine("Nr of layer shapes = {0}", layerShapes)
Console.WriteLine("Nr of clip shapes = {0}", clipShapes)
Console.WriteLine("Nr of other shapes = {0}", otherShapes)
End Sub
'
' iterate through all pages in a PDF document
'
Private Sub IterateShapes(pdf As Document)
For Each page As Page In pdf.Pages
IterateShapes(page)
Next
End Sub
'
' Get all shapes in a PDF page (this will be a shape collection)
'
Private Sub IterateShapes(page As Page)
Dim shapes As ShapeCollection = page.CreateShapes()
IterateShapes(shapes, "")
End Sub
'
' iterate through each shape in a shape collections (this may recurse)
'
Private Sub IterateShapes(shapes As ShapeCollection, indent As String)
DumpShapeInfo(shapes, indent)
For Each shape As Shape In shapes
DumpShapeInfo(shape, indent)
If TypeOf shape Is ShapeCollection Then
' recurse
IterateShapes(TryCast(shape, ShapeCollection), indent & Convert.ToString(" "))
End If
If TypeOf shape Is LayerShape Then
' A LayerShape is also as collection
IterateShapes(TryCast(shape, LayerShape), indent & Convert.ToString(" "))
End If
Next
End Sub
'
' iterate through all shapes in a layer shape
'
Private Sub IterateShapes(shapes As LayerShape, indent As String)
For Each shape As Shape In shapes
DumpShapeInfo(shape, indent)
Next
End Sub
'
' Dump information on any kind of shape
'
Friend Sub DumpShapeInfo(shape As Shape, indent As String)
If TypeOf shape Is ShapeCollection Then
DumpShapeInfo(TryCast(shape, ShapeCollection), indent)
ElseIf TypeOf shape Is LayerShape Then
DumpShapeInfo(TryCast(shape, LayerShape), indent)
End If
If TypeOf shape Is TextShape Then
DumpShapeInfo(TryCast(shape, TextShape), indent)
ElseIf TypeOf shape Is ImageShape Then
DumpShapeInfo(TryCast(shape, ImageShape), indent)
ElseIf TypeOf shape Is FreeHandShape Then
DumpShapeInfo(TryCast(shape, FreeHandShape), indent)
ElseIf TypeOf shape Is LayerShape Then
DumpShapeInfo(TryCast(shape, LayerShape), indent)
ElseIf TypeOf shape Is ClipShape Then
DumpShapeInfo(TryCast(shape, ClipShape), indent)
Else
Console.WriteLine("{0}Shape = some other type", indent, indent)
otherShapes += 1
End If
End Sub
'
' Dump information on a shape collection
'
Friend Sub DumpShapeInfo(shape As ShapeCollection, indent As String)
Console.WriteLine("{0}Shape = shape collection", indent)
Console.WriteLine("{0} : N elements = {1}", indent, shape.Count)
shapeCollections += 1
End Sub
'
' Dump information on a layer shape
'
Friend Sub DumpShapeInfo(shape As LayerShape, indent As String)
Console.WriteLine("{0}Shape = Layer shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
layerShapes += 1
End Sub
'
' Dump information on a text shape
'
Friend Sub DumpShapeInfo(shape As TextShape, indent As String)
Console.WriteLine("{0}Shape = Text shape", indent)
Console.WriteLine("{0} : Font name = {1}", indent, shape.Font.FamilyName)
Console.WriteLine("{0} : Font weight = {1}", indent, shape.Font.Weight)
Console.WriteLine("{0} : Font size = {1}", indent, shape.FontSize)
Console.WriteLine("{0} : embed mode = {1}", indent, shape.Font.EmbedMode)
Console.WriteLine("{0} : bounding box left = {1}", indent, shape.BoundingBox.Left)
Console.WriteLine("{0} : bounding box top = {1}", indent, shape.BoundingBox.Top)
Console.WriteLine("{0} : bounding box Width = {1}", indent, shape.BoundingBox.Width)
Console.WriteLine("{0} : bounding box Height = {1}", indent, shape.BoundingBox.Height)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X,Y = {1}, {2}", indent, shape.X, shape.Y)
Console.WriteLine("{0} : Bold = {1}", indent, shape.Bold)
Console.WriteLine("{0} : Italic = {1}", indent, shape.Italic)
Console.WriteLine("{0} : Underline = {1}", indent, shape.Underline)
Console.WriteLine("{0} : StrikeOut = {1}", indent, shape.StrikeOut)
Console.WriteLine("{0} : width = {1}", indent, shape.MeasuredWidth)
Console.WriteLine("{0} : height = {1}", indent, shape.MeasuredHeight)
textShapes += 1
End Sub
'
' Dump information on a image shape
'
Friend Sub DumpShapeInfo(shape As ImageShape, indent As String)
Console.WriteLine("{0}Shape = image shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : mask color = {1}", indent, shape.MaskColor)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
Console.WriteLine("{0} : width = {1}", indent, shape.Width)
Console.WriteLine("{0} : height = {1}", indent, shape.Height)
imageShapes += 1
End Sub
'
' Dump information on a freehand shape
'
Friend Sub DumpShapeInfo(shape As FreeHandShape, indent As String)
Console.WriteLine("{0}Shape = freehand shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count)
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule)
Console.WriteLine("{0} : Brush = {1}", indent, shape.Brush)
Console.WriteLine("{0} : Pen = {1}", indent, shape.Pen)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
freehandShapes += 1
End Sub
'
' Dump information on a clip shape
'
Friend Sub DumpShapeInfo(shape As ClipShape, indent As String)
Console.WriteLine("{0}Shape = Clip shape", indent)
Console.WriteLine("{0} : blendmode = {1}", indent, shape.BlendMode)
Console.WriteLine("{0} : fillrule = {1}", indent, shape.FillRule)
Console.WriteLine("{0} : opacity = {1}", indent, shape.Opacity)
Console.WriteLine("{0} : N paths = {1}", indent, shape.Paths.Count)
Console.WriteLine("{0} : X, Y = {1} {2}", indent, shape.X, shape.Y)
clipShapes += 1
End Sub