Extract glyph boxes from PDF
Extract glyph boxes from PDF
This sample demonstrates how to extract glyph boxes.
This sample creates a bitmap for each page and draws boxes for each glyph. It takes into account the orientation of the page, as well as its cropbox and mediabox so that the bitmap resembles the page as shown by a PDF viewer. The main routine here is CreateBoxesBitmap. It takes a page as an argument and returns a Bitmap with drawn boxes.
For the following PDF page:
We get the following result:
C# code sample
static void Main(string[] args)
{
using (FileStream fileIn = new FileStream(@"..\..\..\inputdocuments\R0.pdf", FileMode.Open, FileAccess.Read))
{
//create document
Document document = new Document(fileIn);
foreach (Page page in document.Pages)
{
System.Drawing.Bitmap bitmap = CreateBoxesBitmap(page);
bitmap.Save(@"..\..\out.png", System.Drawing.Imaging.ImageFormat.Png);
}
}
}
private static System.Drawing.Bitmap CreateBoxesBitmap(Page page)
{
// Compute the part of the page that is visible in a viewer.
Rectangle visibleRectangle = GetVisibleRectangle(page);
// determine the size taking the orientation into account
int width = (int)Math.Round(visibleRectangle.Width);
int height = (int)Math.Round(visibleRectangle.Height);
Orientation orientation = page.Orientation;
if (orientation == Orientation.Rotate90 || orientation == Orientation.Rotate270)
{
// swap width and height.
int temp = width;
width = height;
height = temp;
}
// create the resulting bitmap
var bitmap = new System.Drawing.Bitmap(width, height);
using (System.Drawing.Graphics graphics = System.Drawing.Graphics.FromImage(bitmap))
using (System.Drawing.Pen pen = new System.Drawing.Pen(System.Drawing.Color.Red))
{
graphics.Clear(System.Drawing.Color.White);
// retrieve all glyphs on the current page and draw a rectangle for each.
foreach (Glyph glyph in page.Glyphs)
{
// we convert each coordinate into a GDI coordinate
System.Drawing.PointF bottomLeft = PDFPointToGDI(glyph.BottomLeft, visibleRectangle, orientation);
System.Drawing.PointF bottomRight = PDFPointToGDI(glyph.BottomRight, visibleRectangle, orientation);
System.Drawing.PointF topRight = PDFPointToGDI(glyph.TopRight, visibleRectangle, orientation);
System.Drawing.PointF topLeft = PDFPointToGDI(glyph.TopLeft, visibleRectangle, orientation);
System.Drawing.PointF[] points = new[] { bottomLeft, bottomRight, topRight, topLeft };
// draw glyph box
graphics.DrawPolygon(pen, points);
}
}
return bitmap;
}
Private Sub Main(args As String())
Using fileIn As New FileStream("..\..\..\inputdocuments\R0.pdf", FileMode.Open, FileAccess.Read)
'create document
Dim document As New Document(fileIn)
For Each page As Page In document.Pages
Dim bitmap As System.Drawing.Bitmap = CreateBoxesBitmap(page)
bitmap.Save("..\..\out.png", System.Drawing.Imaging.ImageFormat.Png)
Next
End Using
End Sub
Private Function CreateBoxesBitmap(page As Page) As System.Drawing.Bitmap
' Compute the part of the page that is visible in a viewer.
Dim visibleRectangle As Rectangle = GetVisibleRectangle(page)
' determine the size taking the orientation into account
Dim width As Integer = CInt(Math.Round(visibleRectangle.Width))
Dim height As Integer = CInt(Math.Round(visibleRectangle.Height))
Dim orientation__1 As Orientation = page.Orientation
If orientation__1 = Orientation.Rotate90 OrElse orientation__1 = Orientation.Rotate270 Then
' swap width and height.
Dim temp As Integer = width
width = height
height = temp
End If
' create the resulting bitmap
Dim bitmap = New System.Drawing.Bitmap(width, height)
Using graphics As System.Drawing.Graphics = System.Drawing.Graphics.FromImage(bitmap)
Using pen As New System.Drawing.Pen(System.Drawing.Color.Red)
graphics.Clear(System.Drawing.Color.White)
' retrieve all glyphs on the current page and draw a rectangle for each.
For Each glyph As Glyph In page.Glyphs
' we convert each coordinate into a GDI coordinate
Dim bottomLeft As System.Drawing.PointF = PDFPointToGDI(glyph.BottomLeft, visibleRectangle, orientation__1)
Dim bottomRight As System.Drawing.PointF = PDFPointToGDI(glyph.BottomRight, visibleRectangle, orientation__1)
Dim topRight As System.Drawing.PointF = PDFPointToGDI(glyph.TopRight, visibleRectangle, orientation__1)
Dim topLeft As System.Drawing.PointF = PDFPointToGDI(glyph.TopLeft, visibleRectangle, orientation__1)
Dim points As System.Drawing.PointF() = {bottomLeft, bottomRight, topRight, topLeft}
' draw glyph box
graphics.DrawPolygon(pen, points)
Next
End Using
End Using
Return bitmap
End Function
Note that we need to convert each coordinate into a GDI coordinate, as PDF has its origin at the bottom left of the page, and the page may be rotated as well. Below is the code of the PdfPointToGdi routine.
C# code sample
static Rectangle GetVisibleRectangle(Page page)
{
Rectangle rectangle = new Rectangle(0, 0, page.Width, page.Height);
Rectangle mediaBox = page.MediaBox;
if (mediaBox != null)
{
rectangle = Intersection(rectangle, mediaBox);
}
Rectangle cropBox = page.CropBox;
if (null != cropBox)
{
rectangle = Intersection(rectangle, cropBox);
}
return rectangle;
}
static System.Drawing.PointF PDFPointToGDI(System.Drawing.PointF point, Rectangle rectangle, Orientation orientation)
{
// Adjust for origin of the visible rectangle, which may not be at (0,0).
double x = point.X - rectangle.Left;
double y = point.Y - rectangle.Bottom;
switch (orientation)
{
case Orientation.Rotate0:
// just 'flip' the coordinate over the y axis.
return new System.Drawing.PointF((float)x, (float)(rectangle.Height - y));
case Orientation.Rotate90:
// exchange x and y, and perform appropiate flipping.
return new System.Drawing.PointF((float)(rectangle.Height - y), (float)(rectangle.Width - x));
case Orientation.Rotate180:
// Pointwise mirror of Rotate0.
return new System.Drawing.PointF((float)(rectangle.Width - x), (float)y);
case Orientation.Rotate270:
// Pointwise mirror of Rotate90.
return new System.Drawing.PointF((float)y, (float)x);
default:
return point;
}
}
static Rectangle Intersection(Rectangle rect1, Rectangle rect2)
{
double minX = Math.Max(rect1.Left, rect2.Left); // maximum of left sides.
double maxX = Math.Min(rect1.Left + rect1.Width, rect2.Left + rect2.Width); // minimum of right sides.
double minY = Math.Max(rect1.Bottom, rect2.Bottom); // maximum of bottom sides.
double maxY = Math.Min(rect1.Bottom + rect1.Height, rect2.Bottom + rect2.Height); // minimum of bottom sides.
return new Rectangle(minX, minY, maxX - minX, maxY - minY);
}
Private Function GetVisibleRectangle(page As Page) As Rectangle
Dim rectangle As New Rectangle(0, 0, page.Width, page.Height)
Dim mediaBox As Rectangle = page.MediaBox
If mediaBox IsNot Nothing Then
rectangle = Intersection(rectangle, mediaBox)
End If
Dim cropBox As Rectangle = page.CropBox
If cropBox IsNot Nothing Then
rectangle = Intersection(rectangle, cropBox)
End If
Return rectangle
End Function
Private Function PDFPointToGDI(point As System.Drawing.PointF, rectangle As Rectangle, orientation__1 As Orientation) As System.Drawing.PointF
' Adjust for origin of the visible rectangle, which may not be at (0,0).
Dim x As Double = point.X - rectangle.Left
Dim y As Double = point.Y - rectangle.Bottom
Select Case orientation__1
Case Orientation.Rotate0
' just 'flip' the coordinate over the y axis.
Return New System.Drawing.PointF(CSng(x), CSng(rectangle.Height - y))
Case Orientation.Rotate90
' exchange x and y, and perform appropiate flipping.
Return New System.Drawing.PointF(CSng(rectangle.Height - y), CSng(rectangle.Width - x))
Case Orientation.Rotate180
' Pointwise mirror of Rotate0.
Return New System.Drawing.PointF(CSng(rectangle.Width - x), CSng(y))
Case Orientation.Rotate270
' Pointwise mirror of Rotate90.
Return New System.Drawing.PointF(CSng(y), CSng(x))
Case Else
Return point
End Select
End Function
Private Function Intersection(rect1 As Rectangle, rect2 As Rectangle) As Rectangle
Dim minX As Double = Math.Max(rect1.Left, rect2.Left)
' maximum of left sides.
Dim maxX As Double = Math.Min(rect1.Left + rect1.Width, rect2.Left + rect2.Width)
' minimum of right sides.
Dim minY As Double = Math.Max(rect1.Bottom, rect2.Bottom)
' maximum of bottom sides.
Dim maxY As Double = Math.Min(rect1.Bottom + rect1.Height, rect2.Bottom + rect2.Height)
' minimum of bottom sides.
Return New Rectangle(minX, minY, maxX - minX, maxY - minY)
End Function