How to reduce PDF file size
How to reduce PDF file size
Adobe Acrobat has an option to save a document with a reduced size. This article shows how to reduce pdf file size with PDFKit.NET.
Note: For small PDF documents, the size may actually increase.
Adobe Acrobat
Adobe Acrobat has an option to save PDF files with a reduced size. This option can be found in a submenu under File – Save As… For some PDF files this results in a remarkable reduction in size. See the PDF file included in the downloads of this article. Close inspection of the result reveals that Adobe Acrobat does 2 things for this document:
- Large images get subsampled at a much lower resolution.
- Fully embedded fonts get replaced by a subsetted font.
PDFKit.NET
PDFKit.NET can do the same, via the Page.CreateShapes() method. The most interesting part of the sample that you will find attached is the code below. It inspects each shape on a page, and replaces some of them by a modified shape:
- Image shapes get replaced by a new image shape with a lower resolution.
- Text shapes that use an Arial font get modified to refer to a subsetted Arial font.
static void reduceFileSize(ShapeCollection shapes, int dpi)
{
for (int i = 0; i < shapes.Count; i++)
{
Shape shape = shapes[i];
if (shape is ShapeCollection)
{
// recurse
reduceFileSize(shape as ShapeCollection, dpi);
}
else if (shape is ImageShape)
{
shapes.RemoveAt(i);
ImageShape downScaled = downScale(shape as ImageShape, dpi);
shapes.Insert(i, downScaled);
}
else if (shape is TextShape)
{
TextShape textShape = shape as TextShape;
var fontName = !string.IsNullOrEmpty(textShape.Font.FamilyName)
? textShape.Font.FamilyName.ToLower()
: !string.IsNullOrEmpty(textShape.Font.Name)
? textShape.Font.Name.ToLower()
: string.Empty;
if (fontName == "arial")
{
shapes.RemoveAt(i);
TallComponents.PDF.Fonts.Font subsetted =
TallComponents.PDF.Fonts.Font.Create("Arial", false, false);
subsetted.EmbedMode = TallComponents.PDF.Fonts.EmbedMode.Subset;
textShape.Font = subsetted;
shapes.Insert(i, textShape);
}
}
}
}
``` vb
Private Sub reduceFileSize(shapes As ShapeCollection, dpi As Integer)
For i As Integer = 0 To shapes.Count - 1
Dim shape As Shape = shapes(i)
If TypeOf shape Is ShapeCollection Then
' recurse
reduceFileSize(TryCast(shape, ShapeCollection), dpi)
ElseIf TypeOf shape Is ImageShape Then
shapes.RemoveAt(i)
Dim downScaled As ImageShape = downScale(TryCast(shape, ImageShape), dpi)
shapes.Insert(i, downScaled)
ElseIf TypeOf shape Is TextShape Then
Dim textShape As TextShape = TryCast(shape, TextShape)
Dim fontName = If(Not String.IsNullOrEmpty(textShape.Font.FamilyName), textShape.Font.FamilyName.ToLower(), If(Not String.IsNullOrEmpty(textShape.Font.Name), textShape.Font.Name.ToLower(), String.Empty))
If fontName = "arial" Then
shapes.RemoveAt(i)
Dim subsetted As TallComponents.PDF.Fonts.Font = TallComponents.PDF.Fonts.Font.Create("Arial", False, False)
subsetted.EmbedMode = TallComponents.PDF.Fonts.EmbedMode.Subset
textShape.Font = subsetted
shapes.Insert(i, textShape)
End If
End If
Next
End Sub
The code for creating a downsampled image can be found below.
static ImageShape downScale(ImageShape image, int dpi)
{
Matrix matrix = image.Transform.CreateGdiMatrix();
PointF[] points = new PointF[] {
new PointF(0, 0),
new PointF((float)image.Width, 0),
new PointF(0, (float)image.Height)
};
matrix.TransformPoints(points);
// real dimensions of the image in points as it appears on the page
float realWidth = distance(points[0], points[1]);
float realHeight = distance(points[0], points[2]);
// given the desired resolution, these are the desired number of cols/rows of the optimized image
int desiredColumns = (int)(realWidth * ((float)dpi / 72f));
int desiredRows = (int)(realHeight * ((float)dpi / 72f));
if (desiredColumns < 5) return image;
if (desiredRows < 5) return image;
// create the new image and copy the source image to it (resampling happens here)
using (Bitmap bitmap = image.CreateBitmap())
{
if (desiredColumns > bitmap.Width) return image; // prevent upscale
if (desiredRows > bitmap.Width) return image; // prevent upscale
Bitmap optimizedBitmap = new Bitmap(desiredColumns, desiredRows, PixelFormat.Format32bppArgb);
using (Graphics graphics = Graphics.FromImage(optimizedBitmap))
{
graphics.DrawImage(bitmap, 0, 0, desiredColumns, desiredRows);
}
ImageShape optimized = new ImageShape(optimizedBitmap, true);
optimized.Compression = Compression.Jpeg;
optimized.Width = image.Width;
optimized.Height = image.Height;
optimized.Transform = image.Transform;
optimized.Opacity = image.Opacity;
optimized.BlendMode = image.BlendMode;
optimized.Transform = image.Transform;
return optimized;
}
}
static float distance(PointF a, PointF b)
{
return (float)Math.Sqrt((a.X - b.X) * (a.X - b.X) + (a.Y - b.Y) * (a.Y - b.Y));
}
Private Function downScale(image As ImageShape, dpi As Integer) As ImageShape
Dim matrix As Matrix = image.Transform.CreateGdiMatrix()
Dim points As PointF() = New PointF() {New PointF(0, 0), New PointF(CSng(image.Width), 0), New PointF(0, CSng(image.Height))}
matrix.TransformPoints(points)
' real dimensions of the image in points as it appears on the page
Dim realWidth As Single = distance(points(0), points(1))
Dim realHeight As Single = distance(points(0), points(2))
' given the desired resolution, these are the desired number of cols/rows of the optimized image
Dim desiredColumns As Integer = CInt(realWidth * (CSng(dpi) / 72.0F))
Dim desiredRows As Integer = CInt(realHeight * (CSng(dpi) / 72.0F))
If desiredColumns < 5 Then
Return image
End If
If desiredRows < 5 Then
Return image
End If
' create the new image and copy the source image to it (resampling happens here)
Using bitmap As Bitmap = image.CreateBitmap()
If desiredColumns > bitmap.Width Then
Return image
End If
' prevent upscale
If desiredRows > bitmap.Width Then
Return image
End If
' prevent upscale
Dim optimizedBitmap As New Bitmap(desiredColumns, desiredRows, PixelFormat.Format32bppArgb)
Using graphics__1 As Graphics = Graphics.FromImage(optimizedBitmap)
graphics__1.DrawImage(bitmap, 0, 0, desiredColumns, desiredRows)
End Using
Dim optimized As New ImageShape(optimizedBitmap, True)
optimized.Compression = Compression.Jpeg
optimized.Width = image.Width
optimized.Height = image.Height
optimized.Transform = image.Transform
optimized.Opacity = image.Opacity
optimized.BlendMode = image.BlendMode
optimized.Transform = image.Transform
Return optimized
End Using
End Function
Private Function distance(a As PointF, b As PointF) As Single
Return CSng(Math.Sqrt((a.X - b.X) * (a.X - b.X) + (a.Y - b.Y) * (a.Y - b.Y)))
End Function
The resulting document looks as follows. Its size is only 120 KB, as compared to the orignal 4,583 KB.
If you look closely, you will see a red cross through the image. This is normal: we add this deliberately in unlicensed versions of our software for images that get extracted via Page.CreateShapes().