How to reduce PDF file size

How to reduce PDF file size

Adobe Acrobat has an option to save a document with a reduced size. This article shows how to reduce pdf file size with PDFKit.NET.

Note: For small PDF documents, the size may actually increase.

Adobe Acrobat

Adobe Acrobat has an option to save PDF files with a reduced size. This option can be found in a submenu under File – Save As… For some PDF files this results in a remarkable reduction in size. See the PDF file included in the downloads of this article. Close inspection of the result reveals that Adobe Acrobat does 2 things for this document:

  • Large images get subsampled at a much lower resolution.
  • Fully embedded fonts get replaced by a subsetted font.

PDFKit.NET

PDFKit.NET can do the same, via the Page.CreateShapes() method. The most interesting part of the sample that you will find attached is the code below. It inspects each shape on a page, and replaces some of them by a modified shape:

  • Image shapes get replaced by a new image shape with a lower resolution.
  • Text shapes that use an Arial font get modified to refer to a subsetted Arial font.
static void reduceFileSize(ShapeCollection shapes, int dpi)
{
  for (int i = 0; i < shapes.Count; i++)
  {
    Shape shape = shapes[i];

    if (shape is ShapeCollection)
    {
      // recurse
      reduceFileSize(shape as ShapeCollection, dpi);
    }
    else if (shape is ImageShape)
    {
      shapes.RemoveAt(i);
      ImageShape downScaled = downScale(shape as ImageShape, dpi);
      shapes.Insert(i, downScaled);
    }
    else if (shape is TextShape)
    {
      TextShape textShape = shape as TextShape;
      
      var fontName = !string.IsNullOrEmpty(textShape.Font.FamilyName)
                     ? textShape.Font.FamilyName.ToLower()
                     : !string.IsNullOrEmpty(textShape.Font.Name)
                     ? textShape.Font.Name.ToLower()
                     : string.Empty;

      if (fontName == "arial")
      {
        shapes.RemoveAt(i);

        TallComponents.PDF.Fonts.Font subsetted =
          TallComponents.PDF.Fonts.Font.Create("Arial", false, false);
        subsetted.EmbedMode = TallComponents.PDF.Fonts.EmbedMode.Subset;
        textShape.Font = subsetted;
        shapes.Insert(i, textShape);
      }
    }
  }
}


``` vb
Private Sub reduceFileSize(shapes As ShapeCollection, dpi As Integer)
        For i As Integer = 0 To shapes.Count - 1
            Dim shape As Shape = shapes(i)

            If TypeOf shape Is ShapeCollection Then
                ' recurse
                reduceFileSize(TryCast(shape, ShapeCollection), dpi)
            ElseIf TypeOf shape Is ImageShape Then
                shapes.RemoveAt(i)
                Dim downScaled As ImageShape = downScale(TryCast(shape, ImageShape), dpi)
                shapes.Insert(i, downScaled)
            ElseIf TypeOf shape Is TextShape Then
                Dim textShape As TextShape = TryCast(shape, TextShape)

                Dim fontName = If(Not String.IsNullOrEmpty(textShape.Font.FamilyName), textShape.Font.FamilyName.ToLower(), If(Not String.IsNullOrEmpty(textShape.Font.Name), textShape.Font.Name.ToLower(), String.Empty))

                If fontName = "arial" Then
                    shapes.RemoveAt(i)

                    Dim subsetted As TallComponents.PDF.Fonts.Font = TallComponents.PDF.Fonts.Font.Create("Arial", False, False)
                    subsetted.EmbedMode = TallComponents.PDF.Fonts.EmbedMode.Subset
                    textShape.Font = subsetted
                    shapes.Insert(i, textShape)
                End If
            End If
        Next
    End Sub

The code for creating a downsampled image can be found below.

static ImageShape downScale(ImageShape image, int dpi)
{
  Matrix matrix = image.Transform.CreateGdiMatrix();
  PointF[] points = new PointF[] {
                new PointF(0, 0), 
                new PointF((float)image.Width, 0),
                new PointF(0, (float)image.Height) 
            };
  matrix.TransformPoints(points);

  // real dimensions of the image in points as it appears on the page
  float realWidth = distance(points[0], points[1]);
  float realHeight = distance(points[0], points[2]);

  // given the desired resolution, these are the desired number of cols/rows of the optimized image
  int desiredColumns = (int)(realWidth * ((float)dpi / 72f));
  int desiredRows = (int)(realHeight * ((float)dpi / 72f));

  if (desiredColumns < 5) return image;
  if (desiredRows < 5) return image;

  // create the new image and copy the source image to it (resampling happens here)
  using (Bitmap bitmap = image.CreateBitmap())
  {
    if (desiredColumns > bitmap.Width) return image; // prevent upscale
    if (desiredRows > bitmap.Width) return image; // prevent upscale

    Bitmap optimizedBitmap = new Bitmap(desiredColumns, desiredRows, PixelFormat.Format32bppArgb);
            using (Graphics graphics = Graphics.FromImage(optimizedBitmap))
    {
      graphics.DrawImage(bitmap, 0, 0, desiredColumns, desiredRows);
    }

    ImageShape optimized = new ImageShape(optimizedBitmap, true);
    optimized.Compression = Compression.Jpeg;
    optimized.Width = image.Width;
    optimized.Height = image.Height;
    optimized.Transform = image.Transform;

    optimized.Opacity = image.Opacity;
    optimized.BlendMode = image.BlendMode;
    optimized.Transform = image.Transform;

    return optimized;
  }
}

static float distance(PointF a, PointF b)
{
  return (float)Math.Sqrt((a.X - b.X) * (a.X - b.X) + (a.Y - b.Y) * (a.Y - b.Y));
}
Private Function downScale(image As ImageShape, dpi As Integer) As ImageShape
        Dim matrix As Matrix = image.Transform.CreateGdiMatrix()
        Dim points As PointF() = New PointF() {New PointF(0, 0), New PointF(CSng(image.Width), 0), New PointF(0, CSng(image.Height))}
        matrix.TransformPoints(points)

        ' real dimensions of the image in points as it appears on the page
        Dim realWidth As Single = distance(points(0), points(1))
        Dim realHeight As Single = distance(points(0), points(2))

        ' given the desired resolution, these are the desired number of cols/rows of the optimized image
        Dim desiredColumns As Integer = CInt(realWidth * (CSng(dpi) / 72.0F))
        Dim desiredRows As Integer = CInt(realHeight * (CSng(dpi) / 72.0F))

        If desiredColumns < 5 Then
            Return image
        End If
        If desiredRows < 5 Then
            Return image
        End If

        ' create the new image and copy the source image to it (resampling happens here)
        Using bitmap As Bitmap = image.CreateBitmap()
            If desiredColumns > bitmap.Width Then
                Return image
            End If
            ' prevent upscale
            If desiredRows > bitmap.Width Then
                Return image
            End If
            ' prevent upscale
            Dim optimizedBitmap As New Bitmap(desiredColumns, desiredRows, PixelFormat.Format32bppArgb)
            Using graphics__1 As Graphics = Graphics.FromImage(optimizedBitmap)
                graphics__1.DrawImage(bitmap, 0, 0, desiredColumns, desiredRows)
            End Using

            Dim optimized As New ImageShape(optimizedBitmap, True)
            optimized.Compression = Compression.Jpeg
            optimized.Width = image.Width
            optimized.Height = image.Height
            optimized.Transform = image.Transform

            optimized.Opacity = image.Opacity
            optimized.BlendMode = image.BlendMode
            optimized.Transform = image.Transform

            Return optimized
        End Using
    End Function

    Private Function distance(a As PointF, b As PointF) As Single
        Return CSng(Math.Sqrt((a.X - b.X) * (a.X - b.X) + (a.Y - b.Y) * (a.Y - b.Y)))
    End Function

The resulting document looks as follows. Its size is only 120 KB, as compared to the orignal 4,583 KB.

If you look closely, you will see a red cross through the image. This is normal: we add this deliberately in unlicensed versions of our software for images that get extracted via Page.CreateShapes().

Reduce Size PDF C Sharp And Vbnet