Determine the content bounding box

This article shows how to determine the content bounding of a page.

public static void Main()
{
  // open PDF
  using (FileStream fileIn = new FileStream(@"f1040a.pdf", FileMode.Open, FileAccess.Read))
  {
    Document document = new Document(fileIn);

    foreach (Page page in document.Pages)
    {
      // extract all graphics as shapes
      ShapeCollection shapes = page.CreateShapes();

      // get the boundaries taking the transformation
      // initially the current transformation matrix is Identity
      double top = GetTop(shapes, Transform.Identity);
      double bottom = GetBottom(shapes, Transform.Identity);
      double left = GetLeft(shapes, Transform.Identity);
      double right = GetRight(shapes, Transform.Identity);
    }
  }

Below is the GetTop() method that is called recursively. GetBottom, GetLeft and GetRight are comparable. GetTop takes into account the current transformation matrix and clipping.

/// <summary>
/// Finds the max Y value among given shapes.
/// </summary>
private static double GetTop(IEnumerable<Shape> shapes, Transform currentTransform)
{
  // According to the PDF coordinate system the origin is in the left lower corner of a page.
  // We assume that there is no visible content, so the top value is minimal.
  double top = double.MinValue;

  // take the clipping area into account, since it affects shapes boundaries.
  // initially, we assume that the clipping area is quite big and does not clip anythinhg 
  double clipTop = double.MaxValue;

  foreach (Shape shape in shapes)
  {
    if (shape is ShapeCollection || shape is LayerShape)
    {
      // recurse
      var transform = multiply(shape as ContentShape, currentTransform);
      var childShapes = shape as IEnumerable<Shape>;
      var childShapesTop = GetTop(childShapes, transform);

      // the most top value is the result.
      top = Math.Max(top, childShapesTop);
    }
    else
    {
      // otherwise, we determine the top bound of the shape.
      ContentShape contentShape = shape as ContentShape;
      if (null != contentShape)
      {
        // check whether shape is visible
        if (isVisible(contentShape))
        {
          // keep in mind that the shape can be rotated for instance,
          // the rotation affects the coordinates of the shape. So we need
          // to take all the transformations into account.
          Transform transform = multiply(contentShape, currentTransform);
          using (System.Drawing.Drawing2D.Matrix matrix = transform.CreateGdiMatrix())
          {
            // the top point of the shape is the bottom point + height
            double shapeHeight = getAdditionalTopOffset(contentShape);
            System.Drawing.PointF point = new System.Drawing.PointF(0, (float)shapeHeight);

            // now we have the untransformed coordinate of the top point of the shape.
            // transform it to get the actual coordinate in the page
            // coordinate system.
            System.Drawing.PointF[] points = new System.Drawing.PointF[1] { point };
            matrix.TransformPoints(points);
            point = points[0];
            double shapeTop = Math.Max(point.Y, 0);

            if (contentShape is ClipShape)
            {
              // if it is a clip shape, update the actual top clip boundary
              clipTop = Math.Min(clipTop, shapeTop);
            }
            else
            {
              // take the clipping area into account
              shapeTop = Math.Min(shapeTop, clipTop);
              
              // update the result value
              top = Math.Max(top, shapeTop);
            }
          }
        }
      }
    }
  }
  return top;
}

This code that shows how to get the additional top offset.

private static double getAdditionalTopOffset(ContentShape contentShape)
{
  double offset = double.MinValue;

  ImageShape imageShape = contentShape as ImageShape;
  if (null != imageShape)
  {
    offset = imageShape.Height;
  }

  TextShape textShape = contentShape as TextShape;
  if (null != textShape)
  {
    offset = textShape.Font.Height * textShape.FontSize;
  }

  FreeHandPathCollection paths = null;
  FreeHandShape freeHandShape = contentShape as FreeHandShape;
  if (null != freeHandShape)
  {
    paths = freeHandShape.Paths;
  }

  ClipShape clipShape = contentShape as ClipShape;
  if (null != clipShape)
  {
    paths = clipShape.Paths;
  }

  if (null != paths)
  {
    foreach (FreeHandPath path in paths)
    {
      foreach (FreeHandSegment segment in path.Segments)
      {
        if (segment is FreeHandStartSegment)
        {
          offset = Math.Max(offset, ((FreeHandStartSegment)segment).Y);
        }
        else
        {
          if (segment is FreeHandLineSegment)
          {
            offset = Math.Max(offset, ((FreeHandLineSegment)segment).Y1);
          }
          else
          {
            if (segment is FreeHandBezierSegment)
            {
              offset = Math.Max(offset, ((FreeHandBezierSegment)segment).Y1);
              offset = Math.Max(offset, ((FreeHandBezierSegment)segment).Y2);
              offset = Math.Max(offset, ((FreeHandBezierSegment)segment).Y3);
            }
            else
            {
              throw new NotSupportedException("not expected this segment type.");
            }
          }
        }
      }
    }
  }
 
  if (double.MinValue == offset)
  {
    offset = 0;
  }

  return offset;
}
Download PDFKit.NET 4.0
We will send you a download link
Why do we ask your email address?
We send tips that speed up your evaluation
We let you know about bug fixes
You can always unsubscribe with one click
We never share your address with a 3rd party
Thank you for your download

We have sent an email with a download link.