Determine the content bounding box

This article shows how to determine the content bounding of a page.

public static void Main()
{
  // open PDF
  using (FileStream fileIn = new FileStream(@"f1040a.pdf", FileMode.Open, FileAccess.Read))
  {
    Document document = new Document(fileIn);

    foreach (Page page in document.Pages)
    {
      // extract all graphics as shapes
      ShapeCollection shapes = page.CreateShapes();

      // get the boundaries taking the transformation
      // initially the current transformation matrix is Identity
      double top = GetTop(shapes, Transform.Identity);
      double bottom = GetBottom(shapes, Transform.Identity);
      double left = GetLeft(shapes, Transform.Identity);
      double right = GetRight(shapes, Transform.Identity);
    }
  }

Below is the GetTop() method that is called recursively. GetBottom, GetLeft and GetRight are comparable. GetTop takes into account the current transformation matrix and clipping.

/// <summary>
/// Finds the max Y value among given shapes.
/// </summary>
private static double GetTop(IEnumerable<Shape> shapes, Transform currentTransform)
{
  // According to the PDF coordinate system the origin is in the left lower corner of a page.
  // We assume that there is no visible content, so the top value is minimal.
  double top = double.MinValue;

  // take the clipping area into account, since it affects shapes boundaries.
  // initially, we assume that the clipping area is quite big and does not clip anythinhg 
  double clipTop = double.MaxValue;

  foreach (Shape shape in shapes)
  {
    if (shape is ShapeCollection || shape is LayerShape)
    {
      // recurse
      var transform = multiply(shape as ContentShape, currentTransform);
      var childShapes = shape as IEnumerable<Shape>;
      var childShapesTop = GetTop(childShapes, transform);

      // the most top value is the result.
      top = Math.Max(top, childShapesTop);
    }
    else
    {
      // otherwise, we determine the top bound of the shape.
      ContentShape contentShape = shape as ContentShape;
      if (null != contentShape)
      {
        // check whether shape is visible
        if (isVisible(contentShape))
        {
          // keep in mind that the shape can be rotated for instance,
          // the rotation affects the coordinates of the shape. So we need
          // to take all the transformations into account.
          Transform transform = multiply(contentShape, currentTransform);
          using (System.Drawing.Drawing2D.Matrix matrix = transform.CreateGdiMatrix())
          {
            // the top point of the shape is the bottom point + height
            double shapeHeight = getAdditionalTopOffset(contentShape);
            System.Drawing.PointF point = new System.Drawing.PointF(0, (float)shapeHeight);

            // now we have the untransformed coordinate of the top point of the shape.
            // transform it to get the actual coordinate in the page
            // coordinate system.
            System.Drawing.PointF[] points = new System.Drawing.PointF[1] { point };
            matrix.TransformPoints(points);
            point = points[0];
            double shapeTop = Math.Max(point.Y, 0);

            if (contentShape is ClipShape)
            {
              // if it is a clip shape, update the actual top clip boundary
              clipTop = Math.Min(clipTop, shapeTop);
            }
            else
            {
              // take the clipping area into account
              shapeTop = Math.Min(shapeTop, clipTop);
              
              // update the result value
              top = Math.Max(top, shapeTop);
            }
          }
        }
      }
    }
  }
  return top;
}

This code that shows how to get the additional top offset.

private static double getAdditionalTopOffset(ContentShape contentShape)
{
  double offset = double.MinValue;

  ImageShape imageShape = contentShape as ImageShape;
  if (null != imageShape)
  {
    offset = imageShape.Height;
  }

  TextShape textShape = contentShape as TextShape;
  if (null != textShape)
  {
    offset = textShape.Font.Height * textShape.FontSize;
  }

  FreeHandPathCollection paths = null;
  FreeHandShape freeHandShape = contentShape as FreeHandShape;
  if (null != freeHandShape)
  {
    paths = freeHandShape.Paths;
  }

  ClipShape clipShape = contentShape as ClipShape;
  if (null != clipShape)
  {
    paths = clipShape.Paths;
  }

  if (null != paths)
  {
    foreach (FreeHandPath path in paths)
    {
      foreach (FreeHandSegment segment in path.Segments)
      {
        if (segment is FreeHandStartSegment)
        {
          offset = Math.Max(offset, ((FreeHandStartSegment)segment).Y);
        }
        else
        {
          if (segment is FreeHandLineSegment)
          {
            offset = Math.Max(offset, ((FreeHandLineSegment)segment).Y1);
          }
          else
          {
            if (segment is FreeHandBezierSegment)
            {
              offset = Math.Max(offset, ((FreeHandBezierSegment)segment).Y1);
              offset = Math.Max(offset, ((FreeHandBezierSegment)segment).Y2);
              offset = Math.Max(offset, ((FreeHandBezierSegment)segment).Y3);
            }
            else
            {
              throw new NotSupportedException("not expected this segment type.");
            }
          }
        }
      }
    }
  }
 
  if (double.MinValue == offset)
  {
    offset = 0;
  }

  return offset;
}