Determine if a PDF only contains images
Determine if a PDF only contains images
The following sample code determines whether a PDF document only contains images (and not other shapes such as text fragments).
static void Main(string[] args)
{
string path = @"..\..\input.pdf";
System.Console.WriteLine("File " + ((OnlyImages(path) ? " contains " : " does not contain ")) + "only images");
}
public static bool OnlyImages(string path)
{
using (FileStream file = new FileStream(path, FileMode.Open, FileAccess.Read))
{
Document document = new Document(file);
foreach (Page page in document.Pages)
{
ShapeCollection shapes = page.CreateShapes();
if (!OnlyImages(shapes)) return false;
}
}
return true;
}
public static bool OnlyImages(ShapeCollection shapes)
{
foreach (Shape shape in shapes)
{
if (shape is ImageShape) continue;
if (shape is ShapeCollection) // recurse
{
if (OnlyImages(shape as ShapeCollection)) continue;
}
return false;
}
return true;
}
Sub Main()
Dim path As String = "..\..\input.pdf"
System.Console.WriteLine("File " + ((If(OnlyImages(path), " contains ", " does not contain "))) + "only images")
End Sub
Public Function OnlyImages(path As String) As Boolean
Using file As New FileStream(path, FileMode.Open, FileAccess.Read)
Dim document As New Document(file)
For Each page As Page In document.Pages
Dim shapes As ShapeCollection = page.CreateShapes()
If Not OnlyImages(shapes) Then
Return False
End If
Next
End Using
Return True
End Function
Public Function OnlyImages(shapes As ShapeCollection) As Boolean
For Each shape As Shape In shapes
If TypeOf shape Is ImageShape Then
Continue For
End If
If TypeOf shape Is ShapeCollection Then
' recurse
If OnlyImages(TryCast(shape, ShapeCollection)) Then
Continue For
End If
End If
Return False
Next
Return True
End Function