Check For Text
This demo shows how to check for text on pages in a pdf document. We use a helper class that analyses the document for us:
PHP
<?php
use setasign\SetaPDF2\Core\Canvas\Canvas;
use setasign\SetaPDF2\Core\Parser\Content;
use setasign\SetaPDF2\Core\Resource\ResourceInterface;
use setasign\SetaPDF2\Core\XObject\XObject;
class TextProcessor
{
/**
* The canvas object
*
* @var Canvas
*/
protected $_canvas;
/**
* @var boolean
*/
protected $_hasText;
/**
* The constructor
*
* The parameter is the canvas instance.
*
* @param Canvas $canvas
*/
public function __construct(Canvas $canvas)
{
$this->_canvas = $canvas;
}
/**
* Checks for text on the initially passed canvas instance.
*
* Returns true if there is any text in the stream, otherwise false
*
* @return bool
*/
public function hasText()
{
// if there are no resources no text can be output because no font is defined
$resources = $this->_canvas->getResources();
if ($resources === false) {
return false;
}
$this->_hasText = false;
$parser = $this->_createContentParser();
$parser->process();
$parser->cleanUp();
return $this->_hasText;
}
/**
* Create a content parser instance.
*
* @return Content
*/
protected function _createContentParser()
{
try {
$stream = $this->_canvas->getStream();
} catch (\setasign\SetaPDF2\Core\Filter\Exception $e) {
// if a stream cannot be unfiltered, we ignore it
$stream = '';
}
$contentParser = new Content($stream);
// register a callback for text output operators
$contentParser->registerOperator(
['Tj', 'TJ', '"', "'"],
function ($o) {
$this->_hasText = true;
return false;
}
);
// register a callback to handle form XObjects
$contentParser->registerOperator(
'Do',
function ($arguments) {
$xObjects = $this->_canvas->getResources(true, false, ResourceInterface::TYPE_X_OBJECT);
if ($xObjects === false) {
return;
}
$xObject = $xObjects->getValue($arguments[0]->getValue());
$xObject = XObject::get($xObject);
if ($xObject instanceof \setasign\SetaPDF2\Core\XObject\Form) {
$processor = new self($xObject->getCanvas());
$this->_hasText = $processor->hasText();
if ($this->_hasText === true) {
return false;
}
}
}
);
return $contentParser;
}
}
