A PHP library for low level access of PDF files

SetaPDF-Core

Access PDF documents at their lowest level with PHP

Extract Comments

This demo shows you how to extract comments in a structure like you know from reader/viewer applications. We use a helper class that dumps the comments outline for us: 

PHP
<?php

class CommentsDumper
{
    /**
     * @var \SetaPDF_Core_Document
     */
    protected $_document;

    /**
     * The constructor
     *
     * @param \SetaPDF_Core_Document $document
     */
    public function __construct(\SetaPDF_Core_Document $document)
    {
        $this->_document = $document;
    }

    /**
     * Dump all comments
     */
    public function dump()
    {
        $pages = $this->_document->getCatalog()->getPages();

        // iterate over all available pages
        for ($pageNo = 1, $pageCount = $pages->count(); $pageNo <= $pageCount; $pageNo++) {
            $annotations = $pages->getPage($pageNo)->getAnnotations();
            $allAnnotations = $annotations->getAll();
            $rootAnnotations = array();
            // extract all root annotations
            foreach ($allAnnotations AS $annotation) {
                if (!$annotation instanceof \SetaPDF_Core_Document_Page_Annotation_Markup) {
                    continue;
                }

                if ($annotation->isReplyTo()) {
                    continue;
                }

                $rootAnnotations[] = $annotation;
            }

            usort($rootAnnotations, array($this, '_orderByDate'));
            foreach ($rootAnnotations AS $annotation) {
                $this->_dumpReplies($annotations, $annotation);
            }
        }
    }

    /**
     * Compare the dates of two annotation objects.
     *
     * @param $a
     * @param $b
     * @return integer
     */
    protected function _orderByDate($a, $b)
    {
        /**
         * @var $a \SetaPDF_Core_Document_Page_Annotation_Markup
         * @var $b \SetaPDF_Core_Document_Page_Annotation_Markup
         */
        $dateA = $a->getModificationDate(false) ? $a->getModificationDate(false): $a->getCreationDate(false);
        $dateB = $b->getModificationDate(false) ? $b->getModificationDate(false): $b->getCreationDate(false);

        if (!$dateA instanceof SetaPDF_Core_DataStructure_Date || !$dateB instanceof SetaPDF_Core_DataStructure_Date) {
            return 0;
        }

        $_a = (int)$dateA->getAsDateTime()->format('U');
        $_b = (int)$dateB->getAsDateTime()->format('U');

        return $_a < $_b ? -1 : 1;
    }

    /**
     * Dumps the replies (recursively).
     *
     * @param \SetaPDF_Core_Document_Page_Annotations $annotations
     * @param \SetaPDF_Core_Document_Page_Annotation $annotation
     * @param int $level
     */
    protected function _dumpReplies(
        \SetaPDF_Core_Document_Page_Annotations $annotations,
        \SetaPDF_Core_Document_Page_Annotation $annotation,
        $level = 0
    ) {
        echo str_repeat(' ', $level * 4);
        // get a date
        $date = $annotation->getModificationDate(false)
            ? $annotation->getModificationDate(false)
            : $annotation->getCreationDate(false);

        if ($date instanceof SetaPDF_Core_DataStructure_Date) {
            echo $date->getAsDateTime()->format('Y-m-d H:i:s') . ': ';
        } else {
            echo 'Date Indeterminate: ';
        }
        echo $annotation->getTextLabel() . ' (' . $annotation->getSubject() . '): ' . $annotation->getContents() . "\n";

        // check if this annotation has replies
        if ($annotation->hasReplies($annotations)) {
            // get the replies
            $replies = $annotation->getReplies($annotations);

            // states and replies are created in the same structure
            $repliesByStateModel = array();
            $realReplies = array();

            // iterate over all replies and
            foreach ($replies AS $reply) {
                if ($reply instanceof \SetaPDF_Core_Document_Page_Annotation_Text && $reply->getStateModel()) {
                    // get last state for the current reply
                    while (true) {
                        $repliesByStateModel[$reply->getStateModel()][] = $reply;
                        $_replies = $reply->getReplies($annotations);
                        if (count($_replies) === 0) {
                            break;
                        }
                        $reply = $_replies[0];
                    }

                } else {
                    $realReplies[] = $reply;
                }
            }

            /* list the state changes by their state model:
             * Foxit sorts this by Creation/Modification date.
             * Acrobat simply takes the last one.
             * We simply show all changes.
             */
            foreach ($repliesByStateModel AS $stateModel => $replies) {
                echo str_repeat(' ', ($level + .5) * 4);
                echo $stateModel . ":\n";
                foreach ($replies AS $reply) {
                    echo str_repeat(' ', ($level + 1) * 4);
                    $date = $reply->getModificationDate(false)
                        ? $reply->getModificationDate(false)
                        : $reply->getCreationDate(false);
                    echo $date->getAsDateTime()->format('Y-m-d H:i:s') . ': ';
                    echo $reply->getState() . ' - ' . $annotation->getTextLabel() . "\n";
                }
            }

            if (count($realReplies) == 0) {
                return;
            }

            // recursively dump further replies
            echo str_repeat(' ', ($level + .5) * 4);
            echo "Replies:\n";
            usort($realReplies, array($this, '_orderByDate'));
            foreach ($realReplies AS $reply) {
                $this->_dumpReplies($annotations, $reply, $level + 1);
            }
        }
    }
}

Try it!

Select or upload a file

The uploaded files are bound to your browser session and are not accessible by any other user. They will get deleted after 24 hours automatically.

Loading...