SetaPDF-Core shows how to extract comments ▷ setasign.com

Scale and Pad

Extract Comments

This demo shows you how to extract comments in a structure like you know from reader/viewer applications. We use a helper class that dumps the comments outline for us:

copy

PHP

<?php

use setasign\SetaPDF2\Core\DataStructure\Date;
use setasign\SetaPDF2\Core\Document;
use setasign\SetaPDF2\Core\Document\Page\Annotation\Annotation;
use setasign\SetaPDF2\Core\Document\Page\Annotation\MarkupAnnotation;
use setasign\SetaPDF2\Core\Document\Page\Annotation\TextAnnotation;
use setasign\SetaPDF2\Core\Document\Page\Annotations;

class CommentsDumper
{
    /**
     * @var Document
     */
    protected $_document;

    /**
     * The constructor
     *
     * @param Document $document
     */
    public function __construct(Document $document)
    {
        $this->_document = $document;
    }

    /**
     * Dump all comments
     */
    public function dump()
    {
        $pages = $this->_document->getCatalog()->getPages();

        // iterate over all available pages
        for ($pageNo = 1, $pageCount = $pages->count(); $pageNo <= $pageCount; $pageNo++) {
            $annotations = $pages->getPage($pageNo)->getAnnotations();
            $allAnnotations = $annotations->getAll();
            $rootAnnotations = array();
            // extract all root annotations
            foreach ($allAnnotations AS $annotation) {
                if (!$annotation instanceof MarkupAnnotation) {
                    continue;
                }

                if ($annotation->isReplyTo()) {
                    continue;
                }

                $rootAnnotations[] = $annotation;
            }

            usort($rootAnnotations, array($this, '_orderByDate'));
            foreach ($rootAnnotations AS $annotation) {
                $this->_dumpReplies($annotations, $annotation);
            }
        }
    }

    /**
     * Compare the dates of two annotation objects.
     *
     * @param $a
     * @param $b
     * @return integer
     */
    protected function _orderByDate($a, $b)
    {
        /**
         * @var $a MarkupAnnotation
         * @var $b MarkupAnnotation
         */
        $dateA = $a->getModificationDate(false) ? $a->getModificationDate(false): $a->getCreationDate(false);
        $dateB = $b->getModificationDate(false) ? $b->getModificationDate(false): $b->getCreationDate(false);

        if (!$dateA instanceof Date || !$dateB instanceof Date) {
            return 0;
        }

        $_a = (int)$dateA->getAsDateTime()->format('U');
        $_b = (int)$dateB->getAsDateTime()->format('U');

        return $_a < $_b ? -1 : 1;
    }

    /**
     * Dumps the replies (recursively).
     *
     * @param Annotations $annotations
     * @param Annotation $annotation
     * @param int $level
     */
    protected function _dumpReplies(
        Annotations $annotations,
        Annotation $annotation,
        int $level = 0
    ) {
        echo str_repeat(' ', $level * 4);
        // get a date
        $date = $annotation->getModificationDate(false)
            ? $annotation->getModificationDate(false)
            : $annotation->getCreationDate(false);

        if ($date instanceof Date) {
            echo $date->getAsDateTime()->format('Y-m-d H:i:s') . ': ';
        } else {
            echo 'Date Indeterminate: ';
        }
        echo $annotation->getTextLabel() . ' (' . $annotation->getSubject() . '): ' . $annotation->getContents() . "\n";

        // check if this annotation has replies
        if ($annotation->hasReplies($annotations)) {
            // get the replies
            $replies = $annotation->getReplies($annotations);

            // states and replies are created in the same structure
            $repliesByStateModel = array();
            $realReplies = array();

            // iterate over all replies and
            foreach ($replies AS $reply) {
                if ($reply instanceof TextAnnotation && $reply->getStateModel()) {
                    // get last state for the current reply
                    while (true) {
                        $repliesByStateModel[$reply->getStateModel()][] = $reply;
                        $_replies = $reply->getReplies($annotations);
                        if (count($_replies) === 0) {
                            break;
                        }
                        $reply = $_replies[0];
                    }

                } else {
                    $realReplies[] = $reply;
                }
            }

            /* list the state changes by their state model:
             * Foxit sorts this by Creation/Modification date.
             * Acrobat simply takes the last one.
             * We simply show all changes.
             */
            foreach ($repliesByStateModel AS $stateModel => $replies) {
                echo str_repeat(' ', ($level + .5) * 4);
                echo $stateModel . ":\n";
                foreach ($replies AS $reply) {
                    echo str_repeat(' ', ($level + 1) * 4);
                    $date = $reply->getModificationDate(false)
                        ? $reply->getModificationDate(false)
                        : $reply->getCreationDate(false);
                    echo $date->getAsDateTime()->format('Y-m-d H:i:s') . ': ';
                    echo $reply->getState() . ' - ' . $annotation->getTextLabel() . "\n";
                }
            }

            if (count($realReplies) == 0) {
                return;
            }

            // recursively dump further replies
            echo str_repeat(' ', ($level + .5) * 4);
            echo "Replies:\n";
            usort($realReplies, array($this, '_orderByDate'));
            foreach ($realReplies AS $reply) {
                $this->_dumpReplies($annotations, $reply, $level + 1);
            }
        }
    }
}

Try it!

 Settings
 Run
 Code

 Settings
 Run
 Code

Get image sizes and resolutions

Scale and Pad

SetaPDF-Core Access PDF documents at their lowest level with PHP

Extract Comments

Try it!

Select or upload a file

The uploaded files are bound to your browser session and are not accessible by any other user. They will get deleted after 24 hours automatically.

Extract Comments

Try it!

Select or upload a file The uploaded files are bound to your browser session and are not accessible by any other user. They will get deleted after 24 hours automatically.

Select or upload a file

The uploaded files are bound to your browser session and are not accessible by any other user. They will get deleted after 24 hours automatically.