Extract Comments
This demo shows you how to extract comments in a structure like you know from reader/viewer applications. We use a helper class that dumps the comments outline for us:
PHP
<?php class CommentsDumper { /** * @var \SetaPDF_Core_Document */ protected $_document; /** * The constructor * * @param \SetaPDF_Core_Document $document */ public function __construct(\SetaPDF_Core_Document $document) { $this->_document = $document; } /** * Dump all comments */ public function dump() { $pages = $this->_document->getCatalog()->getPages(); // iterate over all available pages for ($pageNo = 1, $pageCount = $pages->count(); $pageNo <= $pageCount; $pageNo++) { $annotations = $pages->getPage($pageNo)->getAnnotations(); $allAnnotations = $annotations->getAll(); $rootAnnotations = array(); // extract all root annotations foreach ($allAnnotations AS $annotation) { if (!$annotation instanceof \SetaPDF_Core_Document_Page_Annotation_Markup) { continue; } if ($annotation->isReplyTo()) { continue; } $rootAnnotations[] = $annotation; } usort($rootAnnotations, array($this, '_orderByDate')); foreach ($rootAnnotations AS $annotation) { $this->_dumpReplies($annotations, $annotation); } } } /** * Compare the dates of two annotation objects. * * @param $a * @param $b * @return integer */ protected function _orderByDate($a, $b) { /** * @var $a \SetaPDF_Core_Document_Page_Annotation_Markup * @var $b \SetaPDF_Core_Document_Page_Annotation_Markup */ $dateA = $a->getModificationDate(false) ? $a->getModificationDate(false): $a->getCreationDate(false); $dateB = $b->getModificationDate(false) ? $b->getModificationDate(false): $b->getCreationDate(false); $_a = (int)$dateA->getAsDateTime()->format('U'); $_b = (int)$dateB->getAsDateTime()->format('U'); return $_a < $_b ? -1 : 1; } /** * Dumps the replies (recursively). * * @param \SetaPDF_Core_Document_Page_Annotations $annotations * @param \SetaPDF_Core_Document_Page_Annotation $annotation * @param int $level */ protected function _dumpReplies( \SetaPDF_Core_Document_Page_Annotations $annotations, \SetaPDF_Core_Document_Page_Annotation $annotation, $level = 0 ) { echo str_repeat(' ', $level * 4); // get a date $date = $annotation->getModificationDate(false) ? $annotation->getModificationDate(false) : $annotation->getCreationDate(false); echo $date->getAsDateTime()->format('Y-m-d H:i:s') . ': '; echo $annotation->getTextLabel() . ' (' . $annotation->getSubject() . '): ' . $annotation->getContents() . "\n"; // check if this annotation has replies if ($annotation->hasReplies($annotations)) { // get the replies $replies = $annotation->getReplies($annotations); // states and replies are created in the same structure $repliesByStateModel = array(); $realReplies = array(); // iterate over all replies and foreach ($replies AS $reply) { if ($reply instanceof \SetaPDF_Core_Document_Page_Annotation_Text && $reply->getStateModel()) { // get last state for the current reply while (true) { $repliesByStateModel[$reply->getStateModel()][] = $reply; $_replies = $reply->getReplies($annotations); if (count($_replies) === 0) { break; } $reply = $_replies[0]; } } else { $realReplies[] = $reply; } } /* list the state changes by their state model: * Foxit sorts this by Creation/Modification date. * Acrobat simply takes the last one. * We simply show all changes. */ foreach ($repliesByStateModel AS $stateModel => $replies) { echo str_repeat(' ', ($level + .5) * 4); echo $stateModel . ":\n"; foreach ($replies AS $reply) { echo str_repeat(' ', ($level + 1) * 4); $date = $reply->getModificationDate(false) ? $reply->getModificationDate(false) : $reply->getCreationDate(false); echo $date->getAsDateTime()->format('Y-m-d H:i:s') . ': '; echo $reply->getState() . ' - ' . $annotation->getTextLabel() . "\n"; } } if (count($realReplies) == 0) { return; } // recursively dump further replies echo str_repeat(' ', ($level + .5) * 4); echo "Replies:\n"; usort($realReplies, array($this, '_orderByDate')); foreach ($realReplies AS $reply) { $this->_dumpReplies($annotations, $reply, $level + 1); } } } }