URL of the file, relative to the nextcloud instance // 'originalUrls' => original URLs, usually just one. // 'datetime' => snapshot datetime as a unix timestamp // ] // // Each mementoUrl is hardcoded to /apps/raw/..., thus relying on the 'raw' app to serve the files. trait MementoFinder { function findSingleUserMementosForUrl($userId, $url) { // Get the user's public mementos. $foundMementos = findPublicMementos($this->serverContainer->getShareManager(), $userId); // If logged in, and asking for one's own mementos, get private mementos too. if ($this->loggedInUserId === $userId) { $userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId); $moreMementos = findPrivateMementos($userFolder); $foundMementos = array_merge($foundMementos, $moreMementos); } return listMementosMatchingUrl($foundMementos, $url); } function findAllUsersMementosForUrl($url) { $foundMementos = []; // Get the public mementos of every user. $allUserIds = []; $this->serverContainer->getUserManager()->callForAllUsers( function ($user) use (&$allUserIds) { $allUserIds[] = $user->getUID(); } ); $shareManager = $this->serverContainer->getShareManager(); foreach ($allUserIds as $userId) { $moreMementos = findPublicMementos($shareManager, $userId); $foundMementos = array_merge($foundMementos, $moreMementos); } // If logged in, get current user's private mementos too. if ($this->loggedInUserId) { $userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId); $moreMementos = findPrivateMementos($userFolder); $foundMementos = array_merge($foundMementos, $moreMementos); } return listMementosMatchingUrl($foundMementos, $url); } } function findPrivateMementos($folder) { $urlForFile = function ($file) use ($folder) { $absoluteFilePath = $file->getPath(); $relativeFilePath = $folder->getRelativePath($absoluteFilePath); $rawFileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency return $rawFileUrl; }; // Peek into each HTML file the user owns, and return those that are mementos. $files = $folder->searchByMime('text/html'); $foundMementos = []; foreach ($files as $file) { $mementoInfo = extractMementoInfo($file); if ($mementoInfo) { $mementoInfo['mementoUrl'] = $urlForFile($file); $foundMementos[] = $mementoInfo; } } return $foundMementos; } function findPublicMementos($shareManager, $userId) { $shares = $shareManager->getSharesBy( $userId, Share::SHARE_TYPE_LINK, null, /* path */ true, /* include reshares */ -1 /* no limit */ ); $urlForSharedFile = function ($share) { return "/apps/raw/s/" . $share->getToken(); // XXX hardcoded dependency }; $urlForFileInsideSharedFolder = function ($share, $folder, $file) { $absoluteFilePath = $file->getPath(); $relativeFilePath = $folder->getRelativePath($absoluteFilePath); return joinPaths("/apps/raw/s/{$share->getToken()}", $relativeFilePath); }; // Look into every shared file to see if it is a memento. $foundMementos = []; foreach ($shares as $share) { $node = $share->getNode(); if ($node->getType() === FileInfo::TYPE_FILE) { $mementoInfo = extractMementoInfo($node); if ($mementoInfo) { $mementoInfo['mementoUrl'] = $urlForSharedFile($share); $foundMementos[] = $mementoInfo; } } else { // Share is a folder: Go through all html files inside the shared folder. $folder = $node; $files = $folder->searchByMime('text/html'); foreach ($files as $file) { $mementoInfo = extractMementoInfo($file); if ($mementoInfo) { $mementoInfo['mementoUrl'] = $urlForFileInsideSharedFolder($share, $folder, $file); $foundMementos[] = $mementoInfo; } } } } return $foundMementos; } function listMementosMatchingUrl($foundMementos, $url) { // Filter those that match the requested URL $matchingMementos = filterMementosByUrl($foundMementos, $url); // Deduplicate (as a file may be accessible both through a public and a private URL) $matchingMementos = deduplicateMementos($matchingMementos); // Sort them by date. $matchingMementos = sortMementos($matchingMementos); return $matchingMementos; } function filterMementosByUrl($mementos, $url) { $matchingMementos = array_filter($mementos, function ($mementoInfo) use ($url) { return matchesUrl($mementoInfo, $url); }); return $matchingMementos; } function matchesUrl($mementoInfo, $url) { $originalUrls = $mementoInfo['originalUrls']; foreach ($originalUrls as $originalUrl) { if (normaliseUrl($originalUrl) === normaliseUrl($url)) { return true; } } return false; } function normaliseUrl($url) { // Ignore trailing slashes. Because everybody does. $url = rtrim($url, '/'); return $url; } function deduplicateMementos($mementos) { $deduped = []; $seenIds = []; foreach ($mementos as $memento) { if (!array_key_exists($memento['id'], $seenIds)) { $deduped[] = $memento; $seenIds[$memento['id']] = null; } } return $deduped; } // Sort an array of mementos by their datetime. Oldest first. function sortMementos($mementos) { usort($mementos, function ($m1, $m2) { return $m1['datetime'] <=> $m2['datetime']; }); return $mementos; } function joinPaths($piece1, $piece2) { $left = rtrim($piece1, '/'); $right = ltrim($piece2, '/'); return "$left/$right"; } function extractMementoInfo($file) { $content = $file->getContent(); $DOM = new DOMDocument; $DOM->loadHTML($content); $headElement = $DOM->documentElement->getElementsByTagName('head')[0]; if (!$headElement) return null; // possibly $content was not HTML at all. $originalUrls = getOriginalUrls($headElement); $datetime = getDatetime($headElement); return [ 'originalUrls' => $originalUrls, 'datetime' => $datetime, 'id' => $file->getFileInfo()->getId() // for deduplication ]; } // Reads hrefs from any with relation type "original". // (note the plural: we also accept pages that claim to correspond to multiple original URLs) function getOriginalUrls($headElement) { $originalUrls = []; $links = $headElement->getElementsByTagName('link'); foreach ($links as $link) { $rels = explode(' ', $link->getAttribute('rel')); if (in_array('original', $rels)) { $href = $link->getAttribute('href'); $href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); if ($href) { $originalUrls[] = $href; } } } return $originalUrls; } // Read the content of the first , if any. function getDatetime($headElement) { $metas = $headElement->getElementsByTagName('meta'); foreach($metas as $meta) { // Let's match case-insensitively, I guess? if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') { $datetime = $meta->getAttribute('content'); $datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp(); return $datetime; // Return directly at the first match } } return null; }