<?php namespace OCA\Memento\Controller; use \DOMDocument; use \DateTime; use OCP\Share; use OCP\Files\FileInfo; // Finds HTML files that claim to be a snapshot of the given URL; // Returns an array of mementos, sorted by datetime, with each memento represented by an array: // [ // 'mementoUrl' => URL of the file, relative to the nextcloud instance // 'originalUrls' => original URLs, usually just one. // 'datetime' => snapshot datetime as a unix timestamp // ] // // Each mementoUrl is hardcoded to /apps/raw/..., thus relying on the 'raw' app to serve the files. trait MementoFinder { function findSingleUserMementosForUrl($userId, $url) { // Get the user's public mementos. $foundMementos = findPublicMementos($this->serverContainer->getShareManager(), $userId); // If logged in, and asking for one's own mementos, get private mementos too. if ($this->loggedInUserId === $userId) { $userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId); $moreMementos = findPrivateMementos($userFolder); $foundMementos = mergeMementos($foundMementos, $moreMementos); } // Filter those that match the requested URL, and sort them. $matchingMementos = filterMementosByUrl($foundMementos, $url); sortMementos($matchingMementos); return $matchingMementos; } function findAllUsersMementosForUrl($url) { $foundMementos = []; // Get the public mementos of every user. $allUserIds = []; $this->serverContainer->getUserManager()->callForAllUsers( function ($user) use (&$allUserIds) { $allUserIds[] = $user->getUID(); } ); $shareManager = $this->serverContainer->getShareManager(); foreach ($allUserIds as $userId) { $moreMementos = findPublicMementos($shareManager, $userId); $foundMementos = mergeMementos($foundMementos, $moreMementos); } // If logged in, get current user's private mementos too. if ($this->loggedInUserId) { $userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId); $moreMementos = findPrivateMementos($userFolder); $foundMementos = mergeMementos($foundMementos, $moreMementos); } // Filter those that match the requested URL, and sort them. $matchingMementos = filterMementosByUrl($foundMementos, $url); $matchingMementos = sortMementos($matchingMementos); return $matchingMementos; } } function findPrivateMementos($folder) { $urlForFile = function ($file) use ($folder) { $absoluteFilePath = $file->getPath(); $relativeFilePath = $folder->getRelativePath($absoluteFilePath); $rawFileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency return $rawFileUrl; }; // Peek into each HTML file the user owns, and return those that are mementos. $files = $folder->searchByMime('text/html'); $foundMementos = []; foreach ($files as $file) { $mementoInfo = extractMementoInfo($file); if ($mementoInfo) { $mementoInfo['mementoUrl'] = $urlForFile($file); $foundMementos[] = $mementoInfo; } } return $foundMementos; } function findPublicMementos($shareManager, $userId) { $shares = $shareManager->getSharesBy( $userId, Share::SHARE_TYPE_LINK, null, /* path */ true, /* include reshares */ -1 /* no limit */ ); $urlForShare = function ($share) { return "/apps/raw/s/" . $share->getToken(); // XXX hardcoded dependency }; // Look into every shared file to see if it is a memento. $foundMementos = []; foreach ($shares as $share) { $node = $share->getNode(); if ($node->getType() === FileInfo::TYPE_FILE) { $mementoInfo = extractMementoInfo($node); if ($mementoInfo) { $mementoInfo['mementoUrl'] = $urlForShare($share); $foundMementos[] = $mementoInfo; } } else { // TODO add files inside shared folders? How to make URLs for those? } } return $foundMementos; } function mergeMementos($mementos1, $mementos2) { // TODO deduplicate (we'll get public & private URLs for the same files) return array_merge($mementos1, $mementos2); } function filterMementosByUrl($mementos, $url) { $matchingMementos = array_filter($mementos, function ($mementoInfo) use ($url) { return matchesUrl($mementoInfo, $url); }); return $matchingMementos; } function matchesUrl($mementoInfo, $url) { $originalUrls = $mementoInfo['originalUrls']; foreach ($originalUrls as $originalUrl) { if (normaliseUrl($originalUrl) === normaliseUrl($url)) { return true; } } return false; } function normaliseUrl($url) { // Ignore trailing slashes. Because everybody does. $url = rtrim($url, '/'); return $url; } // Sort an array of mementos by their datetime. Oldest first. function sortMementos($mementos) { usort($mementos, function ($m1, $m2) { return $m1['datetime'] <=> $m2['datetime']; }); return $mementos; } function joinPaths($piece1, $piece2) { $left = rtrim($piece1, '/'); $right = ltrim($piece2, '/'); return "$left/$right"; } function extractMementoInfo($file) { $content = $file->getContent(); $DOM = new DOMDocument; $DOM->loadHTML($content); $headElement = $DOM->documentElement->getElementsByTagName('head')[0]; if (!$headElement) return null; // possibly $content was not HTML at all. $originalUrls = getOriginalUrls($headElement); $datetime = getDatetime($headElement); return [ 'originalUrls' => $originalUrls, 'datetime' => $datetime ]; } // Reads hrefs from any <link> with relation type "original". // (note the plural: we also accept pages that claim to correspond to multiple original URLs) function getOriginalUrls($headElement) { $originalUrls = []; $links = $headElement->getElementsByTagName('link'); foreach ($links as $link) { $rels = explode(' ', $link->getAttribute('rel')); if (in_array('original', $rels)) { $href = $link->getAttribute('href'); $href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); if ($href) { $originalUrls[] = $href; } } } return $originalUrls; } // Read the content of the first <meta http-equiv="Memento-Datetime">, if any. function getDatetime($headElement) { $metas = $headElement->getElementsByTagName('meta'); foreach($metas as $meta) { // Let's match case-insensitively, I guess? if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') { $datetime = $meta->getAttribute('content'); $datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp(); return $datetime; // Return directly at the first match } } return null; }