URL of the file, relative to the nextcloud instance
// 'originalUrls' => original URLs, usually just one.
// 'datetime' => snapshot datetime as a unix timestamp
// ]
//
// Each mementoUrl is hardcoded to /apps/raw/..., thus relying on the 'raw' app to serve the files.
trait MementoFinder {
function findSingleUserMementosForUrl($userId, $url) {
// Get the user's public mementos.
$foundMementos = findPublicMementos($this->serverContainer->getShareManager(), $userId);
// If logged in, and asking for one's own mementos, get private mementos too.
if ($this->loggedInUserId === $userId) {
$userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId);
$moreMementos = findPrivateMementos($userFolder, $this->loggedInUserId);
$foundMementos = array_merge($foundMementos, $moreMementos);
}
return listMementosMatchingUrl($foundMementos, $url);
}
function findAllUsersMementosForUrl($url) {
$foundMementos = [];
// Get the public mementos of every user.
$allUserIds = [];
$this->serverContainer->getUserManager()->callForAllUsers(
function ($user) use (&$allUserIds) { $allUserIds[] = $user->getUID(); }
);
$shareManager = $this->serverContainer->getShareManager();
foreach ($allUserIds as $userId) {
$moreMementos = findPublicMementos($shareManager, $userId);
$foundMementos = array_merge($foundMementos, $moreMementos);
}
// If logged in, get current user's private mementos too.
if ($this->loggedInUserId) {
$userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId);
$moreMementos = findPrivateMementos($userFolder, $this->loggedInUserId);
$foundMementos = array_merge($foundMementos, $moreMementos);
}
return listMementosMatchingUrl($foundMementos, $url);
}
}
function findPrivateMementos($userFolder, $userId) {
$urlForFile = function ($file) use ($userFolder, $userId) {
$absoluteFilePath = $file->getPath();
$relativeFilePath = $userFolder->getRelativePath($absoluteFilePath);
$rawFileUrl = joinPaths("/apps/raw/u/$userId", $relativeFilePath); // XXX hardcoded dependency
return $rawFileUrl;
};
// Peek into each HTML file the user owns, and return those that are mementos.
$files = $userFolder->searchByMime('text/html');
$foundMementos = [];
foreach ($files as $file) {
$mementoInfo = extractMementoInfo($file);
if ($mementoInfo) {
$mementoInfo['mementoUrl'] = $urlForFile($file);
$foundMementos[] = $mementoInfo;
}
}
return $foundMementos;
}
function findPublicMementos($shareManager, $userId) {
$shares = $shareManager->getSharesBy(
$userId,
Share::SHARE_TYPE_LINK,
null, /* path */
true, /* include reshares */
-1 /* no limit */
);
$urlForSharedFile = function ($share) {
return "/apps/raw/s/" . $share->getToken(); // XXX hardcoded dependency
};
$urlForFileInsideSharedFolder = function ($share, $folder, $file) {
$absoluteFilePath = $file->getPath();
$relativeFilePath = $folder->getRelativePath($absoluteFilePath);
return joinPaths("/apps/raw/s/{$share->getToken()}", $relativeFilePath);
};
// Look into every shared file to see if it is a memento.
$foundMementos = [];
foreach ($shares as $share) {
$node = $share->getNode();
if ($node->getType() === FileInfo::TYPE_FILE) {
$mementoInfo = extractMementoInfo($node);
if ($mementoInfo) {
$mementoInfo['mementoUrl'] = $urlForSharedFile($share);
$foundMementos[] = $mementoInfo;
}
} else {
// Share is a folder: Go through all html files inside the shared folder.
$folder = $node;
$files = $folder->searchByMime('text/html');
foreach ($files as $file) {
$mementoInfo = extractMementoInfo($file);
if ($mementoInfo) {
$mementoInfo['mementoUrl'] = $urlForFileInsideSharedFolder($share, $folder, $file);
$foundMementos[] = $mementoInfo;
}
}
}
}
return $foundMementos;
}
function listMementosMatchingUrl($foundMementos, $url) {
// Filter those that match the requested URL
$matchingMementos = filterMementosByUrl($foundMementos, $url);
// Deduplicate (as a file may be accessible both through a public and a private URL)
$matchingMementos = deduplicateMementos($matchingMementos);
// Sort them by date.
$matchingMementos = sortMementos($matchingMementos);
return $matchingMementos;
}
function filterMementosByUrl($mementos, $url) {
$matchingMementos = array_filter($mementos, function ($mementoInfo) use ($url) {
return matchesUrl($mementoInfo, $url);
});
return $matchingMementos;
}
function matchesUrl($mementoInfo, $url) {
$originalUrls = $mementoInfo['originalUrls'];
foreach ($originalUrls as $originalUrl) {
if (normaliseUrl($originalUrl) === normaliseUrl($url)) {
return true;
}
}
return false;
}
function normaliseUrl($url) {
// Ignore trailing slashes. Because everybody does.
$url = rtrim($url, '/');
return $url;
}
function deduplicateMementos($mementos) {
$deduped = [];
$seenIds = [];
foreach ($mementos as $memento) {
if (!array_key_exists($memento['id'], $seenIds)) {
$deduped[] = $memento;
$seenIds[$memento['id']] = null;
}
}
return $deduped;
}
// Sort an array of mementos by their datetime. Oldest first.
function sortMementos($mementos) {
usort($mementos, function ($m1, $m2) { return $m1['datetime'] <=> $m2['datetime']; });
return $mementos;
}
function joinPaths($piece1, $piece2) {
$left = rtrim($piece1, '/');
$right = ltrim($piece2, '/');
return "$left/$right";
}
function extractMementoInfo($file) {
$content = $file->getContent();
$DOM = new DOMDocument;
$DOM->loadHTML($content);
$headElement = $DOM->documentElement->getElementsByTagName('head')[0];
if (!$headElement) return null; // possibly $content was not HTML at all.
$originalUrls = getOriginalUrls($headElement);
$datetime = getDatetime($headElement);
return [
'originalUrls' => $originalUrls,
'datetime' => $datetime,
'id' => $file->getFileInfo()->getId() // for deduplication
];
}
// Reads hrefs from any with relation type "original".
// (note the plural: we also accept pages that claim to correspond to multiple original URLs)
function getOriginalUrls($headElement) {
$originalUrls = [];
$links = $headElement->getElementsByTagName('link');
foreach ($links as $link) {
$rels = explode(' ', $link->getAttribute('rel'));
if (in_array('original', $rels)) {
$href = $link->getAttribute('href');
$href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
if ($href) {
$originalUrls[] = $href;
}
}
}
return $originalUrls;
}
// Read the content of the first , if any.
function getDatetime($headElement) {
$metas = $headElement->getElementsByTagName('meta');
foreach($metas as $meta) {
// Let's match case-insensitively, I guess?
if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') {
$datetime = $meta->getAttribute('content');
$datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp();
return $datetime; // Return directly at the first match
}
}
return null;
}