userFolder = $serverContainer->getUserFolder($UserId);
}
/**
* @PublicPage
* @NoAdminRequired
* @NoCSRFRequired
*/
public function timeGate($url) {
// Get all HTML files the user owns.
$files = $this->userFolder->searchByMime('text/html');
// Filter them for pages that have a referring to the given URL.
$matchingFiles = array();
foreach ($files as $file) {
$content = $file->getContent();
try {
$DOM = new DOMDocument;
$DOM->loadHTML($content);
$head = $DOM->documentElement->getElementsByTagName('head')[0];
$originals = getOriginals($head);
foreach ($originals as $original) {
if (normaliseUrl($original) === normaliseUrl($url)) {
// Found a match!
$datetime = getDatetime($head);
$matchingFiles[] = [
'file' => $file,
'original' => $original,
'datetime' => $datetime
];
}
}
} catch (Exception $e) {
continue;
}
}
// Choose one of the matched files, if any.
if (count($matchingFiles) === 0) {
// No matches. :(
$message = "
No snapshots found for requested URL. :(
";
return new DataDisplayResponse($message, 404);
} else if (count($matchingFiles) === 1) {
// One match; no need to choose.
$chosenFile = $matchingFiles[0];
} else {
// Multiple matches: choose based on requested date.
$acceptDatetimeHeader = $this->request->getHeader('Accept-Datetime');
if ($acceptDatetimeHeader) {
try {
$requestedDatetime = DateTime::createFromFormat(DateTime::RFC1123, $acceptDatetimeHeader)
->getTimestamp();
} catch (Exception $e) {
return new DataDisplayResponse("Invalid Accept-Datetime header.", 400);
}
} else {
// Not sending the header means requesting the most recent version.
$requestedDatetime = time();
}
// Pick the one closest to the requested date (either before or after it).
$chosenFile = minBy($matchingFiles, function ($matchingFile) use ($requestedDatetime) {
return abs($matchingFile['datetime'] - $requestedDatetime);
});
}
// Send a 302 Found redirect pointing to the chosen file.
$absoluteFilePath = $chosenFile['file']->getPath();
$relativeFilePath = $this->userFolder->getRelativePath($absoluteFilePath);
$fileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency
$originalUrl = $chosenFile['original'];
$response = new RedirectResponse($fileUrl);
$response->setStatus(302);
$response->addHeader('Vary', 'accept-datetime');
$response->addHeader('Link', "<$originalUrl>; rel=\"original\"");
return $response;
}
}
function joinPaths($piece1, $piece2) {
$left = rtrim($piece1, '/');
$right = ltrim($piece2, '/');
return "$left/$right";
}
// Reads hrefs from any with relation type "original".
// (note the plural: we also accept pages that claim to correspond to multiple original URLs)
function getOriginals($head) {
$originals = [];
$links = $head->getElementsByTagName('link');
foreach ($links as $link) {
$rels = explode(' ', $link->getAttribute('rel'));
if (in_array('original', $rels)) {
$href = $link->getAttribute('href');
$href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
if ($href) {
$originals[] = $href;
}
}
}
return $originals;
}
// Read the content of the first , if any.
function getDatetime($head) {
$metas = $head->getElementsByTagName('meta');
foreach($metas as $meta) {
// Let's match case-insensitively, I guess?
if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') {
$datetime = $meta->getAttribute('content');
$datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp();
return $datetime; // Return directly at the first match
}
}
return null;
}
function normaliseUrl($url) {
// Ignore trailing slashes. Because everybody does.
$url = rtrim($url, '/');
// Replace multiple slashes with a single one. Because Nextcloud will have already done this to
// the queried url (e.g. 'http://abc' arrives to us as 'http:/abc')
$url = preg_replace('%/{2,}%', '/', $url);
return $url;
}
function minBy($array, $iteratee) {
// is there any simpler way for this in php?
$values = array_map($iteratee, $array);
$argmin = array_search(min($values), $values);
return $array[$argmin];
}