userFolder = $serverContainer->getUserFolder($UserId); } /** * @PublicPage * @NoAdminRequired * @NoCSRFRequired */ public function timeGate($url) { // Get all HTML files the user owns. $files = $this->userFolder->searchByMime('text/html'); // Filter them for pages that have a referring to the given URL. $matchingFiles = array(); foreach ($files as $file) { $content = $file->getContent(); try { $DOM = new DOMDocument; $DOM->loadHTML($content); $head = $DOM->documentElement->getElementsByTagName('head')[0]; $originals = getOriginals($head); foreach ($originals as $original) { if (normaliseUrl($original) === normaliseUrl($url)) { // Found a match! $datetime = getDatetime($head); $matchingFiles[] = [ 'file' => $file, 'original' => $original, 'datetime' => $datetime ]; } } } catch (Exception $e) { continue; } } // Choose one of the matched files, if any. if (count($matchingFiles) === 0) { // No matches. :( $message = "

No snapshots found for requested URL. :(

"; return new DataDisplayResponse($message, 404); } else if (count($matchingFiles) === 1) { // One match; no need to choose. $chosenFile = $matchingFiles[0]; } else { // Multiple matches: choose based on requested date. $acceptDatetimeHeader = $this->request->getHeader('Accept-Datetime'); if ($acceptDatetimeHeader) { try { $requestedDatetime = DateTime::createFromFormat(DateTime::RFC1123, $acceptDatetimeHeader) ->getTimestamp(); } catch (Exception $e) { return new DataDisplayResponse("Invalid Accept-Datetime header.", 400); } } else { // Not sending the header means requesting the most recent version. $requestedDatetime = time(); } // Pick the one closest to the requested date (either before or after it). $chosenFile = minBy($matchingFiles, function ($matchingFile) use ($requestedDatetime) { return abs($matchingFile['datetime'] - $requestedDatetime); }); } // Send a 302 Found redirect pointing to the chosen file. $absoluteFilePath = $chosenFile['file']->getPath(); $relativeFilePath = $this->userFolder->getRelativePath($absoluteFilePath); $fileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency $originalUrl = $chosenFile['original']; $response = new RedirectResponse($fileUrl); $response->setStatus(302); $response->addHeader('Vary', 'accept-datetime'); $response->addHeader('Link', "<$originalUrl>; rel=\"original\""); return $response; } } function joinPaths($piece1, $piece2) { $left = rtrim($piece1, '/'); $right = ltrim($piece2, '/'); return "$left/$right"; } // Reads hrefs from any with relation type "original". // (note the plural: we also accept pages that claim to correspond to multiple original URLs) function getOriginals($head) { $originals = []; $links = $head->getElementsByTagName('link'); foreach ($links as $link) { $rels = explode(' ', $link->getAttribute('rel')); if (in_array('original', $rels)) { $href = $link->getAttribute('href'); $href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); if ($href) { $originals[] = $href; } } } return $originals; } // Read the content of the first , if any. function getDatetime($head) { $metas = $head->getElementsByTagName('meta'); foreach($metas as $meta) { // Let's match case-insensitively, I guess? if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') { $datetime = $meta->getAttribute('content'); $datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp(); return $datetime; // Return directly at the first match } } return null; } function normaliseUrl($url) { // Ignore trailing slashes. Because everybody does. $url = rtrim($url, '/'); // Replace multiple slashes with a single one. Because Nextcloud will have already done this to // the queried url (e.g. 'http://abc' arrives to us as 'http:/abc') $url = preg_replace('%/{2,}%', '/', $url); return $url; } function minBy($array, $iteratee) { // is there any simpler way for this in php? $values = array_map($iteratee, $array); $argmin = array_search(min($values), $values); return $array[$argmin]; }