|
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155 |
- <?php
- namespace OCA\Memento\Controller;
-
- use DOMDocument;
- use DateTime;
-
- use OCP\IRequest;
- use OCP\IServerContainer;
- use OCP\AppFramework\Controller;
- use OCP\AppFramework\Http\RedirectResponse;
- use OCP\AppFramework\Http\DataDisplayResponse;
-
- class TimeGateController extends Controller {
- private $userFolder;
-
- public function __construct(
- $AppName,
- IRequest $request,
- $UserId,
- IServerContainer $serverContainer
- ) {
- parent::__construct($AppName, $request);
- $this->userFolder = $serverContainer->getUserFolder($UserId);
- }
-
- /**
- * @PublicPage
- * @NoAdminRequired
- * @NoCSRFRequired
- */
- public function timeGate($url) {
- // Get all HTML files the user owns.
- $files = $this->userFolder->searchByMime('text/html');
-
- // Filter them for pages that have a <link rel="original"> referring to the given URL.
- $matchingFiles = array();
- foreach ($files as $file) {
- $content = $file->getContent();
- try {
- $DOM = new DOMDocument;
- $DOM->loadHTML($content);
- $head = $DOM->documentElement->getElementsByTagName('head')[0];
- $originals = getOriginals($head);
- foreach ($originals as $original) {
- if (normaliseUrl($original) === normaliseUrl($url)) {
- // Found a match!
- $datetime = getDatetime($head);
- $matchingFiles[] = [
- 'file' => $file,
- 'original' => $original,
- 'datetime' => $datetime
- ];
- }
- }
- } catch (Exception $e) {
- continue;
- }
- }
-
- // Choose one of the matched files, if any.
- if (count($matchingFiles) === 0) {
- // No matches. :(
- $message = "<h1>No snapshots found for requested URL. :(</h1>";
- return new DataDisplayResponse($message, 404);
- } else if (count($matchingFiles) === 1) {
- // One match; no need to choose.
- $chosenFile = $matchingFiles[0];
- } else {
- // Multiple matches: choose based on requested date.
- $acceptDatetimeHeader = $this->request->getHeader('Accept-Datetime');
- if ($acceptDatetimeHeader) {
- try {
- $requestedDatetime = DateTime::createFromFormat(DateTime::RFC1123, $acceptDatetimeHeader)
- ->getTimestamp();
- } catch (Exception $e) {
- return new DataDisplayResponse("Invalid Accept-Datetime header.", 400);
- }
- } else {
- // Not sending the header means requesting the most recent version.
- $requestedDatetime = time();
- }
- // Pick the one closest to the requested date (either before or after it).
- $chosenFile = minBy($matchingFiles, function ($matchingFile) use ($requestedDatetime) {
- return abs($matchingFile['datetime'] - $requestedDatetime);
- });
- }
-
- // Send a 302 Found redirect pointing to the chosen file.
- $absoluteFilePath = $chosenFile['file']->getPath();
- $relativeFilePath = $this->userFolder->getRelativePath($absoluteFilePath);
- $fileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency
- $originalUrl = $chosenFile['original'];
- $response = new RedirectResponse($fileUrl);
- $response->setStatus(302);
- $response->addHeader('Vary', 'accept-datetime');
- $response->addHeader('Link', "<$originalUrl>; rel=\"original\"");
- return $response;
- }
- }
-
- function joinPaths($piece1, $piece2) {
- $left = rtrim($piece1, '/');
- $right = ltrim($piece2, '/');
- return "$left/$right";
- }
-
- // Reads hrefs from any <link> with relation type "original".
- // (note the plural: we also accept pages that claim to correspond to multiple original URLs)
- function getOriginals($head) {
- $originals = [];
- $links = $head->getElementsByTagName('link');
- foreach ($links as $link) {
- $rels = explode(' ', $link->getAttribute('rel'));
- if (in_array('original', $rels)) {
- $href = $link->getAttribute('href');
- $href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
- if ($href) {
- $originals[] = $href;
- }
- }
- }
- return $originals;
- }
-
- // Read the content of the first <meta http-equiv="Memento-Datetime">, if any.
- function getDatetime($head) {
- $metas = $head->getElementsByTagName('meta');
- foreach($metas as $meta) {
- // Let's match case-insensitively, I guess?
- if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') {
- $datetime = $meta->getAttribute('content');
- $datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp();
- return $datetime; // Return directly at the first match
- }
- }
- return null;
- }
-
- function normaliseUrl($url) {
- // Ignore trailing slashes. Because everybody does.
- $url = rtrim($url, '/');
-
- // Replace multiple slashes with a single one. Because Nextcloud will have already done this to
- // the queried url (e.g. 'http://abc' arrives to us as 'http:/abc')
- $url = preg_replace('%/{2,}%', '/', $url);
-
- return $url;
- }
-
- function minBy($array, $iteratee) {
- // is there any simpler way for this in php?
- $values = array_map($iteratee, $array);
- $argmin = array_search(min($values), $values);
- return $array[$argmin];
- }
|