|
|
@@ -1,7 +1,8 @@ |
|
|
|
<?php |
|
|
|
namespace OCA\Memento\Controller; |
|
|
|
|
|
|
|
use DOMDocument; |
|
|
|
require_once __DIR__ . '/findMementos.php'; |
|
|
|
|
|
|
|
use DateTime; |
|
|
|
|
|
|
|
use OCP\IRequest; |
|
|
@@ -10,6 +11,8 @@ use OCP\AppFramework\Controller; |
|
|
|
use OCP\AppFramework\Http\RedirectResponse; |
|
|
|
use OCP\AppFramework\Http\DataDisplayResponse; |
|
|
|
|
|
|
|
use findMementos; |
|
|
|
|
|
|
|
class TimeGateController extends Controller { |
|
|
|
private $userFolder; |
|
|
|
|
|
|
@@ -29,42 +32,16 @@ class TimeGateController extends Controller { |
|
|
|
* @NoCSRFRequired |
|
|
|
*/ |
|
|
|
public function timeGate($url) { |
|
|
|
// Get all HTML files the user owns. |
|
|
|
$files = $this->userFolder->searchByMime('text/html'); |
|
|
|
|
|
|
|
// Filter them for pages that have a <link rel="original"> referring to the given URL. |
|
|
|
$matchingFiles = array(); |
|
|
|
foreach ($files as $file) { |
|
|
|
$content = $file->getContent(); |
|
|
|
try { |
|
|
|
$DOM = new DOMDocument; |
|
|
|
$DOM->loadHTML($content); |
|
|
|
$head = $DOM->documentElement->getElementsByTagName('head')[0]; |
|
|
|
$originals = getOriginals($head); |
|
|
|
foreach ($originals as $original) { |
|
|
|
if (normaliseUrl($original) === normaliseUrl($url)) { |
|
|
|
// Found a match! |
|
|
|
$datetime = getDatetime($head); |
|
|
|
$matchingFiles[] = [ |
|
|
|
'file' => $file, |
|
|
|
'original' => $original, |
|
|
|
'datetime' => $datetime |
|
|
|
]; |
|
|
|
} |
|
|
|
} |
|
|
|
} catch (Exception $e) { |
|
|
|
continue; |
|
|
|
} |
|
|
|
} |
|
|
|
$matchingMementos = findMementos($this->userFolder, $url); |
|
|
|
|
|
|
|
// Choose one of the matched files, if any. |
|
|
|
if (count($matchingFiles) === 0) { |
|
|
|
// Choose one of the matched mementos, if any. |
|
|
|
if (count($matchingMementos) === 0) { |
|
|
|
// No matches. :( |
|
|
|
$message = "<h1>No snapshots found for requested URL. :(</h1>"; |
|
|
|
return new DataDisplayResponse($message, 404); |
|
|
|
} else if (count($matchingFiles) === 1) { |
|
|
|
} else if (count($matchingMementos) === 1) { |
|
|
|
// One match; no need to choose. |
|
|
|
$chosenFile = $matchingFiles[0]; |
|
|
|
$chosenMemento = $matchingMementos[0]; |
|
|
|
} else { |
|
|
|
// Multiple matches: choose based on requested date. |
|
|
|
$acceptDatetimeHeader = $this->request->getHeader('Accept-Datetime'); |
|
|
@@ -80,73 +57,22 @@ class TimeGateController extends Controller { |
|
|
|
$requestedDatetime = time(); |
|
|
|
} |
|
|
|
// Pick the one closest to the requested date (either before or after it). |
|
|
|
$chosenFile = minBy($matchingFiles, function ($matchingFile) use ($requestedDatetime) { |
|
|
|
return abs($matchingFile['datetime'] - $requestedDatetime); |
|
|
|
}); |
|
|
|
$chosenMemento = minBy($matchingMementos, |
|
|
|
function ($matchingMemento) use ($requestedDatetime) { |
|
|
|
return abs($matchingMemento['datetime'] - $requestedDatetime); |
|
|
|
} |
|
|
|
); |
|
|
|
} |
|
|
|
|
|
|
|
// Send a 302 Found redirect pointing to the chosen file. |
|
|
|
$absoluteFilePath = $chosenFile['file']->getPath(); |
|
|
|
$relativeFilePath = $this->userFolder->getRelativePath($absoluteFilePath); |
|
|
|
$fileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency |
|
|
|
$originalUrl = $chosenFile['original']; |
|
|
|
$response = new RedirectResponse($fileUrl); |
|
|
|
// Send a 302 Found redirect pointing to the chosen memento. |
|
|
|
$response = new RedirectResponse($chosenMemento['mementoUrl']); |
|
|
|
$response->setStatus(302); |
|
|
|
$response->addHeader('Vary', 'accept-datetime'); |
|
|
|
$response->addHeader('Link', "<$originalUrl>; rel=\"original\""); |
|
|
|
$response->addHeader('Link', "<{$chosenMemento['originalUrl']}>; rel=\"original\""); |
|
|
|
return $response; |
|
|
|
} |
|
|
|
} |
|
|
|
|
|
|
|
function joinPaths($piece1, $piece2) { |
|
|
|
$left = rtrim($piece1, '/'); |
|
|
|
$right = ltrim($piece2, '/'); |
|
|
|
return "$left/$right"; |
|
|
|
} |
|
|
|
|
|
|
|
// Reads hrefs from any <link> with relation type "original". |
|
|
|
// (note the plural: we also accept pages that claim to correspond to multiple original URLs) |
|
|
|
function getOriginals($head) { |
|
|
|
$originals = []; |
|
|
|
$links = $head->getElementsByTagName('link'); |
|
|
|
foreach ($links as $link) { |
|
|
|
$rels = explode(' ', $link->getAttribute('rel')); |
|
|
|
if (in_array('original', $rels)) { |
|
|
|
$href = $link->getAttribute('href'); |
|
|
|
$href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED); |
|
|
|
if ($href) { |
|
|
|
$originals[] = $href; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
return $originals; |
|
|
|
} |
|
|
|
|
|
|
|
// Read the content of the first <meta http-equiv="Memento-Datetime">, if any. |
|
|
|
function getDatetime($head) { |
|
|
|
$metas = $head->getElementsByTagName('meta'); |
|
|
|
foreach($metas as $meta) { |
|
|
|
// Let's match case-insensitively, I guess? |
|
|
|
if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') { |
|
|
|
$datetime = $meta->getAttribute('content'); |
|
|
|
$datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp(); |
|
|
|
return $datetime; // Return directly at the first match |
|
|
|
} |
|
|
|
} |
|
|
|
return null; |
|
|
|
} |
|
|
|
|
|
|
|
function normaliseUrl($url) { |
|
|
|
// Ignore trailing slashes. Because everybody does. |
|
|
|
$url = rtrim($url, '/'); |
|
|
|
|
|
|
|
// Replace multiple slashes with a single one. Because Nextcloud will have already done this to |
|
|
|
// the queried url (e.g. 'http://abc' arrives to us as 'http:/abc') |
|
|
|
$url = preg_replace('%/{2,}%', '/', $url); |
|
|
|
|
|
|
|
return $url; |
|
|
|
} |
|
|
|
|
|
|
|
function minBy($array, $iteratee) { |
|
|
|
// is there any simpler way for this in php? |
|
|
|
$values = array_map($iteratee, $array); |
|
|
|