diff --git a/lib/Controller/TimeGateController.php b/lib/Controller/TimeGateController.php
index 394db7c..49842d7 100644
--- a/lib/Controller/TimeGateController.php
+++ b/lib/Controller/TimeGateController.php
@@ -1,7 +1,8 @@
userFolder->searchByMime('text/html');
-
- // Filter them for pages that have a referring to the given URL.
- $matchingFiles = array();
- foreach ($files as $file) {
- $content = $file->getContent();
- try {
- $DOM = new DOMDocument;
- $DOM->loadHTML($content);
- $head = $DOM->documentElement->getElementsByTagName('head')[0];
- $originals = getOriginals($head);
- foreach ($originals as $original) {
- if (normaliseUrl($original) === normaliseUrl($url)) {
- // Found a match!
- $datetime = getDatetime($head);
- $matchingFiles[] = [
- 'file' => $file,
- 'original' => $original,
- 'datetime' => $datetime
- ];
- }
- }
- } catch (Exception $e) {
- continue;
- }
- }
+ $matchingMementos = findMementos($this->userFolder, $url);
- // Choose one of the matched files, if any.
- if (count($matchingFiles) === 0) {
+ // Choose one of the matched mementos, if any.
+ if (count($matchingMementos) === 0) {
// No matches. :(
$message = "
No snapshots found for requested URL. :(
";
return new DataDisplayResponse($message, 404);
- } else if (count($matchingFiles) === 1) {
+ } else if (count($matchingMementos) === 1) {
// One match; no need to choose.
- $chosenFile = $matchingFiles[0];
+ $chosenMemento = $matchingMementos[0];
} else {
// Multiple matches: choose based on requested date.
$acceptDatetimeHeader = $this->request->getHeader('Accept-Datetime');
@@ -80,73 +57,22 @@ class TimeGateController extends Controller {
$requestedDatetime = time();
}
// Pick the one closest to the requested date (either before or after it).
- $chosenFile = minBy($matchingFiles, function ($matchingFile) use ($requestedDatetime) {
- return abs($matchingFile['datetime'] - $requestedDatetime);
- });
+ $chosenMemento = minBy($matchingMementos,
+ function ($matchingMemento) use ($requestedDatetime) {
+ return abs($matchingMemento['datetime'] - $requestedDatetime);
+ }
+ );
}
- // Send a 302 Found redirect pointing to the chosen file.
- $absoluteFilePath = $chosenFile['file']->getPath();
- $relativeFilePath = $this->userFolder->getRelativePath($absoluteFilePath);
- $fileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency
- $originalUrl = $chosenFile['original'];
- $response = new RedirectResponse($fileUrl);
+ // Send a 302 Found redirect pointing to the chosen memento.
+ $response = new RedirectResponse($chosenMemento['mementoUrl']);
$response->setStatus(302);
$response->addHeader('Vary', 'accept-datetime');
- $response->addHeader('Link', "<$originalUrl>; rel=\"original\"");
+ $response->addHeader('Link', "<{$chosenMemento['originalUrl']}>; rel=\"original\"");
return $response;
}
}
-function joinPaths($piece1, $piece2) {
- $left = rtrim($piece1, '/');
- $right = ltrim($piece2, '/');
- return "$left/$right";
-}
-
-// Reads hrefs from any with relation type "original".
-// (note the plural: we also accept pages that claim to correspond to multiple original URLs)
-function getOriginals($head) {
- $originals = [];
- $links = $head->getElementsByTagName('link');
- foreach ($links as $link) {
- $rels = explode(' ', $link->getAttribute('rel'));
- if (in_array('original', $rels)) {
- $href = $link->getAttribute('href');
- $href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
- if ($href) {
- $originals[] = $href;
- }
- }
- }
- return $originals;
-}
-
-// Read the content of the first , if any.
-function getDatetime($head) {
- $metas = $head->getElementsByTagName('meta');
- foreach($metas as $meta) {
- // Let's match case-insensitively, I guess?
- if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') {
- $datetime = $meta->getAttribute('content');
- $datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp();
- return $datetime; // Return directly at the first match
- }
- }
- return null;
-}
-
-function normaliseUrl($url) {
- // Ignore trailing slashes. Because everybody does.
- $url = rtrim($url, '/');
-
- // Replace multiple slashes with a single one. Because Nextcloud will have already done this to
- // the queried url (e.g. 'http://abc' arrives to us as 'http:/abc')
- $url = preg_replace('%/{2,}%', '/', $url);
-
- return $url;
-}
-
function minBy($array, $iteratee) {
// is there any simpler way for this in php?
$values = array_map($iteratee, $array);
diff --git a/lib/Controller/findMementos.php b/lib/Controller/findMementos.php
new file mode 100644
index 0000000..3a104dd
--- /dev/null
+++ b/lib/Controller/findMementos.php
@@ -0,0 +1,93 @@
+searchByMime('text/html');
+
+ // Filter them for pages that have a referring to the given URL.
+ $matchingMementos = array();
+ foreach ($files as $file) {
+ $content = $file->getContent();
+ try {
+ $DOM = new DOMDocument;
+ $DOM->loadHTML($content);
+ $headElement = $DOM->documentElement->getElementsByTagName('head')[0];
+ $originalUrls = getOriginalUrls($headElement);
+ foreach ($originalUrls as $originalUrl) {
+ if (normaliseUrl($originalUrl) === normaliseUrl($url)) {
+ // Found a match!
+ // Read its datetime
+ $datetime = getDatetime($headElement);
+ // Construct its URL.
+ $absoluteFilePath = $file->getPath();
+ $relativeFilePath = $folder->getRelativePath($absoluteFilePath);
+ $mementoUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency
+
+ $matchingMementos[] = [
+ 'mementoUrl' => $mementoUrl,
+ 'originalUrl' => $originalUrl,
+ 'datetime' => $datetime
+ ];
+ }
+ }
+ } catch (Exception $e) {
+ continue;
+ }
+ }
+ return $matchingMementos;
+}
+
+function joinPaths($piece1, $piece2) {
+ $left = rtrim($piece1, '/');
+ $right = ltrim($piece2, '/');
+ return "$left/$right";
+}
+
+// Reads hrefs from any with relation type "original".
+// (note the plural: we also accept pages that claim to correspond to multiple original URLs)
+function getOriginalUrls($headElement) {
+ $originalUrls = [];
+ $links = $headElement->getElementsByTagName('link');
+ foreach ($links as $link) {
+ $rels = explode(' ', $link->getAttribute('rel'));
+ if (in_array('original', $rels)) {
+ $href = $link->getAttribute('href');
+ $href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
+ if ($href) {
+ $originalUrls[] = $href;
+ }
+ }
+ }
+ return $originalUrls;
+}
+
+// Read the content of the first , if any.
+function getDatetime($headElement) {
+ $metas = $headElement->getElementsByTagName('meta');
+ foreach($metas as $meta) {
+ // Let's match case-insensitively, I guess?
+ if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') {
+ $datetime = $meta->getAttribute('content');
+ $datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp();
+ return $datetime; // Return directly at the first match
+ }
+ }
+ return null;
+}
+
+function normaliseUrl($url) {
+ // Ignore trailing slashes. Because everybody does.
+ $url = rtrim($url, '/');
+
+ // HACK. Replace multiple slashes with a single one. Because Nextcloud will have already done this
+ // to the queried url (e.g. 'http://abc' arrives to us as 'http:/abc').
+ $url = preg_replace('%/{2,}%', '/', $url);
+
+ return $url;
+}