Browse Source

Also search public files, add single-user option.

tags/v0.1.0
Gerben 6 years ago
parent
commit
eb6d583c4e
4 changed files with 225 additions and 70 deletions
  1. +6
    -2
      appinfo/routes.php
  2. +39
    -16
      lib/Controller/TimeGateController.php
  3. +35
    -12
      lib/Controller/TimeMapController.php
  4. +145
    -40
      lib/Controller/findMementos.php

+ 6
- 2
appinfo/routes.php View File

@@ -2,9 +2,13 @@


return [ return [
'routes' => [ 'routes' => [
['name' => 'timeGate#timeGate', 'url' => '/timegate/{url}',
['name' => 'timeGate#singleUserTimeGate', 'url' => '/u/{userId}/timegate/{url}',
'requirements' => array('url' => '.+')], 'requirements' => array('url' => '.+')],
['name' => 'timeMap#timeMap', 'url' => '/timemap/{url}',
['name' => 'timeGate#allUsersTimeGate', 'url' => '/timegate/{url}',
'requirements' => array('url' => '.+')],
['name' => 'timeMap#singleUserTimeMap', 'url' => '/u/{userId}/timemap/{url}',
'requirements' => array('url' => '.+')],
['name' => 'timeMap#allUsersTimeMap', 'url' => '/timemap/{url}',
'requirements' => array('url' => '.+')], 'requirements' => array('url' => '.+')],
] ]
]; ];

+ 39
- 16
lib/Controller/TimeGateController.php View File

@@ -6,37 +6,56 @@ require_once __DIR__ . '/datetimeConversion.php';
require_once __DIR__ . '/getUrlParameter.php'; require_once __DIR__ . '/getUrlParameter.php';


use OCP\IRequest; use OCP\IRequest;
use OCP\IURLGenerator;
use OCP\IServerContainer; use OCP\IServerContainer;
use OCP\AppFramework\Controller; use OCP\AppFramework\Controller;
use OCP\AppFramework\Http\RedirectResponse; use OCP\AppFramework\Http\RedirectResponse;
use OCP\AppFramework\Http\DataDisplayResponse; use OCP\AppFramework\Http\DataDisplayResponse;


class TimeGateController extends Controller { class TimeGateController extends Controller {
private $userFolder;
private $URLGenerator;
use MementoFinder;

private $loggedInUserId;
private $serverContainer;


public function __construct( public function __construct(
$AppName, $AppName,
IRequest $request, IRequest $request,
$UserId, $UserId,
IServerContainer $serverContainer,
IURLGenerator $URLGenerator
IServerContainer $serverContainer
) { ) {
parent::__construct($AppName, $request); parent::__construct($AppName, $request);
$this->userFolder = $serverContainer->getUserFolder($UserId);
$this->URLGenerator = $URLGenerator;
$this->loggedInUserId = $UserId;
$this->serverContainer = $serverContainer;
} }


/** /**
* @NoAdminRequired * @NoAdminRequired
* @NoCSRFRequired * @NoCSRFRequired
* @PublicPage
*/ */
public function timeGate($url) {
$url = getUrlParameter('timegate'); // XXX workaround, as nextcloud corrupts the $url parameter.
public function singleUserTimeGate($userId, $url) {
// XXX workaround, as nextcloud corrupts the $url parameter.
$url = getUrlParameter("u/$userId/timegate");

$matchingMementos = $this->findSingleUserMementosForUrl($userId, $url);

return $this->makeResponse($url, $matchingMementos);
}


$matchingMementos = findMementos($this->userFolder, $url);
/**
* @NoAdminRequired
* @NoCSRFRequired
* @PublicPage
*/
public function allUsersTimeGate($url) {
$url = getUrlParameter('timegate');

$matchingMementos = $this->findAllUsersMementosForUrl($url);

return $this->makeResponse($url, $matchingMementos);
}


private function makeResponse($url, $matchingMementos) {
// Choose one of the matched mementos, if any. // Choose one of the matched mementos, if any.
if (count($matchingMementos) === 0) { if (count($matchingMementos) === 0) {
// No matches. :( // No matches. :(
@@ -69,12 +88,17 @@ class TimeGateController extends Controller {
// Send a 302 Found redirect pointing to the chosen memento. // Send a 302 Found redirect pointing to the chosen memento.
$response = new RedirectResponse($chosenMemento['mementoUrl']); $response = new RedirectResponse($chosenMemento['mementoUrl']);
$response->setStatus(302); $response->setStatus(302);
$response->addHeader('Vary', 'accept-datetime');
// Both the requested datetime and the authenticated user influence the response.
$response->addHeader('Vary', 'accept-datetime, cookie');


// Add a link to the original and to the timemap.
$originalLink = "<{$chosenMemento['originalUrl']}>;rel=\"original\"";
// Add a link to the original(s) and to the timemap.
$originalLinks = implode(", ", array_map(
function ($originalUrl) { return "<$originalUrl>;rel=\"original\""; },
$chosenMemento['originalUrls']
));
// XXX hardcoding the route URL. // XXX hardcoding the route URL.
$timeMapUrl = $this->URLGenerator->getAbsoluteUrl("/apps/memento/timemap/$url");
$timeMapUrl = $this->serverContainer->getURLGenerator()
->getAbsoluteUrl("/apps/memento/timemap/$url");
$firstDatetime = datetimeTimestampToString($matchingMementos[0]['datetime']); $firstDatetime = datetimeTimestampToString($matchingMementos[0]['datetime']);
$lastMemento = $matchingMementos[count($matchingMementos)-1]; $lastMemento = $matchingMementos[count($matchingMementos)-1];
$lastDatetime = datetimeTimestampToString($lastMemento['datetime']); $lastDatetime = datetimeTimestampToString($lastMemento['datetime']);
@@ -82,14 +106,13 @@ class TimeGateController extends Controller {
. ";rel=\"timemap\"" . ";rel=\"timemap\""
. ";type=\"application/link-format\"" . ";type=\"application/link-format\""
. ";from=\"$firstDatetime\";until=\"$lastDatetime\""; . ";from=\"$firstDatetime\";until=\"$lastDatetime\"";
$response->addHeader('Link', "$originalLink, $timeMapLink");
$response->addHeader('Link', "$originalLinks, $timeMapLink");


return $response; return $response;
} }
} }


function minBy($array, $iteratee) { function minBy($array, $iteratee) {
// is there any simpler way for this in php?
$values = array_map($iteratee, $array); $values = array_map($iteratee, $array);
$argmin = array_search(min($values), $values); $argmin = array_search(min($values), $values);
return $array[$argmin]; return $array[$argmin];


+ 35
- 12
lib/Controller/TimeMapController.php View File

@@ -12,36 +12,59 @@ use OCP\AppFramework\Controller;
use OCP\AppFramework\Http\DataDisplayResponse; use OCP\AppFramework\Http\DataDisplayResponse;


class TimeMapController extends Controller { class TimeMapController extends Controller {
private $userFolder;
private $URLGenerator;
use MementoFinder;

private $loggedInUserId;
private $serverContainer;


public function __construct( public function __construct(
$AppName, $AppName,
IRequest $request, IRequest $request,
$UserId, $UserId,
IServerContainer $serverContainer,
IURLGenerator $URLGenerator
IServerContainer $serverContainer
) { ) {
parent::__construct($AppName, $request); parent::__construct($AppName, $request);
$this->userFolder = $serverContainer->getUserFolder($UserId);
$this->URLGenerator = $URLGenerator;
$this->loggedInUserId = $UserId;
$this->serverContainer = $serverContainer;
} }


/** /**
* @NoAdminRequired * @NoAdminRequired
* @NoCSRFRequired * @NoCSRFRequired
* @PublicPage
*/ */
public function timeMap($url) {
$url = getUrlParameter('timemap'); // XXX workaround, as nextcloud corrupts the $url parameter.
public function singleUserTimeMap($userId, $url) {
// XXX workaround, as nextcloud corrupts the $url parameter.
$routePrefix = "u/$userId/";
$url = getUrlParameter("{$routePrefix}timemap");

$matchingMementos = $this->findSingleUserMementosForUrl($userId, $url);

return $this->makeResponse($url, $matchingMementos, $routePrefix);
}


$matchingMementos = findMementos($this->userFolder, $url);
/**
* @NoAdminRequired
* @NoCSRFRequired
* @PublicPage
*/
public function allUsersTimeMap($url) {
$routePrefix = "";
$url = getUrlParameter("{$routePrefix}timemap");

$matchingMementos = $this->findAllUsersMementosForUrl($url);

return $this->makeResponse($url, $matchingMementos, $routePrefix);
}


private function makeResponse($url, $matchingMementos, $routePrefix) {
// Build the list of links. // Build the list of links.
// $timeMapUrl = $this->URLGenerator->linkToRouteAbsolute('timeMap#timeMap', [ 'url' => $url ]); // $timeMapUrl = $this->URLGenerator->linkToRouteAbsolute('timeMap#timeMap', [ 'url' => $url ]);
// $timeGateUrl = $this->URLGenerator->linkToRouteAbsolute('timeGate#timeGate', [ 'url' => $url ]); // $timeGateUrl = $this->URLGenerator->linkToRouteAbsolute('timeGate#timeGate', [ 'url' => $url ]);
// FIXME ...is linkToRouteAbsolute broken? Hardcoding the path then.. // FIXME ...is linkToRouteAbsolute broken? Hardcoding the path then..
$timeMapUrl = $this->URLGenerator->getAbsoluteUrl("/apps/memento/timemap/$url");
$timeGateUrl = $this->URLGenerator->getAbsoluteUrl("/apps/memento/timegate/$url");
$URLGenerator = $this->serverContainer->getURLGenerator();
$timeMapUrl = $URLGenerator->getAbsoluteUrl("/apps/memento/{$routePrefix}timemap/$url");
$timeGateUrl = $URLGenerator->getAbsoluteUrl("/apps/memento/{$routePrefix}timegate/$url");
if (count($matchingMementos) > 0) { if (count($matchingMementos) > 0) {
$firstDatetime = datetimeTimestampToString($matchingMementos[0]['datetime']); $firstDatetime = datetimeTimestampToString($matchingMementos[0]['datetime']);
$lastMemento = $matchingMementos[count($matchingMementos)-1]; $lastMemento = $matchingMementos[count($matchingMementos)-1];
@@ -58,7 +81,7 @@ class TimeMapController extends Controller {
$maybeFirst = $index === 0 ? 'first ' : ''; $maybeFirst = $index === 0 ? 'first ' : '';
$maybeLast = $index === count($matchingMementos)-1 ? 'last ' : ''; $maybeLast = $index === count($matchingMementos)-1 ? 'last ' : '';
// Make absolute, as the spec says URLs are to be interpreted relative to the *original* url! // Make absolute, as the spec says URLs are to be interpreted relative to the *original* url!
$absoluteMementoUrl = $this->URLGenerator->getAbsoluteURL($memento['mementoUrl']);
$absoluteMementoUrl = $URLGenerator->getAbsoluteURL($memento['mementoUrl']);
$links[] = "<$absoluteMementoUrl>" $links[] = "<$absoluteMementoUrl>"
. ";rel=\"{$maybeFirst}{$maybeLast}memento\"" . ";rel=\"{$maybeFirst}{$maybeLast}memento\""
. ";datetime=\"$datetime\""; . ";datetime=\"$datetime\"";


+ 145
- 40
lib/Controller/findMementos.php View File

@@ -1,61 +1,172 @@
<?php <?php
use DOMDocument;
use DateTime;
namespace OCA\Memento\Controller;

use \DOMDocument;
use \DateTime;
use OCP\Share;
use OCP\Files\FileInfo;


// Finds HTML files that claim to be a snapshot of the given URL; // Finds HTML files that claim to be a snapshot of the given URL;
// Returns an array of mementos, sorted by datetime, with each memento represented by an array: // Returns an array of mementos, sorted by datetime, with each memento represented by an array:
// [ // [
// 'mementoUrl' => URL of the file, relative to the nextcloud instance // 'mementoUrl' => URL of the file, relative to the nextcloud instance
// 'originalUrl' => original URL, presumably equal to the given $url, except we normalise a bit
// 'originalUrls' => original URLs, usually just one.
// 'datetime' => snapshot datetime as a unix timestamp // 'datetime' => snapshot datetime as a unix timestamp
// ] // ]
function findMementos($folder, $url) {
// Get all HTML files the user owns.
$files = $folder->searchByMime('text/html');
//
// Each mementoUrl is hardcoded to /apps/raw/..., thus relying on the 'raw' app to serve the files.

trait MementoFinder {
function findSingleUserMementosForUrl($userId, $url) {
// Get the user's public mementos.
$foundMementos = findPublicMementos($this->serverContainer->getShareManager(), $userId);

// If logged in, and asking for one's own mementos, get private mementos too.
if ($this->loggedInUserId === $userId) {
$userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId);
$moreMementos = findPrivateMementos($userFolder);
$foundMementos = mergeMementos($foundMementos, $moreMementos);
}

// Filter those that match the requested URL, and sort them.
$matchingMementos = filterMementosByUrl($foundMementos, $url);
sortMementos($matchingMementos);
return $matchingMementos;
}

function findAllUsersMementosForUrl($url) {
$foundMementos = [];

// Get the public mementos of every user.
$allUserIds = [];
$this->serverContainer->getUserManager()->callForAllUsers(
function ($user) use (&$allUserIds) { $allUserIds[] = $user->getUID(); }
);
$shareManager = $this->serverContainer->getShareManager();
foreach ($allUserIds as $userId) {
$moreMementos = findPublicMementos($shareManager, $userId);
$foundMementos = mergeMementos($foundMementos, $moreMementos);
}

// If logged in, get current user's private mementos too.
if ($this->loggedInUserId) {
$userFolder = $this->serverContainer->getUserFolder($this->loggedInUserId);
$moreMementos = findPrivateMementos($userFolder);
$foundMementos = mergeMementos($foundMementos, $moreMementos);
}


// Filter them for pages that have a <link rel="original"> referring to the given URL.
$matchingMementos = array();
// Filter those that match the requested URL, and sort them.
$matchingMementos = filterMementosByUrl($foundMementos, $url);
$matchingMementos = sortMementos($matchingMementos);
return $matchingMementos;
}
}

function findPrivateMementos($folder) {
$urlForFile = function ($file) use ($folder) {
$absoluteFilePath = $file->getPath();
$relativeFilePath = $folder->getRelativePath($absoluteFilePath);
$rawFileUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency
return $rawFileUrl;
};

// Peek into each HTML file the user owns, and return those that are mementos.
$files = $folder->searchByMime('text/html');
$foundMementos = [];
foreach ($files as $file) { foreach ($files as $file) {
$content = $file->getContent();
try {
$DOM = new DOMDocument;
$DOM->loadHTML($content);
$headElement = $DOM->documentElement->getElementsByTagName('head')[0];
$originalUrls = getOriginalUrls($headElement);
foreach ($originalUrls as $originalUrl) {
if (normaliseUrl($originalUrl) === normaliseUrl($url)) {
// Found a match!
// Read its datetime
$datetime = getDatetime($headElement);
// Construct its URL.
$absoluteFilePath = $file->getPath();
$relativeFilePath = $folder->getRelativePath($absoluteFilePath);
$mementoUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency

$matchingMementos[] = [
'mementoUrl' => $mementoUrl,
'originalUrl' => $originalUrl,
'datetime' => $datetime
];
}
$mementoInfo = extractMementoInfo($file);
if ($mementoInfo) {
$mementoInfo['mementoUrl'] = $urlForFile($file);
$foundMementos[] = $mementoInfo;
}
}
return $foundMementos;
}

function findPublicMementos($shareManager, $userId) {
$shares = $shareManager->getSharesBy(
$userId,
Share::SHARE_TYPE_LINK,
null, /* path */
true, /* include reshares */
-1 /* no limit */
);

$urlForShare = function ($share) {
return "/apps/raw/s/" . $share->getToken(); // XXX hardcoded dependency
};

// Look into every shared file to see if it is a memento.
$foundMementos = [];
foreach ($shares as $share) {
$node = $share->getNode();
if ($node->getType() === FileInfo::TYPE_FILE) {
$mementoInfo = extractMementoInfo($node);
if ($mementoInfo) {
$mementoInfo['mementoUrl'] = $urlForShare($share);
$foundMementos[] = $mementoInfo;
} }
} catch (Exception $e) {
continue;
} else {
// TODO add files inside shared folders? How to make URLs for those?
} }
} }
return $foundMementos;
}


// Sort mementos by their datetime. Oldest first.
usort($matchingMementos, function ($m1, $m2) { return $m1['datetime'] <=> $m2['datetime']; });
function mergeMementos($mementos1, $mementos2) {
// TODO deduplicate (we'll get public & private URLs for the same files)
return array_merge($mementos1, $mementos2);
}


function filterMementosByUrl($mementos, $url) {
$matchingMementos = array_filter($mementos, function ($mementoInfo) use ($url) {
return matchesUrl($mementoInfo, $url);
});
return $matchingMementos; return $matchingMementos;
} }


function matchesUrl($mementoInfo, $url) {
$originalUrls = $mementoInfo['originalUrls'];
foreach ($originalUrls as $originalUrl) {
if (normaliseUrl($originalUrl) === normaliseUrl($url)) {
return true;
}
}
return false;
}

function normaliseUrl($url) {
// Ignore trailing slashes. Because everybody does.
$url = rtrim($url, '/');
return $url;
}

// Sort an array of mementos by their datetime. Oldest first.
function sortMementos($mementos) {
usort($mementos, function ($m1, $m2) { return $m1['datetime'] <=> $m2['datetime']; });
return $mementos;
}

function joinPaths($piece1, $piece2) { function joinPaths($piece1, $piece2) {
$left = rtrim($piece1, '/'); $left = rtrim($piece1, '/');
$right = ltrim($piece2, '/'); $right = ltrim($piece2, '/');
return "$left/$right"; return "$left/$right";
} }


function extractMementoInfo($file) {
$content = $file->getContent();
$DOM = new DOMDocument;
$DOM->loadHTML($content);
$headElement = $DOM->documentElement->getElementsByTagName('head')[0];
if (!$headElement) return null; // possibly $content was not HTML at all.
$originalUrls = getOriginalUrls($headElement);
$datetime = getDatetime($headElement);
return [
'originalUrls' => $originalUrls,
'datetime' => $datetime
];
}

// Reads hrefs from any <link> with relation type "original". // Reads hrefs from any <link> with relation type "original".
// (note the plural: we also accept pages that claim to correspond to multiple original URLs) // (note the plural: we also accept pages that claim to correspond to multiple original URLs)
function getOriginalUrls($headElement) { function getOriginalUrls($headElement) {
@@ -87,9 +198,3 @@ function getDatetime($headElement) {
} }
return null; return null;
} }

function normaliseUrl($url) {
// Ignore trailing slashes. Because everybody does.
$url = rtrim($url, '/');
return $url;
}

Loading…
Cancel
Save