<?php
use DOMDocument;
use DateTime;

// Finds HTML files that claim to be a snapshot of the given URL;
// Returns an array of mementos, sorted by datetime, with each memento represented by an array:
// [
//   'mementoUrl' => URL of the file, relative to the nextcloud instance
//   'originalUrl' => original URL, presumably equal to the given $url, except we normalise a bit
//   'datetime' => snapshot datetime as a unix timestamp
// ]
function findMementos($folder, $url) {
	// Get all HTML files the user owns.
	$files = $folder->searchByMime('text/html');

	// Filter them for pages that have a <link rel="original"> referring to the given URL.
	$matchingMementos = array();
	foreach ($files as $file) {
		$content = $file->getContent();
		try {
			$DOM = new DOMDocument;
			$DOM->loadHTML($content);
			$headElement = $DOM->documentElement->getElementsByTagName('head')[0];
			$originalUrls = getOriginalUrls($headElement);
			foreach ($originalUrls as $originalUrl) {
				if (normaliseUrl($originalUrl) === normaliseUrl($url)) {
					// Found a match!
					// Read its datetime
					$datetime = getDatetime($headElement);
					// Construct its URL.
					$absoluteFilePath = $file->getPath();
					$relativeFilePath = $folder->getRelativePath($absoluteFilePath);
					$mementoUrl = joinPaths("/apps/raw/files", $relativeFilePath); // XXX hardcoded dependency

					$matchingMementos[] = [
						'mementoUrl' =>  $mementoUrl,
						'originalUrl' => $originalUrl,
						'datetime' => $datetime
					];
				}
			}
		} catch (Exception $e) {
			continue;
		}
	}

	// Sort mementos by their datetime. Oldest first.
	usort($matchingMementos, function ($m1, $m2) { return $m1['datetime'] <=> $m2['datetime']; });

	return $matchingMementos;
}

function joinPaths($piece1, $piece2) {
	$left = rtrim($piece1, '/');
	$right = ltrim($piece2, '/');
	return "$left/$right";
}

// Reads hrefs from any <link> with relation type "original".
// (note the plural: we also accept pages that claim to correspond to multiple original URLs)
function getOriginalUrls($headElement) {
	$originalUrls = [];
	$links = $headElement->getElementsByTagName('link');
	foreach ($links as $link) {
		$rels = explode(' ', $link->getAttribute('rel'));
		if (in_array('original', $rels)) {
			$href = $link->getAttribute('href');
			$href = filter_var($href, FILTER_VALIDATE_URL, FILTER_FLAG_SCHEME_REQUIRED);
			if ($href) {
				$originalUrls[] = $href;
			}
		}
	}
	return $originalUrls;
}

// Read the content of the first <meta http-equiv="Memento-Datetime">, if any.
function getDatetime($headElement) {
	$metas = $headElement->getElementsByTagName('meta');
	foreach($metas as $meta) {
		// Let's match case-insensitively, I guess?
		if (strtolower($meta->getAttribute('http-equiv')) === 'memento-datetime') {
			$datetime = $meta->getAttribute('content');
			$datetime = DateTime::createFromFormat(DateTime::RFC1123, $datetime)->getTimestamp();
			return $datetime; // Return directly at the first match
		}
	}
	return null;
}

function normaliseUrl($url) {
	// Ignore trailing slashes. Because everybody does.
	$url = rtrim($url, '/');
	return $url;
}