// See https://wicg.github.io/scroll-to-text-fragment/
// Based on the version of 13 August 2020. <https://raw.githubusercontent.com/WICG/scroll-to-text-fragment/2dcfbd6e272f51e5b250c58076b6d1cc57656fce/index.html>
// Some terms used in the spec (would be great if these could be expressed more precisely in TypeScript)
type nonEmptyString = string;
type nonNegativeInteger = number;
type count = number; // same as nonNegativeInteger?
type integer = number; // XXX each use in the spec looks like it should be a nonnegative integer.
// “The fragment directive delimiter is the string ":~:", that is the three consecutive code points U+003A (:), U+007E (~), U+003A (:).”
const fragmentDirectiveDelimiter = ':~:';
export const fragmentDirectiveDelimiter = ':~:';
// The function below implements most of the specified amendment to the ‘create and initialize a Document object’ steps. It applies the newly introduced steps on an ‘unmodified’ document. Instead of actually setting the document’s URL and fragment directive, it returns the values they should have obtained.
// XXX Should the new procedure really “replace steps 7 and 8”? Which version of the HTML spec was this written for? In the version of 6 August 2020, steps 4, 5 and 9 seem more related.
@@ -85,7 +109,7 @@ export function initializeDocumentFragmentDirective(document: Document): { docum
// “To parse a text directive, on a string textDirectiveString, run these steps:”
function parseTextDirective(textDirectiveInput: TextDirective): ParsedTextDirective | null { // XXX The spec writes “textDirectiveString” here, but probably meant “text directive input”.
export function parseTextDirective(textDirectiveInput: TextDirective): ParsedTextDirective | null { // XXX The spec writes “textDirectiveString” here, but probably meant “text directive input”.
// 1. “Assert: textDirectiveString matches the production TextDirective.” XXX again, this should be “text directive input” (Note the 'TextDirective' subtype of string is intended to express this assertion)
// “A ParsedTextDirective is a struct that consists of four strings: textStart, textEnd, prefix, and suffix. textStart is required to be non-null. The other three items may be set to null, indicating they weren’t provided. The empty string is not a valid value for any of these items.”
// “Move the scroll an element into view algorithm’s steps 3-14 into a new algorithm scroll a DOMRect into view, with input DOMRect bounding box, ScrollIntoViewOptions dictionary options, and element startingElement.”
// “Also move the recursive behavior described at the top of the scroll an element into view algorithm to the scroll a DOMRect into view algorithm: "run these steps for each ancestor element or viewport of startingElement that establishes a scrolling box scrolling box, in order of innermost to outermost scrolling box".”
function scrollDomRectIntoView(boundingBox: DOMRect, options: ScrollIntoViewOptions, startingElement: Element): void {
export function scrollDomRectIntoView(boundingBox: DOMRect, options: ScrollIntoViewOptions, startingElement: Element): void {
// TODO Create/borrow a complete implementation.
// TEMP assume the window is the only scrolling box, block=vertical and inline=horizontal, …
function applyScrollLogicalPosition({
@@ -390,7 +413,7 @@ function scrollDomRectIntoView(boundingBox: DOMRect, options: ScrollIntoViewOpti
// “Replace steps 3-14 of the scroll an element into view algorithm with a call to scroll a DOMRect into view:”
// (note the recursive behaviour is already removed due to the lines above)
// Basing on the <https://drafts.csswg.org/cssom-view-1/#scroll-an-element-into-view> version of 20 February 2020
function scrollElementIntoView(element: Element, behavior: ScrollBehavior, block: ScrollLogicalPosition, inline: ScrollLogicalPosition) {
// 1. (from original) “If the Document associated with element is not same origin with the Document associated with the element or viewport associated with box, terminate these steps.”
// TODO (if this makes sense here at all?)
@@ -404,7 +427,7 @@ function scrollElementIntoView(element: Element, behavior: ScrollBehavior, block
// “Define a new algorithm scroll a Range into view, with input range range, element containingElement, and a ScrollIntoViewOptions dictionary options:”
function scrollRangeIntoView(range: Range, containingElement: Element, options: ScrollIntoViewOptions): void {
export function scrollRangeIntoView(range: Range, containingElement: Element, options: ScrollIntoViewOptions): void {
// 1. “Let bounding rect be the DOMRect that is the return value of invoking getBoundingClientRect() on range.”
// To find a string in range for a string query in a given range range, run these steps:
function findStringInRange(query: string, searchRange: Range): Range | null { // XXX The spec calls it 'range' here, but 'searchRange' afterwards.
export function findStringInRange(query: string, searchRange: Range): Range | null { // XXX The spec calls it 'range' here, but 'searchRange' afterwards.
// 1. “While searchRange is not collapsed:”
while (!searchRange.collapsed) {
// 1. “Let curNode be searchRange’s start node.”
@@ -763,7 +786,7 @@ function findStringInRange(query: string, searchRange: Range): Range | null { //
// “To find the nearest block ancestor of a node follow the steps:”
function nearestBlockAncestorOf(node: Node): Node {
export function nearestBlockAncestorOf(node: Node): Node {
// 1. “While node is non-null”
// XXX We replace node with a new variable curNode for walking up the tree, as we will still need a non-null node in step 2 (and also it needs the type Node | null).
let curNode: Node | null = node;
@@ -852,7 +875,7 @@ function nearestBlockAncestorOf(node: Node): Node {
// “startLocale and endLocale must be a valid [BCP47] language tag, or the empty string. An empty string indicates that the primary language is unknown.” <https://tools.ietf.org/html/bcp47>
// XXX Is this, or should this be a step? (should locale strings be validated?)
// “In shadow-including tree order is shadow-including preorder, depth-first traversal of a node tree. Shadow-including preorder, depth-first traversal of a node tree tree is preorder, depth-first traversal of tree, with for each shadow host encountered in tree, shadow-including preorder, depth-first traversal of that element’s shadow root’s node tree just after it is encountered.”
function nextNodeInShadowIncludingTreeOrder(node: Node): Node | null {
// “An object A is a shadow-including descendant of an object B, if A is a descendant of B, or A’s root is a shadow root and A’s root’s host is a shadow-including inclusive descendant of B.”
function isShadowIncludingDescendant(nodeA: Node, nodeB: Node): boolean {
if (isDescendant(nodeA, nodeB))
return true;
const nodeARoot = nodeA.getRootNode();
if (nodeARoot instanceof ShadowRoot && isShadowIncludingInclusiveDescendant(nodeARoot.host, nodeB))
// “To substring data with node node, offset offset, and count count, run these steps:”
function substringData(
node: CharacterData, // XXX The spec says “node node”, but reads “node’s data” which is only defined for CharacterData nodes.
offset: number,
count: count
): string {
// 1. “Let length be node’s length.”
const length = nodeLength(node);
// 2. “If offset is greater than length, then throw an "IndexSizeError" DOMException.”
if (offset > length)
throw new DOMException('', 'IndexSizeError');
// 3. “If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit to the end of node’s data, and then return.”
if (offset + count > length) {
return node.data.substring(offset);
}
// TODO verify: “Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data.”
// “To determine the language of a node, user agents must look at the nearest ancestor element (including the element itself if the node is an element) that has a lang attribute in the XML namespace set or is an HTML element and has a lang in no namespace attribute set. That attribute specifies the language of the node (regardless of its value).”
let curNode: Node | null = node;
while (curNode !== null) {
if (isElement(curNode)) {
// “If both the lang attribute in no namespace and the lang attribute in the XML namespace are set on an element, user agents must use the lang attribute in the XML namespace, and the lang attribute in no namespace must be ignored for the purposes of determining the element's language.”
const language = curNode.getAttributeNS(xmlNamespace, 'lang') ?? curNode.getAttributeNS(null, 'lang');
if (language !== null)
return language;
}
curNode = curNode.parentNode;
}
// “If node's inclusive ancestors do not have either attribute set, but there is a pragma-set default language set, then that is the language of the node.”
// “If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP), if any, must be used as the final fallback language instead.”
// Probably not available to us. (well, perhaps we could try fetch document.URL from cache and read its headers…)
// “In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages, the language of the node is unknown, and the corresponding language tag is the empty string.”
// This implementation is a workaround, since we cannot read the pragma-set default language from the DOM. We simply rerun the steps the user agent should have executed to determine this value, when the corresponding <meta> elements are inserted into the document.
// (note that we assume the meta elements were not modified after creation; in scenarios with attribute modifications our result could deviate from the correct result)
function getPragmaSetDefaultLanguage(): string | undefined {
// “Content language state (http-equiv="content-language")”
// “This pragma sets the pragma-set default language. Until such a pragma is successfully processed, there is no pragma-set default language.”
let pragmaSetDefaultLanguage: string | undefined = undefined;
// “An opaque origin”: “An internal value, with no serialization it can be recreated from (it is serialized as "null" per serialization of an origin), for which the only meaningful operation is testing for equality.”
type opaqueOrigin = symbol; // I guess?
// “A tuple consists of:
// • A scheme (a scheme).
// • A host (a host).
// • A port (a port).
// • A domain (null or a domain). Null unless stated otherwise.”
type tupleOrigin = [
// (using primitive types here; specifying these further is beyond scope)
string,
string | integer | integer[], // integers for IP addresses
// “For the purposes of the following algorithm, an element serializes as void if its element type is one of the void elements, or is basefont, bgsound, frame, or keygen.”
function serializesAsVoid(element: Element): boolean {
// From § 2.1.3 XML Compatibility, <https://html.spec.whatwg.org/multipage/infrastructure.html#element-type>:
// “The term element type is used to refer to the set of elements that have a given local name and namespace.”
// “Except where otherwise stated, all elements defined or mentioned in this specification are in the HTML namespace ("http://www.w3.org/1999/xhtml")”
// “An element is being rendered if it has any associated CSS layout boxes, SVG layout boxes, or some equivalent in other styling languages.”
function isBeingRendered(element: Element) {
// “Note … The presence of the hidden attribute normally means the element is not being rendered, though this might be overridden by the style sheets.”
// TODO figure out what exactly we should/could test.
return !element.hasAttribute('hidden'); // TEMP
}
////////////////////////////////////////////////
/// Required pieces of the WHATWG Infra Spec ///
////////////////////////////////////////////////
// Based on the version of 6 August 2020 <https://infra.spec.whatwg.org/commit-snapshots/38caa3d54ec94b757326b18b0b6cfb39c454f1de/>
// “In shadow-including tree order is shadow-including preorder, depth-first traversal of a node tree. Shadow-including preorder, depth-first traversal of a node tree tree is preorder, depth-first traversal of tree, with for each shadow host encountered in tree, shadow-including preorder, depth-first traversal of that element’s shadow root’s node tree just after it is encountered.”
export function nextNodeInShadowIncludingTreeOrder(node: Node): Node | null {
// “An object A is a shadow-including descendant of an object B, if A is a descendant of B, or A’s root is a shadow root and A’s root’s host is a shadow-including inclusive descendant of B.”
export function isShadowIncludingDescendant(nodeA: Node, nodeB: Node): boolean {
if (isDescendant(nodeA, nodeB))
return true;
const nodeARoot = nodeA.getRootNode();
if (nodeARoot instanceof ShadowRoot && isShadowIncludingInclusiveDescendant(nodeARoot.host, nodeB))
// “To substring data with node node, offset offset, and count count, run these steps:”
export function substringData(
node: CharacterData, // XXX The spec says “node node”, but reads “node’s data” which is only defined for CharacterData nodes.
offset: number,
count: count
): string {
// 1. “Let length be node’s length.”
const length = nodeLength(node);
// 2. “If offset is greater than length, then throw an "IndexSizeError" DOMException.”
if (offset > length)
throw new DOMException('', 'IndexSizeError');
// 3. “If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit to the end of node’s data, and then return.”
if (offset + count > length) {
return node.data.substring(offset);
}
// TODO verify: “Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data.”
// “To determine the language of a node, user agents must look at the nearest ancestor element (including the element itself if the node is an element) that has a lang attribute in the XML namespace set or is an HTML element and has a lang in no namespace attribute set. That attribute specifies the language of the node (regardless of its value).”
let curNode: Node | null = node;
while (curNode !== null) {
if (isElement(curNode)) {
// “If both the lang attribute in no namespace and the lang attribute in the XML namespace are set on an element, user agents must use the lang attribute in the XML namespace, and the lang attribute in no namespace must be ignored for the purposes of determining the element's language.”
const language = curNode.getAttributeNS(xmlNamespace, 'lang') ?? curNode.getAttributeNS(null, 'lang');
if (language !== null)
return language;
}
curNode = curNode.parentNode;
}
// “If node's inclusive ancestors do not have either attribute set, but there is a pragma-set default language set, then that is the language of the node.”
// “If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP), if any, must be used as the final fallback language instead.”
// Probably not available to us. (well, perhaps we could try fetch document.URL from cache and read its headers…)
// “In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages, the language of the node is unknown, and the corresponding language tag is the empty string.”
// This implementation is a workaround, since we cannot read the pragma-set default language from the DOM. We simply rerun the steps the user agent should have executed to determine this value, when the corresponding <meta> elements are inserted into the document.
// (note that we assume the meta elements were not modified after creation; in scenarios with attribute modifications our result could deviate from the correct result)
export function getPragmaSetDefaultLanguage(): string | undefined {
// “Content language state (http-equiv="content-language")”
// “This pragma sets the pragma-set default language. Until such a pragma is successfully processed, there is no pragma-set default language.”
let pragmaSetDefaultLanguage: string | undefined = undefined;
// “An opaque origin”: “An internal value, with no serialization it can be recreated from (it is serialized as "null" per serialization of an origin), for which the only meaningful operation is testing for equality.”
// “For the purposes of the following algorithm, an element serializes as void if its element type is one of the void elements, or is basefont, bgsound, frame, or keygen.”
export function serializesAsVoid(element: Element): boolean {
// From § 2.1.3 XML Compatibility, <https://html.spec.whatwg.org/multipage/infrastructure.html#element-type>:
// “The term element type is used to refer to the set of elements that have a given local name and namespace.”
// “Except where otherwise stated, all elements defined or mentioned in this specification are in the HTML namespace ("http://www.w3.org/1999/xhtml")”
// “An element is being rendered if it has any associated CSS layout boxes, SVG layout boxes, or some equivalent in other styling languages.”
export function isBeingRendered(element: Element) {
// “Note … The presence of the hidden attribute normally means the element is not being rendered, though this might be overridden by the style sheets.”
// TODO figure out what exactly we should/could test.