text-fragments-ts/src/ index.ts
1129 lines
59 KiB

  1. //////////////////////
  2. /// Text Fragments ///
  3. //////////////////////
  4. // An implementation of (most of) the Text Fragments draft spec.
  5. // See https://wicg.github.io/scroll-to-text-fragment/
  6. // Based on the version of 13 August 2020. <https://raw.githubusercontent.com/WICG/scroll-to-text-fragment/2dcfbd6e272f51e5b250c58076b6d1cc57656fce/index.html>
  7. import {
  8. nonEmptyString,
  9. integer,
  10. locale,
  11. isElement,
  12. nextNode,
  13. } from './common.js';
  14. import {
  15. followsInTree,
  16. nodeLength,
  17. nextNodeInShadowIncludingTreeOrder,
  18. isShadowIncludingDescendant,
  19. isShadowIncludingInclusiveAncestor,
  20. substringData,
  21. BoundaryPoint,
  22. } from './whatwg-dom.js';
  23. import {
  24. languageOf,
  25. origin,
  26. serializesAsVoid,
  27. isBeingRendered,
  28. } from './whatwg-html.js';
  29. import {
  30. htmlNamespace,
  31. } from './whatwg-infra.js';
  32. // § 3.3.1. Parsing the fragment directive
  33. // https://wicg.github.io/scroll-to-text-fragment/#fragment-directive-delimiter
  34. // “The fragment directive delimiter is the string ":~:", that is the three consecutive code points U+003A (:), U+007E (~), U+003A (:).”
  35. export const fragmentDirectiveDelimiter = ':~:';
  36. // The function below implements most of the specified amendment to the ‘create and initialize a Document object’ steps. It applies the newly introduced steps on an ‘unmodified’ document. Instead of actually setting the document’s URL and fragment directive, it returns the values they should have obtained.
  37. // XXX Should the new procedure really “replace steps 7 and 8”? Which version of the HTML spec was this written for? In the version of 6 August 2020, steps 4, 5 and 9 seem more related.
  38. export function initializeDocumentFragmentDirective(document: Document): { documentUrl: string, documentFragmentDirective: string | null } {
  39. // We mock the document’s URL and the document’s fragment directive using plain variables.
  40. // As far as I can tell, we cannot access the document’s URL directly — only this serialised version (see <https://dom.spec.whatwg.org/#dom-document-url> as of 29 June 2020).
  41. let documentUrl: string = document.URL;
  42. // “Each document has an associated fragment directive which is either null or an ASCII string holding data used by the UA to process the resource. It is initially null.”
  43. let documentFragmentDirective: string | null = null;
  44. // 7. “Let url be null”
  45. let url: string | null = null;
  46. // XXX What is the idea of the new steps 8 and 9? These could at least use an explanatory note:
  47. // 8. “If request is non-null, then set document’s URL to request’s current URL.”
  48. // XXX should this perhaps be “url” instead of “document’s URL”? Otherwise we ignore the fragment directive completely.
  49. // 9. “Otherwise, set url to response’s URL.”
  50. // XXX should be “navigationParams's response”? Also, note its URL could be null.
  51. // In any case, we deviate from the spec in these steps, to allow testing this implementation without access to the request and response. We just take the document’s URL instead.
  52. url = documentUrl;
  53. // 10. “Let raw fragment be equal to url’s fragment.”
  54. // (as we only have access to the serialised URL, we extract the fragment again)
  55. const rawFragment = url.split('#')[1] ?? null;
  56. // XXX The spec seems to neglect that a URL’s fragment can be null (or is it somehow guaranteed to be non-null?). If it is null, I suppose we should skip the subsequent steps.
  57. if (rawFragment !== null) {
  58. // (a sane implementation would simply use rawFragment.indexOf(…) or rawFragment.split(…) instead the steps below)
  59. // 11. “Let fragmentDirectivePosition be an integer initialized to 0.”
  60. let fragmentDirectivePosition = 0;
  61. // 12. “While the substring of raw fragment starting at position fragmentDirectivePosition does not begin with the fragment directive delimiter and fragmentDirectivePosition does not point past the end of raw fragment:”
  62. while (
  63. !rawFragment.substring(fragmentDirectivePosition).startsWith(fragmentDirectiveDelimiter)
  64. && !(fragmentDirectivePosition >= rawFragment.length)
  65. ) {
  66. // 1. “Increment fragmentDirectivePosition by 1.”
  67. fragmentDirectivePosition += 1;
  68. }
  69. // 13. “If fragmentDirectivePosition does not point past the end of raw fragment:”
  70. if (!(fragmentDirectivePosition >= rawFragment.length)) {
  71. // 1. “Let fragment be the substring of raw fragment starting at 0 of count fragmentDirectivePosition.”
  72. const fragment = rawFragment.substring(0, 0 + fragmentDirectivePosition);
  73. // 2. “Advance fragmentDirectivePosition by the length of fragment directive delimiter.”
  74. fragmentDirectivePosition += fragmentDirectiveDelimiter.length;
  75. // 3. “Let fragment directive be the substring of raw fragment starting at fragmentDirectivePosition.”
  76. const fragmentDirective = rawFragment.substring(fragmentDirectivePosition);
  77. // 4. “Set url’s fragment to fragment.”
  78. // (as we only have access to the serialised URL, we manually replace its fragment part)
  79. url = url.split('#')[0] + (fragment !== null) ? '#' + fragment : '';
  80. // 5. “Set document’s fragment directive to fragment directive. (Note: this is stored on the document but not web-exposed)”
  81. documentFragmentDirective = fragmentDirective;
  82. }
  83. }
  84. // 14. “Set document’s URL to be url.”
  85. documentUrl = url;
  86. // For testing/trying purposes, we return what should now be the document’s URL and fragment directive.
  87. return { documentUrl, documentFragmentDirective };
  88. }
  89. // https://wicg.github.io/scroll-to-text-fragment/#parse-a-text-directive
  90. // “To parse a text directive, on a string textDirectiveString, run these steps:”
  91. export function parseTextDirective(textDirectiveInput: TextDirective): ParsedTextDirective | null { // XXX The spec writes “textDirectiveString” here, but probably meant “text directive input”.
  92. // 1. “Assert: textDirectiveString matches the production TextDirective.” XXX again, this should be “text directive input” (Note the 'TextDirective' subtype of string is intended to express this assertion)
  93. // assert(isTextFragmentDirective(textDirectiveInput));
  94. // 2. “Let textDirectiveString be the substring of text directive input starting at index 5.”
  95. const textDirectiveString = textDirectiveInput.substring(5);
  96. // 3. “Let tokens be a list of strings that is the result of splitting textDirectiveString on commas.”
  97. const tokens = textDirectiveString.split(',');
  98. // 4. “If tokens has size less than 1 or greater than 4, return null.”
  99. if (tokens.length < 1 || tokens.length > 4)
  100. return null;
  101. // 5. “If any of tokens’s items are the empty string, return null.”
  102. if (tokens.some(token => token === ''))
  103. return null;
  104. // 6. “Let retVal be a ParsedTextDirective with each of its items initialized to null.”
  105. const retVal: Partial<ParsedTextDirective> = {
  106. // XXX Initialising textStart to null would conflict with the type definition; hence using Partial<…> instead. Is this temporary type mismatch acceptable in the spec?
  107. textEnd: null,
  108. prefix: null,
  109. suffix: null,
  110. };
  111. // 7. “Let potential prefix be the first item of tokens.”
  112. const potentialPrefix = tokens[0];
  113. // 8. “If the last character of potential prefix is U+002D (-), then:”
  114. if (potentialPrefix.endsWith('-')) {
  115. // 1. “Set retVal’s prefix to the result of removing the last character from potential prefix.
  116. retVal.prefix = decodeURIComponent(potentialPrefix.substring(0, potentialPrefix.length - 1)); // XXX spec forgets “…the percent-decoding of…”
  117. // 2. “Remove the first item of the list tokens.”
  118. tokens.shift();
  119. }
  120. // 9. “Let potential suffix be the last item of tokens, if one exists, null otherwise.”
  121. const potentialSuffix = tokens[tokens.length - 1] ?? null;
  122. // 10. “If potential suffix is non-null and its first character is U+002D (-), then:”
  123. if (potentialSuffix !== null && potentialSuffix.startsWith('-')) {
  124. // 1. “Set retVal’s suffix to the result of removing the first character from potential suffix.”
  125. retVal.suffix = decodeURIComponent(potentialSuffix.substring(1)); // XXX spec forgets “…the percent-decoding of…”
  126. // 2. “Remove the last item of the list tokens.”
  127. tokens.pop();
  128. }
  129. // 11. “If tokens has size not equal to 1 nor 2 then return null.”
  130. if (tokens.length !== 1 && tokens.length !== 2)
  131. return null;
  132. // 12. “Set retVal’s textStart be the first item of tokens.”
  133. retVal.textStart = decodeURIComponent(tokens[0]); // XXX spec forgets “…the percent-decoding of…”
  134. // 13. “If tokens has size 2, then set retVal’s textEnd be the last item of tokens.”
  135. if (tokens.length === 2)
  136. retVal.textEnd = decodeURIComponent(tokens[tokens.length - 1]); // XXX spec forgets “…the percent-decoding of…”
  137. // 14. “Return retVal.”
  138. return retVal as ParsedTextDirective;
  139. }
  140. // https://wicg.github.io/scroll-to-text-fragment/#parsedtextdirective
  141. // “A ParsedTextDirective is a struct that consists of four strings: textStart, textEnd, prefix, and suffix. textStart is required to be non-null. The other three items may be set to null, indicating they weren’t provided. The empty string is not a valid value for any of these items.”
  142. export interface ParsedTextDirective {
  143. textStart: nonEmptyString;
  144. textEnd: nonEmptyString | null;
  145. prefix: nonEmptyString | null;
  146. suffix: nonEmptyString | null;
  147. };
  148. // § 3.3.2. Fragment directive grammar
  149. // https://wicg.github.io/scroll-to-text-fragment/#valid-fragment-directive
  150. // “A valid fragment directive is a sequence of characters that appears in the fragment directive that matches the production:”
  151. export type ValidFragmentDirective = string; // could be `unique string`, when (if) TypeScript will support that.
  152. export function isValidFragmentDirective(input: string | null): input is ValidFragmentDirective {
  153. // TODO (use PEG.js?)
  154. return true; // TEMP
  155. }
  156. // https://wicg.github.io/scroll-to-text-fragment/#text-fragment-directive
  157. // “The text fragment directive is one such fragment directive that enables specifying a piece of text on the page, that matches the production:”
  158. export type TextDirective = string; // could be `unique string`, when (if) TypeScript will support that.
  159. export function isTextFragmentDirective(input: string): input is TextDirective {
  160. // TODO (use PEG.js?)
  161. return input.startsWith('text='); // TEMP
  162. }
  163. // § 3.4. Security and Privacy
  164. // § 3.4.4 Restricting the Text Fragment
  165. // https://wicg.github.io/scroll-to-text-fragment/#should-allow-a-text-fragment
  166. // “To determine whether a navigation should allow a text fragment, given as input a boolean is user triggered, an origin incumbentNavigationOrigin, and Document document; follow these steps:”
  167. export function shouldAllowTextFragment(isUserTriggered: boolean, incumbentNavigationOrigin: origin | null, document: Document): boolean {
  168. // 1. “If incumbentNavigationOrigin is null, return true.”
  169. // XXX This implies null is a potential value of the parameter (is this implicitly allowed in web specs?). (note that an opaque origin is serialised to null, but that is presumably not the situation meant here)
  170. if (incumbentNavigationOrigin === null)
  171. return true;
  172. // 2. “If is user triggered is false, return false.”
  173. if (isUserTriggered === false)
  174. return false;
  175. // 3. “If the document of the latest entry in document’s browsing context's session history is equal to document, return false.”
  176. // 4. “If incumbentNavigationOrigin is equal to the origin of document return true.”
  177. // 5. “If document’s browsing context is a top-level browsing context and its group’s browsing context set has length 1 return true.”
  178. // NOT IMPLEMENTED. Implementing these steps would require reproducing a bunch of browser internals. Probably not worthwhile.
  179. // 6. “Otherwise, return false.”
  180. return false;
  181. }
  182. // https://wicg.github.io/scroll-to-text-fragment/#allowtextfragmentdirective
  183. // “To set the allowTextFragmentDirective flag, follow these steps:”
  184. // XXX Is this line supposed to be there? Looks like it may have left by accident while changing the approach.
  185. // “Amend the page load processing model for HTML files to insert these steps after step 1:
  186. // 2. “Let is user activated be true if the current navigation was initiated from a window that had a transient activation at the time the navigation was initiated.”
  187. // 3. “Set document’s allowTextFragmentDirective flag to the result of running should allow a text fragment with is user activated, incumbentNavigationOrigin, and the document.”
  188. // Amend the try to scroll to the fragment steps by replacing the steps of the task queued in step 2:
  189. // 1. “If document has no parser, or its parser has stopped parsing, or the user agent has reason to believe the user is no longer interested in scrolling to the fragment, then clear document’s allowTextFragmentDirective flag and abort these steps.”
  190. // 2. “Scroll to the fragment given in document’s URL. If this does not find an indicated part of the document, then try to scroll to the fragment for document.”
  191. // 3. “Clear document’s allowTextFragmentDirective flag””
  192. // NOT IMPLEMENTED. Implementing these amendments would require reproducing a bunch of browser internals. Not deemed worthwhile here.
  193. // § 3.5. Navigating to a Text Fragment
  194. // https://wicg.github.io/scroll-to-text-fragment/#navigating-to-text-fragment
  195. // This implements the amended version of step 3 of the HTML spec’s “scroll to the fragment” steps: <https://html.spec.whatwg.org/multipage/browsing-the-web.html#scroll-to-the-fragment-identifier>
  196. export function scrollToTheFragment(indicatedPart: [Element, Range | null]): void {
  197. // (note that step 1 and 2 are irrelevant if the indicated part is an Element/Range, which we require here)
  198. // “Replace step 3.1 of the scroll to the fragment algorithm with the following:”
  199. // 3. “Otherwise:”
  200. // XXX this line above seems superfluous (and possibly confusing).
  201. // 1. (new) “Let target, range be the element and range that is the indicated part of the document.”
  202. const [target, range] = indicatedPart;
  203. // 2. (from original) “Set the Document's target element to target.”
  204. // TODO Perhaps we could fake this by applying any stylesheet rules for :target to target?
  205. // “Replace step 3.3 of the scroll to the fragment algorithm with the following:”
  206. // 3. (new) “Get the policy value for force-load-at-top in the Document. If the result is true, abort these steps.”
  207. // TODO (but this would require access to HTTP headers)
  208. // 4. (new) “If range is non-null:”
  209. if (range !== null) {
  210. // 1. “If the UA supports scrolling of text fragments on navigation, invoke Scroll range into view, with containingElement target, behavior set to "auto", block set to "center", and inline set to "nearest".”
  211. // XXX …and “with range range”?
  212. scrollRangeIntoView(range, target, { behavior: 'auto', block: 'center', inline: 'nearest' });
  213. }
  214. // 5. (new) “Otherwise:”
  215. else {
  216. // 1. (equals original step 3.3) “Scroll target into view, with behavior set to "auto", block set to "start", and inline set to "nearest".”
  217. scrollElementIntoView(target, 'auto', 'start', 'nearest');
  218. }
  219. }
  220. // “Add the following steps to the beginning of the processing model for the indicated part of the document:”
  221. // This function only implements the newly introduced steps. To help testing it out, its required inputs have to be passed as arguments, and the resulting indicated part (if any), is returned, along with the list of ranges (if any).
  222. export function indicatedPartOfTheDocument_beginning(
  223. { document, documentFragmentDirective, documentAllowTextFragmentDirective }:
  224. { document: Document, documentFragmentDirective: string | null, documentAllowTextFragmentDirective: boolean }
  225. ): { documentIndicatedPart: [Element, Range] | undefined, ranges?: Range[] } {
  226. let documentIndicatedPart: [Element, Range] | undefined = undefined;
  227. // 1. “Let fragment directive string be the document’s fragment directive.”
  228. const fragmentDirectiveString = documentFragmentDirective;
  229. // 2. “If the document’s allowTextFragmentDirective flag is true then:”
  230. if (documentAllowTextFragmentDirective === true) {
  231. // 1. “Let ranges be a list that is the result of running the process a fragment directive steps with fragment directive string and the document.”
  232. let ranges = processFragmentDirective(fragmentDirectiveString, document);
  233. // 2. “If ranges is non-empty, then:”
  234. if (ranges.length > 0) {
  235. // 1. “Let range be the first item of ranges.”
  236. const range = ranges[0];
  237. // 2. “Let node be the first common ancestor of range’s start node and start node.”
  238. // XXX This looks silly. Was “start node and end node” meant here?
  239. let node = firstCommonAncestor(range.startContainer, range.startContainer);
  240. // 3. “While node is not an element, set node to node’s parent.”
  241. // XXX Could loop forever! Or is it guaranteed that node has an element as ancestor? This may be a valid but fragile assumption.
  242. while (!isElement(node))
  243. node = node.parentNode as Node;
  244. // 4. “The indicated part of the document is node and range; return.”
  245. documentIndicatedPart = [node, range];
  246. // return;
  247. return { documentIndicatedPart, ranges }; // To allow testing it out.
  248. }
  249. }
  250. return { documentIndicatedPart };
  251. }
  252. // https://wicg.github.io/scroll-to-text-fragment/#first-common-ancestor
  253. // To find the first common ancestor of two nodes nodeA and nodeB, follow these steps:
  254. export function firstCommonAncestor(nodeA: Node, nodeB: Node): Node | never {
  255. // 1. “Let commonAncestor be nodeA.”
  256. let commonAncestor = nodeA;
  257. // 2. “While commonAncestor is not a shadow-including inclusive ancestor of nodeB, let commonAncestor be commonAncestor’s shadow-including parent.”
  258. // XXX If the nodes are not part of the same tree, this becomes an infinite loop! Should be “While commonAncestor is not null and …”
  259. while (!isShadowIncludingInclusiveAncestor(commonAncestor, /* of */ nodeB))
  260. commonAncestor = shadowIncludingParent(commonAncestor) as Node;
  261. // 3. “Return commonAncestor.”
  262. return commonAncestor;
  263. }
  264. // To find the shadow-including parent of node follow these steps:
  265. export function shadowIncludingParent(node: Node): Node | null {
  266. // 1. “If node is a shadow root, return node’s host.”
  267. if (node instanceof ShadowRoot)
  268. return node.host;
  269. // 2. “Otherwise, return node’s parent.”
  270. return node.parentNode;
  271. }
  272. // § 3.5.1. Scroll a DOMRect into view
  273. // https://wicg.github.io/scroll-to-text-fragment/#scroll-a-domrect-into-view
  274. // “Move the scroll an element into view algorithm’s steps 3-14 into a new algorithm scroll a DOMRect into view, with input DOMRect bounding box, ScrollIntoViewOptions dictionary options, and element startingElement.”
  275. // “Also move the recursive behavior described at the top of the scroll an element into view algorithm to the scroll a DOMRect into view algorithm: "run these steps for each ancestor element or viewport of startingElement that establishes a scrolling box scrolling box, in order of innermost to outermost scrolling box".”
  276. export function scrollDomRectIntoView(boundingBox: DOMRect, options: ScrollIntoViewOptions, startingElement: Element): void {
  277. // TODO Create/borrow a complete implementation.
  278. // TEMP assume the window is the only scrolling box, block=vertical and inline=horizontal, …
  279. function applyScrollLogicalPosition({
  280. position,
  281. boundingBoxRelativeEdgeBegin,
  282. boundingBoxRelativeEdgeEnd,
  283. boundingBoxSize,
  284. scrollBoxAbsoluteEdgeBegin,
  285. scrollBoxSize,
  286. }: {
  287. position: ScrollLogicalPosition,
  288. boundingBoxRelativeEdgeBegin: number,
  289. boundingBoxRelativeEdgeEnd: number,
  290. boundingBoxSize: number,
  291. scrollBoxAbsoluteEdgeBegin: number,
  292. scrollBoxSize: number,
  293. }): number | undefined {
  294. const boundingBoxAbsoluteEdgeBegin = scrollBoxAbsoluteEdgeBegin + boundingBoxRelativeEdgeBegin;
  295. const boundingBoxAbsoluteEdgeEnd = boundingBoxAbsoluteEdgeBegin + boundingBoxSize;
  296. boundingBoxRelativeEdgeEnd -= scrollBoxSize; // measure relative to scroll box’s end, not start.
  297. switch (position) {
  298. case 'start':
  299. return boundingBoxAbsoluteEdgeBegin;
  300. case 'end':
  301. return boundingBoxAbsoluteEdgeEnd - scrollBoxSize;
  302. case 'center':
  303. return boundingBoxAbsoluteEdgeBegin + boundingBoxSize / 2 - scrollBoxSize / 2;
  304. case 'nearest':
  305. const fitsInView = boundingBoxSize < scrollBoxSize; // XXX CSSWG spec seems to forget the case in which the sizes are equal. Here we interpret “greater than” as “greater than or equal to”.
  306. if (boundingBoxRelativeEdgeBegin < 0 && boundingBoxRelativeEdgeEnd > 0)
  307. return undefined;
  308. else if (boundingBoxRelativeEdgeBegin < 0 && fitsInView || boundingBoxRelativeEdgeEnd > 0 && !fitsInView)
  309. return boundingBoxAbsoluteEdgeBegin;
  310. else if (boundingBoxRelativeEdgeBegin < 0 && !fitsInView || boundingBoxRelativeEdgeEnd > 0 && fitsInView)
  311. return boundingBoxAbsoluteEdgeEnd - scrollBoxSize;
  312. }
  313. return undefined;
  314. }
  315. const top = applyScrollLogicalPosition({
  316. position: options.block ?? 'start', // presuming same default as for Element.scrollIntoView
  317. boundingBoxRelativeEdgeBegin: boundingBox.top,
  318. boundingBoxRelativeEdgeEnd: boundingBox.bottom,
  319. scrollBoxAbsoluteEdgeBegin: window.scrollY,
  320. boundingBoxSize: boundingBox.height,
  321. scrollBoxSize: document.documentElement.clientHeight,
  322. });
  323. const left = applyScrollLogicalPosition({
  324. position: options.inline ?? 'nearest', // presuming same default as for Element.scrollIntoView
  325. boundingBoxRelativeEdgeBegin: boundingBox.left,
  326. boundingBoxRelativeEdgeEnd: boundingBox.right,
  327. boundingBoxSize: boundingBox.width,
  328. scrollBoxAbsoluteEdgeBegin: window.scrollX,
  329. scrollBoxSize: document.documentElement.clientWidth,
  330. });
  331. window.scroll({ top, left, behavior: options.behavior });
  332. }
  333. // “Replace steps 3-14 of the scroll an element into view algorithm with a call to scroll a DOMRect into view:”
  334. // (note the recursive behaviour is already removed due to the lines above)
  335. // Basing on the <https://drafts.csswg.org/cssom-view-1/#scroll-an-element-into-view> version of 20 February 2020
  336. export function scrollElementIntoView(element: Element, behavior: ScrollBehavior, block: ScrollLogicalPosition, inline: ScrollLogicalPosition) {
  337. // 1. (from original) “If the Document associated with element is not same origin with the Document associated with the element or viewport associated with box, terminate these steps.”
  338. // TODO (if this makes sense here at all?)
  339. // 2. (from original) “Let element bounding border box be the box that the return value of invoking getBoundingClientRect() on element represents.”
  340. const elementBoundingBorderBox = element.getBoundingClientRect();
  341. // 3. (new) “Perform scroll a DOMRect into view given element bounding border box, options and element.”
  342. // XXX There is no “options” defined; presumably that should be { behavior, block, inline }.
  343. scrollDomRectIntoView(elementBoundingBorderBox, { behavior, block, inline }, element);
  344. }
  345. // https://wicg.github.io/scroll-to-text-fragment/#scroll-a-range-into-view
  346. // “Define a new algorithm scroll a Range into view, with input range range, element containingElement, and a ScrollIntoViewOptions dictionary options:”
  347. export function scrollRangeIntoView(range: Range, containingElement: Element, options: ScrollIntoViewOptions): void {
  348. // 1. “Let bounding rect be the DOMRect that is the return value of invoking getBoundingClientRect() on range.”
  349. const boundingRect = range.getBoundingClientRect();
  350. // 2. “Perform scroll a DOMRect into view given bounding rect, options, and containingElement.”
  351. scrollDomRectIntoView(boundingRect, options, containingElement);
  352. }
  353. // § 3.5.2 Finding Ranges in a Document
  354. // https://wicg.github.io/scroll-to-text-fragment/#process-a-fragment-directive
  355. export function processFragmentDirective(fragmentDirectiveInput: string | null, document: Document): Range[] {
  356. // 1. “If fragment directive input is not a valid fragment directive, then return an empty list.”
  357. if (!isValidFragmentDirective(fragmentDirectiveInput)) {
  358. return [];
  359. }
  360. // 2. “Let directives be a list of strings that is the result of strictly splitting the string fragment directive input on "&".”
  361. const directives = fragmentDirectiveInput.split('&');
  362. // 3. “Let ranges be a list of ranges, initially empty.”
  363. const ranges = [];
  364. // 4. “For each string directive of directives:”
  365. for (const directive of directives) {
  366. // 1. “If directive does not match the production TextDirective, then continue.”
  367. if (!isTextFragmentDirective(directive))
  368. continue;
  369. // 2. “Let parsedValues be the result of running the parse a text directive steps on directive.”
  370. const parsedValues = parseTextDirective(directive);
  371. // 3. “If parsedValues is null then continue.”
  372. if (parsedValues === null)
  373. continue;
  374. // 4. “If the result of running find a range from a text directive given parsedValues and document is non-null, then append it to ranges.”
  375. const range = findRangeFromTextDirective(parsedValues, document);
  376. if (range !== null)
  377. ranges.push(range);
  378. }
  379. // 5. “Return ranges.”
  380. return ranges;
  381. }
  382. // https://wicg.github.io/scroll-to-text-fragment/#find-a-range-from-a-text-directive
  383. // “To find a range from a text directive, given a ParsedTextDirective parsedValues and Document document, run the following steps:”
  384. export function findRangeFromTextDirective(parsedValues: ParsedTextDirective, document: Document): Range | null {
  385. // 1. “Let searchRange be a range with start (document, 0) and end (document, document’s length)”
  386. const searchRange = document.createRange();
  387. searchRange.setStart(document, 0);
  388. searchRange.setEnd(document, document.childNodes.length);
  389. // 2. “While searchRange is not collapsed:”
  390. while (!searchRange.collapsed) {
  391. // 1. “Let potentialMatch be null.”
  392. let potentialMatch = null;
  393. // 2. “If parsedValues’s prefix is not null:”
  394. if (parsedValues.prefix !== null) {
  395. // 1. “Let prefixMatch be the the result of running the find a string in range steps given parsedValues’s prefix and searchRange”.
  396. const prefixMatch = findStringInRange(parsedValues.prefix, searchRange);
  397. // 2. “If prefixMatch is null, return null.”
  398. if (prefixMatch === null)
  399. return null;
  400. // 3. “Set searchRange’s start to the first boundary point after prefixMatch’s start”
  401. // XXX I suppose we can be certain a next boundary point always exist in this case; can we proof this?
  402. searchRange.setStart(...firstBoundaryPointAfter(getStart(prefixMatch)) as BoundaryPoint);
  403. // 4. “Let matchRange be a range whose start is prefixMatch’s end and end is searchRange’s end.”
  404. const matchRange = document.createRange();
  405. matchRange.setStart(...getEnd(prefixMatch));
  406. matchRange.setEnd(...getEnd(searchRange));
  407. // 5. “Advance matchRange’s start to the next non-whitespace position.”
  408. advanceRangeStartToNextNonWhitespacePosition(matchRange);
  409. // 6. “If matchRange is collapsed return null.”
  410. if (matchRange.collapsed)
  411. return null;
  412. // 7. “Assert: matchRange’s start node is a Text node.”
  413. // assert(matchRange.startContainer.nodeType === Node.TEXT_NODE);
  414. // 8. “Set potentialMatch to the result of running the find a string in range steps given parsedValues’s textStart and matchRange.”
  415. potentialMatch = findStringInRange(parsedValues.textStart, matchRange);
  416. // 9. “If potentialMatch is null, return null.”
  417. if (potentialMatch === null)
  418. return null;
  419. // 10. “If potentialMatch’s start is not matchRange’s start, then and continue.” XXX then ~~and~~ continue
  420. if (!samePoint(getStart(potentialMatch), getStart(matchRange)))
  421. continue;
  422. // 11. “If parsedValues’s textEnd item is non-null, then:”
  423. // XXX This block of steps could be deduplicated by factoring it out of the if/else.
  424. if (parsedValues.textEnd !== null) {
  425. // 1. “Let textEndRange be a range whose start is potentialMatch’s end and whose end is searchRange’s end.”
  426. const textEndRange = document.createRange();
  427. textEndRange.setStart(...getEnd(potentialMatch));
  428. textEndRange.setEnd(...getEnd(searchRange));
  429. // 2. “Let textEndMatch be the result of running the find a string in range steps given parsedValue’s textEnd and textEndRange.”
  430. const textEndMatch = findStringInRange(parsedValues.textEnd, textEndRange);
  431. // 3. “If textEndMatch is null then return null.”
  432. if (textEndMatch === null)
  433. return null;
  434. // 4. “Set potentialMatch’s end to textEndMatch’s end.”
  435. potentialMatch.setEnd(...getEnd(textEndMatch));
  436. }
  437. }
  438. // 3. “Otherwise:”
  439. else {
  440. // 1. “Set potentialMatch to the result of running the find a string in range steps given parsedValues’s textStart and searchRange.”
  441. potentialMatch = findStringInRange(parsedValues.textStart, searchRange);
  442. // 2. “If potentialMatch is null, return null.”
  443. if (potentialMatch === null)
  444. return null;
  445. // 3. “Set searchRange’s start to the first boundary point after potentialMatch’s start”
  446. // XXX I suppose we can be certain a next boundary point always exist in this case; can we proof this?
  447. searchRange.setStart(...firstBoundaryPointAfter(getStart(potentialMatch)) as BoundaryPoint);
  448. // 4. “If parsedValues’s textEnd item is non-null, then:”
  449. // XXX This block of steps could be deduplicated by factoring it out of the if/else.
  450. if (parsedValues.textEnd !== null) {
  451. // 1. “Let textEndRange be a range whose start is potentialMatch’s end and whose end is searchRange’s end.”
  452. const textEndRange = document.createRange();
  453. textEndRange.setStart(...getEnd(potentialMatch));
  454. textEndRange.setEnd(...getEnd(searchRange));
  455. // 2. “Let textEndMatch be the result of running the find a string in range steps given parsedValue’s textEnd and textEndRange.”
  456. const textEndMatch = findStringInRange(parsedValues.textEnd, textEndRange);
  457. // 3. “If textEndMatch is null then return null.”
  458. if (textEndMatch === null)
  459. return null;
  460. // 4. “Set potentialMatch’s end to textEndMatch’s end.”
  461. potentialMatch.setEnd(...getEnd(textEndMatch));
  462. }
  463. }
  464. // 4. “Assert: potentialMatch is non-null, not collapsed and represents a range exactly containing an instance of matching text.” XXX the last assertion sounds rather vague.
  465. // assert(
  466. // potentialMatch !== null
  467. // && !potentialMatch.collapsed
  468. // && new RegExp('^' + escapeRegExp(textStart) + '.*' + escapeRegExp(textEnd) + '$').test(potentialMatch.toString()) // …or something similar?
  469. // );
  470. // 5. “If parsedValues’s suffix is null, return potentialMatch.”
  471. if (parsedValues.suffix === null)
  472. return potentialMatch;
  473. // 6. “Let suffixRange be a range with start equal to potentialMatch’s end and end equal to searchRange’s end.”
  474. const suffixRange = document.createRange();
  475. suffixRange.setStart(...getEnd(potentialMatch));
  476. suffixRange.setEnd(...getEnd(searchRange));
  477. // 7. “Advance suffixRange’s start to the next non-whitespace position.”
  478. advanceRangeStartToNextNonWhitespacePosition(suffixRange);
  479. // 8. “Let suffixMatch be result of running the find a string in range steps given parsedValues’s suffix and suffixRange.”
  480. const suffixMatch = findStringInRange(parsedValues.suffix, suffixRange);
  481. // 9. “If suffixMatch is null then return null.”
  482. if (suffixMatch === null)
  483. return null;
  484. // 10. “If suffixMatch’s start is suffixRange’s start, return potentialMatch.”
  485. if (samePoint(getStart(suffixMatch), getStart(suffixRange)))
  486. return potentialMatch;
  487. }
  488. // XXX Not in spec; by intention or accident?
  489. return null;
  490. }
  491. // https://wicg.github.io/scroll-to-text-fragment/#next-non-whitespace-position
  492. // “To advance a range range’s start to the next non-whitespace position follow the steps:”
  493. export function advanceRangeStartToNextNonWhitespacePosition(range: Range) {
  494. // 1. “While range is not collapsed:”
  495. while (!range.collapsed) {
  496. // 1. “Let node be range’s start node.”
  497. const node = range.startContainer;
  498. // 2. “Let offset be range’s start offset.”
  499. const offset = range.startOffset;
  500. // 3. “If node is part of a non-searchable subtree then:”
  501. if (partOfNonSearchableSubtree(node)) {
  502. // 1. “Set range’s start node to the next node, in shadow-including tree order, that isn’t a shadow-including descendant of node.”
  503. range.setStart(
  504. nextNodeInShadowIncludingTreeOrderThatIsNotAShadowIncludingDescendantOf(node) as Node, // XXX Can we be sure there is a next node? Asserting it here.
  505. 0, // XXX presumably we should set the offset to zero?
  506. );
  507. // 2. “Continue.”
  508. continue;
  509. }
  510. // 4. “If node is not a visible text node:”
  511. if (!isVisibleTextNode(node)) {
  512. // 1. “Set range’s start node to the next node, in shadow-including tree order.”
  513. range.setStart(
  514. nextNodeInShadowIncludingTreeOrder(node) as Node, // XXX Can we be sure there is a next node? Asserting it here.
  515. 0, // XXX presumably we should set the offset to zero?
  516. );
  517. // 2. “Continue.”
  518. continue;
  519. }
  520. // 5. “If the substring data of node at offset offset and count 6 is equal to the string "&nbsp;" then:” XXX Why only "&nbsp;", and not e.g. "&thinspace;" or others? Is there no existing spec for whitespace that can be used here?
  521. if (substringData(node as CharacterData, offset, 6) === '&nbsp;') { // XXX Is node guaranteed to be CharacterData? Not clear in spec.
  522. // 1. “Add 6 to range’s start offset.”
  523. range.setStart(range.startContainer, range.startOffset + 6);
  524. }
  525. // 6. “Otherwise, if the substring data of node at offset offset and count 5 is equal to the string "&nbsp" then:”
  526. else if (substringData(node as CharacterData, offset, 5) === '&nbsp') { // XXX Is node guaranteed to be CharacterData? Not clear in spec.
  527. // 1. “Add 5 to range’s start offset.”
  528. range.setStart(range.startContainer, range.startOffset + 5);
  529. }
  530. // 7. “Otherwise”
  531. else {
  532. // 1. “Let cp be the code point at the offset index in node’s data.”
  533. const cp = (node as CharacterData).data.codePointAt(offset) as number; // TODO verify if this is correct. We use the index to count code *units*, but we read the code *point*, which smells fishy but may be correct.
  534. // 2. “If cp does not have the White_Space property set, return.”
  535. if (!hasWhiteSpaceProperty(cp)) return;
  536. // 3. “Add 1 to range’s start offset.”
  537. range.setStart(range.startContainer, range.startOffset + 1);
  538. }
  539. // 8. “If range’s start offset is equal to node’s length, set range’s start node to the next node in shadow-including tree order.”
  540. if (range.startOffset === nodeLength(node)) {
  541. range.setStart(
  542. nextNodeInShadowIncludingTreeOrder(node) as Node, // XXX Can we be sure there is a next node? Asserting it here.
  543. 0, // XXX presumably we should set the offset to zero?
  544. );
  545. }
  546. }
  547. }
  548. // https://wicg.github.io/scroll-to-text-fragment/#find-a-string-in-range
  549. // To find a string in range for a string query in a given range range, run these steps:
  550. export function findStringInRange(query: string, searchRange: Range): Range | null { // XXX The spec calls it 'range' here, but 'searchRange' afterwards.
  551. // 1. “While searchRange is not collapsed:”
  552. while (!searchRange.collapsed) {
  553. // 1. “Let curNode be searchRange’s start node.”
  554. let curNode: Node | null = searchRange.startContainer;
  555. // 2. “If curNode is part of a non-searchable subtree:”
  556. if (partOfNonSearchableSubtree(curNode)) {
  557. // 1. “Set searchRange’s start node to the next node, in shadow-including tree order, that isn’t a shadow-including descendant of curNode”
  558. searchRange.setStart(
  559. nextNodeInShadowIncludingTreeOrderThatIsNotAShadowIncludingDescendantOf(curNode) as Node, // XXX Can we be sure there is a next node? Asserting it here.
  560. 0, // XXX presumably we should set the offset to zero?
  561. );
  562. // 2. “Continue.”
  563. continue;
  564. }
  565. // 3. “If curNode is not a visible text node:”
  566. if (!isVisibleTextNode(curNode)) {
  567. // 1. “Set searchRange’s start node to the next node, in shadow-including tree order.”
  568. // searchRange.setStart(
  569. // nextNodeInShadowIncludingTreeOrder(curNode) as Node, // XXX Can we be sure there is a next node? Asserting it here.
  570. // 0, // XXX presumably we should set the offset to zero?
  571. // );
  572. // XXX The above fails if nextNode is a doctype (see <https://dom.spec.whatwg.org/#concept-range-bp-set> as of 29 June 2020)
  573. // Take the next node that is not a doctype.
  574. curNode = nextNodeInShadowIncludingTreeOrder(curNode);
  575. while (curNode && curNode.nodeType === Node.DOCUMENT_TYPE_NODE)
  576. curNode = nextNodeInShadowIncludingTreeOrder(curNode);
  577. searchRange.setStart(
  578. curNode as Node, // XXX Can we be sure there is a next node? Asserting it here.
  579. 0, // XXX presumably we should set the offset to zero?
  580. );
  581. // 2. “Continue.”
  582. continue;
  583. }
  584. // 4. “Otherwise:” XXX unnecessary due to the 'continue' (and confusing after two 'if's, should the latter be 'else if'?)
  585. else {
  586. // 1. “Let blockAncestor be the nearest block ancestor of curNode.”
  587. const blockAncestor = nearestBlockAncestorOf(curNode);
  588. // 2. “Let textNodeList be a list of Text nodes, initially empty.”
  589. const textNodeList: Text[] = [];
  590. // 3. “While curNode is a shadow-including descendant of blockAncestor and it does not follow searchRange’s end node:”
  591. while (
  592. curNode && isShadowIncludingDescendant(curNode, /* of */ blockAncestor)
  593. // XXX “it does not follow searchRange’s end node” seems mistaken: *every* node follows Document, which is usually the end node…
  594. // && !followsInTree(curNode, searchRange.endContainer)
  595. // XXX …so we check instead whether curNode starts after searchRange.
  596. && searchRange.comparePoint(curNode, 0) !== 1
  597. ) {
  598. // 1. “If curNode has block-level display then break.”
  599. if (hasBlockLevelDisplay(curNode)) {
  600. break;
  601. }
  602. // 2. “If curNode is search invisible:”
  603. if (isSearchInvisible(curNode)) {
  604. // 1. “Set curNode to the next node in shadow-including tree order whose ancestor is not curNode.”
  605. // XXX Is this a *shadow-including* ancestor? Presumably yes, but making it explicit may be better.
  606. // XXX Two other places in the spec use the equivalent phrasing “that isn’t a shadow-including descendant of”. Best to then use the same phrasing here (or vice versa, but “whose ancestor” seems slightly less clear as it suggests there is only one ancestor).
  607. curNode = nextNodeInShadowIncludingTreeOrderThatIsNotAShadowIncludingDescendantOf(curNode);
  608. // 2. “Continue.”
  609. continue;
  610. }
  611. // 3. “If curNode is a visible text node then append it to textNodeList.”
  612. if (isVisibleTextNode(curNode)) {
  613. textNodeList.push(curNode);
  614. }
  615. // 4. “Set curNode to the next node in shadow-including tree order.”
  616. curNode = nextNodeInShadowIncludingTreeOrder(curNode);
  617. }
  618. // 4. “Run the find a range from a node list steps given query, searchRange, and textNodeList, as input. If the resulting range is not null, then return it.”
  619. const resultingRange = findARangeFromANodeList(query, searchRange, textNodeList);
  620. if (resultingRange !== null) {
  621. return resultingRange;
  622. }
  623. // XXX curNode may be null (if we reach the end of tree).
  624. if (curNode === null)
  625. break;
  626. // 5. “Assert: curNode follows searchRange’s start node.”
  627. // assert(followsInTree(curNode, searchRange.startContainer))
  628. // 6. “Set searchRange’s start to the boundary point (curNode, 0).”
  629. searchRange.setStart(curNode, 0);
  630. }
  631. }
  632. // 2. “Return null.”
  633. return null;
  634. }
  635. // https://wicg.github.io/scroll-to-text-fragment/#search-invisible
  636. // “A node is search invisible…”
  637. export function isSearchInvisible(node: Node): boolean {
  638. // “…if it is in the HTML namespace and meets any of the following conditions:”
  639. // XXX Namespace for nodes is inapplicable/deprecated? Presuming this was meant: “…if it is an element in the HTML namespace…”
  640. if (isElement(node) && node.namespaceURI === htmlNamespace) {
  641. // 1. “The computed value of its display property is none.”
  642. if (getComputedStyle(node).display === 'none')
  643. return true;
  644. // 2. “If the node serializes as void.”
  645. if (serializesAsVoid(node))
  646. return true;
  647. // 3. “Is any of the following types: HTMLIFrameElement, HTMLImageElement, HTMLMeterElement, HTMLObjectElement, HTMLProgressElement, HTMLStyleElement, HTMLScriptElement, HTMLVideoElement, HTMLAudioElement”
  648. if (['iframe', 'image', 'meter', 'object', 'progress', 'style', 'script', 'video', 'audio'].includes(node.localName)) // TODO verify: is this correct? Do class names and localName map one-to-one? (hopefully yes, as the term ‘element type’ seems used for both concepts)
  649. return true;
  650. // 4. “Is a select element whose multiple content attribute is absent.”
  651. if (node.localName === 'select' && !node.hasAttribute('multiple'))
  652. return true;
  653. }
  654. return false;
  655. }
  656. // https://wicg.github.io/scroll-to-text-fragment/#non-searchable-subtree
  657. // “A node is part of a non-searchable subtree if it is or has an ancestor that is search invisible.”
  658. export function partOfNonSearchableSubtree(node: Node): boolean {
  659. let curNode: Node | null = node;
  660. while (curNode) {
  661. if (isSearchInvisible(curNode))
  662. return true;
  663. curNode = curNode.parentNode; // XXX I would expect this to be “shadow-including ancestor”.
  664. // curNode = shadowIncludingParent(curNode);
  665. }
  666. return false;
  667. }
  668. // https://wicg.github.io/scroll-to-text-fragment/#visible-text-node
  669. // “A node is a visible text node if it is a Text node, the computed value of its visibility property is visible, and it is being rendered.”
  670. export type VisibleTextNode = Text; // could be `unique Text`, when (if) TypeScript will support that.
  671. export function isVisibleTextNode(node: Node): node is VisibleTextNode {
  672. if (node.nodeType !== Node.TEXT_NODE)
  673. return false;
  674. // XXX How are “the computed value of its visibility property” and “being rendered” defined for non-element nodes? Using the text node’s parent element instead!
  675. if (
  676. node.parentElement
  677. && getComputedStyle(node.parentElement).visibility === 'visible'
  678. && isBeingRendered(node.parentElement)
  679. )
  680. return true;
  681. return false;
  682. }
  683. // https://wicg.github.io/scroll-to-text-fragment/#has-block-level-display
  684. // “A node has block-level display if the computed value of its display property is any of block, table, flow-root, grid, flex, list-item.”
  685. export function hasBlockLevelDisplay(node: Node): boolean {
  686. // XXX How is “the computed value of its display property” defined for non-element nodes? Assuming here it only applies to elements!
  687. return (
  688. isElement(node)
  689. && ['block', 'table', 'flow-root', 'grid', 'flex', 'list-item'].includes(getComputedStyle(node).display)
  690. );
  691. }
  692. // https://wicg.github.io/scroll-to-text-fragment/#nearest-block-ancestor
  693. // “To find the nearest block ancestor of a node follow the steps:”
  694. export function nearestBlockAncestorOf(node: Node): Node {
  695. // 1. “While node is non-null”
  696. // XXX We replace node with a new variable curNode for walking up the tree, as we will still need a non-null node in step 2 (and also it needs the type Node | null).
  697. let curNode: Node | null = node;
  698. while (curNode !== null) {
  699. // 1. “If node is not a Text node and it has block-level display then return node.”
  700. if (curNode.nodeType !== Node.TEXT_NODE && hasBlockLevelDisplay(curNode))
  701. return curNode;
  702. // 2. “Otherwise, set node to node’s parent.”
  703. else
  704. curNode = curNode.parentNode;
  705. }
  706. // 2. “Return node’s node document's document element.”
  707. // XXX In the spec, node would be null now! Hence the need for introducing curNode.
  708. return (node.ownerDocument ?? node as Document).documentElement;
  709. }
  710. // https://wicg.github.io/scroll-to-text-fragment/#find-a-range-from-a-node-list
  711. // “To find a range from a node list given a search string queryString, a range searchRange, and a list of nodes nodes, follow the steps”
  712. export function findARangeFromANodeList(queryString: string, searchRange: Range, nodes: Text[]): Range | null {
  713. // 1. “Assert: each item in nodes is a Text node.”
  714. // XXX Could this not just be asserted through the parameter type, like is done in “get boundary point at index”? Applying this already.
  715. // assert(nodes.every(node => node.nodeType === Node.TEXT_NODE));
  716. // 2. “Let searchBuffer be the concatenation of the data of each item in in nodes.”
  717. // XXX typo: “in in nodes”
  718. const searchBuffer = nodes.map(node => node.data).join('');
  719. // 3. “Let searchStart be 0.”
  720. let searchStart = 0;
  721. // 4. “If the first item in nodes is searchRange’s start node then set searchStart to searchRange’s start offset.”
  722. if (nodes[0] === searchRange.startContainer)
  723. searchStart = searchRange.startOffset;
  724. // 5. “Let start and end be boundary points, initially null.”
  725. let start: BoundaryPoint | null = null;
  726. let end: BoundaryPoint | null = null;
  727. // 6. “Let matchIndex be null.”
  728. let matchIndex = null;
  729. // 7. “While matchIndex is null”
  730. while (matchIndex === null) {
  731. // 1. “Let matchIndex be an integer set to the index of the first instance of queryString in searchBuffer, starting at searchStart. The string search must be performed using a base character comparison, or the primary level, as defined in [UTS10].”
  732. // XXX “Let matchIndex be an integer” sounds like a redeclaration; presumably a mistake?
  733. // TODO implement base character comparison (i.e. ignoring accents, etc.)
  734. // XXX It would be helpful to have more specific guidance than merely a link to UTS10 <https://www.unicode.org/reports/tr10/tr10-43.html>
  735. matchIndex = searchBuffer.toLowerCase().indexOf(queryString.toLowerCase(), searchStart); // TEMP case-insensitive string match will have to suffice for now.
  736. // XXX If queryString does not appear in searchString, I suppose we should return.
  737. if (matchIndex === -1)
  738. return null;
  739. // 2. “Let endIx be matchIndex + queryString’s length.”
  740. const endIx = matchIndex + queryString.length;
  741. // 3. “Set start be the boundary point result of get boundary point at index matchIndex run over nodes with isEnd false.”
  742. // XXX typo: “Set start be”
  743. start = getBoundaryPointAtIndex(matchIndex, nodes, false) as BoundaryPoint;
  744. // 4. “Set end be the boundary point result of get boundary point at index endIx run over nodes with isEnd true.”
  745. // XXX typo: “Set end be”
  746. end = getBoundaryPointAtIndex(endIx, nodes, true) as BoundaryPoint;
  747. // XXX Assert start and end are non-null? (should be correct, as matchIndex and endIx are both less than the search text’s length)
  748. // 5. “If the substring of searchBuffer starting at matchIndex and of length queryString’s length is not word bounded, given the language from each of start and end’s nodes as the startLocale and endLocale:”
  749. if (!isWordBounded(searchBuffer, matchIndex, queryString.length, languageOf(start[0]), languageOf(end[0]))) {
  750. // 1. “Let searchStart be matchIndex + 1.”
  751. // XXX “Let … be” should be “Set … to”?
  752. searchStart = matchIndex + 1;
  753. // 2. “Set matchIndex to null.”
  754. matchIndex = null;
  755. }
  756. }
  757. // 8. “Let endInset be 0.”
  758. let endInset = 0;
  759. // 9. “If the last item in nodes is searchRange’s end node then set endInset to (searchRange’s end node's length − searchRange’s end offset)”
  760. if (nodes[nodes.length - 1] === searchRange.endContainer)
  761. endInset = (searchRange.endContainer as Text).length - searchRange.endOffset;
  762. // 10. “If matchIndex + queryString’s length is greater than or equal to searchBuffer’s length − endInset return null.”
  763. // XXX This comparison should be strictly greater than: a boundary point can be right after the last character.
  764. if (matchIndex + queryString.length > searchBuffer.length - endInset)
  765. return null;
  766. // 11. “Assert: start and end are non-null, valid boundary points in searchRange.”
  767. // assert(start !== null && end !== null && searchRange.comparePoint(...start) === 0 && searchRange.comparePoint(...end) === 0);
  768. start = start as BoundaryPoint;
  769. end = end as BoundaryPoint;
  770. // 12. “Return a range with start start and end end.”
  771. const result = document.createRange();
  772. result.setStart(...start);
  773. result.setEnd(...end);
  774. return result;
  775. }
  776. // https://wicg.github.io/scroll-to-text-fragment/#get-boundary-point-at-index
  777. // “To get boundary point at index, given an integer index, list of Text nodes nodes, and a boolean isEnd, follow these steps:”
  778. export function getBoundaryPointAtIndex(index: integer, nodes: Text[], isEnd: boolean): BoundaryPoint | null {
  779. // 1. “Let counted be 0.”
  780. let counted = 0;
  781. // 2. “For each curNode of nodes:”
  782. for (const curNode of nodes) {
  783. // 1. “Let nodeEnd be counted + curNode’s length.”
  784. let nodeEnd = counted + curNode.length;
  785. // 2. “If isEnd is true, add 1 to nodeEnd.”
  786. if (isEnd)
  787. nodeEnd += 1;
  788. // 3. “If nodeEnd is greater than index then:”
  789. if (nodeEnd > index) {
  790. // 1. “Return the boundary point (curNode, index − counted).”
  791. return [curNode, index - counted];
  792. }
  793. // 4. “Increment counted by curNode’s length.”
  794. counted += curNode.length;
  795. }
  796. // 3. “Return null.”
  797. return null;
  798. }
  799. // § 3.5.3 Word Boundaries
  800. // https://wicg.github.io/scroll-to-text-fragment/#word-bounded
  801. // “To determine if a substring of a larger string is word bounded, given a string text, an integer startPosition, number count, and locales startLocale and endLocale, follow these steps:”
  802. // “startLocale and endLocale must be a valid [BCP47] language tag, or the empty string. An empty string indicates that the primary language is unknown.” <https://tools.ietf.org/html/bcp47>
  803. // XXX Is this, or should this be a step? (should locale strings be validated?)
  804. export function isWordBounded(text: string, startPosition: integer, count: number, startLocale: locale, endLocale: locale): boolean {
  805. // 1. “Using locale startLocale, let left bound be the last word boundary in text that precedes startPositionth code point of text.”
  806. // XXX It seems that “startPositionth” involves zero-based indexing; is that considered self-evident?
  807. const leftBound = nearestWordBoundary(text, startPosition, 'before', startLocale);
  808. // “A string will always contain at least 2 word boundaries before the first code point and after the last code point of the string.
  809. // XXX Is this really true, even for a string with only white space? Or an empty string?
  810. // XXX typo: missing a colon before “before”
  811. // 2. “If the first code point of text following left bound is not at position startPosition return false.”
  812. if (leftBound !== startPosition) // We should be able to assume leftBound is not inside a multi-unit code point.
  813. return false;
  814. // 3. “Let endPosition be (startPosition + count − 1).”
  815. const endPosition = startPosition + count - 1;
  816. // 4. “Using locale endLocale, let right bound be the first word boundary in text after the endPositionth code point.”
  817. // XXX It seems that “endPositionth” involves zero-based indexing; is that considered self-evident?
  818. const rightBound = nearestWordBoundary(text, endPosition, 'after', endLocale);
  819. // 5. “If the first code point of text preceding right bound is not at position endPosition return false.”
  820. if (rightBound - 1 !== endPosition) // TEMP (TODO figure this out)
  821. return false;
  822. // 6. “Return true.”
  823. return true;
  824. }
  825. // https://wicg.github.io/scroll-to-text-fragment/#feature-detectability
  826. // § 3.8. Feature Detectability
  827. // “For feature detectability, we propose adding a new FragmentDirective interface that is exposed via document.fragmentDirective if the UA supports the feature.
  828. // [Exposed=Document]
  829. // interface FragmentDirective {
  830. // };
  831. // We amend the Document interface to include a fragmentDirective property:
  832. // partial interface Document {
  833. // [SameObject] readonly attribute FragmentDirective fragmentDirective;
  834. // };”
  835. export interface FragmentDirective {
  836. };
  837. // TODO Can and should we modify the Document interface?
  838. export function browserSupportsTextFragments(): boolean {
  839. return (
  840. 'fragmentDirective' in Document
  841. // Also check in window.location, which was in the spec until & including the version of 12 August 2020. See commit <https://github.com/WICG/scroll-to-text-fragment/commit/2dcfbd6e272f51e5b250c58076b6d1cc57656fce>.
  842. || 'fragmentDirective' in window.location
  843. );
  844. }
  845. //////////////////////////////////////
  846. /// Simple helpers for readability ///
  847. //////////////////////////////////////
  848. function getStart(range: Range): BoundaryPoint {
  849. return [range.startContainer, range.startOffset];
  850. }
  851. function getEnd(range: Range): BoundaryPoint {
  852. return [range.endContainer, range.endOffset];
  853. }
  854. function samePoint(point1: BoundaryPoint, point2: BoundaryPoint): boolean {
  855. return point1[0] === point2[0] && point1[1] === point2[1];
  856. }
  857. function nextNodeInShadowIncludingTreeOrderThatIsNotAShadowIncludingDescendantOf(node: Node): Node | null {
  858. let curNode: Node | null = nextNodeInShadowIncludingTreeOrder(node);
  859. while (curNode && isShadowIncludingDescendant(curNode, node)) {
  860. curNode = nextNodeInShadowIncludingTreeOrder(curNode);
  861. }
  862. return curNode;
  863. }
  864. ///////////
  865. // Other //
  866. ///////////
  867. function hasWhiteSpaceProperty(codePoint: number): boolean {
  868. // Soon to be widely supported in browsers. <https://caniuse.com/#feat=mdn-javascript_builtins_regexp_property_escapes>
  869. // return !!String.fromCodePoint(codePoint).match(/\p{White_Space}/u);
  870. // The list below takes the values from <https://www.unicode.org/Public/UCD/latest/ucd/PropList.txt> version of 2019-11-27
  871. const whitespaceCodePoints = [
  872. 0x0009, 0x000A, 0x000B, 0x000C, 0x000D,
  873. 0x0085, 0x2028, 0x2029, 0x0020, 0x3000,
  874. 0x1680, 0x2000, 0x2001, 0x2002, 0x2003,
  875. 0x2004, 0x2005, 0x2006, 0x2008, 0x2009,
  876. 0x200A, 0x205F, 0x00A0, 0x2007, 0x202F,
  877. ];
  878. return whitespaceCodePoints.includes(codePoint);
  879. }
  880. // XXX Is this supposed to be self-evident, or should these steps perhaps be included in the spec?
  881. function firstBoundaryPointAfter([node, offset]: BoundaryPoint): BoundaryPoint | null {
  882. if (offset < nodeLength(node)) { // (note that N children/characters makes for N+1 boundary points)
  883. return [node, offset + 1];
  884. } else {
  885. const next = nextNode(node);
  886. if (next !== null)
  887. return [next, 0];
  888. else
  889. return null;
  890. }
  891. }
  892. // XXX Is this supposed to be self-evident, or should these steps perhaps be included in the spec?
  893. function nearestWordBoundary(text: string, position: integer, direction: 'before' | 'after', locale: locale): integer {
  894. // From <https://wicg.github.io/scroll-to-text-fragment/#word-bounded>:
  895. // “A word boundary is defined in [UAX29] in Unicode Text Segmentation §Word_Boundaries. Unicode Text Segmentation §Default_Word_Boundaries defines a default set of what constitutes a word boundary, but as the specification mentions, a more sophisticated algorithm should be used based on the locale.”
  896. // TODO Look into the referenced unicode spec.
  897. // TEMP Just use regular expression’s word boundaries, whitespace, and the string’s start and end.
  898. const allBoundaries = [...text.matchAll(/^|\b|\s|$/g)].map(match => match.index as integer);
  899. if (direction === 'before') {
  900. // Find the last match before position. Sure to exist because we also match the start of the string.
  901. allBoundaries.reverse();
  902. return allBoundaries.find(boundaryPosition => boundaryPosition <= position) as number;
  903. } else {
  904. // Find the first match after position. Sure to exist because we also match the end of the string.
  905. return allBoundaries.find(boundaryPosition => boundaryPosition > position) as number;
  906. }
  907. }