text-fragments-ts/src/ whatwg-html.ts
128 lines
6.6 KiB

  1. ///////////////////////////////////////////////
  2. /// Required pieces of the WHATWG HTML Spec ///
  3. ///////////////////////////////////////////////
  4. // Based on the version of 13 August 2020 <https://html.spec.whatwg.org/commit-snapshots/3c52fe139d9c637eb901932a77d743d6d5ecaa56/>
  5. import {
  6. locale,
  7. isElement,
  8. } from './common.js';
  9. import {
  10. htmlNamespace,
  11. AsciiWhitespace,
  12. xmlNamespace,
  13. } from './whatwg-infra.js';
  14. // § 3.2.6.2 The lang and xml:lang attributes
  15. // https://html.spec.whatwg.org/multipage/dom.html#language
  16. export function languageOf(node: Node): locale {
  17. // “To determine the language of a node, user agents must look at the nearest ancestor element (including the element itself if the node is an element) that has a lang attribute in the XML namespace set or is an HTML element and has a lang in no namespace attribute set. That attribute specifies the language of the node (regardless of its value).”
  18. let curNode: Node | null = node;
  19. while (curNode !== null) {
  20. if (isElement(curNode)) {
  21. // “If both the lang attribute in no namespace and the lang attribute in the XML namespace are set on an element, user agents must use the lang attribute in the XML namespace, and the lang attribute in no namespace must be ignored for the purposes of determining the element's language.”
  22. const language = curNode.getAttributeNS(xmlNamespace, 'lang') ?? curNode.getAttributeNS(null, 'lang');
  23. if (language !== null)
  24. return language;
  25. }
  26. curNode = curNode.parentNode;
  27. }
  28. // “If node's inclusive ancestors do not have either attribute set, but there is a pragma-set default language set, then that is the language of the node.”
  29. const pragmaSetDefaultLanguage = getPragmaSetDefaultLanguage();
  30. if (pragmaSetDefaultLanguage !== undefined)
  31. return pragmaSetDefaultLanguage;
  32. // “If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP), if any, must be used as the final fallback language instead.”
  33. // Probably not available to us. (well, perhaps we could try fetch document.URL from cache and read its headers…)
  34. // “In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages, the language of the node is unknown, and the corresponding language tag is the empty string.”
  35. return '';
  36. }
  37. // § 4.2.5.3 Pragma directives
  38. // https://html.spec.whatwg.org/multipage/semantics.html#pragma-set-default-language
  39. // This implementation is a workaround, since we cannot read the pragma-set default language from the DOM. We simply rerun the steps the user agent should have executed to determine this value, when the corresponding <meta> elements are inserted into the document.
  40. // (note that we assume the meta elements were not modified after creation; in scenarios with attribute modifications our result could deviate from the correct result)
  41. export function getPragmaSetDefaultLanguage(): string | undefined {
  42. // “Content language state (http-equiv="content-language")”
  43. // “This pragma sets the pragma-set default language. Until such a pragma is successfully processed, there is no pragma-set default language.”
  44. let pragmaSetDefaultLanguage: string | undefined = undefined;
  45. const metaElements = document.querySelectorAll('meta[http-equiv="content-language"]');
  46. metaElements.forEach(element => {
  47. // 1. “If the meta element has no content attribute, then return.”
  48. if (element.hasAttribute('content'))
  49. return;
  50. // 3. “Let input be the value of the element's content attribute.”
  51. // (swapping the order for implementation simplicity)
  52. const input = element.getAttribute('content') as string;
  53. // 2. “If the element's content attribute contains a U+002C COMMA character (,) then return.”
  54. if (input.includes(','))
  55. return;
  56. // 4. “Let position point at the first character of input.”
  57. let position = 0;
  58. // 5. “Skip ASCII whitespace within input given position.”
  59. while (position < input.length && AsciiWhitespace.includes(input[position]))
  60. position++;
  61. // 6. “Collect a sequence of code points that are not ASCII whitespace from input given position.”
  62. // 7. “Let candidate be the string that resulted from the previous step.”
  63. let candidate = '';
  64. while (!AsciiWhitespace.includes(input[position])) {
  65. candidate += input[position];
  66. position++;
  67. }
  68. // 8. “If candidate is the empty string, return.”
  69. if (candidate === '')
  70. return;
  71. // 9. “Set the pragma-set default language to candidate.”
  72. pragmaSetDefaultLanguage = candidate;
  73. });
  74. return pragmaSetDefaultLanguage as string | undefined;
  75. }
  76. // § 12.1.2 Elements
  77. // https://html.spec.whatwg.org/multipage/syntax.html#void-elements
  78. export const voidElements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
  79. // § 12.2 Parsing HTML documents
  80. // https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void
  81. // “For the purposes of the following algorithm, an element serializes as void if its element type is one of the void elements, or is basefont, bgsound, frame, or keygen.”
  82. export function serializesAsVoid(element: Element): boolean {
  83. // From § 2.1.3 XML Compatibility, <https://html.spec.whatwg.org/multipage/infrastructure.html#element-type>:
  84. // “The term element type is used to refer to the set of elements that have a given local name and namespace.”
  85. // “Except where otherwise stated, all elements defined or mentioned in this specification are in the HTML namespace ("http://www.w3.org/1999/xhtml")”
  86. if (element.namespaceURI === htmlNamespace
  87. && (voidElements.includes(element.localName) || ['basefont', 'bgsound', 'frame', 'keygen'].includes(element.localName))
  88. ) {
  89. return true;
  90. }
  91. return false;
  92. }
  93. // § 14.1 Rendering → Introduction
  94. // https://html.spec.whatwg.org/multipage/rendering.html#being-rendered
  95. // “An element is being rendered if it has any associated CSS layout boxes, SVG layout boxes, or some equivalent in other styling languages.”
  96. export function isBeingRendered(element: Element) {
  97. // “Note … The presence of the hidden attribute normally means the element is not being rendered, though this might be overridden by the style sheets.”
  98. // TODO figure out what exactly we should/could test.
  99. return !element.hasAttribute('hidden'); // TEMP
  100. }