text-fragments-ts/lib/ whatwg-html.js
94 lines
6.5 KiB

  1. ///////////////////////////////////////////////
  2. /// Required pieces of the WHATWG HTML Spec ///
  3. ///////////////////////////////////////////////
  4. // Based on the version of 13 August 2020 <https://html.spec.whatwg.org/commit-snapshots/3c52fe139d9c637eb901932a77d743d6d5ecaa56/>
  5. import { isElement, } from './common.js';
  6. import { htmlNamespace, AsciiWhitespace, xmlNamespace, } from './whatwg-infra.js';
  7. // § 3.2.6.2 The lang and xml:lang attributes
  8. // https://html.spec.whatwg.org/multipage/dom.html#language
  9. export function languageOf(node) {
  10. var _a;
  11. // “To determine the language of a node, user agents must look at the nearest ancestor element (including the element itself if the node is an element) that has a lang attribute in the XML namespace set or is an HTML element and has a lang in no namespace attribute set. That attribute specifies the language of the node (regardless of its value).”
  12. let curNode = node;
  13. while (curNode !== null) {
  14. if (isElement(curNode)) {
  15. // “If both the lang attribute in no namespace and the lang attribute in the XML namespace are set on an element, user agents must use the lang attribute in the XML namespace, and the lang attribute in no namespace must be ignored for the purposes of determining the element's language.”
  16. const language = (_a = curNode.getAttributeNS(xmlNamespace, 'lang')) !== null && _a !== void 0 ? _a : curNode.getAttributeNS(null, 'lang');
  17. if (language !== null)
  18. return language;
  19. }
  20. curNode = curNode.parentNode;
  21. }
  22. // “If node's inclusive ancestors do not have either attribute set, but there is a pragma-set default language set, then that is the language of the node.”
  23. const pragmaSetDefaultLanguage = getPragmaSetDefaultLanguage();
  24. if (pragmaSetDefaultLanguage !== undefined)
  25. return pragmaSetDefaultLanguage;
  26. // “If there is no pragma-set default language set, then language information from a higher-level protocol (such as HTTP), if any, must be used as the final fallback language instead.”
  27. // Probably not available to us. (well, perhaps we could try fetch document.URL from cache and read its headers…)
  28. // “In the absence of any such language information, and in cases where the higher-level protocol reports multiple languages, the language of the node is unknown, and the corresponding language tag is the empty string.”
  29. return '';
  30. }
  31. // § 4.2.5.3 Pragma directives
  32. // https://html.spec.whatwg.org/multipage/semantics.html#pragma-set-default-language
  33. // This implementation is a workaround, since we cannot read the pragma-set default language from the DOM. We simply rerun the steps the user agent should have executed to determine this value, when the corresponding <meta> elements are inserted into the document.
  34. // (note that we assume the meta elements were not modified after creation; in scenarios with attribute modifications our result could deviate from the correct result)
  35. export function getPragmaSetDefaultLanguage() {
  36. // “Content language state (http-equiv="content-language")”
  37. // “This pragma sets the pragma-set default language. Until such a pragma is successfully processed, there is no pragma-set default language.”
  38. let pragmaSetDefaultLanguage = undefined;
  39. const metaElements = document.querySelectorAll('meta[http-equiv="content-language"]');
  40. metaElements.forEach(element => {
  41. // 1. “If the meta element has no content attribute, then return.”
  42. if (element.hasAttribute('content'))
  43. return;
  44. // 3. “Let input be the value of the element's content attribute.”
  45. // (swapping the order for implementation simplicity)
  46. const input = element.getAttribute('content');
  47. // 2. “If the element's content attribute contains a U+002C COMMA character (,) then return.”
  48. if (input.includes(','))
  49. return;
  50. // 4. “Let position point at the first character of input.”
  51. let position = 0;
  52. // 5. “Skip ASCII whitespace within input given position.”
  53. while (position < input.length && AsciiWhitespace.includes(input[position]))
  54. position++;
  55. // 6. “Collect a sequence of code points that are not ASCII whitespace from input given position.”
  56. // 7. “Let candidate be the string that resulted from the previous step.”
  57. let candidate = '';
  58. while (!AsciiWhitespace.includes(input[position])) {
  59. candidate += input[position];
  60. position++;
  61. }
  62. // 8. “If candidate is the empty string, return.”
  63. if (candidate === '')
  64. return;
  65. // 9. “Set the pragma-set default language to candidate.”
  66. pragmaSetDefaultLanguage = candidate;
  67. });
  68. return pragmaSetDefaultLanguage;
  69. }
  70. // § 12.1.2 Elements
  71. // https://html.spec.whatwg.org/multipage/syntax.html#void-elements
  72. export const voidElements = ['area', 'base', 'br', 'col', 'embed', 'hr', 'img', 'input', 'link', 'meta', 'param', 'source', 'track', 'wbr'];
  73. // § 12.2 Parsing HTML documents
  74. // https://html.spec.whatwg.org/multipage/parsing.html#serializes-as-void
  75. // “For the purposes of the following algorithm, an element serializes as void if its element type is one of the void elements, or is basefont, bgsound, frame, or keygen.”
  76. export function serializesAsVoid(element) {
  77. // From § 2.1.3 XML Compatibility, <https://html.spec.whatwg.org/multipage/infrastructure.html#element-type>:
  78. // “The term element type is used to refer to the set of elements that have a given local name and namespace.”
  79. // “Except where otherwise stated, all elements defined or mentioned in this specification are in the HTML namespace ("http://www.w3.org/1999/xhtml")”
  80. if (element.namespaceURI === htmlNamespace
  81. && (voidElements.includes(element.localName) || ['basefont', 'bgsound', 'frame', 'keygen'].includes(element.localName))) {
  82. return true;
  83. }
  84. return false;
  85. }
  86. // § 14.1 Rendering → Introduction
  87. // https://html.spec.whatwg.org/multipage/rendering.html#being-rendered
  88. // “An element is being rendered if it has any associated CSS layout boxes, SVG layout boxes, or some equivalent in other styling languages.”
  89. export function isBeingRendered(element) {
  90. // “Note … The presence of the hidden attribute normally means the element is not being rendered, though this might be overridden by the style sheets.”
  91. // TODO figure out what exactly we should/could test.
  92. return !element.hasAttribute('hidden'); // TEMP
  93. }
  94. //# sourceMappingURL=whatwg-html.js.map