Browser extension that demonstrates the Web Annotation Discovery mechanism: subscribe to people’s annotation collections/‘feeds’, to see their notes on the web; and create & publish annotations yourself.

web-annotation-discovery-we.../src/storage/ AnnotationSource.ts
483 lines
15 KiB

  1. import { RpcClient } from 'webextension-rpc';
  2. import { contentScriptRpcServer } from '../content_script';
  3. import { asArray, asSingleValue, completeAnnotationStub } from 'web-annotation-utils';
  4. import type { WebAnnotation, ZeroOrMore } from 'web-annotation-utils';
  5. import { Annotation } from './Annotation';
  6. import { db } from './db';
  7. export type AnnotationSourceType = 'container' | 'embeddedJsonld';
  8. export type AnnotationSourceAuthType = 'HttpBasicAuth';
  9. export class IAnnotationSource {
  10. constructor(
  11. public _id: number,
  12. public url: string,
  13. public type: AnnotationSourceType,
  14. public active?: boolean,
  15. public title?: string,
  16. public writable?: boolean,
  17. public useForNewAnnotations?: boolean,
  18. public needsAuth?: boolean,
  19. public lastUpdate?: Date,
  20. public lastModified?: Date,
  21. ) {}
  22. }
  23. /**
  24. * The information needed to subscribe to a source.
  25. */
  26. export type AnnotationSourceDescriptor = Pick<
  27. IAnnotationSource,
  28. 'url' | 'title' | 'type'
  29. >;
  30. export class AnnotationSource {
  31. static sourceUpdatePeriod = 10 * 60;
  32. constructor(public data: IAnnotationSource) {}
  33. protected async save() {
  34. await db.annotationSources.put({
  35. ...this.data,
  36. lastModified: new Date(),
  37. });
  38. }
  39. async delete() {
  40. await this.deleteAnnotationsLocally();
  41. await db.annotationSources.delete(this.data._id);
  42. console.log(`Deleted source ${this.data.url}`);
  43. }
  44. protected async deleteAnnotationsLocally() {
  45. const count = await db.annotations
  46. .where('source')
  47. .equals(this.data._id)
  48. .delete();
  49. console.log(`Deleted ${count} annotations for source ${this.data.url}`);
  50. }
  51. /**
  52. * Reload all annotations from this source.
  53. * @param force Also refresh if the source is not active.
  54. */
  55. async refresh(force = false) {
  56. if (!(this.data.active || force)) return;
  57. if (this.data.writable) {
  58. await this.uploadDirtyAnnotations();
  59. }
  60. const webAnnotations = await this.fetchAllAnnotations();
  61. // Delete all existing items from this source, to avoid duplicates/zombies.
  62. // TODO Make this a little smarter.
  63. await this.deleteAnnotationsLocally();
  64. // Insert annotations.
  65. await Promise.all(
  66. webAnnotations.map(async (webAnnotation) => {
  67. await Annotation.new({
  68. annotation: webAnnotation,
  69. source: this.data._id,
  70. });
  71. }),
  72. );
  73. console.log(
  74. `Inserted ${webAnnotations.length} annotations for source ${this.data.url}`,
  75. );
  76. // Update source metadata.
  77. this.data.lastUpdate = new Date();
  78. await this.save();
  79. }
  80. protected async fetchAllAnnotations(): Promise<WebAnnotation[]> {
  81. if (this.data.type === 'container')
  82. return await getAllAnnotationsFromContainerSource(this.data.url);
  83. if (this.data.type === 'embeddedJsonld')
  84. return await getAnnotationsFromEmbeddedJsonld(this.data.url);
  85. throw new Error(
  86. `Getting annotations from source of type '${this.data.type}' is not yet implemented.`,
  87. );
  88. }
  89. protected async createAnnotation(annotationStub: Partial<WebAnnotation>) {
  90. const webAnnotation = completeAnnotationStub(annotationStub);
  91. const annotation = await Annotation.new({
  92. annotation: webAnnotation,
  93. source: this.data._id,
  94. dirty: true,
  95. });
  96. try {
  97. const createdAnnotation = await this.postAnnotation(
  98. annotation.data.annotation,
  99. );
  100. await annotation.setDirty(false, { annotation: createdAnnotation });
  101. return annotation;
  102. } catch (error: any) {
  103. throw new Error(
  104. `Error while uploading created annotation: ${error.message}`,
  105. );
  106. }
  107. }
  108. async uploadAnnotation(annotation: WebAnnotation) {
  109. if (!annotation.id) await this.postAnnotation(annotation);
  110. else await this.putAnnotation(annotation);
  111. }
  112. protected async postAnnotation(annotation: WebAnnotation) {
  113. const webAnnotation: Omit<WebAnnotation, 'id'> & {
  114. id?: WebAnnotation['id'];
  115. } = { ...annotation };
  116. delete webAnnotation.id;
  117. // This one was not yet uploaded. POST it.
  118. const response = await fetch(this.data.url, {
  119. method: 'POST',
  120. credentials: 'include',
  121. headers: {
  122. 'Content-Type':
  123. 'application/ld+json;profile="http://www.w3.org/ns/anno.jsonld"',
  124. Accept:
  125. 'application/ld+json;profile="http://www.w3.org/ns/anno.jsonld"',
  126. },
  127. body: JSON.stringify(webAnnotation),
  128. });
  129. if (response.status !== 201) {
  130. if (response.status === 401) {
  131. this.data.needsAuth = true;
  132. }
  133. throw new Error(
  134. `Could not POST the annotation, got: ${response.status} ${response.statusText} (expected: 201 Created)`,
  135. );
  136. }
  137. const location = response.headers.get('Location');
  138. if (!location)
  139. throw new Error(
  140. 'Server did not provide Location header for created annotation.',
  141. );
  142. const locationUrl = new URL(location, response.url).href;
  143. const contentLocation = response.headers.get('Content-Location');
  144. const contentLocationUrl =
  145. contentLocation && new URL(contentLocation, response.url).href;
  146. // Replace the local annotation with the one from the server, to update its id (and possibly other properties).
  147. let createdAnnotation: WebAnnotation;
  148. if (contentLocationUrl === locationUrl) {
  149. // Easy: the server responded with the annotation itself.
  150. createdAnnotation = await response.json();
  151. } else {
  152. // If we did not receive it, then we fetch it.
  153. createdAnnotation = await resolveSingle(locationUrl);
  154. }
  155. // TODO better validation.
  156. if (!createdAnnotation.target) {
  157. throw new Error('Server returned something else than an annotation.');
  158. }
  159. return createdAnnotation;
  160. }
  161. protected async putAnnotation(annotation: WebAnnotation) {
  162. // This annotation exists already. PUT it.
  163. const annotationUrl = annotation.id;
  164. if (!annotationUrl.startsWith(this.data.url)) {
  165. throw new Error(
  166. `Annotation to be updated is not part of this collection.`,
  167. );
  168. }
  169. const response = await fetch(annotationUrl, {
  170. method: 'PUT',
  171. credentials: 'include',
  172. headers: {
  173. 'Content-Type':
  174. 'application/ld+json;profile="http://www.w3.org/ns/anno.jsonld"',
  175. Accept:
  176. 'application/ld+json;profile="http://www.w3.org/ns/anno.jsonld"',
  177. },
  178. body: JSON.stringify(annotation),
  179. });
  180. if (!response.ok) {
  181. if (response.status === 401) {
  182. this.data.needsAuth = true;
  183. }
  184. throw new Error(
  185. `Could not POST the annotation, got: ${response.status} ${response.statusText}`,
  186. );
  187. }
  188. }
  189. async uploadDirtyAnnotations() {
  190. const dirtyAnnotations = await db.annotations
  191. .where('source')
  192. .equals(this.data._id)
  193. .filter(({ dirty }) => !!dirty)
  194. .toArray();
  195. // PUT/POST each one individually.
  196. await Promise.all(
  197. dirtyAnnotations
  198. .map((annotation) => new Annotation(annotation))
  199. .map(async (annotation) => {
  200. if (annotation.data.toDelete) await annotation.delete();
  201. else await this.uploadAnnotation(annotation.data.annotation);
  202. }),
  203. );
  204. }
  205. async deleteAnnotationRemotely(annotation: WebAnnotation) {
  206. const annotationUrl = annotation.id;
  207. if (!annotationUrl.startsWith(this.data.url)) {
  208. throw new Error(
  209. `Annotation to be deleted is not part of this collection.`,
  210. );
  211. }
  212. const response = await fetch(annotationUrl, {
  213. method: 'DELETE',
  214. credentials: 'include',
  215. headers: {
  216. 'Content-Type':
  217. 'application/ld+json;profile="http://www.w3.org/ns/anno.jsonld"',
  218. Accept:
  219. 'application/ld+json;profile="http://www.w3.org/ns/anno.jsonld"',
  220. },
  221. });
  222. if (!response.ok) {
  223. if (response.status === 401) {
  224. this.data.needsAuth = true;
  225. }
  226. throw new Error(
  227. `Could not delete the annotation, got: ${response.status} ${response.statusText}`,
  228. );
  229. }
  230. }
  231. async testWritable() {
  232. const createdAnnotation = await this.postAnnotation(
  233. completeAnnotationStub({
  234. target: 'http://example.com/page1',
  235. bodyValue: 'Test annotation, should have been deleted directly',
  236. }),
  237. );
  238. this.data.writable = true;
  239. await this.save();
  240. await this.deleteAnnotationRemotely(createdAnnotation);
  241. }
  242. async useForNewAnnotations(value: boolean) {
  243. this.data.useForNewAnnotations = value;
  244. await this.save();
  245. }
  246. static async new(data: Omit<IAnnotationSource, '_id'>) {
  247. // @ts-ignore: _id is not needed in put()
  248. const source: IAnnotationSource = {
  249. ...data,
  250. lastModified: new Date(),
  251. };
  252. const key = (await db.annotationSources.put(
  253. source,
  254. )) as IAnnotationSource['_id'];
  255. return new this({ ...data, _id: key });
  256. }
  257. static async addSource(
  258. sourceDescriptor: AnnotationSourceDescriptor,
  259. active?: boolean,
  260. ) {
  261. if (await this.exists(sourceDescriptor)) return;
  262. // Auto-update from containers, but not from pages with embedded annotations.
  263. if (active === undefined) {
  264. active = sourceDescriptor.type === 'container';
  265. }
  266. const source = await AnnotationSource.new({
  267. url: sourceDescriptor.url,
  268. title: sourceDescriptor.title,
  269. type: sourceDescriptor.type,
  270. active,
  271. });
  272. await source.refresh(true);
  273. return source;
  274. }
  275. static async exists(sourceDescriptor: AnnotationSourceDescriptor) {
  276. const source = await db.annotationSources
  277. .filter((source) => source.url === sourceDescriptor.url)
  278. .first();
  279. return source !== undefined;
  280. }
  281. static async get(id: IAnnotationSource['_id']) {
  282. const source = await db.annotationSources.get(id);
  283. if (!source) throw new Error(`No annotation source exists with id ${id}.`);
  284. return new AnnotationSource(source);
  285. }
  286. static async getByUrl(url: string) {
  287. const source = await db.annotationSources
  288. .filter((source) => source.url === url)
  289. .first();
  290. if (!source)
  291. throw new Error(`No annotation source exists with url ${url}.`);
  292. return new AnnotationSource(source);
  293. }
  294. static async getAll() {
  295. const sources = await db.annotationSources.toArray();
  296. return sources.map((source) => new AnnotationSource(source));
  297. }
  298. static async getActiveSources() {
  299. // How to do this nicely in Dexie?
  300. const sources = await db.annotationSources.toArray();
  301. const activeSources = sources.filter(({ active }) => active);
  302. return activeSources.map((source) => new AnnotationSource(source));
  303. }
  304. static async getPossiblyWritableSources() {
  305. const sources = await this.getActiveSources();
  306. return sources.filter((source) => source.data.type === 'container');
  307. }
  308. static async getSourcesNeedingUpdate() {
  309. const cutoffDate = new Date(Date.now() - this.sourceUpdatePeriod * 1000);
  310. // Using filters; the where() clause cannot get items with lastUpdate===undefined
  311. const sources = await db.annotationSources
  312. // .where('lastUpdate')
  313. // .belowOrEqual(cutoffDate)
  314. // .and((annotationSource) => annotationSource.active)
  315. .filter(({ lastUpdate }) => !lastUpdate || lastUpdate < cutoffDate)
  316. .filter(({ active }) => !!active)
  317. .toArray();
  318. return sources.map((source) => new AnnotationSource(source));
  319. }
  320. static async createAnnotation(
  321. annotationStub: Partial<WebAnnotation>,
  322. sourceId?: AnnotationSource['data']['_id'],
  323. ) {
  324. let sourceObjs;
  325. if (sourceId) {
  326. sourceObjs = [await AnnotationSource.get(sourceId)];
  327. } else {
  328. sourceObjs = await AnnotationSource.getPossiblyWritableSources();
  329. if (sourceObjs.length === 0)
  330. throw new Error(
  331. 'Please first subscribe to the annotation collection where you want to store your annotations.',
  332. );
  333. sourceObjs = sourceObjs.filter(
  334. (sourceObj) => sourceObj.data.useForNewAnnotations,
  335. );
  336. if (sourceObjs.length === 0)
  337. throw new Error(
  338. 'Please select (in the extension’s popup menu) in which collection to store your annotations.',
  339. );
  340. }
  341. // There should only be one source marked with useForNewAnnotations.
  342. const createdAnnotation = await sourceObjs[0].createAnnotation(
  343. annotationStub,
  344. );
  345. return createdAnnotation;
  346. }
  347. }
  348. async function getAllAnnotationsFromContainerSource(
  349. sourceUrl: string,
  350. ): Promise<WebAnnotation[]> {
  351. console.log(`Fetching annotations from ${sourceUrl}`);
  352. const annotationSourceData = await resolveSingle(sourceUrl);
  353. // Check what type of source we got.
  354. const nodeTypes = asArray(annotationSourceData.type);
  355. // If the source is a single annotation, import that one.
  356. if (nodeTypes.includes('Annotation')) {
  357. return [annotationSourceData];
  358. }
  359. // If the source is an annotation container/collection, import all its items.
  360. if (nodeTypes.includes('AnnotationCollection')) {
  361. // Read the first page of annotations.
  362. let page = await resolveSingle(annotationSourceData.first);
  363. const annotations: WebAnnotation[] = asArray(page.items);
  364. // Fetch the subsequent pages, if any.
  365. while ((page = await resolveSingle(page.next))) {
  366. annotations.push(...asArray(page.items));
  367. }
  368. return annotations;
  369. }
  370. throw new Error(
  371. `Annotation source is neither AnnotationCollection nor Annotation.`,
  372. );
  373. }
  374. /**
  375. * Given the value of an `@id`-property, get the ‘actual’ value:
  376. * - if the node is nested, return value as-is.
  377. * - if the value is a string (= a URL), fetch and return its data.
  378. * - if there is no value, return `undefined`.
  379. * - if there are multiple values, process only the first.
  380. *
  381. * TODO Consider using json-ld tools:
  382. * - https://github.com/LDflex/LDflex/
  383. * - https://github.com/assemblee-virtuelle/LDP-navigator
  384. */
  385. async function resolveSingle(
  386. valuesOrIds: ZeroOrMore<string> | object,
  387. ): Promise<undefined | any> {
  388. const valueOrId = asSingleValue(valuesOrIds);
  389. // If it’s a value (or undefined), we are done.
  390. if (typeof valueOrId !== 'string') return valueOrId;
  391. // It’s an id, i.e. a URL. (TODO use correct base for relative URLs)
  392. const response = await fetch(valueOrId, {
  393. headers: {
  394. Accept: 'application/ld+json;profile="http://www.w3.org/ns/anno.jsonld"',
  395. Prefer:
  396. 'return=representation;include="http://www.w3.org/ns/oa#PreferContainedDescriptions"',
  397. },
  398. cache: 'no-cache',
  399. });
  400. let data;
  401. try {
  402. data = await response.json();
  403. } catch (error) {
  404. throw new Error(`Received invalid JSON from URL <${valueOrId}>, ${error}`);
  405. }
  406. if (typeof data !== 'object')
  407. throw new Error('Response is valid JSON but not an object.');
  408. return data;
  409. }
  410. /**
  411. * Extract the annotations embedded in a page, via the content script.
  412. * Only works if the page is opened. (though we could fetch&parse the html ourselves)
  413. */
  414. async function getAnnotationsFromEmbeddedJsonld(
  415. url: string,
  416. ): Promise<WebAnnotation[]> {
  417. const tabs = await browser.tabs.query({});
  418. const sourceTab = tabs.find((tab) => tab.url?.startsWith(url.split('#')[0]));
  419. if (sourceTab) {
  420. const contentScriptRpc = new RpcClient<typeof contentScriptRpcServer>({
  421. tabId: sourceTab.id,
  422. });
  423. const annotations = await contentScriptRpc.func(
  424. 'discoverAnnotationsEmbeddedAsJSONLD',
  425. )();
  426. return annotations;
  427. } else {
  428. throw new Error(
  429. `To refresh annotations extracted from a web page, first open that page.`,
  430. );
  431. }
  432. }