Add <track> as a URL property element with itemprop reflecting src
[microdatajs:evo42-microdatajs.git] / jquery.microdata.rdf.js
1 /* -*- mode: js2; js2-basic-offset: 2; indent-tabs-mode: nil -*- */
2
3 // a small set of prefixes used by the microdata spec.
4 // additional prefixes can be added externally, e.g.:
5 //
6 // jQuery.extend(jQuery.microdata.rdf.prefix, {
7 //   'foo': 'http://example.com/foo#'
8 // });
9 jQuery.microdata.rdf = {};
10 jQuery.microdata.rdf.prefix = {
11   'xhv': 'http://www.w3.org/1999/xhtml/vocab#',
12   'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
13   'owl': 'http://www.w3.org/2002/07/owl#',
14   'cc': 'http://creativecommons.org/ns#',
15   'dc': 'http://purl.org/dc/terms/',
16   'vcard': 'http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fmicroformats.org%2Fprofile%2Fhcard%23%3A',
17   'vevent': 'http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fmicroformats.org%2Fprofile%2Fhcalendar%23vevent%3A',
18   'work': 'http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3A'
19 };
20
21 // http://www.whatwg.org/specs/web-apps/current-work/multipage/converting-html-to-other-formats.html#rdf
22 (function() {
23   function splitTokens(s) {
24     if (s && /\S/.test(s))
25       return s.replace(/^\s+|\s+$/g,'').split(/\s+/);
26     return [];
27   }
28
29   function URI(uri) {
30     if (uri)
31       this.uri = uri; // URI node
32     else
33       this.uri = '_:n'+URI.prototype.blanks++; // blank node
34   }
35   URI.prototype.isBlank = function() {
36     return this.uri.substr(0, 2) == '_:';
37   };
38   URI.prototype.equals = function(other) {
39     return other instanceof URI && this.uri == other.uri;
40   };
41   function Literal(string, lang) {
42     this.string = string;
43     this.lang = lang;
44   }
45
46   function Triple(s, p, o) {
47     this.s = s;
48     this.p = p;
49     this.o = o;
50   }
51
52   // http://www.whatwg.org/specs/web-apps/current-work/multipage/urls.html#absolute-url
53   function isAbsoluteURL(url) {
54     // FIXME: not really!
55     return url.substr(0, 7) == 'http://';
56   }
57
58   function getLang($elem) {
59     for (; $elem.get(0); $elem = $elem.parent()) {
60       if ($elem.attr('lang'))
61         return $elem.attr('lang');
62     }
63     return undefined;
64   }
65
66   // http://www.whatwg.org/specs/web-apps/current-work/multipage/converting-html-to-other-formats.html#extracting-rdf
67   function extractDocumentTriples(triples) {
68     var $title = jQuery('title').first();
69     if ($title.length == 1)
70       triples.push(new Triple(new URI(document.location.href),
71                               new URI('http://purl.org/dc/terms/title'),
72                               new Literal($title.text(), getLang($title))));
73
74     jQuery('a[rel][href],area[rel][href],link[rel][href]').each(function(i, elm) {
75       var $elm = jQuery(elm);
76       var tokens = {};
77       jQuery.each(splitTokens($elm.attr('rel')), function(i, t) {
78         t = t.toLowerCase();
79         if (tokens[t])
80           tokens[t]++;
81         else
82           tokens[t] = 1;
83       });
84       if (tokens.up && tokens.up > 1)
85         delete tokens.up;
86       if (tokens.alternate && tokens.stylesheet) {
87         delete tokens.alternate;
88         delete tokens.stylesheet;
89         tokens['ALTERNATE-STYLESHEET'] = 1;
90       }
91       for (t in tokens) {
92         var predicate;
93         if (t.indexOf(':') == -1)
94           predicate = 'http://www.w3.org/1999/xhtml/vocab#'+encodeURIComponent(t);
95         else if (isAbsoluteURL(t))
96           predicate = t;
97         else
98           continue;
99         // FIXME: resolve href
100         triples.push(new Triple(new URI(document.location.href),
101                                 new URI(predicate),
102                                 new URI(elm.href)));
103       }
104     });
105
106     jQuery('meta[name][content]').each(function(i, meta) {
107       var $meta = jQuery(meta);
108       var name = $meta.attr('name');
109       var predicate;
110       if (name.indexOf(':') == -1)
111         predicate = 'http://www.w3.org/1999/xhtml/vocab#'+encodeURIComponent(name.toLowerCase());
112       else if (isAbsoluteURL(name))
113         predicate = name;
114       else
115         return;
116       triples.push(new Triple(new URI(document.location.href),
117                               new URI(predicate),
118                               new Literal($meta.attr('content'), getLang($meta))));
119     });
120
121     jQuery('blockquote[cite],q[cite]').each(function(i, elm) {
122       // FIXME: resolve cite attribute
123       triples.push(new Triple(new URI(document.location.href),
124                               new URI('http://purl.org/dc/terms/source'),
125                               new URI(jQuery(elm).attr('cite'))));
126     });
127
128     // list of {item: ..., subject: ...} objects
129     var memory = [];
130     jQuery(document).items().each(function(i, item) {
131       var t = new Triple(new URI(document.location.href),
132                          new URI('http://www.w3.org/1999/xhtml/microdata#item'),
133                          generateItemTriples(item, triples, memory));
134       triples.push(t);
135     });
136   }
137
138   // http://www.whatwg.org/specs/web-apps/current-work/multipage/converting-html-to-other-formats.html#generate-the-triples-for-an-item
139   function generateItemTriples(item, triples, memory, fallbackType) {
140     var $item = jQuery(item);
141     var subject;
142     jQuery.each(memory, function(i, m) {
143       if (m.item == item) {
144         subject = m.subject;
145         return false;
146       }
147     });
148     if (!subject) {
149       subject = isAbsoluteURL($item.itemId()) ? new URI($item.itemId()) : new URI(/*blank*/);
150       memory.push({item: item, subject: subject});
151     }
152     var type = '';
153     if (isAbsoluteURL($item.itemType())) {
154       type = $item.itemType();
155       triples.push(new Triple(subject,
156                               new URI('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
157                               new URI(type)));
158       if (type.indexOf('#') == -1)
159         type += '#';
160       if (type.indexOf(':') < type.indexOf('#'))
161         type += ':';
162     }
163     if (!type && fallbackType)
164       type = fallbackType;
165     $item.properties().each(function(i, prop) {
166       var $prop = jQuery(prop);
167       $prop.itemProp().each(function(i, name) {
168         if (!type && !isAbsoluteURL(name))
169           return;
170         var value;
171         if ($prop.itemScope()) {
172           value = generateItemTriples(prop, triples, memory, type);
173         } else if (/^A|AREA|AUDIO|EMBED|IFRAME|IMG|LINK|OBJECT|SOURCE|TRACK|VIDEO$/.test(prop.tagName.toUpperCase())) {
174           value = new URI($prop.itemValue());
175         } else {
176           value = new Literal($prop.itemValue(), getLang($prop));
177         }
178         var predicate;
179         if (isAbsoluteURL(name)) {
180           predicate = name;
181         } else if (name.indexOf(':') == -1) {
182           predicate = 'http://www.w3.org/1999/xhtml/microdata#'+encodeURIComponent(type+name);
183         }
184         triples.push(new Triple(subject, new URI(predicate), value));
185       });
186     });
187     return subject;
188   }
189
190   function getTurtle(triples) {
191     var used = [];
192
193     function format(term) {
194       if (term instanceof Triple) {
195         return format(term.s)+' '+format(term.p)+' '+format(term.o)+' .';
196       } else if (term instanceof URI) {
197         // blank nodes
198         if (term.isBlank())
199           return term.uri;
200         // prefixed notation
201         for (name in jQuery.microdata.rdf.prefix) {
202           var uri = jQuery.microdata.rdf.prefix[name];
203           if (term.uri.substr(0, uri.length) == uri) {
204             if (jQuery.inArray(name, used) == -1)
205               used.push(name);
206             return name+':'+term.uri.substr(uri.length);
207           }
208         }
209         // plain URIs
210         return '<'+term.uri+'>';
211       } else if (term instanceof Literal) {
212         return '"'+term.string.replace(/([\\"])/g, '\\$1').replace(/\r/g, '\\r').replace(/\n/g, '\\n')+'"'+
213           (term.lang ? ('@'+term.lang) : '');
214       }
215     }
216
217     var body = '';
218     while (triples.length) {
219       var subject = triples[0].s;
220       var batch = [];
221       // extract all triples that share same subject into batch
222       triples = jQuery.grep(triples, function (t) {
223         if (subject.equals(t.s)) {
224           batch.push(t);
225           return false;
226         }
227         // leave for the next round
228         return true;
229       });
230
231       // print batch with same subject
232       if (batch.length == 1) {
233         // single-line output
234         body += format(batch[0])+'\n';
235       } else {
236         // subject on first line, predicate-objects follow indented
237         body += format(batch[0].s)+'\n';
238         jQuery.each(batch, function(i, t) {
239           body += '  '+format(t.p)+' '+format(t.o)+' '+((i+1<batch.length)?';':'.')+'\n';
240         });
241       }
242     }
243
244     var head = '';
245     jQuery.each(used, function(i, name) {
246         head += '@prefix '+name+': <'+jQuery.microdata.rdf.prefix[name]+'> .\n';
247     });
248     return head+'\n'+body;
249   }
250
251   jQuery.microdata.turtle = function(options) {
252     options = jQuery.extend({doc:false,owl:false}, options);
253
254     URI.prototype.blanks = 0;
255     var triples = [];
256     if (options.doc) {
257       extractDocumentTriples(triples);
258     } else {
259       memory = [];
260       jQuery(document).items().each(function(i, item) {
261         generateItemTriples(item, triples, memory);
262       });
263     }
264
265     if (options.owl) {
266       triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Awork'),
267                               new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
268                               new URI('http://www.w3.org/2002/07/owl#sameAs')));
269       triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Atitle'),
270                               new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
271                               new URI('http://purl.org/dc/terms/title')));
272       triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Aauthor'),
273                               new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
274                               new URI('http://creativecommons.org/ns#attributionName')));
275       triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Alicense'),
276                               new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
277                               new URI('http://www.w3.org/1999/xhtml/vocab#license')));
278     }
279
280     return getTurtle(triples);
281   };
282 })();