Add <track> as a URL property element with itemprop reflecting src
[microdatajs:evo42-microdatajs.git] / microdata.rdf.js
1 // http://www.whatwg.org/specs/web-apps/current-work/multipage/converting-html-to-other-formats.html#rdf
2 function getRDF() {
3     function URI(uri) {
4         if (uri)
5             this.uri = uri; // URI node
6         else
7             this.uri = '_:n'+URI.prototype.blanks++; // blank node
8     }
9     URI.prototype.blanks = 0;
10     URI.prototype.isBlank = function() {
11         return this.uri.substr(0, 2) == '_:';
12     };
13     URI.prototype.equals = function(other) {
14         return other instanceof URI && this.uri == other.uri;
15     };
16     URI.prototype.toTurtle = function(prefixes) {
17         // blank nodes
18         if (this.isBlank())
19             return this.uri;
20         // prefixed notation
21         for (var i=0; prefixes && i<prefixes.length; i++) {
22             var p = prefixes[i];
23             if (this.uri.substr(0, p.uri.length) == p.uri) {
24                 p.used = true;
25                 return p.name+':'+this.uri.substr(p.uri.length);
26             }
27         }
28         // plain URIs
29         return '<'+this.uri+'>';
30     };
31     function Literal(string, lang) {
32         this.string = string;
33         this.lang = lang;
34     }
35     Literal.prototype.toTurtle = function() {
36         return '"'+this.string.replace(/([\\"])/g, '\\$1').replace(/\r/g, '\\r').replace(/\n/g, '\\n')+'"'+
37             (this.lang ? ('@'+this.lang) : '');
38     };
39
40     function Triple(s, p, o) {
41         this.s = s;
42         this.p = p;
43         this.o = o;
44     }
45     Triple.prototype.toTurtle = function() {
46         return this.s.toTurtle() + ' ' + this.p.toTurtle() + ' ' + this.o.toTurtle() + ' .\n';
47     };
48
49     function isAbsoluteURL(url) {
50         // FIXME: not really!
51         return url.substr(0, 7) == 'http://';
52     }
53
54     function getLang(elem) {
55         // FIXME: the spec isn't 100% clear about how to get the language
56         /*
57         var lang;
58         for (var walk = prop; walk; walk = walk.parentNode) {
59             if (walk.lang) {
60                 lang = walk.lang;
61                 break;
62             }
63         }
64         return lang;
65          */
66         return elem.lang;
67     }
68
69     var triples = [];
70
71     var title = document.getElementsByTagName('title')[0];
72     if (title)
73         triples.push(new Triple(new URI(document.location.href),
74                                 new URI('http://purl.org/dc/terms/title'),
75                                 new Literal(title.textContent, getLang(title))));
76     // FIXME: a, area, link
77 /*
78     var links = document.querySelectorAll('a,area,link');
79     for (var linkIndex=0; linkIndex<links.length; linkIndex++) {
80         var elm = links[linkIndex];
81         if (!elm.hasAttribute('rel') || !elm.hasAttribute('href'))
82             continue;
83         var tokens = splitTokens(elm.getAttribute('rel'));
84     }
85 */
86
87     var metas = document.getElementsByTagName('meta');
88     for (var metaIndex = 0; metaIndex < metas.length; metaIndex++) {
89         var meta = metas[metaIndex];
90         if (meta.hasAttribute('name') && meta.hasAttribute('content')) {
91             var subject = new URI(document.location.href);
92             var object = new Literal(meta.content, getLang(meta));
93             if (meta.name.indexOf(':') == -1)
94                 triples.push(new Triple(subject,
95                                         new URI('http://www.w3.org/1999/xhtml/vocab#'+encodeURIComponent(meta.name.toLowerCase())),
96                                         object));
97             else if (isAbsoluteURL(meta.name))
98                 triples.push(new Triple(subject, new URI(meta.name), object));
99         }
100     }
101
102     // FIXME: blockquote, q
103
104     function generateItemTriples(item, type) {
105         var subject = isAbsoluteURL(item.itemId) ? new URI(item.itemId) : new URI(/*blank*/);
106         if (isAbsoluteURL(item.itemType)) {
107             triples.push(new Triple(subject,
108                                     new URI('http://www.w3.org/1999/02/22-rdf-syntax-ns#type'),
109                                     new URI(item.itemType)));
110             type = item.itemType;
111         }
112         for (var propIndex=0; propIndex<item.properties.length; propIndex++) {
113             var prop = item.properties[propIndex];
114             var value;
115             if (prop.itemScope) {
116                 // FIXME: the spec doesn't pass on type to subitems like this
117                 value = generateItemTriples(prop, type);
118             } else if (/^A|AREA|AUDIO|EMBED|IFRAME|IMG|LINK|OBJECT|SOURCE|TRACK|VIDEO$/.test(prop.tagName.toUpperCase())) {
119                 value = new URI(prop.itemValue);
120             } else {
121                 value = new Literal(prop.itemValue, getLang(prop));
122             }
123             for (var nameIndex=0; nameIndex<prop.itemProp.length; nameIndex++) {
124                 var name = prop.itemProp[nameIndex];
125                 if (isAbsoluteURL(name)) {
126                     triples.push(new Triple(subject, new URI(name), value));
127                 } else if (name.indexOf(':') == -1 && type) {
128                     var predicate = type;
129                     if (predicate.indexOf('#') == -1)
130                         predicate += '#';
131                     predicate += ':';
132                     predicate += encodeURIComponent(name);
133                     predicate = 'http://www.w3.org/1999/xhtml/microdata#'+encodeURIComponent(predicate);
134                     triples.push(new Triple(subject, new URI(predicate), value));
135                 }
136             }
137         }
138         return subject;
139     }
140     var items = document.getItems();
141     for (var i=0; i<items.length; i++) {
142         var t = new Triple(new URI(document.location.href),
143                            new URI('http://www.w3.org/1999/xhtml/microdata#item'),
144                            generateItemTriples(items[i]));
145         triples.push(t);
146     }
147
148     if (document.getItems('http://n.whatwg.org/work').length > 0) {
149         triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Awork'),
150                                 new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
151                                 new URI('http://www.w3.org/2002/07/owl#sameAs')));
152         triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Atitle'),
153                                 new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
154                                 new URI('http://purl.org/dc/elements/1.1/title')));
155         triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Aauthor'),
156                                 new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
157                                 new URI('http://creativecommons.org/ns#attributionName')));
158         triples.push(new Triple(new URI('http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3Alicense'),
159                                 new URI('http://www.w3.org/2002/07/owl#equivalentProperty'),
160                                 new URI('http://www.w3.org/1999/xhtml/vocab#license')));
161     }
162
163     return triples;
164 }
165
166 var prefixMap = {'rdf': 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
167                  'owl': 'http://www.w3.org/2002/07/owl#',
168                  'xhv': 'http://www.w3.org/1999/xhtml/vocab#',
169                  'dc': 'http://purl.org/dc/elements/1.1/',
170                  'dct': 'http://purl.org/dc/terms/',
171                  'cc': 'http://creativecommons.org/ns#',
172                  'hcard': 'http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fmicroformats.org%2Fprofile%2Fhcard%23%3A',
173                  'vevent': 'http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fmicroformats.org%2Fprofile%2Fhcalendar%23vevent%3A',
174                  'work': 'http://www.w3.org/1999/xhtml/microdata#http%3A%2F%2Fn.whatwg.org%2Fwork%23%3A'};
175
176 function getTurtle(pretty) {
177     if (arguments.length < 1)
178         pretty = true;
179
180     var triples = getRDF();
181
182     if (pretty) {
183         var prefixes = [];
184         for (prefix in prefixMap)
185             prefixes.push({'name': prefix, 'uri': prefixMap[prefix]});
186
187         var body = '';
188         while (triples.length) {
189             var rest = [];
190             var subject = null;
191             var indent = '';
192             triples.forEach(function (t) {
193                 if (subject == null) {
194                     var subjstr = t.s.toTurtle(prefixes)+' ';
195                     body += subjstr+t.p.toTurtle(prefixes)+' '+t.o.toTurtle(prefixes);
196                     var indentlen = subjstr.length;
197                     if (indentlen > 8)
198                         indent = '\t';
199                     else
200                         while (indentlen-- > 0)
201                             indent += ' ';
202                     subject = t.s;
203                 } else {
204                     if (subject.equals(t.s)) {
205                         body += ' ;\n'+indent+t.p.toTurtle(prefixes)+' '+t.o.toTurtle(prefixes);
206                     } else {
207                         rest.push(t);
208                     }
209                 }
210             });
211             body += ' .\n';
212             triples = rest;
213         }
214
215         var head = '';
216         prefixes.forEach(function(p) {
217             if (p.used)
218                 head += '@prefix '+p.name+': <'+p.uri+'> .\n';
219         });
220         return head+'\n'+body;
221     } else {
222         var body = '';
223         triples.forEach(function (t) {
224             body += t.s.toTurtle()+' '+t.p.toTurtle()+' '+t.o.toTurtle()+' .\n';
225         });
226         return body;
227     }
228 }