2 if(!defined('DOKU_INC')) die('meh.');
3 require_once DOKU_INC . 'inc/parser/lexer.php';
4 require_once DOKU_INC . 'inc/parser/handler.php';
8 * Define various types of modes used by the parser - they are used to
9 * populate the list of modes another mode accepts
12 $PARSER_MODES = array(
13 // containers are complex modes that can contain many other modes
14 // hr breaks the principle but they shouldn't be used in tables / lists
15 // so they are put here
16 'container' => array('listblock','table','quote','hr'),
18 // some mode are allowed inside the base mode only
19 'baseonly' => array('header'),
21 // modes for styling text -- footnote behaves similar to styling
22 'formatting' => array('strong', 'emphasis', 'underline', 'monospace',
23 'subscript', 'superscript', 'deleted', 'footnote'),
25 // modes where the token is simply replaced - they can not contain any
27 'substition' => array('acronym','smiley','wordblock','entity',
28 'camelcaselink', 'internallink','media',
29 'externallink','linebreak','emaillink',
30 'windowssharelink','filelink','notoc',
31 'nocache','multiplyentity','quotes','rss'),
33 // modes which have a start and end token but inside which
34 // no other modes should be applied
35 'protected' => array('preformatted','code','file','php','html','htmlblock','phpblock'),
37 // inside this mode no wiki markup should be applied but lineendings
38 // and whitespace isn't preserved
39 'disabled' => array('unformatted'),
41 // used to mark paragraph boundaries
42 'paragraphs' => array('eol')
45 //-------------------------------------------------------------------
48 * Sets up the Lexer with modes and points it to the Handler
49 * For an intro to the Lexer see: wiki:parser
56 * @var Doku_Lexer $Lexer
62 var $connected = false;
64 function addBaseMode(& $BaseMode) {
65 $this->modes['base'] =& $BaseMode;
66 if ( !$this->Lexer ) {
67 $this->Lexer = new Doku_Lexer($this->Handler,'base', true);
69 $this->modes['base']->Lexer =& $this->Lexer;
73 * PHP preserves order of associative elements
74 * Mode sequence is important
76 function addMode($name, & $Mode) {
77 if ( !isset($this->modes['base']) ) {
78 $this->addBaseMode(new Doku_Parser_Mode_base());
80 $Mode->Lexer = & $this->Lexer;
81 $this->modes[$name] =& $Mode;
84 function connectModes() {
86 if ( $this->connected ) {
90 foreach ( array_keys($this->modes) as $mode ) {
92 // Base isn't connected to anything
93 if ( $mode == 'base' ) {
96 $this->modes[$mode]->preConnect();
98 foreach ( array_keys($this->modes) as $cm ) {
100 if ( $this->modes[$cm]->accepts($mode) ) {
101 $this->modes[$mode]->connectTo($cm);
106 $this->modes[$mode]->postConnect();
109 $this->connected = true;
112 function parse($doc) {
113 if ( $this->Lexer ) {
114 $this->connectModes();
115 // Normalize CRs and pad doc
116 $doc = "\n".str_replace("\r\n","\n",$doc)."\n";
117 $this->Lexer->parse($doc);
118 $this->Handler->_finalize();
119 return $this->Handler->calls;
127 //-------------------------------------------------------------------
129 * This class and all the subclasses below are
130 * used to reduce the effort required to register
131 * modes with the Lexer. For performance these
132 * could all be eliminated later perhaps, or
133 * the Parser could be serialized to a file once
134 * all modes are registered
136 * @author Harry Fuecks <hfuecks@gmail.com>
138 class Doku_Parser_Mode {
141 * @var Doku_Lexer $Lexer
145 var $allowedModes = array();
147 // returns a number used to determine in which order modes are added
149 trigger_error('getSort() not implemented in '.get_class($this), E_USER_WARNING);
152 // Called before any calls to connectTo
153 function preConnect() {}
156 function connectTo($mode) {}
158 // Called after all calls to connectTo
159 function postConnect() {}
161 function accepts($mode) {
162 return in_array($mode, (array) $this->allowedModes );
167 //-------------------------------------------------------------------
168 class Doku_Parser_Mode_base extends Doku_Parser_Mode {
170 function Doku_Parser_Mode_base() {
171 global $PARSER_MODES;
173 $this->allowedModes = array_merge (
174 $PARSER_MODES['container'],
175 $PARSER_MODES['baseonly'],
176 $PARSER_MODES['paragraphs'],
177 $PARSER_MODES['formatting'],
178 $PARSER_MODES['substition'],
179 $PARSER_MODES['protected'],
180 $PARSER_MODES['disabled']
189 //-------------------------------------------------------------------
190 class Doku_Parser_Mode_footnote extends Doku_Parser_Mode {
192 function Doku_Parser_Mode_footnote() {
193 global $PARSER_MODES;
195 $this->allowedModes = array_merge (
196 $PARSER_MODES['container'],
197 $PARSER_MODES['formatting'],
198 $PARSER_MODES['substition'],
199 $PARSER_MODES['protected'],
200 $PARSER_MODES['disabled']
203 unset($this->allowedModes[array_search('footnote', $this->allowedModes)]);
206 function connectTo($mode) {
207 $this->Lexer->addEntryPattern(
208 '\x28\x28(?=.*\x29\x29)',$mode,'footnote'
212 function postConnect() {
213 $this->Lexer->addExitPattern(
214 '\x29\x29','footnote'
223 //-------------------------------------------------------------------
224 class Doku_Parser_Mode_header extends Doku_Parser_Mode {
226 function connectTo($mode) {
227 //we're not picky about the closing ones, two are enough
228 $this->Lexer->addSpecialPattern(
229 '[ \t]*={2,}[^\n]+={2,}[ \t]*(?=\n)',
240 //-------------------------------------------------------------------
241 class Doku_Parser_Mode_notoc extends Doku_Parser_Mode {
243 function connectTo($mode) {
244 $this->Lexer->addSpecialPattern('~~NOTOC~~',$mode,'notoc');
252 //-------------------------------------------------------------------
253 class Doku_Parser_Mode_nocache extends Doku_Parser_Mode {
255 function connectTo($mode) {
256 $this->Lexer->addSpecialPattern('~~NOCACHE~~',$mode,'nocache');
264 //-------------------------------------------------------------------
265 class Doku_Parser_Mode_linebreak extends Doku_Parser_Mode {
267 function connectTo($mode) {
268 $this->Lexer->addSpecialPattern('\x5C{2}(?:[ \t]|(?=\n))',$mode,'linebreak');
276 //-------------------------------------------------------------------
277 class Doku_Parser_Mode_eol extends Doku_Parser_Mode {
279 function connectTo($mode) {
280 $badModes = array('listblock','table');
281 if ( in_array($mode, $badModes) ) {
284 // see FS#1652, pattern extended to swallow preceding whitespace to avoid issues with lines that only contain whitespace
285 $this->Lexer->addSpecialPattern('(?:^[ \t]*)?\n',$mode,'eol');
293 //-------------------------------------------------------------------
294 class Doku_Parser_Mode_hr extends Doku_Parser_Mode {
296 function connectTo($mode) {
297 $this->Lexer->addSpecialPattern('\n[ \t]*-{4,}[ \t]*(?=\n)',$mode,'hr');
305 //-------------------------------------------------------------------
307 * This class sets the markup for bold (=strong),
308 * italic (=emphasis), underline etc.
310 class Doku_Parser_Mode_formatting extends Doku_Parser_Mode {
313 var $formatting = array (
315 'entry'=>'\*\*(?=.*\*\*)',
321 'entry'=>'//(?=[^\x00]*[^:])', //hack for bugs #384 #763 #1468
326 'underline'=> array (
327 'entry'=>'__(?=.*__)',
332 'monospace'=> array (
333 'entry'=>'\x27\x27(?=.*\x27\x27)',
338 'subscript'=> array (
339 'entry'=>'<sub>(?=.*</sub>)',
344 'superscript'=> array (
345 'entry'=>'<sup>(?=.*</sup>)',
351 'entry'=>'<del>(?=.*</del>)',
357 function Doku_Parser_Mode_formatting($type) {
358 global $PARSER_MODES;
360 if ( !array_key_exists($type, $this->formatting) ) {
361 trigger_error('Invalid formatting type '.$type, E_USER_WARNING);
366 // formatting may contain other formatting but not it self
367 $modes = $PARSER_MODES['formatting'];
368 $key = array_search($type, $modes);
369 if ( is_int($key) ) {
373 $this->allowedModes = array_merge (
375 $PARSER_MODES['substition'],
376 $PARSER_MODES['disabled']
380 function connectTo($mode) {
382 // Can't nest formatting in itself
383 if ( $mode == $this->type ) {
387 $this->Lexer->addEntryPattern(
388 $this->formatting[$this->type]['entry'],
394 function postConnect() {
396 $this->Lexer->addExitPattern(
397 $this->formatting[$this->type]['exit'],
404 return $this->formatting[$this->type]['sort'];
408 //-------------------------------------------------------------------
409 class Doku_Parser_Mode_listblock extends Doku_Parser_Mode {
411 function Doku_Parser_Mode_listblock() {
412 global $PARSER_MODES;
414 $this->allowedModes = array_merge (
415 $PARSER_MODES['formatting'],
416 $PARSER_MODES['substition'],
417 $PARSER_MODES['disabled'],
418 $PARSER_MODES['protected'] #XXX new
421 // $this->allowedModes[] = 'footnote';
424 function connectTo($mode) {
425 $this->Lexer->addEntryPattern('[ \t]*\n {2,}[\-\*]',$mode,'listblock');
426 $this->Lexer->addEntryPattern('[ \t]*\n\t{1,}[\-\*]',$mode,'listblock');
428 $this->Lexer->addPattern('\n {2,}[\-\*]','listblock');
429 $this->Lexer->addPattern('\n\t{1,}[\-\*]','listblock');
433 function postConnect() {
434 $this->Lexer->addExitPattern('\n','listblock');
442 //-------------------------------------------------------------------
443 class Doku_Parser_Mode_table extends Doku_Parser_Mode {
445 function Doku_Parser_Mode_table() {
446 global $PARSER_MODES;
448 $this->allowedModes = array_merge (
449 $PARSER_MODES['formatting'],
450 $PARSER_MODES['substition'],
451 $PARSER_MODES['disabled'],
452 $PARSER_MODES['protected']
456 function connectTo($mode) {
457 $this->Lexer->addEntryPattern('\n\^',$mode,'table');
458 $this->Lexer->addEntryPattern('\n\|',$mode,'table');
461 function postConnect() {
462 $this->Lexer->addPattern('\n\^','table');
463 $this->Lexer->addPattern('\n\|','table');
464 $this->Lexer->addPattern('[\t ]*:::[\t ]*(?=[\|\^])','table');
465 $this->Lexer->addPattern('[\t ]+','table');
466 $this->Lexer->addPattern('\^','table');
467 $this->Lexer->addPattern('\|','table');
468 $this->Lexer->addExitPattern('\n','table');
476 //-------------------------------------------------------------------
477 class Doku_Parser_Mode_unformatted extends Doku_Parser_Mode {
479 function connectTo($mode) {
480 $this->Lexer->addEntryPattern('<nowiki>(?=.*</nowiki>)',$mode,'unformatted');
481 $this->Lexer->addEntryPattern('%%(?=.*%%)',$mode,'unformattedalt');
484 function postConnect() {
485 $this->Lexer->addExitPattern('</nowiki>','unformatted');
486 $this->Lexer->addExitPattern('%%','unformattedalt');
487 $this->Lexer->mapHandler('unformattedalt','unformatted');
495 //-------------------------------------------------------------------
496 class Doku_Parser_Mode_php extends Doku_Parser_Mode {
498 function connectTo($mode) {
499 $this->Lexer->addEntryPattern('<php>(?=.*</php>)',$mode,'php');
500 $this->Lexer->addEntryPattern('<PHP>(?=.*</PHP>)',$mode,'phpblock');
503 function postConnect() {
504 $this->Lexer->addExitPattern('</php>','php');
505 $this->Lexer->addExitPattern('</PHP>','phpblock');
513 //-------------------------------------------------------------------
514 class Doku_Parser_Mode_html extends Doku_Parser_Mode {
516 function connectTo($mode) {
517 $this->Lexer->addEntryPattern('<html>(?=.*</html>)',$mode,'html');
518 $this->Lexer->addEntryPattern('<HTML>(?=.*</HTML>)',$mode,'htmlblock');
521 function postConnect() {
522 $this->Lexer->addExitPattern('</html>','html');
523 $this->Lexer->addExitPattern('</HTML>','htmlblock');
531 //-------------------------------------------------------------------
532 class Doku_Parser_Mode_preformatted extends Doku_Parser_Mode {
534 function connectTo($mode) {
535 // Has hard coded awareness of lists...
536 $this->Lexer->addEntryPattern('\n (?![\*\-])',$mode,'preformatted');
537 $this->Lexer->addEntryPattern('\n\t(?![\*\-])',$mode,'preformatted');
539 // How to effect a sub pattern with the Lexer!
540 $this->Lexer->addPattern('\n ','preformatted');
541 $this->Lexer->addPattern('\n\t','preformatted');
545 function postConnect() {
546 $this->Lexer->addExitPattern('\n','preformatted');
554 //-------------------------------------------------------------------
555 class Doku_Parser_Mode_code extends Doku_Parser_Mode {
557 function connectTo($mode) {
558 $this->Lexer->addEntryPattern('<code(?=.*</code>)',$mode,'code');
561 function postConnect() {
562 $this->Lexer->addExitPattern('</code>','code');
570 //-------------------------------------------------------------------
571 class Doku_Parser_Mode_file extends Doku_Parser_Mode {
573 function connectTo($mode) {
574 $this->Lexer->addEntryPattern('<file(?=.*</file>)',$mode,'file');
577 function postConnect() {
578 $this->Lexer->addExitPattern('</file>','file');
586 //-------------------------------------------------------------------
587 class Doku_Parser_Mode_quote extends Doku_Parser_Mode {
589 function Doku_Parser_Mode_quote() {
590 global $PARSER_MODES;
592 $this->allowedModes = array_merge (
593 $PARSER_MODES['formatting'],
594 $PARSER_MODES['substition'],
595 $PARSER_MODES['disabled'],
596 $PARSER_MODES['protected'] #XXX new
598 #$this->allowedModes[] = 'footnote';
599 #$this->allowedModes[] = 'preformatted';
600 #$this->allowedModes[] = 'unformatted';
603 function connectTo($mode) {
604 $this->Lexer->addEntryPattern('\n>{1,}',$mode,'quote');
607 function postConnect() {
608 $this->Lexer->addPattern('\n>{1,}','quote');
609 $this->Lexer->addExitPattern('\n','quote');
617 //-------------------------------------------------------------------
618 class Doku_Parser_Mode_acronym extends Doku_Parser_Mode {
620 var $acronyms = array();
623 function Doku_Parser_Mode_acronym($acronyms) {
624 usort($acronyms,array($this,'_compare'));
625 $this->acronyms = $acronyms;
628 function preConnect() {
629 if(!count($this->acronyms)) return;
631 $bound = '[\x00-\x2f\x3a-\x40\x5b-\x60\x7b-\x7f]';
632 $acronyms = array_map('Doku_Lexer_Escape',$this->acronyms);
633 $this->pattern = '(?<=^|'.$bound.')(?:'.join('|',$acronyms).')(?='.$bound.')';
636 function connectTo($mode) {
637 if(!count($this->acronyms)) return;
639 if ( strlen($this->pattern) > 0 ) {
640 $this->Lexer->addSpecialPattern($this->pattern,$mode,'acronym');
649 * sort callback to order by string length descending
651 function _compare($a,$b) {
654 if ($a_len > $b_len) {
656 } else if ($a_len < $b_len) {
664 //-------------------------------------------------------------------
665 class Doku_Parser_Mode_smiley extends Doku_Parser_Mode {
667 var $smileys = array();
670 function Doku_Parser_Mode_smiley($smileys) {
671 $this->smileys = $smileys;
674 function preConnect() {
675 if(!count($this->smileys) || $this->pattern != '') return;
678 foreach ( $this->smileys as $smiley ) {
679 $this->pattern .= $sep.'(?<=\W|^)'.Doku_Lexer_Escape($smiley).'(?=\W|$)';
684 function connectTo($mode) {
685 if(!count($this->smileys)) return;
687 if ( strlen($this->pattern) > 0 ) {
688 $this->Lexer->addSpecialPattern($this->pattern,$mode,'smiley');
697 //-------------------------------------------------------------------
698 class Doku_Parser_Mode_wordblock extends Doku_Parser_Mode {
700 var $badwords = array();
703 function Doku_Parser_Mode_wordblock($badwords) {
704 $this->badwords = $badwords;
707 function preConnect() {
709 if ( count($this->badwords) == 0 || $this->pattern != '') {
714 foreach ( $this->badwords as $badword ) {
715 $this->pattern .= $sep.'(?<=\b)(?i)'.Doku_Lexer_Escape($badword).'(?-i)(?=\b)';
721 function connectTo($mode) {
722 if ( strlen($this->pattern) > 0 ) {
723 $this->Lexer->addSpecialPattern($this->pattern,$mode,'wordblock');
732 //-------------------------------------------------------------------
733 class Doku_Parser_Mode_entity extends Doku_Parser_Mode {
735 var $entities = array();
738 function Doku_Parser_Mode_entity($entities) {
739 $this->entities = $entities;
742 function preConnect() {
743 if(!count($this->entities) || $this->pattern != '') return;
746 foreach ( $this->entities as $entity ) {
747 $this->pattern .= $sep.Doku_Lexer_Escape($entity);
752 function connectTo($mode) {
753 if(!count($this->entities)) return;
755 if ( strlen($this->pattern) > 0 ) {
756 $this->Lexer->addSpecialPattern($this->pattern,$mode,'entity');
765 //-------------------------------------------------------------------
766 // Implements the 640x480 replacement
767 class Doku_Parser_Mode_multiplyentity extends Doku_Parser_Mode {
769 function connectTo($mode) {
771 $this->Lexer->addSpecialPattern(
772 '(?<=\b)(?:[1-9]|\d{2,})[xX]\d+(?=\b)',$mode,'multiplyentity'
782 //-------------------------------------------------------------------
783 class Doku_Parser_Mode_quotes extends Doku_Parser_Mode {
785 function connectTo($mode) {
788 $ws = '\s/\#~:+=&%@\-\x28\x29\]\[{}><"\''; // whitespace
791 if($conf['typography'] == 2){
792 $this->Lexer->addSpecialPattern(
793 "(?<=^|[$ws])'(?=[^$ws$punc])",$mode,'singlequoteopening'
795 $this->Lexer->addSpecialPattern(
796 "(?<=^|[^$ws]|[$punc])'(?=$|[$ws$punc])",$mode,'singlequoteclosing'
798 $this->Lexer->addSpecialPattern(
799 "(?<=^|[^$ws$punc])'(?=$|[^$ws$punc])",$mode,'apostrophe'
803 $this->Lexer->addSpecialPattern(
804 "(?<=^|[$ws])\"(?=[^$ws$punc])",$mode,'doublequoteopening'
806 $this->Lexer->addSpecialPattern(
807 "\"",$mode,'doublequoteclosing'
818 //-------------------------------------------------------------------
819 class Doku_Parser_Mode_camelcaselink extends Doku_Parser_Mode {
821 function connectTo($mode) {
822 $this->Lexer->addSpecialPattern(
823 '\b[A-Z]+[a-z]+[A-Z][A-Za-z]*\b',$mode,'camelcaselink'
832 //-------------------------------------------------------------------
833 class Doku_Parser_Mode_internallink extends Doku_Parser_Mode {
835 function connectTo($mode) {
837 $this->Lexer->addSpecialPattern("\[\[(?:(?:[^[\]]*?\[.*?\])|.*?)\]\]",$mode,'internallink');
845 //-------------------------------------------------------------------
846 class Doku_Parser_Mode_media extends Doku_Parser_Mode {
848 function connectTo($mode) {
850 $this->Lexer->addSpecialPattern("\{\{[^\}]+\}\}",$mode,'media');
858 //-------------------------------------------------------------------
859 class Doku_Parser_Mode_rss extends Doku_Parser_Mode {
861 function connectTo($mode) {
862 $this->Lexer->addSpecialPattern("\{\{rss>[^\}]+\}\}",$mode,'rss');
870 //-------------------------------------------------------------------
871 class Doku_Parser_Mode_externallink extends Doku_Parser_Mode {
872 var $schemes = array();
873 var $patterns = array();
875 function preConnect() {
876 if(count($this->patterns)) return;
879 $gunk = '/\#~:.?+=&%@!\-\[\]';
882 $any = $ltrs.$gunk.$punc;
884 $this->schemes = getSchemes();
885 foreach ( $this->schemes as $scheme ) {
886 $this->patterns[] = '\b(?i)'.$scheme.'(?-i)://['.$any.']+?(?=['.$punc.']*[^'.$any.'])';
889 $this->patterns[] = '\b(?i)www?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])';
890 $this->patterns[] = '\b(?i)ftp?(?-i)\.['.$host.']+?\.['.$host.']+?['.$any.']+?(?=['.$punc.']*[^'.$any.'])';
893 function connectTo($mode) {
895 foreach ( $this->patterns as $pattern ) {
896 $this->Lexer->addSpecialPattern($pattern,$mode,'externallink');
905 //-------------------------------------------------------------------
906 class Doku_Parser_Mode_filelink extends Doku_Parser_Mode {
910 function preConnect() {
913 $gunk = '/\#~:.?+=&%@!\-';
916 $any = $ltrs.$gunk.$punc;
918 $this->pattern = '\b(?i)file(?-i)://['.$any.']+?['.
919 $punc.']*[^'.$any.']';
922 function connectTo($mode) {
923 $this->Lexer->addSpecialPattern(
924 $this->pattern,$mode,'filelink');
932 //-------------------------------------------------------------------
933 class Doku_Parser_Mode_windowssharelink extends Doku_Parser_Mode {
937 function preConnect() {
938 $this->pattern = "\\\\\\\\\w+?(?:\\\\[\w-$]+)+";
941 function connectTo($mode) {
942 $this->Lexer->addSpecialPattern(
943 $this->pattern,$mode,'windowssharelink');
951 //-------------------------------------------------------------------
952 class Doku_Parser_Mode_emaillink extends Doku_Parser_Mode {
954 function connectTo($mode) {
955 // pattern below is defined in inc/mail.php
956 $this->Lexer->addSpecialPattern('<'.PREG_PATTERN_VALID_EMAIL.'>',$mode,'emaillink');
965 //Setup VIM: ex: et ts=4 :