3 * GeSHi - Generic Syntax Highlighter
5 * The GeSHi class for Generic Syntax Highlighting. Please refer to the
6 * documentation at http://qbnz.com/highlighter/documentation.php for more
7 * information about how to use this class.
9 * For changes, release notes, TODOs etc, see the relevant files in the docs/
12 * This file is part of GeSHi.
14 * GeSHi is free software; you can redistribute it and/or modify
15 * it under the terms of the GNU General Public License as published by
16 * the Free Software Foundation; either version 2 of the License, or
17 * (at your option) any later version.
19 * GeSHi is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU General Public License for more details.
24 * You should have received a copy of the GNU General Public License
25 * along with GeSHi; if not, write to the Free Software
26 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
30 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
31 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
32 * @license http://gnu.org/copyleft/gpl.html GNU GPL
38 // You should use these constant names in your programs instead of
39 // their values - you never know when a value may change in a future
43 /** The version of this GeSHi file */
44 define('GESHI_VERSION', '1.0.8.10');
46 // Define the root directory for the GeSHi code tree
47 if (!defined('GESHI_ROOT')) {
48 /** The root directory for GeSHi */
49 define('GESHI_ROOT', dirname(__FILE__) . DIRECTORY_SEPARATOR);
51 /** The language file directory for GeSHi
53 define('GESHI_LANG_ROOT', GESHI_ROOT . 'geshi' . DIRECTORY_SEPARATOR);
55 // Define if GeSHi should be paranoid about security
56 if (!defined('GESHI_SECURITY_PARANOID')) {
57 /** Tells GeSHi to be paranoid about security settings */
58 define('GESHI_SECURITY_PARANOID', false);
61 // Line numbers - use with enable_line_numbers()
62 /** Use no line numbers when building the result */
63 define('GESHI_NO_LINE_NUMBERS', 0);
64 /** Use normal line numbers when building the result */
65 define('GESHI_NORMAL_LINE_NUMBERS', 1);
66 /** Use fancy line numbers when building the result */
67 define('GESHI_FANCY_LINE_NUMBERS', 2);
69 // Container HTML type
70 /** Use nothing to surround the source */
71 define('GESHI_HEADER_NONE', 0);
72 /** Use a "div" to surround the source */
73 define('GESHI_HEADER_DIV', 1);
74 /** Use a "pre" to surround the source */
75 define('GESHI_HEADER_PRE', 2);
76 /** Use a pre to wrap lines when line numbers are enabled or to wrap the whole code. */
77 define('GESHI_HEADER_PRE_VALID', 3);
79 * Use a "table" to surround the source:
82 * <thead><tr><td colspan="2">$header</td></tr></thead>
83 * <tbody><tr><td><pre>$linenumbers</pre></td><td><pre>$code></pre></td></tr></tbody>
84 * <tfooter><tr><td colspan="2">$footer</td></tr></tfoot>
87 * this is essentially only a workaround for Firefox, see sf#1651996 or take a look at
88 * https://bugzilla.mozilla.org/show_bug.cgi?id=365805
89 * @note when linenumbers are disabled this is essentially the same as GESHI_HEADER_PRE
91 define('GESHI_HEADER_PRE_TABLE', 4);
93 // Capatalisation constants
94 /** Lowercase keywords found */
95 define('GESHI_CAPS_NO_CHANGE', 0);
96 /** Uppercase keywords found */
97 define('GESHI_CAPS_UPPER', 1);
98 /** Leave keywords found as the case that they are */
99 define('GESHI_CAPS_LOWER', 2);
101 // Link style constants
102 /** Links in the source in the :link state */
103 define('GESHI_LINK', 0);
104 /** Links in the source in the :hover state */
105 define('GESHI_HOVER', 1);
106 /** Links in the source in the :active state */
107 define('GESHI_ACTIVE', 2);
108 /** Links in the source in the :visited state */
109 define('GESHI_VISITED', 3);
111 // Important string starter/finisher
112 // Note that if you change these, they should be as-is: i.e., don't
113 // write them as if they had been run through htmlentities()
114 /** The starter for important parts of the source */
115 define('GESHI_START_IMPORTANT', '<BEGIN GeSHi>');
116 /** The ender for important parts of the source */
117 define('GESHI_END_IMPORTANT', '<END GeSHi>');
122 // When strict mode applies for a language
123 /** Strict mode never applies (this is the most common) */
124 define('GESHI_NEVER', 0);
125 /** Strict mode *might* apply, and can be enabled or
126 disabled by {@link GeSHi->enable_strict_mode()} */
127 define('GESHI_MAYBE', 1);
128 /** Strict mode always applies */
129 define('GESHI_ALWAYS', 2);
131 // Advanced regexp handling constants, used in language files
132 /** The key of the regex array defining what to search for */
133 define('GESHI_SEARCH', 0);
134 /** The key of the regex array defining what bracket group in a
135 matched search to use as a replacement */
136 define('GESHI_REPLACE', 1);
137 /** The key of the regex array defining any modifiers to the regular expression */
138 define('GESHI_MODIFIERS', 2);
139 /** The key of the regex array defining what bracket group in a
140 matched search to put before the replacement */
141 define('GESHI_BEFORE', 3);
142 /** The key of the regex array defining what bracket group in a
143 matched search to put after the replacement */
144 define('GESHI_AFTER', 4);
145 /** The key of the regex array defining a custom keyword to use
146 for this regexp's html tag class */
147 define('GESHI_CLASS', 5);
149 /** Used in language files to mark comments */
150 define('GESHI_COMMENTS', 0);
152 /** Used to work around missing PHP features **/
153 define('GESHI_PHP_PRE_433', !(version_compare(PHP_VERSION, '4.3.3') === 1));
155 /** make sure we can call stripos **/
156 if (!function_exists('stripos')) {
157 // the offset param of preg_match is not supported below PHP 4.3.3
158 if (GESHI_PHP_PRE_433) {
162 function stripos($haystack, $needle, $offset = null) {
163 if (!is_null($offset)) {
164 $haystack = substr($haystack, $offset);
166 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE)) {
176 function stripos($haystack, $needle, $offset = null) {
177 if (preg_match('/'. preg_quote($needle, '/') . '/', $haystack, $match, PREG_OFFSET_CAPTURE, $offset)) {
185 /** some old PHP / PCRE subpatterns only support up to xxx subpatterns in
186 regular expressions. Set this to false if your PCRE lib is up to date
187 @see GeSHi->optimize_regexp_list()
189 define('GESHI_MAX_PCRE_SUBPATTERNS', 500);
190 /** it's also important not to generate too long regular expressions
191 be generous here... but keep in mind, that when reaching this limit we
192 still have to close open patterns. 12k should do just fine on a 16k limit.
193 @see GeSHi->optimize_regexp_list()
195 define('GESHI_MAX_PCRE_LENGTH', 12288);
197 //Number format specification
198 /** Basic number format for integers */
199 define('GESHI_NUMBER_INT_BASIC', 1); //Default integers \d+
200 /** Enhanced number format for integers like seen in C */
201 define('GESHI_NUMBER_INT_CSTYLE', 2); //Default C-Style \d+[lL]?
202 /** Number format to highlight binary numbers with a suffix "b" */
203 define('GESHI_NUMBER_BIN_SUFFIX', 16); //[01]+[bB]
204 /** Number format to highlight binary numbers with a prefix % */
205 define('GESHI_NUMBER_BIN_PREFIX_PERCENT', 32); //%[01]+
206 /** Number format to highlight binary numbers with a prefix 0b (C) */
207 define('GESHI_NUMBER_BIN_PREFIX_0B', 64); //0b[01]+
208 /** Number format to highlight octal numbers with a leading zero */
209 define('GESHI_NUMBER_OCT_PREFIX', 256); //0[0-7]+
210 /** Number format to highlight octal numbers with a prefix 0o (logtalk) */
211 define('GESHI_NUMBER_OCT_PREFIX_0O', 512); //0[0-7]+
212 /** Number format to highlight octal numbers with a leading @ (Used in HiSofts Devpac series). */
213 define('GESHI_NUMBER_OCT_PREFIX_AT', 1024); //@[0-7]+
214 /** Number format to highlight octal numbers with a suffix of o */
215 define('GESHI_NUMBER_OCT_SUFFIX', 2048); //[0-7]+[oO]
216 /** Number format to highlight hex numbers with a prefix 0x */
217 define('GESHI_NUMBER_HEX_PREFIX', 4096); //0x[0-9a-fA-F]+
218 /** Number format to highlight hex numbers with a prefix $ */
219 define('GESHI_NUMBER_HEX_PREFIX_DOLLAR', 8192); //$[0-9a-fA-F]+
220 /** Number format to highlight hex numbers with a suffix of h */
221 define('GESHI_NUMBER_HEX_SUFFIX', 16384); //[0-9][0-9a-fA-F]*h
222 /** Number format to highlight floating-point numbers without support for scientific notation */
223 define('GESHI_NUMBER_FLT_NONSCI', 65536); //\d+\.\d+
224 /** Number format to highlight floating-point numbers without support for scientific notation */
225 define('GESHI_NUMBER_FLT_NONSCI_F', 131072); //\d+(\.\d+)?f
226 /** Number format to highlight floating-point numbers with support for scientific notation (E) and optional leading zero */
227 define('GESHI_NUMBER_FLT_SCI_SHORT', 262144); //\.\d+e\d+
228 /** Number format to highlight floating-point numbers with support for scientific notation (E) and required leading digit */
229 define('GESHI_NUMBER_FLT_SCI_ZERO', 524288); //\d+(\.\d+)?e\d+
230 //Custom formats are passed by RX array
232 // Error detection - use these to analyse faults
233 /** No sourcecode to highlight was specified
236 define('GESHI_ERROR_NO_INPUT', 1);
237 /** The language specified does not exist */
238 define('GESHI_ERROR_NO_SUCH_LANG', 2);
239 /** GeSHi could not open a file for reading (generally a language file) */
240 define('GESHI_ERROR_FILE_NOT_READABLE', 3);
241 /** The header type passed to {@link GeSHi->set_header_type()} was invalid */
242 define('GESHI_ERROR_INVALID_HEADER_TYPE', 4);
243 /** The line number type passed to {@link GeSHi->enable_line_numbers()} was invalid */
244 define('GESHI_ERROR_INVALID_LINE_NUMBER_TYPE', 5);
251 * Please refer to the documentation for GeSHi 1.0.X that is available
252 * at http://qbnz.com/highlighter/documentation.php for more information
253 * about how to use this class.
256 * @author Nigel McNie <nigel@geshi.org>, Benny Baumann <BenBE@omorphia.de>
257 * @copyright (C) 2004 - 2007 Nigel McNie, (C) 2007 - 2008 Benny Baumann
264 * The source code to highlight
270 * The language to use when highlighting
276 * The data for the language used
279 var $language_data = array();
282 * The path to the language files
285 var $language_path = GESHI_LANG_ROOT;
288 * The error message associated with an error
290 * @todo check err reporting works
295 * Possible error messages
298 var $error_messages = array(
299 GESHI_ERROR_NO_SUCH_LANG => 'GeSHi could not find the language {LANGUAGE} (using path {PATH})',
300 GESHI_ERROR_FILE_NOT_READABLE => 'The file specified for load_from_file was not readable',
301 GESHI_ERROR_INVALID_HEADER_TYPE => 'The header type specified is invalid',
302 GESHI_ERROR_INVALID_LINE_NUMBER_TYPE => 'The line number type specified is invalid'
306 * Whether highlighting is strict or not
309 var $strict_mode = false;
312 * Whether to use CSS classes in output
315 var $use_classes = false;
318 * The type of header to use. Can be one of the following
321 * - GESHI_HEADER_PRE: Source is outputted in a "pre" HTML element.
322 * - GESHI_HEADER_DIV: Source is outputted in a "div" HTML element.
323 * - GESHI_HEADER_NONE: No header is outputted.
327 var $header_type = GESHI_HEADER_PRE;
330 * Array of permissions for which lexics should be highlighted
333 var $lexic_permissions = array(
334 'KEYWORDS' => array(),
335 'COMMENTS' => array('MULTI' => true),
336 'REGEXPS' => array(),
337 'ESCAPE_CHAR' => true,
347 * The time it took to parse the code
353 * The content of the header block
356 var $header_content = '';
359 * The content of the footer block
362 var $footer_content = '';
365 * The style of the header block
368 var $header_content_style = '';
371 * The style of the footer block
374 var $footer_content_style = '';
377 * Tells if a block around the highlighted source should be forced
378 * if not using line numbering
381 var $force_code_block = false;
384 * The styles for hyperlinks in the code
387 var $link_styles = array();
390 * Whether important blocks should be recognised or not
393 * @todo REMOVE THIS FUNCTIONALITY!
395 var $enable_important_blocks = false;
398 * Styles for important parts of the code
401 * @todo As above - rethink the whole idea of important blocks as it is buggy and
402 * will be hard to implement in 1.2
404 var $important_styles = 'font-weight: bold; color: red;'; // Styles for important parts of the code
407 * Whether CSS IDs should be added to the code
410 var $add_ids = false;
413 * Lines that should be highlighted extra
416 var $highlight_extra_lines = array();
419 * Styles of lines that should be highlighted extra
422 var $highlight_extra_lines_styles = array();
425 * Styles of extra-highlighted lines
428 var $highlight_extra_lines_style = 'background-color: #ffc;';
432 * If null, nl2br() will be used on the result string.
433 * Otherwise, all instances of \n will be replaced with $line_ending
436 var $line_ending = null;
439 * Number at which line numbers should start at
442 var $line_numbers_start = 1;
445 * The overall style for this code block
448 var $overall_style = 'font-family:monospace;';
451 * The style for the actual code
454 var $code_style = 'font: normal normal 1em/1.2em monospace; margin:0; padding:0; background:none; vertical-align:top;';
457 * The overall class for this code block
460 var $overall_class = '';
463 * The overall ID for this code block
466 var $overall_id = '';
472 var $line_style1 = 'font-weight: normal; vertical-align:top;';
475 * Line number styles for fancy lines
478 var $line_style2 = 'font-weight: bold; vertical-align:top;';
481 * Style for line numbers when GESHI_HEADER_PRE_TABLE is chosen
484 var $table_linenumber_style = 'width:1px;text-align:right;margin:0;padding:0 2px;vertical-align:top;';
487 * Flag for how line numbers are displayed
490 var $line_numbers = GESHI_NO_LINE_NUMBERS;
493 * Flag to decide if multi line spans are allowed. Set it to false to make sure
494 * each tag is closed before and reopened after each linefeed.
497 var $allow_multiline_span = true;
500 * The "nth" value for fancy line highlighting
503 var $line_nth_row = 0;
506 * The size of tab stops
512 * Should we use language-defined tab stop widths?
515 var $use_language_tab_width = false;
518 * Default target for keyword links
521 var $link_target = '';
524 * The encoding to use for entity encoding
525 * NOTE: Used with Escape Char Sequences to fix UTF-8 handling (cf. SF#2037598)
528 var $encoding = 'utf-8';
531 * Should keywords be linked?
534 var $keyword_links = true;
537 * Currently loaded language file
541 var $loaded_language = '';
544 * Wether the caches needed for parsing are built or not
549 var $parse_cache_built = false;
552 * Work around for Suhosin Patch with disabled /e modifier
554 * Note from suhosins author in config file:
556 * The /e modifier inside <code>preg_replace()</code> allows code execution.
557 * Often it is the cause for remote code execution exploits. It is wise to
558 * deactivate this feature and test where in the application it is used.
559 * The developer using the /e modifier should be made aware that he should
560 * use <code>preg_replace_callback()</code> instead
566 var $_kw_replace_group = 0;
570 * some "callback parameters" for handle_multiline_regexps
576 var $_hmr_before = '';
577 var $_hmr_replace = '';
578 var $_hmr_after = '';
584 * Creates a new GeSHi object, with source and language
586 * @param string The source code to highlight
587 * @param string The language to highlight the source with
588 * @param string The path to the language file directory. <b>This
589 * is deprecated!</b> I've backported the auto path
590 * detection from the 1.1.X dev branch, so now it
591 * should be automatically set correctly. If you have
592 * renamed the language directory however, you will
593 * still need to set the path using this parameter or
594 * {@link GeSHi->set_language_path()}
597 function GeSHi($source = '', $language = '', $path = '') {
598 if (!empty($source)) {
599 $this->set_source($source);
601 if (!empty($language)) {
602 $this->set_language($language);
604 $this->set_language_path($path);
608 * Returns an error message associated with the last GeSHi operation,
609 * or false if no error has occured
611 * @return string|false An error message if there has been an error, else false
616 //Put some template variables for debugging here ...
617 $debug_tpl_vars = array(
618 '{LANGUAGE}' => $this->language,
619 '{PATH}' => $this->language_path
622 array_keys($debug_tpl_vars),
623 array_values($debug_tpl_vars),
624 $this->error_messages[$this->error]);
626 return "<br /><strong>GeSHi Error:</strong> $msg (code {$this->error})<br />";
632 * Gets a human-readable language name (thanks to Simon Patterson
635 * @return string The name for the current language
638 function get_language_name() {
639 if (GESHI_ERROR_NO_SUCH_LANG == $this->error) {
640 return $this->language_data['LANG_NAME'] . ' (Unknown Language)';
642 return $this->language_data['LANG_NAME'];
646 * Sets the source code for this object
648 * @param string The source code to highlight
651 function set_source($source) {
652 $this->source = $source;
653 $this->highlight_extra_lines = array();
657 * Sets the language for this object
659 * @note since 1.0.8 this function won't reset language-settings by default anymore!
660 * if you need this set $force_reset = true
662 * @param string The name of the language to use
665 function set_language($language, $force_reset = false) {
667 $this->loaded_language = false;
670 //Clean up the language name to prevent malicious code injection
671 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
673 $language = strtolower($language);
675 //Retreive the full filename
676 $file_name = $this->language_path . $language . '.php';
677 if ($file_name == $this->loaded_language) {
678 // this language is already loaded!
682 $this->language = $language;
684 $this->error = false;
685 $this->strict_mode = GESHI_NEVER;
687 //Check if we can read the desired file
688 if (!is_readable($file_name)) {
689 $this->error = GESHI_ERROR_NO_SUCH_LANG;
693 // Load the language for parsing
694 $this->load_language($file_name);
698 * Sets the path to the directory containing the language files. Note
699 * that this path is relative to the directory of the script that included
700 * geshi.php, NOT geshi.php itself.
702 * @param string The path to the language directory
704 * @deprecated The path to the language files should now be automatically
705 * detected, so this method should no longer be needed. The
706 * 1.1.X branch handles manual setting of the path differently
707 * so this method will disappear in 1.2.0.
709 function set_language_path($path) {
710 if(strpos($path,':')) {
711 //Security Fix to prevent external directories using fopen wrappers.
712 if(DIRECTORY_SEPARATOR == "\\") {
713 if(!preg_match('#^[a-zA-Z]:#', $path) || false !== strpos($path, ':', 2)) {
720 if(preg_match('#[^/a-zA-Z0-9_\.\-\\\s:]#', $path)) {
721 //Security Fix to prevent external directories using fopen wrappers.
724 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '/.')) {
725 //Security Fix to prevent external directories using fopen wrappers.
728 if(GESHI_SECURITY_PARANOID && false !== strpos($path, '..')) {
729 //Security Fix to prevent external directories using fopen wrappers.
733 $this->language_path = ('/' == $path[strlen($path) - 1]) ? $path : $path . '/';
734 $this->set_language($this->language); // otherwise set_language_path has no effect
739 * Get supported langs or an associative array lang=>full_name.
740 * @param boolean $longnames
743 function get_supported_languages($full_names=false)
748 // we walk the lang root
749 $dir = dir($this->language_path);
752 while (false !== ($entry = $dir->read()))
754 $full_path = $this->language_path.$entry;
757 if (is_dir($full_path)) {
761 // we only want lang.php files
762 if (!preg_match('/^([^.]+)\.php$/', $entry, $matches)) {
766 // Raw lang name is here
767 $langname = $matches[1];
769 // We want the fullname too?
770 if ($full_names === true)
772 if (false !== ($fullname = $this->get_language_fullname($langname)))
774 $back[$langname] = $fullname; // we go associative
779 // just store raw langname
790 * Get full_name for a lang or false.
791 * @param string $language short langname (html4strict for example)
794 function get_language_fullname($language)
796 //Clean up the language name to prevent malicious code injection
797 $language = preg_replace('#[^a-zA-Z0-9\-_]#', '', $language);
799 $language = strtolower($language);
801 // get fullpath-filename for a langname
802 $fullpath = $this->language_path.$language.'.php';
804 // we need to get contents :S
805 if (false === ($data = file_get_contents($fullpath))) {
806 $this->error = sprintf('Geshi::get_lang_fullname() Unknown Language: %s', $language);
810 // match the langname
811 if (!preg_match('/\'LANG_NAME\'\s*=>\s*\'((?:[^\']|\\\')+)\'/', $data, $matches)) {
812 $this->error = sprintf('Geshi::get_lang_fullname(%s): Regex can not detect language', $language);
816 // return fullname for langname
817 return stripcslashes($matches[1]);
821 * Sets the type of header to be used.
823 * If GESHI_HEADER_DIV is used, the code is surrounded in a "div".This
824 * means more source code but more control over tab width and line-wrapping.
825 * GESHI_HEADER_PRE means that a "pre" is used - less source, but less
826 * control. Default is GESHI_HEADER_PRE.
828 * From 1.0.7.2, you can use GESHI_HEADER_NONE to specify that no header code
829 * should be outputted.
831 * @param int The type of header to be used
834 function set_header_type($type) {
835 //Check if we got a valid header type
836 if (!in_array($type, array(GESHI_HEADER_NONE, GESHI_HEADER_DIV,
837 GESHI_HEADER_PRE, GESHI_HEADER_PRE_VALID, GESHI_HEADER_PRE_TABLE))) {
838 $this->error = GESHI_ERROR_INVALID_HEADER_TYPE;
842 //Set that new header type
843 $this->header_type = $type;
847 * Sets the styles for the code that will be outputted
848 * when this object is parsed. The style should be a
849 * string of valid stylesheet declarations
851 * @param string The overall style for the outputted code block
852 * @param boolean Whether to merge the styles with the current styles or not
855 function set_overall_style($style, $preserve_defaults = false) {
856 if (!$preserve_defaults) {
857 $this->overall_style = $style;
859 $this->overall_style .= $style;
864 * Sets the overall classname for this block of code. This
865 * class can then be used in a stylesheet to style this object's
868 * @param string The class name to use for this block of code
871 function set_overall_class($class) {
872 $this->overall_class = $class;
876 * Sets the overall id for this block of code. This id can then
877 * be used in a stylesheet to style this object's output
879 * @param string The ID to use for this block of code
882 function set_overall_id($id) {
883 $this->overall_id = $id;
887 * Sets whether CSS classes should be used to highlight the source. Default
888 * is off, calling this method with no arguments will turn it on
890 * @param boolean Whether to turn classes on or not
893 function enable_classes($flag = true) {
894 $this->use_classes = ($flag) ? true : false;
898 * Sets the style for the actual code. This should be a string
899 * containing valid stylesheet declarations. If $preserve_defaults is
900 * true, then styles are merged with the default styles, with the
901 * user defined styles having priority
903 * Note: Use this method to override any style changes you made to
904 * the line numbers if you are using line numbers, else the line of
905 * code will have the same style as the line number! Consult the
906 * GeSHi documentation for more information about this.
908 * @param string The style to use for actual code
909 * @param boolean Whether to merge the current styles with the new styles
912 function set_code_style($style, $preserve_defaults = false) {
913 if (!$preserve_defaults) {
914 $this->code_style = $style;
916 $this->code_style .= $style;
921 * Sets the styles for the line numbers.
923 * @param string The style for the line numbers that are "normal"
924 * @param string|boolean If a string, this is the style of the line
925 * numbers that are "fancy", otherwise if boolean then this
926 * defines whether the normal styles should be merged with the
927 * new normal styles or not
928 * @param boolean If set, is the flag for whether to merge the "fancy"
929 * styles with the current styles or not
932 function set_line_style($style1, $style2 = '', $preserve_defaults = false) {
933 //Check if we got 2 or three parameters
934 if (is_bool($style2)) {
935 $preserve_defaults = $style2;
939 //Actually set the new styles
940 if (!$preserve_defaults) {
941 $this->line_style1 = $style1;
942 $this->line_style2 = $style2;
944 $this->line_style1 .= $style1;
945 $this->line_style2 .= $style2;
950 * Sets whether line numbers should be displayed.
952 * Valid values for the first parameter are:
954 * - GESHI_NO_LINE_NUMBERS: Line numbers will not be displayed
955 * - GESHI_NORMAL_LINE_NUMBERS: Line numbers will be displayed
956 * - GESHI_FANCY_LINE_NUMBERS: Fancy line numbers will be displayed
958 * For fancy line numbers, the second parameter is used to signal which lines
959 * are to be fancy. For example, if the value of this parameter is 5 then every
960 * 5th line will be fancy.
962 * @param int How line numbers should be displayed
963 * @param int Defines which lines are fancy
966 function enable_line_numbers($flag, $nth_row = 5) {
967 if (GESHI_NO_LINE_NUMBERS != $flag && GESHI_NORMAL_LINE_NUMBERS != $flag
968 && GESHI_FANCY_LINE_NUMBERS != $flag) {
969 $this->error = GESHI_ERROR_INVALID_LINE_NUMBER_TYPE;
971 $this->line_numbers = $flag;
972 $this->line_nth_row = $nth_row;
976 * Sets wether spans and other HTML markup generated by GeSHi can
977 * span over multiple lines or not. Defaults to true to reduce overhead.
978 * Set it to false if you want to manipulate the output or manually display
979 * the code in an ordered list.
981 * @param boolean Wether multiline spans are allowed or not
984 function enable_multiline_span($flag) {
985 $this->allow_multiline_span = (bool) $flag;
989 * Get current setting for multiline spans, see GeSHi->enable_multiline_span().
991 * @see enable_multiline_span
994 function get_multiline_span() {
995 return $this->allow_multiline_span;
999 * Sets the style for a keyword group. If $preserve_defaults is
1000 * true, then styles are merged with the default styles, with the
1001 * user defined styles having priority
1003 * @param int The key of the keyword group to change the styles of
1004 * @param string The style to make the keywords
1005 * @param boolean Whether to merge the new styles with the old or just
1009 function set_keyword_group_style($key, $style, $preserve_defaults = false) {
1010 //Set the style for this keyword group
1011 if (!$preserve_defaults) {
1012 $this->language_data['STYLES']['KEYWORDS'][$key] = $style;
1014 $this->language_data['STYLES']['KEYWORDS'][$key] .= $style;
1017 //Update the lexic permissions
1018 if (!isset($this->lexic_permissions['KEYWORDS'][$key])) {
1019 $this->lexic_permissions['KEYWORDS'][$key] = true;
1024 * Turns highlighting on/off for a keyword group
1026 * @param int The key of the keyword group to turn on or off
1027 * @param boolean Whether to turn highlighting for that group on or off
1030 function set_keyword_group_highlighting($key, $flag = true) {
1031 $this->lexic_permissions['KEYWORDS'][$key] = ($flag) ? true : false;
1035 * Sets the styles for comment groups. If $preserve_defaults is
1036 * true, then styles are merged with the default styles, with the
1037 * user defined styles having priority
1039 * @param int The key of the comment group to change the styles of
1040 * @param string The style to make the comments
1041 * @param boolean Whether to merge the new styles with the old or just
1045 function set_comments_style($key, $style, $preserve_defaults = false) {
1046 if (!$preserve_defaults) {
1047 $this->language_data['STYLES']['COMMENTS'][$key] = $style;
1049 $this->language_data['STYLES']['COMMENTS'][$key] .= $style;
1054 * Turns highlighting on/off for comment groups
1056 * @param int The key of the comment group to turn on or off
1057 * @param boolean Whether to turn highlighting for that group on or off
1060 function set_comments_highlighting($key, $flag = true) {
1061 $this->lexic_permissions['COMMENTS'][$key] = ($flag) ? true : false;
1065 * Sets the styles for escaped characters. If $preserve_defaults is
1066 * true, then styles are merged with the default styles, with the
1067 * user defined styles having priority
1069 * @param string The style to make the escape characters
1070 * @param boolean Whether to merge the new styles with the old or just
1074 function set_escape_characters_style($style, $preserve_defaults = false, $group = 0) {
1075 if (!$preserve_defaults) {
1076 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] = $style;
1078 $this->language_data['STYLES']['ESCAPE_CHAR'][$group] .= $style;
1083 * Turns highlighting on/off for escaped characters
1085 * @param boolean Whether to turn highlighting for escape characters on or off
1088 function set_escape_characters_highlighting($flag = true) {
1089 $this->lexic_permissions['ESCAPE_CHAR'] = ($flag) ? true : false;
1093 * Sets the styles for brackets. If $preserve_defaults is
1094 * true, then styles are merged with the default styles, with the
1095 * user defined styles having priority
1097 * This method is DEPRECATED: use set_symbols_style instead.
1098 * This method will be removed in 1.2.X
1100 * @param string The style to make the brackets
1101 * @param boolean Whether to merge the new styles with the old or just
1104 * @deprecated In favour of set_symbols_style
1106 function set_brackets_style($style, $preserve_defaults = false) {
1107 if (!$preserve_defaults) {
1108 $this->language_data['STYLES']['BRACKETS'][0] = $style;
1110 $this->language_data['STYLES']['BRACKETS'][0] .= $style;
1115 * Turns highlighting on/off for brackets
1117 * This method is DEPRECATED: use set_symbols_highlighting instead.
1118 * This method will be remove in 1.2.X
1120 * @param boolean Whether to turn highlighting for brackets on or off
1122 * @deprecated In favour of set_symbols_highlighting
1124 function set_brackets_highlighting($flag) {
1125 $this->lexic_permissions['BRACKETS'] = ($flag) ? true : false;
1129 * Sets the styles for symbols. If $preserve_defaults is
1130 * true, then styles are merged with the default styles, with the
1131 * user defined styles having priority
1133 * @param string The style to make the symbols
1134 * @param boolean Whether to merge the new styles with the old or just
1136 * @param int Tells the group of symbols for which style should be set.
1139 function set_symbols_style($style, $preserve_defaults = false, $group = 0) {
1140 // Update the style of symbols
1141 if (!$preserve_defaults) {
1142 $this->language_data['STYLES']['SYMBOLS'][$group] = $style;
1144 $this->language_data['STYLES']['SYMBOLS'][$group] .= $style;
1147 // For backward compatibility
1149 $this->set_brackets_style ($style, $preserve_defaults);
1154 * Turns highlighting on/off for symbols
1156 * @param boolean Whether to turn highlighting for symbols on or off
1159 function set_symbols_highlighting($flag) {
1160 // Update lexic permissions for this symbol group
1161 $this->lexic_permissions['SYMBOLS'] = ($flag) ? true : false;
1163 // For backward compatibility
1164 $this->set_brackets_highlighting ($flag);
1168 * Sets the styles for strings. If $preserve_defaults is
1169 * true, then styles are merged with the default styles, with the
1170 * user defined styles having priority
1172 * @param string The style to make the escape characters
1173 * @param boolean Whether to merge the new styles with the old or just
1175 * @param int Tells the group of strings for which style should be set.
1178 function set_strings_style($style, $preserve_defaults = false, $group = 0) {
1179 if (!$preserve_defaults) {
1180 $this->language_data['STYLES']['STRINGS'][$group] = $style;
1182 $this->language_data['STYLES']['STRINGS'][$group] .= $style;
1187 * Turns highlighting on/off for strings
1189 * @param boolean Whether to turn highlighting for strings on or off
1192 function set_strings_highlighting($flag) {
1193 $this->lexic_permissions['STRINGS'] = ($flag) ? true : false;
1197 * Sets the styles for strict code blocks. If $preserve_defaults is
1198 * true, then styles are merged with the default styles, with the
1199 * user defined styles having priority
1201 * @param string The style to make the script blocks
1202 * @param boolean Whether to merge the new styles with the old or just
1204 * @param int Tells the group of script blocks for which style should be set.
1207 function set_script_style($style, $preserve_defaults = false, $group = 0) {
1208 // Update the style of symbols
1209 if (!$preserve_defaults) {
1210 $this->language_data['STYLES']['SCRIPT'][$group] = $style;
1212 $this->language_data['STYLES']['SCRIPT'][$group] .= $style;
1217 * Sets the styles for numbers. If $preserve_defaults is
1218 * true, then styles are merged with the default styles, with the
1219 * user defined styles having priority
1221 * @param string The style to make the numbers
1222 * @param boolean Whether to merge the new styles with the old or just
1224 * @param int Tells the group of numbers for which style should be set.
1227 function set_numbers_style($style, $preserve_defaults = false, $group = 0) {
1228 if (!$preserve_defaults) {
1229 $this->language_data['STYLES']['NUMBERS'][$group] = $style;
1231 $this->language_data['STYLES']['NUMBERS'][$group] .= $style;
1236 * Turns highlighting on/off for numbers
1238 * @param boolean Whether to turn highlighting for numbers on or off
1241 function set_numbers_highlighting($flag) {
1242 $this->lexic_permissions['NUMBERS'] = ($flag) ? true : false;
1246 * Sets the styles for methods. $key is a number that references the
1247 * appropriate "object splitter" - see the language file for the language
1248 * you are highlighting to get this number. If $preserve_defaults is
1249 * true, then styles are merged with the default styles, with the
1250 * user defined styles having priority
1252 * @param int The key of the object splitter to change the styles of
1253 * @param string The style to make the methods
1254 * @param boolean Whether to merge the new styles with the old or just
1258 function set_methods_style($key, $style, $preserve_defaults = false) {
1259 if (!$preserve_defaults) {
1260 $this->language_data['STYLES']['METHODS'][$key] = $style;
1262 $this->language_data['STYLES']['METHODS'][$key] .= $style;
1267 * Turns highlighting on/off for methods
1269 * @param boolean Whether to turn highlighting for methods on or off
1272 function set_methods_highlighting($flag) {
1273 $this->lexic_permissions['METHODS'] = ($flag) ? true : false;
1277 * Sets the styles for regexps. If $preserve_defaults is
1278 * true, then styles are merged with the default styles, with the
1279 * user defined styles having priority
1281 * @param string The style to make the regular expression matches
1282 * @param boolean Whether to merge the new styles with the old or just
1286 function set_regexps_style($key, $style, $preserve_defaults = false) {
1287 if (!$preserve_defaults) {
1288 $this->language_data['STYLES']['REGEXPS'][$key] = $style;
1290 $this->language_data['STYLES']['REGEXPS'][$key] .= $style;
1295 * Turns highlighting on/off for regexps
1297 * @param int The key of the regular expression group to turn on or off
1298 * @param boolean Whether to turn highlighting for the regular expression group on or off
1301 function set_regexps_highlighting($key, $flag) {
1302 $this->lexic_permissions['REGEXPS'][$key] = ($flag) ? true : false;
1306 * Sets whether a set of keywords are checked for in a case sensitive manner
1308 * @param int The key of the keyword group to change the case sensitivity of
1309 * @param boolean Whether to check in a case sensitive manner or not
1312 function set_case_sensitivity($key, $case) {
1313 $this->language_data['CASE_SENSITIVE'][$key] = ($case) ? true : false;
1317 * Sets the case that keywords should use when found. Use the constants:
1319 * - GESHI_CAPS_NO_CHANGE: leave keywords as-is
1320 * - GESHI_CAPS_UPPER: convert all keywords to uppercase where found
1321 * - GESHI_CAPS_LOWER: convert all keywords to lowercase where found
1323 * @param int A constant specifying what to do with matched keywords
1326 function set_case_keywords($case) {
1327 if (in_array($case, array(
1328 GESHI_CAPS_NO_CHANGE, GESHI_CAPS_UPPER, GESHI_CAPS_LOWER))) {
1329 $this->language_data['CASE_KEYWORDS'] = $case;
1334 * Sets how many spaces a tab is substituted for
1336 * Widths below zero are ignored
1338 * @param int The tab width
1341 function set_tab_width($width) {
1342 $this->tab_width = intval($width);
1344 //Check if it fit's the constraints:
1345 if ($this->tab_width < 1) {
1346 //Return it to the default
1347 $this->tab_width = 8;
1352 * Sets whether or not to use tab-stop width specifed by language
1354 * @param boolean Whether to use language-specific tab-stop widths
1357 function set_use_language_tab_width($use) {
1358 $this->use_language_tab_width = (bool) $use;
1362 * Returns the tab width to use, based on the current language and user
1365 * @return int Tab width
1368 function get_real_tab_width() {
1369 if (!$this->use_language_tab_width ||
1370 !isset($this->language_data['TAB_WIDTH'])) {
1371 return $this->tab_width;
1373 return $this->language_data['TAB_WIDTH'];
1378 * Enables/disables strict highlighting. Default is off, calling this
1379 * method without parameters will turn it on. See documentation
1380 * for more details on strict mode and where to use it.
1382 * @param boolean Whether to enable strict mode or not
1385 function enable_strict_mode($mode = true) {
1386 if (GESHI_MAYBE == $this->language_data['STRICT_MODE_APPLIES']) {
1387 $this->strict_mode = ($mode) ? GESHI_ALWAYS : GESHI_NEVER;
1392 * Disables all highlighting
1395 * @todo Rewrite with array traversal
1396 * @deprecated In favour of enable_highlighting
1398 function disable_highlighting() {
1399 $this->enable_highlighting(false);
1403 * Enables all highlighting
1405 * The optional flag parameter was added in version 1.0.7.21 and can be used
1406 * to enable (true) or disable (false) all highlighting.
1409 * @param boolean A flag specifying whether to enable or disable all highlighting
1410 * @todo Rewrite with array traversal
1412 function enable_highlighting($flag = true) {
1413 $flag = $flag ? true : false;
1414 foreach ($this->lexic_permissions as $key => $value) {
1415 if (is_array($value)) {
1416 foreach ($value as $k => $v) {
1417 $this->lexic_permissions[$key][$k] = $flag;
1420 $this->lexic_permissions[$key] = $flag;
1425 $this->enable_important_blocks = $flag;
1429 * Given a file extension, this method returns either a valid geshi language
1430 * name, or the empty string if it couldn't be found
1432 * @param string The extension to get a language name for
1433 * @param array A lookup array to use instead of the default one
1435 * @todo Re-think about how this method works (maybe make it private and/or make it
1436 * a extension->lang lookup?)
1439 function get_language_name_from_extension( $extension, $lookup = array() ) {
1440 if ( !is_array($lookup) || empty($lookup)) {
1442 '6502acme' => array( 'a', 's', 'asm', 'inc' ),
1443 '6502tasm' => array( 'a', 's', 'asm', 'inc' ),
1444 '6502kickass' => array( 'a', 's', 'asm', 'inc' ),
1445 '68000devpac' => array( 'a', 's', 'asm', 'inc' ),
1446 'abap' => array('abap'),
1447 'actionscript' => array('as'),
1448 'ada' => array('a', 'ada', 'adb', 'ads'),
1449 'apache' => array('conf'),
1450 'asm' => array('ash', 'asm', 'inc'),
1451 'asp' => array('asp'),
1452 'bash' => array('sh'),
1453 'bf' => array('bf'),
1454 'c' => array('c', 'h'),
1455 'c_mac' => array('c', 'h'),
1456 'caddcl' => array(),
1457 'cadlisp' => array(),
1458 'cdfg' => array('cdfg'),
1459 'cobol' => array('cbl'),
1460 'cpp' => array('cpp', 'hpp', 'C', 'H', 'CPP', 'HPP'),
1461 'csharp' => array('cs'),
1462 'css' => array('css'),
1464 'delphi' => array('dpk', 'dpr', 'pp', 'pas'),
1465 'diff' => array('diff', 'patch'),
1466 'dos' => array('bat', 'cmd'),
1467 'gdb' => array('kcrash', 'crash', 'bt'),
1468 'gettext' => array('po', 'pot'),
1469 'gml' => array('gml'),
1470 'gnuplot' => array('plt'),
1471 'groovy' => array('groovy'),
1472 'haskell' => array('hs'),
1473 'html4strict' => array('html', 'htm'),
1474 'ini' => array('ini', 'desktop'),
1475 'java' => array('java'),
1476 'javascript' => array('js'),
1477 'klonec' => array('kl1'),
1478 'klonecpp' => array('klx'),
1479 'latex' => array('tex'),
1480 'lisp' => array('lisp'),
1481 'lua' => array('lua'),
1482 'matlab' => array('m'),
1484 'mysql' => array('sql'),
1488 'oracle8' => array(),
1489 'oracle10' => array(),
1490 'pascal' => array('pas'),
1491 'perl' => array('pl', 'pm'),
1492 'php' => array('php', 'php5', 'phtml', 'phps'),
1493 'povray' => array('pov'),
1494 'providex' => array('pvc', 'pvx'),
1495 'prolog' => array('pl'),
1496 'python' => array('py'),
1497 'qbasic' => array('bi'),
1498 'reg' => array('reg'),
1499 'ruby' => array('rb'),
1500 'sas' => array('sas'),
1501 'scala' => array('scala'),
1502 'scheme' => array('scm'),
1503 'scilab' => array('sci'),
1504 'smalltalk' => array('st'),
1505 'smarty' => array(),
1506 'tcl' => array('tcl'),
1507 'vb' => array('bas'),
1509 'visualfoxpro' => array(),
1510 'whitespace' => array('ws'),
1511 'xml' => array('xml', 'svg', 'xrc'),
1512 'z80' => array('z80', 'asm', 'inc')
1516 foreach ($lookup as $lang => $extensions) {
1517 if (in_array($extension, $extensions)) {
1525 * Given a file name, this method loads its contents in, and attempts
1526 * to set the language automatically. An optional lookup table can be
1527 * passed for looking up the language name. If not specified a default
1530 * The language table is in the form
1532 * 'lang_name' => array('extension', 'extension', ...),
1536 * @param string The filename to load the source from
1537 * @param array A lookup array to use instead of the default one
1538 * @todo Complete rethink of this and above method
1541 function load_from_file($file_name, $lookup = array()) {
1542 if (is_readable($file_name)) {
1543 $this->set_source(file_get_contents($file_name));
1544 $this->set_language($this->get_language_name_from_extension(substr(strrchr($file_name, '.'), 1), $lookup));
1546 $this->error = GESHI_ERROR_FILE_NOT_READABLE;
1551 * Adds a keyword to a keyword group for highlighting
1553 * @param int The key of the keyword group to add the keyword to
1554 * @param string The word to add to the keyword group
1557 function add_keyword($key, $word) {
1558 if (!in_array($word, $this->language_data['KEYWORDS'][$key])) {
1559 $this->language_data['KEYWORDS'][$key][] = $word;
1561 //NEW in 1.0.8 don't recompile the whole optimized regexp, simply append it
1562 if ($this->parse_cache_built) {
1563 $subkey = count($this->language_data['CACHED_KEYWORD_LISTS'][$key]) - 1;
1564 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$subkey] .= '|' . preg_quote($word, '/');
1570 * Removes a keyword from a keyword group
1572 * @param int The key of the keyword group to remove the keyword from
1573 * @param string The word to remove from the keyword group
1574 * @param bool Wether to automatically recompile the optimized regexp list or not.
1575 * Note: if you set this to false and @see GeSHi->parse_code() was already called once,
1576 * for the current language, you have to manually call @see GeSHi->optimize_keyword_group()
1577 * or the removed keyword will stay in cache and still be highlighted! On the other hand
1578 * it might be too expensive to recompile the regexp list for every removal if you want to
1579 * remove a lot of keywords.
1582 function remove_keyword($key, $word, $recompile = true) {
1583 $key_to_remove = array_search($word, $this->language_data['KEYWORDS'][$key]);
1584 if ($key_to_remove !== false) {
1585 unset($this->language_data['KEYWORDS'][$key][$key_to_remove]);
1587 //NEW in 1.0.8, optionally recompile keyword group
1588 if ($recompile && $this->parse_cache_built) {
1589 $this->optimize_keyword_group($key);
1595 * Creates a new keyword group
1597 * @param int The key of the keyword group to create
1598 * @param string The styles for the keyword group
1599 * @param boolean Whether the keyword group is case sensitive ornot
1600 * @param array The words to use for the keyword group
1603 function add_keyword_group($key, $styles, $case_sensitive = true, $words = array()) {
1604 $words = (array) $words;
1605 if (empty($words)) {
1606 // empty word lists mess up highlighting
1610 //Add the new keyword group internally
1611 $this->language_data['KEYWORDS'][$key] = $words;
1612 $this->lexic_permissions['KEYWORDS'][$key] = true;
1613 $this->language_data['CASE_SENSITIVE'][$key] = $case_sensitive;
1614 $this->language_data['STYLES']['KEYWORDS'][$key] = $styles;
1616 //NEW in 1.0.8, cache keyword regexp
1617 if ($this->parse_cache_built) {
1618 $this->optimize_keyword_group($key);
1623 * Removes a keyword group
1625 * @param int The key of the keyword group to remove
1628 function remove_keyword_group ($key) {
1629 //Remove the keyword group internally
1630 unset($this->language_data['KEYWORDS'][$key]);
1631 unset($this->lexic_permissions['KEYWORDS'][$key]);
1632 unset($this->language_data['CASE_SENSITIVE'][$key]);
1633 unset($this->language_data['STYLES']['KEYWORDS'][$key]);
1636 unset($this->language_data['CACHED_KEYWORD_LISTS'][$key]);
1640 * compile optimized regexp list for keyword group
1642 * @param int The key of the keyword group to compile & optimize
1645 function optimize_keyword_group($key) {
1646 $this->language_data['CACHED_KEYWORD_LISTS'][$key] =
1647 $this->optimize_regexp_list($this->language_data['KEYWORDS'][$key]);
1648 $space_as_whitespace = false;
1649 if(isset($this->language_data['PARSER_CONTROL'])) {
1650 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'])) {
1651 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'])) {
1652 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS']['SPACE_AS_WHITESPACE'];
1654 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1655 if(isset($this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'])) {
1656 $space_as_whitespace = $this->language_data['PARSER_CONTROL']['KEYWORDS'][$key]['SPACE_AS_WHITESPACE'];
1661 if($space_as_whitespace) {
1662 foreach($this->language_data['CACHED_KEYWORD_LISTS'][$key] as $rxk => $rxv) {
1663 $this->language_data['CACHED_KEYWORD_LISTS'][$key][$rxk] =
1664 str_replace(" ", "\\s+", $rxv);
1670 * Sets the content of the header block
1672 * @param string The content of the header block
1675 function set_header_content($content) {
1676 $this->header_content = $content;
1680 * Sets the content of the footer block
1682 * @param string The content of the footer block
1685 function set_footer_content($content) {
1686 $this->footer_content = $content;
1690 * Sets the style for the header content
1692 * @param string The style for the header content
1695 function set_header_content_style($style) {
1696 $this->header_content_style = $style;
1700 * Sets the style for the footer content
1702 * @param string The style for the footer content
1705 function set_footer_content_style($style) {
1706 $this->footer_content_style = $style;
1710 * Sets whether to force a surrounding block around
1711 * the highlighted code or not
1713 * @param boolean Tells whether to enable or disable this feature
1716 function enable_inner_code_block($flag) {
1717 $this->force_code_block = (bool)$flag;
1721 * Sets the base URL to be used for keywords
1723 * @param int The key of the keyword group to set the URL for
1724 * @param string The URL to set for the group. If {FNAME} is in
1725 * the url somewhere, it is replaced by the keyword
1726 * that the URL is being made for
1729 function set_url_for_keyword_group($group, $url) {
1730 $this->language_data['URLS'][$group] = $url;
1734 * Sets styles for links in code
1736 * @param int A constant that specifies what state the style is being
1737 * set for - e.g. :hover or :visited
1738 * @param string The styles to use for that state
1741 function set_link_styles($type, $styles) {
1742 $this->link_styles[$type] = $styles;
1746 * Sets the target for links in code
1748 * @param string The target for links in the code, e.g. _blank
1751 function set_link_target($target) {
1753 $this->link_target = '';
1755 $this->link_target = ' target="' . $target . '"';
1760 * Sets styles for important parts of the code
1762 * @param string The styles to use on important parts of the code
1765 function set_important_styles($styles) {
1766 $this->important_styles = $styles;
1770 * Sets whether context-important blocks are highlighted
1772 * @param boolean Tells whether to enable or disable highlighting of important blocks
1773 * @todo REMOVE THIS SHIZ FROM GESHI!
1777 function enable_important_blocks($flag) {
1778 $this->enable_important_blocks = ( $flag ) ? true : false;
1782 * Whether CSS IDs should be added to each line
1784 * @param boolean If true, IDs will be added to each line.
1787 function enable_ids($flag = true) {
1788 $this->add_ids = ($flag) ? true : false;
1792 * Specifies which lines to highlight extra
1794 * The extra style parameter was added in 1.0.7.21.
1796 * @param mixed An array of line numbers to highlight, or just a line
1797 * number on its own.
1798 * @param string A string specifying the style to use for this line.
1799 * If null is specified, the default style is used.
1800 * If false is specified, the line will be removed from
1801 * special highlighting
1803 * @todo Some data replication here that could be cut down on
1805 function highlight_lines_extra($lines, $style = null) {
1806 if (is_array($lines)) {
1807 //Split up the job using single lines at a time
1808 foreach ($lines as $line) {
1809 $this->highlight_lines_extra($line, $style);
1812 //Mark the line as being highlighted specially
1813 $lines = intval($lines);
1814 $this->highlight_extra_lines[$lines] = $lines;
1816 //Decide on which style to use
1817 if ($style === null) { //Check if we should use default style
1818 unset($this->highlight_extra_lines_styles[$lines]);
1819 } else if ($style === false) { //Check if to remove this line
1820 unset($this->highlight_extra_lines[$lines]);
1821 unset($this->highlight_extra_lines_styles[$lines]);
1823 $this->highlight_extra_lines_styles[$lines] = $style;
1829 * Sets the style for extra-highlighted lines
1831 * @param string The style for extra-highlighted lines
1834 function set_highlight_lines_extra_style($styles) {
1835 $this->highlight_extra_lines_style = $styles;
1839 * Sets the line-ending
1841 * @param string The new line-ending
1844 function set_line_ending($line_ending) {
1845 $this->line_ending = (string)$line_ending;
1849 * Sets what number line numbers should start at. Should
1850 * be a positive integer, and will be converted to one.
1852 * <b>Warning:</b> Using this method will add the "start"
1853 * attribute to the <ol> that is used for line numbering.
1854 * This is <b>not</b> valid XHTML strict, so if that's what you
1855 * care about then don't use this method. Firefox is getting
1856 * support for the CSS method of doing this in 1.1 and Opera
1857 * has support for the CSS method, but (of course) IE doesn't
1858 * so it's not worth doing it the CSS way yet.
1860 * @param int The number to start line numbers at
1863 function start_line_numbers_at($number) {
1864 $this->line_numbers_start = abs(intval($number));
1868 * Sets the encoding used for htmlspecialchars(), for international
1871 * NOTE: This is not needed for now because htmlspecialchars() is not
1872 * being used (it has a security hole in PHP4 that has not been patched).
1873 * Maybe in a future version it may make a return for speed reasons, but
1876 * @param string The encoding to use for the source
1879 function set_encoding($encoding) {
1881 $this->encoding = strtolower($encoding);
1886 * Turns linking of keywords on or off.
1888 * @param boolean If true, links will be added to keywords
1891 function enable_keyword_links($enable = true) {
1892 $this->keyword_links = (bool) $enable;
1896 * Setup caches needed for styling. This is automatically called in
1897 * parse_code() and get_stylesheet() when appropriate. This function helps
1898 * stylesheet generators as they rely on some style information being
1904 function build_style_cache() {
1905 //Build the style cache needed to highlight numbers appropriate
1906 if($this->lexic_permissions['NUMBERS']) {
1907 //First check what way highlighting information for numbers are given
1908 if(!isset($this->language_data['NUMBERS'])) {
1909 $this->language_data['NUMBERS'] = 0;
1912 if(is_array($this->language_data['NUMBERS'])) {
1913 $this->language_data['NUMBERS_CACHE'] = $this->language_data['NUMBERS'];
1915 $this->language_data['NUMBERS_CACHE'] = array();
1916 if(!$this->language_data['NUMBERS']) {
1917 $this->language_data['NUMBERS'] =
1918 GESHI_NUMBER_INT_BASIC |
1919 GESHI_NUMBER_FLT_NONSCI;
1922 for($i = 0, $j = $this->language_data['NUMBERS']; $j > 0; ++$i, $j>>=1) {
1923 //Rearrange style indices if required ...
1924 if(isset($this->language_data['STYLES']['NUMBERS'][1<<$i])) {
1925 $this->language_data['STYLES']['NUMBERS'][$i] =
1926 $this->language_data['STYLES']['NUMBERS'][1<<$i];
1927 unset($this->language_data['STYLES']['NUMBERS'][1<<$i]);
1930 //Check if this bit is set for highlighting
1932 //So this bit is set ...
1933 //Check if it belongs to group 0 or the actual stylegroup
1934 if(isset($this->language_data['STYLES']['NUMBERS'][$i])) {
1935 $this->language_data['NUMBERS_CACHE'][$i] = 1 << $i;
1937 if(!isset($this->language_data['NUMBERS_CACHE'][0])) {
1938 $this->language_data['NUMBERS_CACHE'][0] = 0;
1940 $this->language_data['NUMBERS_CACHE'][0] |= 1 << $i;
1949 * Setup caches needed for parsing. This is automatically called in parse_code() when appropriate.
1950 * This function makes stylesheet generators much faster as they do not need these caches.
1955 function build_parse_cache() {
1956 // cache symbol regexp
1957 //As this is a costy operation, we avoid doing it for multiple groups ...
1958 //Instead we perform it for all symbols at once.
1960 //For this to work, we need to reorganize the data arrays.
1961 if ($this->lexic_permissions['SYMBOLS'] && !empty($this->language_data['SYMBOLS'])) {
1962 $this->language_data['MULTIPLE_SYMBOL_GROUPS'] = count($this->language_data['STYLES']['SYMBOLS']) > 1;
1964 $this->language_data['SYMBOL_DATA'] = array();
1965 $symbol_preg_multi = array(); // multi char symbols
1966 $symbol_preg_single = array(); // single char symbols
1967 foreach ($this->language_data['SYMBOLS'] as $key => $symbols) {
1968 if (is_array($symbols)) {
1969 foreach ($symbols as $sym) {
1970 $sym = $this->hsc($sym);
1971 if (!isset($this->language_data['SYMBOL_DATA'][$sym])) {
1972 $this->language_data['SYMBOL_DATA'][$sym] = $key;
1973 if (isset($sym[1])) { // multiple chars
1974 $symbol_preg_multi[] = preg_quote($sym, '/');
1975 } else { // single char
1977 // don't trigger range out of order error
1978 $symbol_preg_single[] = '\-';
1980 $symbol_preg_single[] = preg_quote($sym, '/');
1986 $symbols = $this->hsc($symbols);
1987 if (!isset($this->language_data['SYMBOL_DATA'][$symbols])) {
1988 $this->language_data['SYMBOL_DATA'][$symbols] = 0;
1989 if (isset($symbols[1])) { // multiple chars
1990 $symbol_preg_multi[] = preg_quote($symbols, '/');
1991 } else if ($symbols == '-') {
1992 // don't trigger range out of order error
1993 $symbol_preg_single[] = '\-';
1994 } else { // single char
1995 $symbol_preg_single[] = preg_quote($symbols, '/');
2001 //Now we have an array with each possible symbol as the key and the style as the actual data.
2002 //This way we can set the correct style just the moment we highlight ...
2004 //Now we need to rewrite our array to get a search string that
2005 $symbol_preg = array();
2006 if (!empty($symbol_preg_multi)) {
2007 rsort($symbol_preg_multi);
2008 $symbol_preg[] = implode('|', $symbol_preg_multi);
2010 if (!empty($symbol_preg_single)) {
2011 rsort($symbol_preg_single);
2012 $symbol_preg[] = '[' . implode('', $symbol_preg_single) . ']';
2014 $this->language_data['SYMBOL_SEARCH'] = implode("|", $symbol_preg);
2017 // cache optimized regexp for keyword matching
2019 $this->language_data['CACHED_KEYWORD_LISTS'] = array();
2020 foreach (array_keys($this->language_data['KEYWORDS']) as $key) {
2021 if (!isset($this->lexic_permissions['KEYWORDS'][$key]) ||
2022 $this->lexic_permissions['KEYWORDS'][$key]) {
2023 $this->optimize_keyword_group($key);
2028 if ($this->lexic_permissions['BRACKETS']) {
2029 $this->language_data['CACHE_BRACKET_MATCH'] = array('[', ']', '(', ')', '{', '}');
2030 if (!$this->use_classes && isset($this->language_data['STYLES']['BRACKETS'][0])) {
2031 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2032 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">[|>',
2033 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">]|>',
2034 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">(|>',
2035 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">)|>',
2036 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">{|>',
2037 '<| style="' . $this->language_data['STYLES']['BRACKETS'][0] . '">}|>',
2041 $this->language_data['CACHE_BRACKET_REPLACE'] = array(
2042 '<| class="br0">[|>',
2043 '<| class="br0">]|>',
2044 '<| class="br0">(|>',
2045 '<| class="br0">)|>',
2046 '<| class="br0">{|>',
2047 '<| class="br0">}|>',
2052 //Build the parse cache needed to highlight numbers appropriate
2053 if($this->lexic_permissions['NUMBERS']) {
2054 //Check if the style rearrangements have been processed ...
2055 //This also does some preprocessing to check which style groups are useable ...
2056 if(!isset($this->language_data['NUMBERS_CACHE'])) {
2057 $this->build_style_cache();
2060 //Number format specification
2061 //All this formats are matched case-insensitively!
2062 static $numbers_format = array(
2063 GESHI_NUMBER_INT_BASIC =>
2064 '(?:(?<![0-9a-z_\.%$@])|(?<=\.\.))(?<![\d\.]e[+\-])([1-9]\d*?|0)(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2065 GESHI_NUMBER_INT_CSTYLE =>
2066 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])([1-9]\d*?|0)l(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2067 GESHI_NUMBER_BIN_SUFFIX =>
2068 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[01]+?[bB](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2069 GESHI_NUMBER_BIN_PREFIX_PERCENT =>
2070 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])%[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2071 GESHI_NUMBER_BIN_PREFIX_0B =>
2072 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0b[01]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2073 GESHI_NUMBER_OCT_PREFIX =>
2074 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2075 GESHI_NUMBER_OCT_PREFIX_0O =>
2076 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])0o[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2077 GESHI_NUMBER_OCT_PREFIX_AT =>
2078 '(?<![0-9a-z_\.%])(?<![\d\.]e[+\-])\@[0-7]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2079 GESHI_NUMBER_OCT_SUFFIX =>
2080 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])[0-7]+?o(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2081 GESHI_NUMBER_HEX_PREFIX =>
2082 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])0x[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2083 GESHI_NUMBER_HEX_PREFIX_DOLLAR =>
2084 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\$[0-9a-fA-F]+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2085 GESHI_NUMBER_HEX_SUFFIX =>
2086 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d[0-9a-fA-F]*?[hH](?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2087 GESHI_NUMBER_FLT_NONSCI =>
2088 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\d+?\.\d+?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2089 GESHI_NUMBER_FLT_NONSCI_F =>
2090 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)f(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2091 GESHI_NUMBER_FLT_SCI_SHORT =>
2092 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])\.\d+?(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)',
2093 GESHI_NUMBER_FLT_SCI_ZERO =>
2094 '(?<![0-9a-z_\.])(?<![\d\.]e[+\-])(?:\d+?(?:\.\d*?)?|\.\d+?)(?:e[+\-]?\d+?)?(?![0-9a-z]|\.(?:[eE][+\-]?)?\d)'
2097 //At this step we have an associative array with flag groups for a
2098 //specific style or an string denoting a regexp given its index.
2099 $this->language_data['NUMBERS_RXCACHE'] = array();
2100 foreach($this->language_data['NUMBERS_CACHE'] as $key => $rxdata) {
2101 if(is_string($rxdata)) {
2104 //This is a bitfield of number flags to highlight:
2105 //Build an array, implode them together and make this the actual RX
2107 for($i = 1; $i <= $rxdata; $i<<=1) {
2109 $rxuse[] = $numbers_format[$i];
2112 $regexp = implode("|", $rxuse);
2115 $this->language_data['NUMBERS_RXCACHE'][$key] =
2116 "/(?<!<\|\/)(?<!<\|!REG3XP)(?<!<\|\/NUM!)(?<!\d\/>)($regexp)(?!(?:<DOT>|(?>[^\<]))+>)(?![^<]*>)(?!\|>)(?!\/>)/i"; //
2119 if(!isset($this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'])) {
2120 $this->language_data['PARSER_CONTROL']['NUMBERS']['PRECHECK_RX'] = '#\d#';
2124 $this->parse_cache_built = true;
2128 * Returns the code in $this->source, highlighted and surrounded by the
2131 * This should only be called ONCE, cos it's SLOW! If you want to highlight
2132 * the same source multiple times, you're better off doing a whole lot of
2133 * str_replaces to replace the <span>s
2137 function parse_code () {
2139 $start_time = microtime();
2141 // Replace all newlines to a common form.
2142 $code = str_replace("\r\n", "\n", $this->source);
2143 $code = str_replace("\r", "\n", $code);
2145 // Firstly, if there is an error, we won't highlight
2147 //Escape the source for output
2148 $result = $this->hsc($this->source);
2150 //This fix is related to SF#1923020, but has to be applied regardless of
2151 //actually highlighting symbols.
2152 $result = str_replace(array('<SEMI>', '<PIPE>'), array(';', '|'), $result);
2154 // Timing is irrelevant
2155 $this->set_time($start_time, $start_time);
2156 $this->finalise($result);
2160 // make sure the parse cache is up2date
2161 if (!$this->parse_cache_built) {
2162 $this->build_parse_cache();
2165 // Initialise various stuff
2166 $length = strlen($code);
2167 $COMMENT_MATCHED = false;
2168 $stuff_to_parse = '';
2171 // "Important" selections are handled like multiline comments
2172 // @todo GET RID OF THIS SHIZ
2173 if ($this->enable_important_blocks) {
2174 $this->language_data['COMMENT_MULTI'][GESHI_START_IMPORTANT] = GESHI_END_IMPORTANT;
2177 if ($this->strict_mode) {
2178 // Break the source into bits. Each bit will be a portion of the code
2179 // within script delimiters - for example, HTML between < and >
2183 $next_match_pointer = null;
2184 // we use a copy to unset delimiters on demand (when they are not found)
2185 $delim_copy = $this->language_data['SCRIPT_DELIMITERS'];
2187 while ($i < $length) {
2188 $next_match_pos = $length + 1; // never true
2189 foreach ($delim_copy as $dk => $delimiters) {
2190 if(is_array($delimiters)) {
2191 foreach ($delimiters as $open => $close) {
2192 // make sure the cache is setup properly
2193 if (!isset($matches[$dk][$open])) {
2194 $matches[$dk][$open] = array(
2198 'open' => $open, // needed for grouping of adjacent code blocks (see below)
2199 'open_strlen' => strlen($open),
2202 'close_strlen' => strlen($close),
2205 // Get the next little bit for this opening string
2206 if ($matches[$dk][$open]['next_match'] < $i) {
2207 // only find the next pos if it was not already cached
2208 $open_pos = strpos($code, $open, $i);
2209 if ($open_pos === false) {
2210 // no match for this delimiter ever
2211 unset($delim_copy[$dk][$open]);
2214 $matches[$dk][$open]['next_match'] = $open_pos;
2216 if ($matches[$dk][$open]['next_match'] < $next_match_pos) {
2217 //So we got a new match, update the close_pos
2218 $matches[$dk][$open]['close_pos'] =
2219 strpos($code, $close, $matches[$dk][$open]['next_match']+1);
2221 $next_match_pointer =& $matches[$dk][$open];
2222 $next_match_pos = $matches[$dk][$open]['next_match'];
2226 //So we should match an RegExp as Strict Block ...
2228 * The value in $delimiters is expected to be an RegExp
2229 * containing exactly 2 matching groups:
2230 * - Group 1 is the opener
2231 * - Group 2 is the closer
2233 if(!GESHI_PHP_PRE_433 && //Needs proper rewrite to work with PHP >=4.3.0; 4.3.3 is guaranteed to work.
2234 preg_match($delimiters, $code, $matches_rx, PREG_OFFSET_CAPTURE, $i)) {
2235 //We got a match ...
2236 if(isset($matches_rx['start']) && isset($matches_rx['end']))
2238 $matches[$dk] = array(
2239 'next_match' => $matches_rx['start'][1],
2242 'close_strlen' => strlen($matches_rx['end'][0]),
2243 'close_pos' => $matches_rx['end'][1],
2246 $matches[$dk] = array(
2247 'next_match' => $matches_rx[1][1],
2250 'close_strlen' => strlen($matches_rx[2][0]),
2251 'close_pos' => $matches_rx[2][1],
2255 // no match for this delimiter ever
2256 unset($delim_copy[$dk]);
2260 if ($matches[$dk]['next_match'] <= $next_match_pos) {
2261 $next_match_pointer =& $matches[$dk];
2262 $next_match_pos = $matches[$dk]['next_match'];
2267 // non-highlightable text
2269 1 => substr($code, $i, $next_match_pos - $i)
2273 if ($next_match_pos > $length) {
2274 // out of bounds means no next match was found
2278 // highlightable code
2279 $parts[$k][0] = $next_match_pointer['dk'];
2281 //Only combine for non-rx script blocks
2282 if(is_array($delim_copy[$next_match_pointer['dk']])) {
2283 // group adjacent script blocks, e.g. <foobar><asdf> should be one block, not three!
2284 $i = $next_match_pos + $next_match_pointer['open_strlen'];
2286 $close_pos = strpos($code, $next_match_pointer['close'], $i);
2287 if ($close_pos == false) {
2290 $i = $close_pos + $next_match_pointer['close_strlen'];
2291 if ($i == $length) {
2294 if ($code[$i] == $next_match_pointer['open'][0] && ($next_match_pointer['open_strlen'] == 1 ||
2295 substr($code, $i, $next_match_pointer['open_strlen']) == $next_match_pointer['open'])) {
2296 // merge adjacent but make sure we don't merge things like <tag><!-- comment -->
2297 foreach ($matches as $submatches) {
2298 foreach ($submatches as $match) {
2299 if ($match['next_match'] == $i) {
2300 // a different block already matches here!
2310 $close_pos = $next_match_pointer['close_pos'] + $next_match_pointer['close_strlen'];
2314 if ($close_pos === false) {
2315 // no closing delimiter found!
2316 $parts[$k][1] = substr($code, $next_match_pos);
2320 $parts[$k][1] = substr($code, $next_match_pos, $i - $next_match_pos);
2324 unset($delim_copy, $next_match_pointer, $next_match_pos, $matches);
2327 if ($num_parts == 1 && $this->strict_mode == GESHI_MAYBE) {
2328 // when we have only one part, we don't have anything to highlight at all.
2329 // if we have a "maybe" strict language, this should be handled as highlightable code
2344 // Not strict mode - simply dump the source into
2345 // the array at index 1 (the first highlightable block)
2359 //Unset variables we won't need any longer
2362 //Preload some repeatedly used values regarding hardquotes ...
2363 $hq = isset($this->language_data['HARDQUOTE']) ? $this->language_data['HARDQUOTE'][0] : false;
2364 $hq_strlen = strlen($hq);
2366 //Preload if line numbers are to be generated afterwards
2367 //Added a check if line breaks should be forced even without line numbers, fixes SF#1727398
2368 $check_linenumbers = $this->line_numbers != GESHI_NO_LINE_NUMBERS ||
2369 !empty($this->highlight_extra_lines) || !$this->allow_multiline_span;
2371 //preload the escape char for faster checking ...
2372 $escaped_escape_char = $this->hsc($this->language_data['ESCAPE_CHAR']);
2374 // this is used for single-line comments
2375 $sc_disallowed_before = "";
2376 $sc_disallowed_after = "";
2378 if (isset($this->language_data['PARSER_CONTROL'])) {
2379 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS'])) {
2380 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'])) {
2381 $sc_disallowed_before = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_BEFORE'];
2383 if (isset($this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'])) {
2384 $sc_disallowed_after = $this->language_data['PARSER_CONTROL']['COMMENTS']['DISALLOWED_AFTER'];
2389 //Fix for SF#1932083: Multichar Quotemarks unsupported
2390 $is_string_starter = array();
2391 if ($this->lexic_permissions['STRINGS']) {
2392 foreach ($this->language_data['QUOTEMARKS'] as $quotemark) {
2393 if (!isset($is_string_starter[$quotemark[0]])) {
2394 $is_string_starter[$quotemark[0]] = (string)$quotemark;
2395 } else if (is_string($is_string_starter[$quotemark[0]])) {
2396 $is_string_starter[$quotemark[0]] = array(
2397 $is_string_starter[$quotemark[0]],
2400 $is_string_starter[$quotemark[0]][] = $quotemark;
2405 // Now we go through each part. We know that even-indexed parts are
2406 // code that shouldn't be highlighted, and odd-indexed parts should
2408 for ($key = 0; $key < $num_parts; ++$key) {
2411 // If this block should be highlighted...
2413 // Else not a block to highlight
2414 $endresult .= $this->hsc($parts[$key][1]);
2415 unset($parts[$key]);
2420 $part = $parts[$key][1];
2422 $highlight_part = true;
2423 if ($this->strict_mode && !is_null($parts[$key][0])) {
2424 // get the class key for this block of code
2425 $script_key = $parts[$key][0];
2426 $highlight_part = $this->language_data['HIGHLIGHT_STRICT_BLOCK'][$script_key];
2427 if ($this->language_data['STYLES']['SCRIPT'][$script_key] != '' &&
2428 $this->lexic_permissions['SCRIPT']) {
2429 // Add a span element around the source to
2430 // highlight the overall source block
2431 if (!$this->use_classes &&
2432 $this->language_data['STYLES']['SCRIPT'][$script_key] != '') {
2433 $attributes = ' style="' . $this->language_data['STYLES']['SCRIPT'][$script_key] . '"';
2435 $attributes = ' class="sc' . $script_key . '"';
2437 $result .= "<span$attributes>";
2438 $STRICTATTRS = $attributes;
2442 if ($highlight_part) {
2443 // Now, highlight the code in this block. This code
2444 // is really the engine of GeSHi (along with the method
2445 // parse_non_string_part).
2447 // cache comment regexps incrementally
2448 $next_comment_regexp_key = '';
2449 $next_comment_regexp_pos = -1;
2450 $next_comment_multi_pos = -1;
2451 $next_comment_single_pos = -1;
2452 $comment_regexp_cache_per_key = array();
2453 $comment_multi_cache_per_key = array();
2454 $comment_single_cache_per_key = array();
2455 $next_open_comment_multi = '';
2456 $next_comment_single_key = '';
2457 $escape_regexp_cache_per_key = array();
2458 $next_escape_regexp_key = '';
2459 $next_escape_regexp_pos = -1;
2461 $length = strlen($part);
2462 for ($i = 0; $i < $length; ++$i) {
2463 // Get the next char
2467 // update regexp comment cache if needed
2468 if (isset($this->language_data['COMMENT_REGEXP']) && $next_comment_regexp_pos < $i) {
2469 $next_comment_regexp_pos = $length;
2470 foreach ($this->language_data['COMMENT_REGEXP'] as $comment_key => $regexp) {
2472 if (isset($comment_regexp_cache_per_key[$comment_key]) &&
2473 ($comment_regexp_cache_per_key[$comment_key]['pos'] >= $i ||
2474 $comment_regexp_cache_per_key[$comment_key]['pos'] === false)) {
2475 // we have already matched something
2476 if ($comment_regexp_cache_per_key[$comment_key]['pos'] === false) {
2477 // this comment is never matched
2480 $match_i = $comment_regexp_cache_per_key[$comment_key]['pos'];
2482 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2483 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $i), $match, PREG_OFFSET_CAPTURE)) ||
2484 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $i))
2486 $match_i = $match[0][1];
2487 if (GESHI_PHP_PRE_433) {
2491 $comment_regexp_cache_per_key[$comment_key] = array(
2492 'key' => $comment_key,
2493 'length' => strlen($match[0][0]),
2497 $comment_regexp_cache_per_key[$comment_key]['pos'] = false;
2501 if ($match_i !== false && $match_i < $next_comment_regexp_pos) {
2502 $next_comment_regexp_pos = $match_i;
2503 $next_comment_regexp_key = $comment_key;
2504 if ($match_i === $i) {
2511 $string_started = false;
2513 if (isset($is_string_starter[$char])) {
2514 // Possibly the start of a new string ...
2516 //Check which starter it was ...
2517 //Fix for SF#1932083: Multichar Quotemarks unsupported
2518 if (is_array($is_string_starter[$char])) {
2520 foreach ($is_string_starter[$char] as $testchar) {
2521 if ($testchar === substr($part, $i, strlen($testchar)) &&
2522 strlen($testchar) > strlen($char_new)) {
2523 $char_new = $testchar;
2524 $string_started = true;
2527 if ($string_started) {
2531 $testchar = $is_string_starter[$char];
2532 if ($testchar === substr($part, $i, strlen($testchar))) {
2534 $string_started = true;
2537 $char_len = strlen($char);
2540 if ($string_started && ($i != $next_comment_regexp_pos)) {
2541 // Hand out the correct style information for this string
2542 $string_key = array_search($char, $this->language_data['QUOTEMARKS']);
2543 if (!isset($this->language_data['STYLES']['STRINGS'][$string_key]) ||
2544 !isset($this->language_data['STYLES']['ESCAPE_CHAR'][$string_key])) {
2548 // parse the stuff before this
2549 $result .= $this->parse_non_string_part($stuff_to_parse);
2550 $stuff_to_parse = '';
2552 if (!$this->use_classes) {
2553 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS'][$string_key] . '"';
2555 $string_attributes = ' class="st'.$string_key.'"';
2558 // now handle the string
2559 $string = "<span$string_attributes>" . GeSHi::hsc($char);
2560 $start = $i + $char_len;
2561 $string_open = true;
2563 if(empty($this->language_data['ESCAPE_REGEXP'])) {
2564 $next_escape_regexp_pos = $length;
2568 //Get the regular ending pos ...
2569 $close_pos = strpos($part, $char, $start);
2570 if(false === $close_pos) {
2571 $close_pos = $length;
2574 if($this->lexic_permissions['ESCAPE_CHAR']) {
2575 // update escape regexp cache if needed
2576 if (isset($this->language_data['ESCAPE_REGEXP']) && $next_escape_regexp_pos < $start) {
2577 $next_escape_regexp_pos = $length;
2578 foreach ($this->language_data['ESCAPE_REGEXP'] as $escape_key => $regexp) {
2580 if (isset($escape_regexp_cache_per_key[$escape_key]) &&
2581 ($escape_regexp_cache_per_key[$escape_key]['pos'] >= $start ||
2582 $escape_regexp_cache_per_key[$escape_key]['pos'] === false)) {
2583 // we have already matched something
2584 if ($escape_regexp_cache_per_key[$escape_key]['pos'] === false) {
2585 // this comment is never matched
2588 $match_i = $escape_regexp_cache_per_key[$escape_key]['pos'];
2590 //This is to allow use of the offset parameter in preg_match and stay as compatible with older PHP versions as possible
2591 (GESHI_PHP_PRE_433 && preg_match($regexp, substr($part, $start), $match, PREG_OFFSET_CAPTURE)) ||
2592 (!GESHI_PHP_PRE_433 && preg_match($regexp, $part, $match, PREG_OFFSET_CAPTURE, $start))
2594 $match_i = $match[0][1];
2595 if (GESHI_PHP_PRE_433) {
2599 $escape_regexp_cache_per_key[$escape_key] = array(
2600 'key' => $escape_key,
2601 'length' => strlen($match[0][0]),
2605 $escape_regexp_cache_per_key[$escape_key]['pos'] = false;
2609 if ($match_i !== false && $match_i < $next_escape_regexp_pos) {
2610 $next_escape_regexp_pos = $match_i;
2611 $next_escape_regexp_key = $escape_key;
2612 if ($match_i === $start) {
2619 //Find the next simple escape position
2620 if('' != $this->language_data['ESCAPE_CHAR']) {
2621 $simple_escape = strpos($part, $this->language_data['ESCAPE_CHAR'], $start);
2622 if(false === $simple_escape) {
2623 $simple_escape = $length;
2626 $simple_escape = $length;
2629 $next_escape_regexp_pos = $length;
2630 $simple_escape = $length;
2633 if($simple_escape < $next_escape_regexp_pos &&
2634 $simple_escape < $length &&
2635 $simple_escape < $close_pos) {
2636 //The nexxt escape sequence is a simple one ...
2637 $es_pos = $simple_escape;
2639 //Add the stuff not in the string yet ...
2640 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2642 //Get the style for this escaped char ...
2643 if (!$this->use_classes) {
2644 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][0] . '"';
2646 $escape_char_attributes = ' class="es0"';
2649 //Add the style for the escape char ...
2650 $string .= "<span$escape_char_attributes>" .
2651 GeSHi::hsc($this->language_data['ESCAPE_CHAR']);
2653 //Get the byte AFTER the ESCAPE_CHAR we just found
2654 $es_char = $part[$es_pos + 1];
2655 if ($es_char == "\n") {
2656 // don't put a newline around newlines
2657 $string .= "</span>\n";
2658 $start = $es_pos + 2;
2659 } else if (ord($es_char) >= 128) {
2660 //This is an non-ASCII char (UTF8 or single byte)
2661 //This code tries to work around SF#2037598 ...
2662 if(function_exists('mb_substr')) {
2663 $es_char_m = mb_substr(substr($part, $es_pos+1, 16), 0, 1, $this->encoding);
2664 $string .= $es_char_m . '</span>';
2665 } else if (!GESHI_PHP_PRE_433 && 'utf-8' == $this->encoding) {
2666 if(preg_match("/[\xC2-\xDF][\x80-\xBF]".
2667 "|\xE0[\xA0-\xBF][\x80-\xBF]".
2668 "|[\xE1-\xEC\xEE\xEF][\x80-\xBF]{2}".
2669 "|\xED[\x80-\x9F][\x80-\xBF]".
2670 "|\xF0[\x90-\xBF][\x80-\xBF]{2}".
2671 "|[\xF1-\xF3][\x80-\xBF]{3}".
2672 "|\xF4[\x80-\x8F][\x80-\xBF]{2}/s",
2673 $part, $es_char_m, null, $es_pos + 1)) {
2674 $es_char_m = $es_char_m[0];
2676 $es_char_m = $es_char;
2678 $string .= $this->hsc($es_char_m) . '</span>';
2680 $es_char_m = $this->hsc($es_char);
2682 $start = $es_pos + strlen($es_char_m) + 1;
2684 $string .= $this->hsc($es_char) . '</span>';
2685 $start = $es_pos + 2;
2687 } else if ($next_escape_regexp_pos < $length &&
2688 $next_escape_regexp_pos < $close_pos) {
2689 $es_pos = $next_escape_regexp_pos;
2690 //Add the stuff not in the string yet ...
2691 $string .= $this->hsc(substr($part, $start, $es_pos - $start));
2693 //Get the key and length of this match ...
2694 $escape = $escape_regexp_cache_per_key[$next_escape_regexp_key];
2695 $escape_str = substr($part, $es_pos, $escape['length']);
2696 $escape_key = $escape['key'];
2698 //Get the style for this escaped char ...
2699 if (!$this->use_classes) {
2700 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR'][$escape_key] . '"';
2702 $escape_char_attributes = ' class="es' . $escape_key . '"';
2705 //Add the style for the escape char ...
2706 $string .= "<span$escape_char_attributes>" .
2707 $this->hsc($escape_str) . '</span>';
2709 $start = $es_pos + $escape['length'];
2711 //Copy the remainder of the string ...
2712 $string .= $this->hsc(substr($part, $start, $close_pos - $start + $char_len)) . '</span>';
2713 $start = $close_pos + $char_len;
2714 $string_open = false;
2716 } while($string_open);
2718 if ($check_linenumbers) {
2719 // Are line numbers used? If, we should end the string before
2720 // the newline and begin it again (so when <li>s are put in the source
2721 // remains XHTML compliant)
2722 // note to self: This opens up possibility of config files specifying
2723 // that languages can/cannot have multiline strings???
2724 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2731 } else if ($this->lexic_permissions['STRINGS'] && $hq && $hq[0] == $char &&
2732 substr($part, $i, $hq_strlen) == $hq && ($i != $next_comment_regexp_pos)) {
2733 // The start of a hard quoted string
2734 if (!$this->use_classes) {
2735 $string_attributes = ' style="' . $this->language_data['STYLES']['STRINGS']['HARD'] . '"';
2736 $escape_char_attributes = ' style="' . $this->language_data['STYLES']['ESCAPE_CHAR']['HARD'] . '"';
2738 $string_attributes = ' class="st_h"';
2739 $escape_char_attributes = ' class="es_h"';
2741 // parse the stuff before this
2742 $result .= $this->parse_non_string_part($stuff_to_parse);
2743 $stuff_to_parse = '';
2745 // now handle the string
2748 // look for closing quote
2749 $start = $i + $hq_strlen;
2750 while ($close_pos = strpos($part, $this->language_data['HARDQUOTE'][1], $start)) {
2751 $start = $close_pos + 1;
2752 if ($this->lexic_permissions['ESCAPE_CHAR'] && $part[$close_pos - 1] == $this->language_data['HARDCHAR'] &&
2753 (($i + $hq_strlen) != ($close_pos))) { //Support empty string for HQ escapes if Starter = Escape
2754 // make sure this quote is not escaped
2755 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2756 if (substr($part, $close_pos - 1, strlen($hardescape)) == $hardescape) {
2757 // check wether this quote is escaped or if it is something like '\\'
2758 $escape_char_pos = $close_pos - 1;
2759 while ($escape_char_pos > 0
2760 && $part[$escape_char_pos - 1] == $this->language_data['HARDCHAR']) {
2763 if (($close_pos - $escape_char_pos) & 1) {
2764 // uneven number of escape chars => this quote is escaped
2771 // found closing quote
2775 //Found the closing delimiter?
2777 // span till the end of this $part when no closing delimiter is found
2778 $close_pos = $length;
2781 //Get the actual string
2782 $string = substr($part, $i, $close_pos - $i + 1);
2785 // handle escape chars and encode html chars
2786 // (special because when we have escape chars within our string they may not be escaped)
2787 if ($this->lexic_permissions['ESCAPE_CHAR'] && $this->language_data['ESCAPE_CHAR']) {
2790 while ($es_pos = strpos($string, $this->language_data['ESCAPE_CHAR'], $start)) {
2791 // hmtl escape stuff before
2792 $new_string .= $this->hsc(substr($string, $start, $es_pos - $start));
2793 // check if this is a hard escape
2794 foreach ($this->language_data['HARDESCAPE'] as $hardescape) {
2795 if (substr($string, $es_pos, strlen($hardescape)) == $hardescape) {
2796 // indeed, this is a hardescape
2797 $new_string .= "<span$escape_char_attributes>" .
2798 $this->hsc($hardescape) . '</span>';
2799 $start = $es_pos + strlen($hardescape);
2803 // not a hard escape, but a normal escape
2804 // they come in pairs of two
2806 while (isset($string[$es_pos + $c]) && isset($string[$es_pos + $c + 1])
2807 && $string[$es_pos + $c] == $this->language_data['ESCAPE_CHAR']
2808 && $string[$es_pos + $c + 1] == $this->language_data['ESCAPE_CHAR']) {
2812 $new_string .= "<span$escape_char_attributes>" .
2813 str_repeat($escaped_escape_char, $c) .
2815 $start = $es_pos + $c;
2817 // this is just a single lonely escape char...
2818 $new_string .= $escaped_escape_char;
2819 $start = $es_pos + 1;
2822 $string = $new_string . $this->hsc(substr($string, $start));
2824 $string = $this->hsc($string);
2827 if ($check_linenumbers) {
2828 // Are line numbers used? If, we should end the string before
2829 // the newline and begin it again (so when <li>s are put in the source
2830 // remains XHTML compliant)
2831 // note to self: This opens up possibility of config files specifying
2832 // that languages can/cannot have multiline strings???
2833 $string = str_replace("\n", "</span>\n<span$string_attributes>", $string);
2836 $result .= "<span$string_attributes>" . $string . '</span>';
2840 //Have a look for regexp comments
2841 if ($i == $next_comment_regexp_pos) {
2842 $COMMENT_MATCHED = true;
2843 $comment = $comment_regexp_cache_per_key[$next_comment_regexp_key];
2844 $test_str = $this->hsc(substr($part, $i, $comment['length']));
2846 //@todo If remove important do remove here
2847 if ($this->lexic_permissions['COMMENTS']['MULTI']) {
2848 if (!$this->use_classes) {
2849 $attributes = ' style="' . $this->language_data['STYLES']['COMMENTS'][$comment['key']] . '"';
2851 $attributes = ' class="co' . $comment['key'] . '"';
2854 $test_str = "<span$attributes>" . $test_str . "</span>";
2856 // Short-cut through all the multiline code
2857 if ($check_linenumbers) {
2858 // strreplace to put close span and open span around multiline newlines
2859 $test_str = str_replace(
2860 "\n", "</span>\n<span$attributes>",
2861 str_replace("\n ", "\n ", $test_str)
2866 $i += $comment['length'] - 1;
2869 $result .= $this->parse_non_string_part($stuff_to_parse);
2870 $stuff_to_parse = '';
2873 // If we haven't matched a regexp comment, try multi-line comments
2874 if (!$COMMENT_MATCHED) {
2875 // Is this a multiline comment?
2876 if (!empty($this->language_data['COMMENT_MULTI']) && $next_comment_multi_pos < $i) {
2877 $next_comment_multi_pos = $length;
2878 foreach ($this->language_data['COMMENT_MULTI'] as $open => $close) {
2880 if (isset($comment_multi_cache_per_key[$open]) &&
2881 ($comment_multi_cache_per_key[$open] >= $i ||
2882 $comment_multi_cache_per_key[$open] === false)) {
2883 // we have already matched something
2884 if ($comment_multi_cache_per_key[$open] === false) {
2885 // this comment is never matched
2888 $match_i = $comment_multi_cache_per_key[$open];
2889 } else if (($match_i = stripos($part, $open, $i)) !== false) {
2890 $comment_multi_cache_per_key[$open] = $match_i;
2892 $comment_multi_cache_per_key[$open] = false;
2895 if ($match_i !== false && $match_i < $next_comment_multi_pos) {
2896 $next_comment_multi_pos = $match_i;
2897 $next_open_comment_multi = $open;
2898 if ($match_i === $i) {