disapproval of revision '18b0a096bd6f21011f16c7b08c1aac016d2c0fff'
[pidgin-git:pidgin-git.git] / libpurple / util.c
1 /*
2  * @file util.h Utility Functions
3  * @ingroup core
4  */
5
6 /* Purple is the legal property of its developers, whose names are too numerous
7  * to list here.  Please refer to the COPYRIGHT file distributed with this
8  * source distribution.
9  *
10  * This program is free software; you can redistribute it and/or modify
11  * it under the terms of the GNU General Public License as published by
12  * the Free Software Foundation; either version 2 of the License, or
13  * (at your option) any later version.
14  *
15  * This program is distributed in the hope that it will be useful,
16  * but WITHOUT ANY WARRANTY; without even the implied warranty of
17  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18  * GNU General Public License for more details.
19  *
20  * You should have received a copy of the GNU General Public License
21  * along with this program; if not, write to the Free Software
22  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02111-1301  USA
23  */
24 #include "internal.h"
25
26 #include "cipher.h"
27 #include "conversation.h"
28 #include "core.h"
29 #include "debug.h"
30 #include "notify.h"
31 #include "prpl.h"
32 #include "prefs.h"
33 #include "util.h"
34
35 struct _PurpleUtilFetchUrlData
36 {
37         PurpleUtilFetchUrlCallback callback;
38         void *user_data;
39
40         struct
41         {
42                 char *user;
43                 char *passwd;
44                 char *address;
45                 int port;
46                 char *page;
47
48         } website;
49
50         char *url;
51         int num_times_redirected;
52         gboolean full;
53         char *user_agent;
54         gboolean http11;
55         char *request;
56         gsize request_written;
57         gboolean include_headers;
58
59         gboolean is_ssl;
60         PurpleSslConnection *ssl_connection;
61         PurpleProxyConnectData *connect_data;
62         int fd;
63         guint inpa;
64
65         gboolean got_headers;
66         gboolean has_explicit_data_len;
67         char *webdata;
68         unsigned long len;
69         unsigned long data_len;
70         gssize max_len;
71         gboolean chunked;
72 };
73
74 static char *custom_user_dir = NULL;
75 static char *user_dir = NULL;
76
77
78 PurpleMenuAction *
79 purple_menu_action_new(const char *label, PurpleCallback callback, gpointer data,
80                      GList *children)
81 {
82         PurpleMenuAction *act = g_new0(PurpleMenuAction, 1);
83         act->label = g_strdup(label);
84         act->callback = callback;
85         act->data = data;
86         act->children = children;
87         return act;
88 }
89
90 void
91 purple_menu_action_free(PurpleMenuAction *act)
92 {
93         g_return_if_fail(act != NULL);
94
95         g_free(act->label);
96         g_free(act);
97 }
98
99 void
100 purple_util_init(void)
101 {
102         /* This does nothing right now.  It exists for symmetry with 
103          * purple_util_uninit() and forwards compatibility. */
104 }
105
106 void
107 purple_util_uninit(void)
108 {
109         /* Free these so we don't have leaks at shutdown. */
110
111         g_free(custom_user_dir);
112         custom_user_dir = NULL;
113
114         g_free(user_dir);
115         user_dir = NULL;
116 }
117
118 /**************************************************************************
119  * Base16 Functions
120  **************************************************************************/
121 gchar *
122 purple_base16_encode(const guchar *data, gsize len)
123 {
124         int i;
125         gchar *ascii = NULL;
126
127         g_return_val_if_fail(data != NULL, NULL);
128         g_return_val_if_fail(len > 0,   NULL);
129
130         ascii = g_malloc(len * 2 + 1);
131
132         for (i = 0; i < len; i++)
133                 snprintf(&ascii[i * 2], 3, "%02hhx", data[i]);
134
135         return ascii;
136 }
137
138 guchar *
139 purple_base16_decode(const char *str, gsize *ret_len)
140 {
141         int len, i, accumulator = 0;
142         guchar *data;
143
144         g_return_val_if_fail(str != NULL, NULL);
145
146         len = strlen(str);
147
148         g_return_val_if_fail(strlen(str) > 0, 0);
149         g_return_val_if_fail(len % 2 == 0,    0);
150
151         data = g_malloc(len / 2);
152
153         for (i = 0; i < len; i++)
154         {
155                 if ((i % 2) == 0)
156                         accumulator = 0;
157                 else
158                         accumulator <<= 4;
159
160                 if (isdigit(str[i]))
161                         accumulator |= str[i] - 48;
162                 else
163                 {
164                         switch(tolower(str[i]))
165                         {
166                                 case 'a':  accumulator |= 10;  break;
167                                 case 'b':  accumulator |= 11;  break;
168                                 case 'c':  accumulator |= 12;  break;
169                                 case 'd':  accumulator |= 13;  break;
170                                 case 'e':  accumulator |= 14;  break;
171                                 case 'f':  accumulator |= 15;  break;
172                         }
173                 }
174
175                 if (i % 2)
176                         data[(i - 1) / 2] = accumulator;
177         }
178
179         if (ret_len != NULL)
180                 *ret_len = len / 2;
181
182         return data;
183 }
184
185 gchar *
186 purple_base16_encode_chunked(const guchar *data, gsize len)
187 {
188         int i;
189         gchar *ascii = NULL;
190
191         g_return_val_if_fail(data != NULL, NULL);
192         g_return_val_if_fail(len > 0,   NULL);
193
194         /* For each byte of input, we need 2 bytes for the hex representation
195          * and 1 for the colon.
196          * The final colon will be replaced by a terminating NULL
197          */
198         ascii = g_malloc(len * 3 + 1);
199
200         for (i = 0; i < len; i++)
201                 g_snprintf(&ascii[i * 3], 4, "%02hhx:", data[i]);
202
203         /* Replace the final colon with NULL */
204         ascii[len * 3 - 1] = 0;
205
206         return ascii;
207 }
208
209
210 /**************************************************************************
211  * Base64 Functions
212  **************************************************************************/
213 static const char alphabet[] =
214         "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"
215         "0123456789+/";
216
217 static const char xdigits[] =
218         "0123456789abcdef";
219
220 gchar *
221 purple_base64_encode(const guchar *data, gsize len)
222 {
223         char *out, *rv;
224
225         g_return_val_if_fail(data != NULL, NULL);
226         g_return_val_if_fail(len > 0,  NULL);
227
228         rv = out = g_malloc(((len/3)+1)*4 + 1);
229
230         for (; len >= 3; len -= 3)
231         {
232                 *out++ = alphabet[data[0] >> 2];
233                 *out++ = alphabet[((data[0] << 4) & 0x30) | (data[1] >> 4)];
234                 *out++ = alphabet[((data[1] << 2) & 0x3c) | (data[2] >> 6)];
235                 *out++ = alphabet[data[2] & 0x3f];
236                 data += 3;
237         }
238
239         if (len > 0)
240         {
241                 unsigned char fragment;
242
243                 *out++ = alphabet[data[0] >> 2];
244                 fragment = (data[0] << 4) & 0x30;
245
246                 if (len > 1)
247                         fragment |= data[1] >> 4;
248
249                 *out++ = alphabet[fragment];
250                 *out++ = (len < 2) ? '=' : alphabet[(data[1] << 2) & 0x3c];
251                 *out++ = '=';
252         }
253
254         *out = '\0';
255
256         return rv;
257 }
258
259 guchar *
260 purple_base64_decode(const char *str, gsize *ret_len)
261 {
262         guchar *out = NULL;
263         char tmp = 0;
264         const char *c;
265         gint32 tmp2 = 0;
266         int len = 0, n = 0;
267
268         g_return_val_if_fail(str != NULL, NULL);
269
270         c = str;
271
272         while (*c) {
273                 if (*c >= 'A' && *c <= 'Z') {
274                         tmp = *c - 'A';
275                 } else if (*c >= 'a' && *c <= 'z') {
276                         tmp = 26 + (*c - 'a');
277                 } else if (*c >= '0' && *c <= 57) {
278                         tmp = 52 + (*c - '0');
279                 } else if (*c == '+') {
280                         tmp = 62;
281                 } else if (*c == '/') {
282                         tmp = 63;
283                 } else if (*c == '\r' || *c == '\n') {
284                         c++;
285                         continue;
286                 } else if (*c == '=') {
287                         if (n == 3) {
288                                 out = g_realloc(out, len + 2);
289                                 out[len] = (guchar)(tmp2 >> 10) & 0xff;
290                                 len++;
291                                 out[len] = (guchar)(tmp2 >> 2) & 0xff;
292                                 len++;
293                         } else if (n == 2) {
294                                 out = g_realloc(out, len + 1);
295                                 out[len] = (guchar)(tmp2 >> 4) & 0xff;
296                                 len++;
297                         }
298                         break;
299                 }
300                 tmp2 = ((tmp2 << 6) | (tmp & 0xff));
301                 n++;
302                 if (n == 4) {
303                         out = g_realloc(out, len + 3);
304                         out[len] = (guchar)((tmp2 >> 16) & 0xff);
305                         len++;
306                         out[len] = (guchar)((tmp2 >> 8) & 0xff);
307                         len++;
308                         out[len] = (guchar)(tmp2 & 0xff);
309                         len++;
310                         tmp2 = 0;
311                         n = 0;
312                 }
313                 c++;
314         }
315
316         out = g_realloc(out, len + 1);
317         out[len] = 0;
318
319         if (ret_len != NULL)
320                 *ret_len = len;
321
322         return out;
323 }
324
325 /**************************************************************************
326  * Quoted Printable Functions (see RFC 2045).
327  **************************************************************************/
328 guchar *
329 purple_quotedp_decode(const char *str, gsize *ret_len)
330 {
331         char *n, *new;
332         const char *end, *p;
333
334         n = new = g_malloc(strlen (str) + 1);
335         end = str + strlen(str);
336
337         for (p = str; p < end; p++, n++) {
338                 if (*p == '=') {
339                         if (p[1] == '\r' && p[2] == '\n') { /* 5.1 #5 */
340                                 n -= 1;
341                                 p += 2;
342                         } else if (p[1] == '\n') { /* fuzzy case for 5.1 #5 */
343                                 n -= 1;
344                                 p += 1;
345                         } else if (p[1] && p[2]) {
346                                 char *nibble1 = strchr(xdigits, tolower(p[1]));
347                                 char *nibble2 = strchr(xdigits, tolower(p[2]));
348                                 if (nibble1 && nibble2) { /* 5.1 #1 */
349                                         *n = ((nibble1 - xdigits) << 4) | (nibble2 - xdigits);
350                                         p += 2;
351                                 } else { /* This should never happen */
352                                         *n = *p;
353                                 }
354                         } else { /* This should never happen */
355                                 *n = *p;
356                         }
357                 }
358                 else if (*p == '_')
359                         *n = ' ';
360                 else
361                         *n = *p;
362         }
363
364         *n = '\0';
365
366         if (ret_len != NULL)
367                 *ret_len = n - new;
368
369         /* Resize to take less space */
370         /* new = realloc(new, n - new); */
371
372         return (guchar *)new;
373 }
374
375 /**************************************************************************
376  * MIME Functions
377  **************************************************************************/
378 char *
379 purple_mime_decode_field(const char *str)
380 {
381         /*
382          * This is wing's version, partially based on revo/shx's version
383          * See RFC2047 [which apparently obsoletes RFC1342]
384          */
385         typedef enum {
386                 state_start, state_equal1, state_question1,
387                 state_charset, state_question2,
388                 state_encoding, state_question3,
389                 state_encoded_text, state_question4, state_equal2 = state_start
390         } encoded_word_state_t;
391         encoded_word_state_t state = state_start;
392         const char *cur, *mark;
393         const char *charset0 = NULL, *encoding0 = NULL, *encoded_text0 = NULL;
394         char *n, *new;
395
396         /* token can be any CHAR (supposedly ISO8859-1/ISO2022), not just ASCII */
397         #define token_char_p(c) \
398                 (c != ' ' && !iscntrl(c) && !strchr("()<>@,;:\"/[]?.=", c))
399
400         /* But encoded-text must be ASCII; alas, isascii() may not exist */
401         #define encoded_text_char_p(c) \
402                 ((c & 0x80) == 0 && c != '?' && c != ' ' && isgraph(c))
403
404         #define RECOVER_MARKED_TEXT strncpy(n, mark, cur - mark + 1); \
405                 n += cur - mark + 1
406
407         g_return_val_if_fail(str != NULL, NULL);
408
409         /* NOTE: Assuming that we need just strlen(str)+1 *may* be wrong.
410          * It would be wrong if one byte (in some unknown encoding) could
411          * expand to >=4 bytes of UTF-8; I don't know if there are such things.
412          */
413         n = new = g_malloc(strlen(str) + 1);
414
415         /* Here we will be looking for encoded words and if they seem to be
416          * valid then decode them.
417          * They are of this form: =?charset?encoding?text?=
418          */
419
420         for (cur = str, mark = NULL; *cur; cur += 1) {
421                 switch (state) {
422                 case state_equal1:
423                         if (*cur == '?') {
424                                 state = state_question1;
425                         } else {
426                                 RECOVER_MARKED_TEXT;
427                                 state = state_start;
428                         }
429                         break;
430                 case state_question1:
431                         if (token_char_p(*cur)) {
432                                 charset0 = cur;
433                                 state = state_charset;
434                         } else { /* This should never happen */
435                                 RECOVER_MARKED_TEXT;
436                                 state = state_start;
437                         }
438                         break;
439                 case state_charset:
440                         if (*cur == '?') {
441                                 state = state_question2;
442                         } else if (!token_char_p(*cur)) { /* This should never happen */
443                                 RECOVER_MARKED_TEXT;
444                                 state = state_start;
445                         }
446                         break;
447                 case state_question2:
448                         if (token_char_p(*cur)) {
449                                 encoding0 = cur;
450                                 state = state_encoding;
451                         } else { /* This should never happen */
452                                 RECOVER_MARKED_TEXT;
453                                 state = state_start;
454                         }
455                         break;
456                 case state_encoding:
457                         if (*cur == '?') {
458                                 state = state_question3;
459                         } else if (!token_char_p(*cur)) { /* This should never happen */
460                                 RECOVER_MARKED_TEXT;
461                                 state = state_start;
462                         }
463                         break;
464                 case state_question3:
465                         if (encoded_text_char_p(*cur)) {
466                                 encoded_text0 = cur;
467                                 state = state_encoded_text;
468                         } else if (*cur == '?') { /* empty string */
469                                 encoded_text0 = cur;
470                                 state = state_question4;
471                         } else { /* This should never happen */
472                                 RECOVER_MARKED_TEXT;
473                                 state = state_start;
474                         }
475                         break;
476                 case state_encoded_text:
477                         if (*cur == '?') {
478                                 state = state_question4;
479                         } else if (!encoded_text_char_p(*cur)) {
480                                 RECOVER_MARKED_TEXT;
481                                 state = state_start;
482                         }
483                         break;
484                 case state_question4:
485                         if (*cur == '=') { /* Got the whole encoded-word */
486                                 char *charset = g_strndup(charset0, encoding0 - charset0 - 1);
487                                 char *encoding = g_strndup(encoding0, encoded_text0 - encoding0 - 1);
488                                 char *encoded_text = g_strndup(encoded_text0, cur - encoded_text0 - 1);
489                                 guchar *decoded = NULL;
490                                 gsize dec_len;
491                                 if (g_ascii_strcasecmp(encoding, "Q") == 0)
492                                         decoded = purple_quotedp_decode(encoded_text, &dec_len);
493                                 else if (g_ascii_strcasecmp(encoding, "B") == 0)
494                                         decoded = purple_base64_decode(encoded_text, &dec_len);
495                                 else
496                                         decoded = NULL;
497                                 if (decoded) {
498                                         gsize len;
499                                         char *converted = g_convert((const gchar *)decoded, dec_len, "utf-8", charset, NULL, &len, NULL);
500
501                                         if (converted) {
502                                                 n = strncpy(n, converted, len) + len;
503                                                 g_free(converted);
504                                         }
505                                         g_free(decoded);
506                                 }
507                                 g_free(charset);
508                                 g_free(encoding);
509                                 g_free(encoded_text);
510                                 state = state_equal2; /* Restart the FSM */
511                         } else { /* This should never happen */
512                                 RECOVER_MARKED_TEXT;
513                                 state = state_start;
514                         }
515                         break;
516                 default:
517                         if (*cur == '=') {
518                                 mark = cur;
519                                 state = state_equal1;
520                         } else {
521                                 /* Some unencoded text. */
522                                 *n = *cur;
523                                 n += 1;
524                         }
525                         break;
526                 } /* switch */
527         } /* for */
528
529         if (state != state_start) {
530                 RECOVER_MARKED_TEXT;
531         }
532         *n = '\0';
533
534         return new;
535 }
536
537
538 /**************************************************************************
539  * Date/Time Functions
540  **************************************************************************/
541
542 const char *purple_get_tzoff_str(const struct tm *tm, gboolean iso)
543 {
544         static char buf[7];
545         long off;
546         gint8 min;
547         gint8 hrs;
548         struct tm new_tm = *tm;
549
550         mktime(&new_tm);
551
552         if (new_tm.tm_isdst < 0)
553                 g_return_val_if_reached("");
554
555 #ifdef _WIN32
556         if ((off = wpurple_get_tz_offset()) == -1)
557                 return "";
558 #else
559 # ifdef HAVE_TM_GMTOFF
560         off = new_tm.tm_gmtoff;
561 # else
562 #  ifdef HAVE_TIMEZONE
563         tzset();
564         off = -1 * timezone;
565 #  endif /* HAVE_TIMEZONE */
566 # endif /* !HAVE_TM_GMTOFF */
567 #endif /* _WIN32 */
568
569         min = (off / 60) % 60;
570         hrs = ((off / 60) - min) / 60;
571
572         if(iso) {
573                 if (0 == off) {
574                         strcpy(buf, "Z");
575                 } else {
576                         /* please leave the colons...they're optional for iso, but jabber
577                          * wants them */
578                         if(g_snprintf(buf, sizeof(buf), "%+03d:%02d", hrs, ABS(min)) > 6)
579                                 g_return_val_if_reached("");
580                 }
581         } else {
582                 if (g_snprintf(buf, sizeof(buf), "%+03d%02d", hrs, ABS(min)) > 5)
583                         g_return_val_if_reached("");
584         }
585
586         return buf;
587 }
588
589 /* Windows doesn't HAVE_STRFTIME_Z_FORMAT, but this seems clearer. -- rlaager */
590 #if !defined(HAVE_STRFTIME_Z_FORMAT) || defined(_WIN32)
591 static size_t purple_internal_strftime(char *s, size_t max, const char *format, const struct tm *tm)
592 {
593         const char *start;
594         const char *c;
595         char *fmt = NULL;
596
597         /* Yes, this is checked in purple_utf8_strftime(),
598          * but better safe than sorry. -- rlaager */
599         g_return_val_if_fail(format != NULL, 0);
600
601         /* This is fairly efficient, and it only gets
602          * executed on Windows or if the underlying
603          * system doesn't support the %z format string,
604          * for strftime() so I think it's good enough.
605          * -- rlaager */
606         for (c = start = format; *c ; c++)
607         {
608                 if (*c != '%')
609                         continue;
610
611                 c++;
612
613 #ifndef HAVE_STRFTIME_Z_FORMAT
614                 if (*c == 'z')
615                 {
616                         char *tmp = g_strdup_printf("%s%.*s%s",
617                                                     fmt ? fmt : "",
618                                                     c - start - 1,
619                                                     start,
620                                                     purple_get_tzoff_str(tm, FALSE));
621                         g_free(fmt);
622                         fmt = tmp;
623                         start = c + 1;
624                 }
625 #endif
626 #ifdef _WIN32
627                 if (*c == 'Z')
628                 {
629                         char *tmp = g_strdup_printf("%s%.*s%s",
630                                                     fmt ? fmt : "",
631                                                     c - start - 1,
632                                                     start,
633                                                     wpurple_get_timezone_abbreviation(tm));
634                         g_free(fmt);
635                         fmt = tmp;
636                         start = c + 1;
637                 }
638 #endif
639         }
640
641         if (fmt != NULL)
642         {
643                 size_t ret;
644
645                 if (*start)
646                 {
647                         char *tmp = g_strconcat(fmt, start, NULL);
648                         g_free(fmt);
649                         fmt = tmp;
650                 }
651
652                 ret = strftime(s, max, fmt, tm);
653                 g_free(fmt);
654
655                 return ret;
656         }
657
658         return strftime(s, max, format, tm);
659 }
660 #else /* HAVE_STRFTIME_Z_FORMAT && !_WIN32 */
661 #define purple_internal_strftime strftime
662 #endif
663
664 const char *
665 purple_utf8_strftime(const char *format, const struct tm *tm)
666 {
667         static char buf[128];
668         char *locale;
669         GError *err = NULL;
670         int len;
671         char *utf8;
672
673         g_return_val_if_fail(format != NULL, NULL);
674
675         if (tm == NULL)
676         {
677                 time_t now = time(NULL);
678                 tm = localtime(&now);
679         }
680
681         locale = g_locale_from_utf8(format, -1, NULL, NULL, &err);
682         if (err != NULL)
683         {
684                 purple_debug_error("util", "Format conversion failed in purple_utf8_strftime(): %s\n", err->message);
685                 g_error_free(err);
686                 locale = g_strdup(format);
687         }
688
689         /* A return value of 0 is either an error (in
690          * which case, the contents of the buffer are
691          * undefined) or the empty string (in which
692          * case, no harm is done here). */
693         if ((len = purple_internal_strftime(buf, sizeof(buf), locale, tm)) == 0)
694         {
695                 g_free(locale);
696                 return "";
697         }
698
699         g_free(locale);
700
701         utf8 = g_locale_to_utf8(buf, len, NULL, NULL, &err);
702         if (err != NULL)
703         {
704                 purple_debug_error("util", "Result conversion failed in purple_utf8_strftime(): %s\n", err->message);
705                 g_error_free(err);
706         }
707         else
708         {
709                 purple_strlcpy(buf, utf8);
710                 g_free(utf8);
711         }
712
713         return buf;
714 }
715
716 const char *
717 purple_date_format_short(const struct tm *tm)
718 {
719         return purple_utf8_strftime("%x", tm);
720 }
721
722 const char *
723 purple_date_format_long(const struct tm *tm)
724 {
725         /*
726          * This string determines how some dates are displayed.  The default
727          * string "%x %X" shows the date then the time.  Translators can
728          * change this to "%X %x" if they want the time to be shown first,
729          * followed by the date.
730          */
731         return purple_utf8_strftime(_("%x %X"), tm);
732 }
733
734 const char *
735 purple_date_format_full(const struct tm *tm)
736 {
737         return purple_utf8_strftime("%c", tm);
738 }
739
740 const char *
741 purple_time_format(const struct tm *tm)
742 {
743         return purple_utf8_strftime("%X", tm);
744 }
745
746 time_t
747 purple_time_build(int year, int month, int day, int hour, int min, int sec)
748 {
749         struct tm tm;
750
751         tm.tm_year = year - 1900;
752         tm.tm_mon = month - 1;
753         tm.tm_mday = day;
754         tm.tm_hour = hour;
755         tm.tm_min = min;
756         tm.tm_sec = sec >= 0 ? sec : time(NULL) % 60;
757
758         return mktime(&tm);
759 }
760
761 time_t
762 purple_str_to_time(const char *timestamp, gboolean utc,
763                  struct tm *tm, long *tz_off, const char **rest)
764 {
765         time_t retval = 0;
766         static struct tm t;
767         const char *c = timestamp;
768         int year = 0;
769         long tzoff = PURPLE_NO_TZ_OFF;
770
771         time(&retval);
772         localtime_r(&retval, &t);
773
774         if (rest != NULL)
775                 *rest = NULL;
776
777         /* 4 digit year */
778         if (sscanf(c, "%04d", &year) && year > 1900)
779         {
780                 c += 4;
781                 if (*c == '-')
782                         c++;
783                 t.tm_year = year - 1900;
784         }
785
786         /* 2 digit month */
787         if (!sscanf(c, "%02d", &t.tm_mon))
788         {
789                 if (rest != NULL && *c != '\0')
790                         *rest = c;
791                 return 0;
792         }
793         c += 2;
794         if (*c == '-' || *c == '/')
795                 c++;
796         t.tm_mon -= 1;
797
798         /* 2 digit day */
799         if (!sscanf(c, "%02d", &t.tm_mday))
800         {
801                 if (rest != NULL && *c != '\0')
802                         *rest = c;
803                 return 0;
804         }
805         c += 2;
806         if (*c == '/')
807         {
808                 c++;
809
810                 if (!sscanf(c, "%04d", &t.tm_year))
811                 {
812                         if (rest != NULL && *c != '\0')
813                                 *rest = c;
814                         return 0;
815                 }
816                 t.tm_year -= 1900;
817         }
818         else if (*c == 'T' || *c == '.')
819         {
820                 c++;
821                 /* we have more than a date, keep going */
822
823                 /* 2 digit hour */
824                 if ((sscanf(c, "%02d:%02d:%02d", &t.tm_hour, &t.tm_min, &t.tm_sec) == 3 && (c = c + 8)) ||
825                     (sscanf(c, "%02d%02d%02d", &t.tm_hour, &t.tm_min, &t.tm_sec) == 3 && (c = c + 6)))
826                 {
827                         gboolean offset_positive = FALSE;
828                         int tzhrs;
829                         int tzmins;
830
831                         t.tm_isdst = -1;
832
833                         if (*c == '.') {
834                                 do {
835                                         c++;
836                                 } while (*c >= '0' && *c <= '9'); /* dealing with precision we don't care about */
837                         }
838                         if (*c == '+')
839                                 offset_positive = TRUE;
840                         if (((*c == '+' || *c == '-') && (c = c + 1)) &&
841                             ((sscanf(c, "%02d:%02d", &tzhrs, &tzmins) == 2 && (c = c + 5)) ||
842                              (sscanf(c, "%02d%02d", &tzhrs, &tzmins) == 2 && (c = c + 4))))
843                         {
844                                 tzoff = tzhrs*60*60 + tzmins*60;
845                                 if (offset_positive)
846                                         tzoff *= -1;
847                         }
848                         else if ((*c == 'Z') && (c = c + 1))
849                         {
850                                 /* 'Z' = Zulu = UTC */
851                                 tzoff = 0;
852                         }
853                         else if (utc)
854                         {
855                                 static struct tm tmptm;
856                                 time_t tmp;
857                                 tmp = mktime(&t);
858                                 /* we care about whether it *was* dst, and the offset, here on this
859                                  * date, not whether we are currently observing dst locally *now*.
860                                  * This isn't perfect, because we would need to know in advance the
861                                  * offset we are trying to work out in advance to be sure this
862                                  * works for times around dst transitions but it'll have to do. */
863                                 localtime_r(&tmp, &tmptm);
864                                 t.tm_isdst = tmptm.tm_isdst;
865 #ifdef HAVE_TM_GMTOFF
866                                 t.tm_gmtoff = tmptm.tm_gmtoff;
867 #endif
868                         }
869
870                         if (rest != NULL && *c != '\0')
871                         {
872                                 if (*c == ' ')
873                                         c++;
874                                 if (*c != '\0')
875                                         *rest = c;
876                         }
877
878                         if (tzoff != PURPLE_NO_TZ_OFF || utc)
879                         {
880 #if defined(_WIN32)
881                                 long sys_tzoff;
882 #endif
883
884 #if defined(_WIN32) || defined(HAVE_TM_GMTOFF) || defined (HAVE_TIMEZONE)
885                                 if (tzoff == PURPLE_NO_TZ_OFF)
886                                         tzoff = 0;
887 #endif
888
889 #ifdef _WIN32
890                                 if ((sys_tzoff = wpurple_get_tz_offset()) == -1)
891                                         tzoff = PURPLE_NO_TZ_OFF;
892                                 else
893                                         tzoff += sys_tzoff;
894 #else
895 #ifdef HAVE_TM_GMTOFF
896                                 tzoff += t.tm_gmtoff;
897 #else
898 #       ifdef HAVE_TIMEZONE
899                                 tzset();    /* making sure */
900                                 tzoff -= timezone;
901 #       endif
902 #endif
903 #endif /* _WIN32 */
904                         }
905                 }
906                 else
907                 {
908                         if (rest != NULL && *c != '\0')
909                                 *rest = c;
910                 }
911         }
912
913         retval = mktime(&t);
914
915         if (tm != NULL)
916                 *tm = t;
917
918         if (tzoff != PURPLE_NO_TZ_OFF)
919                 retval += tzoff;
920
921         if (tz_off != NULL)
922                 *tz_off = tzoff;
923
924         return retval;
925 }
926
927 /**************************************************************************
928  * Markup Functions
929  **************************************************************************/
930
931 const char *
932 purple_markup_unescape_entity(const char *text, int *length)
933 {
934         const char *pln;
935         int len, pound;
936         char temp[2];
937
938         if (!text || *text != '&')
939                 return NULL;
940
941 #define IS_ENTITY(s)  (!g_ascii_strncasecmp(text, s, (len = sizeof(s) - 1)))
942
943         if(IS_ENTITY("&amp;"))
944                 pln = "&";
945         else if(IS_ENTITY("&lt;"))
946                 pln = "<";
947         else if(IS_ENTITY("&gt;"))
948                 pln = ">";
949         else if(IS_ENTITY("&nbsp;"))
950                 pln = " ";
951         else if(IS_ENTITY("&copy;"))
952                 pln = "\302\251";      /* or use g_unichar_to_utf8(0xa9); */
953         else if(IS_ENTITY("&quot;"))
954                 pln = "\"";
955         else if(IS_ENTITY("&reg;"))
956                 pln = "\302\256";      /* or use g_unichar_to_utf8(0xae); */
957         else if(IS_ENTITY("&apos;"))
958                 pln = "\'";
959         else if(*(text+1) == '#' &&
960                         (sscanf(text, "&#%u%1[;]", &pound, temp) == 2 ||
961                          sscanf(text, "&#x%x%1[;]", &pound, temp) == 2) &&
962                         pound != 0) {
963                 static char buf[7];
964                 int buflen = g_unichar_to_utf8((gunichar)pound, buf);
965                 buf[buflen] = '\0';
966                 pln = buf;
967
968                 len = 2;
969                 while(isdigit((gint) text[len])) len++;
970                 if(text[len] == ';') len++;
971         }
972         else
973                 return NULL;
974
975         if (length)
976                 *length = len;
977         return pln;
978 }
979
980 char *
981 purple_markup_get_css_property(const gchar *style,
982                                 const gchar *opt)
983 {
984         const gchar *css_str = style;
985         const gchar *css_value_start;
986         const gchar *css_value_end;
987         gchar *tmp;
988         gchar *ret;
989
990         g_return_val_if_fail(opt != NULL, NULL);
991
992         if (!css_str)
993                 return NULL;
994
995         /* find the CSS property */
996         while (1)
997         {
998                 /* skip whitespace characters */
999                 while (*css_str && g_ascii_isspace(*css_str))
1000                         css_str++;
1001                 if (!g_ascii_isalpha(*css_str))
1002                         return NULL;
1003                 if (g_ascii_strncasecmp(css_str, opt, strlen(opt)))
1004                 {
1005                         /* go to next css property positioned after the next ';' */
1006                         while (*css_str && *css_str != '"' && *css_str != ';')
1007                                 css_str++;
1008                         if(*css_str != ';')
1009                                 return NULL;
1010                         css_str++;
1011                 }
1012                 else
1013                         break;
1014         }
1015
1016         /* find the CSS value position in the string */
1017         css_str += strlen(opt);
1018         while (*css_str && g_ascii_isspace(*css_str))
1019                 css_str++;
1020         if (*css_str != ':')
1021                 return NULL;
1022         css_str++;
1023         while (*css_str && g_ascii_isspace(*css_str))
1024                 css_str++;
1025         if (*css_str == '\0' || *css_str == '"' || *css_str == ';')
1026                 return NULL;
1027
1028         /* mark the CSS value */
1029         css_value_start = css_str;
1030         while (*css_str && *css_str != '"' && *css_str != ';')
1031                 css_str++;
1032         css_value_end = css_str - 1;
1033
1034         /* Removes trailing whitespace */
1035         while (css_value_end > css_value_start && g_ascii_isspace(*css_value_end))
1036                 css_value_end--;
1037
1038         tmp = g_strndup(css_value_start, css_value_end - css_value_start + 1);
1039         ret = purple_unescape_html(tmp);
1040         g_free(tmp);
1041
1042         return ret;
1043 }
1044
1045 gboolean
1046 purple_markup_find_tag(const char *needle, const char *haystack,
1047                                          const char **start, const char **end, GData **attributes)
1048 {
1049         GData *attribs;
1050         const char *cur = haystack;
1051         char *name = NULL;
1052         gboolean found = FALSE;
1053         gboolean in_tag = FALSE;
1054         gboolean in_attr = FALSE;
1055         const char *in_quotes = NULL;
1056         size_t needlelen;
1057
1058         g_return_val_if_fail(    needle != NULL, FALSE);
1059         g_return_val_if_fail(   *needle != '\0', FALSE);
1060         g_return_val_if_fail(  haystack != NULL, FALSE);
1061         g_return_val_if_fail(     start != NULL, FALSE);
1062         g_return_val_if_fail(       end != NULL, FALSE);
1063         g_return_val_if_fail(attributes != NULL, FALSE);
1064
1065         needlelen = strlen(needle);
1066         g_datalist_init(&attribs);
1067
1068         while (*cur && !found) {
1069                 if (in_tag) {
1070                         if (in_quotes) {
1071                                 const char *close = cur;
1072
1073                                 while (*close && *close != *in_quotes)
1074                                         close++;
1075
1076                                 /* if we got the close quote, store the value and carry on from    *
1077                                  * after it. if we ran to the end of the string, point to the NULL *
1078                                  * and we're outta here */
1079                                 if (*close) {
1080                                         /* only store a value if we have an attribute name */
1081                                         if (name) {
1082                                                 size_t len = close - cur;
1083                                                 char *val = g_strndup(cur, len);
1084
1085                                                 g_datalist_set_data_full(&attribs, name, val, g_free);
1086                                                 g_free(name);
1087                                                 name = NULL;
1088                                         }
1089
1090                                         in_quotes = NULL;
1091                                         cur = close + 1;
1092                                 } else {
1093                                         cur = close;
1094                                 }
1095                         } else if (in_attr) {
1096                                 const char *close = cur;
1097
1098                                 while (*close && *close != '>' && *close != '"' &&
1099                                                 *close != '\'' && *close != ' ' && *close != '=')
1100                                         close++;
1101
1102                                 /* if we got the equals, store the name of the attribute. if we got
1103                                  * the quote, save the attribute and go straight to quote mode.
1104                                  * otherwise the tag closed or we reached the end of the string,
1105                                  * so we can get outta here */
1106                                 switch (*close) {
1107                                 case '"':
1108                                 case '\'':
1109                                         in_quotes = close;
1110                                 case '=':
1111                                         {
1112                                                 size_t len = close - cur;
1113
1114                                                 /* don't store a blank attribute name */
1115                                                 if (len) {
1116                                                         g_free(name);
1117                                                         name = g_ascii_strdown(cur, len);
1118                                                 }
1119
1120                                                 in_attr = FALSE;
1121                                                 cur = close + 1;
1122                                                 break;
1123                                         }
1124                                 case ' ':
1125                                 case '>':
1126                                         in_attr = FALSE;
1127                                 default:
1128                                         cur = close;
1129                                         break;
1130                                 }
1131                         } else {
1132                                 switch (*cur) {
1133                                 case ' ':
1134                                         /* swallow extra spaces inside tag */
1135                                         while (*cur && *cur == ' ') cur++;
1136                                         in_attr = TRUE;
1137                                         break;
1138                                 case '>':
1139                                         found = TRUE;
1140                                         *end = cur;
1141                                         break;
1142                                 case '"':
1143                                 case '\'':
1144                                         in_quotes = cur;
1145                                 default:
1146                                         cur++;
1147                                         break;
1148                                 }
1149                         }
1150                 } else {
1151                         /* if we hit a < followed by the name of our tag... */
1152                         if (*cur == '<' && !g_ascii_strncasecmp(cur + 1, needle, needlelen)) {
1153                                 *start = cur;
1154                                 cur = cur + needlelen + 1;
1155
1156                                 /* if we're pointing at a space or a >, we found the right tag. if *
1157                                  * we're not, we've found a longer tag, so we need to skip to the  *
1158                                  * >, but not being distracted by >s inside quotes.                */
1159                                 if (*cur == ' ' || *cur == '>') {
1160                                         in_tag = TRUE;
1161                                 } else {
1162                                         while (*cur && *cur != '"' && *cur != '\'' && *cur != '>') {
1163                                                 if (*cur == '"') {
1164                                                         cur++;
1165                                                         while (*cur && *cur != '"')
1166                                                                 cur++;
1167                                                 } else if (*cur == '\'') {
1168                                                         cur++;
1169                                                         while (*cur && *cur != '\'')
1170                                                                 cur++;
1171                                                 } else {
1172                                                         cur++;
1173                                                 }
1174                                         }
1175                                 }
1176                         } else {
1177                                 cur++;
1178                         }
1179                 }
1180         }
1181
1182         /* clean up any attribute name from a premature termination */
1183         g_free(name);
1184
1185         if (found) {
1186                 *attributes = attribs;
1187         } else {
1188                 *start = NULL;
1189                 *end = NULL;
1190                 *attributes = NULL;
1191         }
1192
1193         return found;
1194 }
1195
1196 gboolean
1197 purple_markup_extract_info_field(const char *str, int len, PurpleNotifyUserInfo *user_info,
1198                                                            const char *start_token, int skip,
1199                                                            const char *end_token, char check_value,
1200                                                            const char *no_value_token,
1201                                                            const char *display_name, gboolean is_link,
1202                                                            const char *link_prefix,
1203                                                            PurpleInfoFieldFormatCallback format_cb)
1204 {
1205         const char *p, *q;
1206
1207         g_return_val_if_fail(str          != NULL, FALSE);
1208         g_return_val_if_fail(user_info    != NULL, FALSE);
1209         g_return_val_if_fail(start_token  != NULL, FALSE);
1210         g_return_val_if_fail(end_token    != NULL, FALSE);
1211         g_return_val_if_fail(display_name != NULL, FALSE);
1212
1213         p = strstr(str, start_token);
1214
1215         if (p == NULL)
1216                 return FALSE;
1217
1218         p += strlen(start_token) + skip;
1219
1220         if (p >= str + len)
1221                 return FALSE;
1222
1223         if (check_value != '\0' && *p == check_value)
1224                 return FALSE;
1225
1226         q = strstr(p, end_token);
1227
1228         /* Trim leading blanks */
1229         while (*p != '\n' && g_ascii_isspace(*p)) {
1230                 p += 1;
1231         }
1232
1233         /* Trim trailing blanks */
1234         while (q > p && g_ascii_isspace(*(q - 1))) {
1235                 q -= 1;
1236         }
1237
1238         /* Don't bother with null strings */
1239         if (p == q)
1240                 return FALSE;
1241
1242         if (q != NULL && (!no_value_token ||
1243                                           (no_value_token && strncmp(p, no_value_token,
1244                                                                                                  strlen(no_value_token)))))
1245         {
1246                 GString *dest = g_string_new("");
1247
1248                 if (is_link)
1249                 {
1250                         g_string_append(dest, "<a href=\"");
1251
1252                         if (link_prefix)
1253                                 g_string_append(dest, link_prefix);
1254
1255                         if (format_cb != NULL)
1256                         {
1257                                 char *reformatted = format_cb(p, q - p);
1258                                 g_string_append(dest, reformatted);
1259                                 g_free(reformatted);
1260                         }
1261                         else
1262                                 g_string_append_len(dest, p, q - p);
1263                         g_string_append(dest, "\">");
1264
1265                         if (link_prefix)
1266                                 g_string_append(dest, link_prefix);
1267
1268                         g_string_append_len(dest, p, q - p);
1269                         g_string_append(dest, "</a>");
1270                 }
1271                 else
1272                 {
1273                         if (format_cb != NULL)
1274                         {
1275                                 char *reformatted = format_cb(p, q - p);
1276                                 g_string_append(dest, reformatted);
1277                                 g_free(reformatted);
1278                         }
1279                         else
1280                                 g_string_append_len(dest, p, q - p);
1281                 }
1282
1283                 purple_notify_user_info_add_pair(user_info, display_name, dest->str);
1284                 g_string_free(dest, TRUE);
1285
1286                 return TRUE;
1287         }
1288
1289         return FALSE;
1290 }
1291
1292 struct purple_parse_tag {
1293         char *src_tag;
1294         char *dest_tag;
1295         gboolean ignore;
1296 };
1297
1298 #define ALLOW_TAG_ALT(x, y) if(!g_ascii_strncasecmp(c, "<" x " ", strlen("<" x " "))) { \
1299                                                 const char *o = c + strlen("<" x); \
1300                                                 const char *p = NULL, *q = NULL, *r = NULL; \
1301                                                 GString *innards = g_string_new(""); \
1302                                                 while(o && *o) { \
1303                                                         if(!q && (*o == '\"' || *o == '\'') ) { \
1304                                                                 q = o; \
1305                                                         } else if(q) { \
1306                                                                 if(*o == *q) { \
1307                                                                         char *unescaped = g_strndup(q+1, o-q-1); \
1308                                                                         char *escaped = g_markup_escape_text(unescaped, -1); \
1309                                                                         g_string_append_printf(innards, "%c%s%c", *q, escaped, *q); \
1310                                                                         g_free(unescaped); \
1311                                                                         g_free(escaped); \
1312                                                                         q = NULL; \
1313                                                                 } else if(*c == '\\') { \
1314                                                                         o++; \
1315                                                                 } \
1316                                                         } else if(*o == '<') { \
1317                                                                 r = o; \
1318                                                         } else if(*o == '>') { \
1319                                                                 p = o; \
1320                                                                 break; \
1321                                                         } else { \
1322                                                                 innards = g_string_append_c(innards, *o); \
1323                                                         } \
1324                                                         o++; \
1325                                                 } \
1326                                                 if(p && !r) { \
1327                                                         if(*(p-1) != '/') { \
1328                                                                 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
1329                                                                 pt->src_tag = x; \
1330                                                                 pt->dest_tag = y; \
1331                                                                 tags = g_list_prepend(tags, pt); \
1332                                                         } \
1333                                                         if(xhtml) { \
1334                                                                 xhtml = g_string_append(xhtml, "<" y); \
1335                                                                 xhtml = g_string_append(xhtml, innards->str); \
1336                                                                 xhtml = g_string_append_c(xhtml, '>'); \
1337                                                         } \
1338                                                         c = p + 1; \
1339                                                 } else { \
1340                                                         if(xhtml) \
1341                                                                 xhtml = g_string_append(xhtml, "&lt;"); \
1342                                                         if(plain) \
1343                                                                 plain = g_string_append_c(plain, '<'); \
1344                                                         c++; \
1345                                                 } \
1346                                                 g_string_free(innards, TRUE); \
1347                                                 continue; \
1348                                         } \
1349                                                 if(!g_ascii_strncasecmp(c, "<" x, strlen("<" x)) && \
1350                                                                 (*(c+strlen("<" x)) == '>' || \
1351                                                                  !g_ascii_strncasecmp(c+strlen("<" x), "/>", 2))) { \
1352                                                         if(xhtml) \
1353                                                                 xhtml = g_string_append(xhtml, "<" y); \
1354                                                         c += strlen("<" x); \
1355                                                         if(*c != '/') { \
1356                                                                 struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1); \
1357                                                                 pt->src_tag = x; \
1358                                                                 pt->dest_tag = y; \
1359                                                                 tags = g_list_prepend(tags, pt); \
1360                                                                 if(xhtml) \
1361                                                                         xhtml = g_string_append_c(xhtml, '>'); \
1362                                                         } else { \
1363                                                                 if(xhtml) \
1364                                                                         xhtml = g_string_append(xhtml, "/>");\
1365                                                         } \
1366                                                         c = strchr(c, '>') + 1; \
1367                                                         continue; \
1368                                                 }
1369 #define ALLOW_TAG(x) ALLOW_TAG_ALT(x, x)
1370 void
1371 purple_markup_html_to_xhtml(const char *html, char **xhtml_out,
1372                                                   char **plain_out)
1373 {
1374         GString *xhtml = NULL;
1375         GString *plain = NULL;
1376         GString *url = NULL;
1377         GString *cdata = NULL;
1378         GList *tags = NULL, *tag;
1379         const char *c = html;
1380         char quote = '\0';
1381
1382 #define CHECK_QUOTE(ptr) if (*(ptr) == '\'' || *(ptr) == '\"') \
1383                         quote = *(ptr++); \
1384                 else \
1385                         quote = '\0';
1386
1387 #define VALID_CHAR(ptr) (*(ptr) && *(ptr) != quote && (quote || (*(ptr) != ' ' && *(ptr) != '>')))
1388
1389         g_return_if_fail(xhtml_out != NULL || plain_out != NULL);
1390
1391         if(xhtml_out)
1392                 xhtml = g_string_new("");
1393         if(plain_out)
1394                 plain = g_string_new("");
1395
1396         while(c && *c) {
1397                 if(*c == '<') {
1398                         if(*(c+1) == '/') { /* closing tag */
1399                                 tag = tags;
1400                                 while(tag) {
1401                                         struct purple_parse_tag *pt = tag->data;
1402                                         if(!g_ascii_strncasecmp((c+2), pt->src_tag, strlen(pt->src_tag)) && *(c+strlen(pt->src_tag)+2) == '>') {
1403                                                 c += strlen(pt->src_tag) + 3;
1404                                                 break;
1405                                         }
1406                                         tag = tag->next;
1407                                 }
1408                                 if(tag) {
1409                                         while(tags) {
1410                                                 struct purple_parse_tag *pt = tags->data;
1411                                                 if(xhtml)
1412                                                         g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
1413                                                 if(plain && !strcmp(pt->src_tag, "a")) {
1414                                                         /* if this is a link, we have to add the url to the plaintext, too */
1415                                                         if (cdata && url &&
1416                                                                         (!g_string_equal(cdata, url) && (g_ascii_strncasecmp(url->str, "mailto:", 7) != 0 ||
1417                                                                                                          g_utf8_collate(url->str + 7, cdata->str) != 0)))
1418                                                                 g_string_append_printf(plain, " <%s>", g_strstrip(url->str));
1419                                                         if (cdata) {
1420                                                                 g_string_free(cdata, TRUE);
1421                                                                 cdata = NULL;
1422                                                         }
1423
1424                                                 }
1425                                                 if(tags == tag)
1426                                                         break;
1427                                                 tags = g_list_remove(tags, pt);
1428                                                 g_free(pt);
1429                                         }
1430                                         g_free(tag->data);
1431                                         tags = g_list_remove(tags, tag->data);
1432                                 } else {
1433                                         /* a closing tag we weren't expecting...
1434                                          * we'll let it slide, if it's really a tag...if it's
1435                                          * just a </ we'll escape it properly */
1436                                         const char *end = c+2;
1437                                         while(*end && g_ascii_isalpha(*end))
1438                                                 end++;
1439                                         if(*end == '>') {
1440                                                 c = end+1;
1441                                         } else {
1442                                                 if(xhtml)
1443                                                         xhtml = g_string_append(xhtml, "&lt;");
1444                                                 if(plain)
1445                                                         plain = g_string_append_c(plain, '<');
1446                                                 c++;
1447                                         }
1448                                 }
1449                         } else { /* opening tag */
1450                                 ALLOW_TAG("blockquote");
1451                                 ALLOW_TAG("cite");
1452                                 ALLOW_TAG("div");
1453                                 ALLOW_TAG("em");
1454                                 ALLOW_TAG("h1");
1455                                 ALLOW_TAG("h2");
1456                                 ALLOW_TAG("h3");
1457                                 ALLOW_TAG("h4");
1458                                 ALLOW_TAG("h5");
1459                                 ALLOW_TAG("h6");
1460                                 /* we only allow html to start the message */
1461                                 if(c == html)
1462                                         ALLOW_TAG("html");
1463                                 ALLOW_TAG_ALT("i", "em");
1464                                 ALLOW_TAG_ALT("italic", "em");
1465                                 ALLOW_TAG("li");
1466                                 ALLOW_TAG("ol");
1467                                 ALLOW_TAG("p");
1468                                 ALLOW_TAG("pre");
1469                                 ALLOW_TAG("q");
1470                                 ALLOW_TAG("span");
1471                                 ALLOW_TAG("ul");
1472
1473
1474                                 /* we skip <HR> because it's not legal in XHTML-IM.  However,
1475                                  * we still want to send something sensible, so we put a
1476                                  * linebreak in its place. <BR> also needs special handling
1477                                  * because putting a </BR> to close it would just be dumb. */
1478                                 if((!g_ascii_strncasecmp(c, "<br", 3)
1479                                                         || !g_ascii_strncasecmp(c, "<hr", 3))
1480                                                 && (*(c+3) == '>' ||
1481                                                         !g_ascii_strncasecmp(c+3, "/>", 2) ||
1482                                                         !g_ascii_strncasecmp(c+3, " />", 3))) {
1483                                         c = strchr(c, '>') + 1;
1484                                         if(xhtml)
1485                                                 xhtml = g_string_append(xhtml, "<br/>");
1486                                         if(plain && *c != '\n')
1487                                                 plain = g_string_append_c(plain, '\n');
1488                                         continue;
1489                                 }
1490                                 if(!g_ascii_strncasecmp(c, "<b>", 3) || !g_ascii_strncasecmp(c, "<bold>", strlen("<bold>")) || !g_ascii_strncasecmp(c, "<strong>", strlen("<strong>"))) {
1491                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1492                                         if (*(c+2) == '>')
1493                                                 pt->src_tag = "b";
1494                                         else if (*(c+2) == 'o')
1495                                                 pt->src_tag = "bold";
1496                                         else
1497                                                 pt->src_tag = "strong";
1498                                         pt->dest_tag = "span";
1499                                         tags = g_list_prepend(tags, pt);
1500                                         c = strchr(c, '>') + 1;
1501                                         if(xhtml)
1502                                                 xhtml = g_string_append(xhtml, "<span style='font-weight: bold;'>");
1503                                         continue;
1504                                 }
1505                                 if(!g_ascii_strncasecmp(c, "<u>", 3) || !g_ascii_strncasecmp(c, "<underline>", strlen("<underline>"))) {
1506                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1507                                         pt->src_tag = *(c+2) == '>' ? "u" : "underline";
1508                                         pt->dest_tag = "span";
1509                                         tags = g_list_prepend(tags, pt);
1510                                         c = strchr(c, '>') + 1;
1511                                         if (xhtml)
1512                                                 xhtml = g_string_append(xhtml, "<span style='text-decoration: underline;'>");
1513                                         continue;
1514                                 }
1515                                 if(!g_ascii_strncasecmp(c, "<s>", 3) || !g_ascii_strncasecmp(c, "<strike>", strlen("<strike>"))) {
1516                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1517                                         pt->src_tag = *(c+2) == '>' ? "s" : "strike";
1518                                         pt->dest_tag = "span";
1519                                         tags = g_list_prepend(tags, pt);
1520                                         c = strchr(c, '>') + 1;
1521                                         if(xhtml)
1522                                                 xhtml = g_string_append(xhtml, "<span style='text-decoration: line-through;'>");
1523                                         continue;
1524                                 }
1525                                 if(!g_ascii_strncasecmp(c, "<sub>", 5)) {
1526                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1527                                         pt->src_tag = "sub";
1528                                         pt->dest_tag = "span";
1529                                         tags = g_list_prepend(tags, pt);
1530                                         c = strchr(c, '>') + 1;
1531                                         if(xhtml)
1532                                                 xhtml = g_string_append(xhtml, "<span style='vertical-align:sub;'>");
1533                                         continue;
1534                                 }
1535                                 if(!g_ascii_strncasecmp(c, "<sup>", 5)) {
1536                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1537                                         pt->src_tag = "sup";
1538                                         pt->dest_tag = "span";
1539                                         tags = g_list_prepend(tags, pt);
1540                                         c = strchr(c, '>') + 1;
1541                                         if(xhtml)
1542                                                 xhtml = g_string_append(xhtml, "<span style='vertical-align:super;'>");
1543                                         continue;
1544                                 }
1545                                 if (!g_ascii_strncasecmp(c, "<img", 4) && (*(c+4) == '>' || *(c+4) == ' ')) {
1546                                         const char *p = c + 4;
1547                                         GString *src = NULL, *alt = NULL;
1548                                         while (*p && *p != '>') {
1549                                                 if (!g_ascii_strncasecmp(p, "src=", 4)) {
1550                                                         const char *q = p + 4;
1551                                                         if (src)
1552                                                                 g_string_free(src, TRUE);
1553                                                         src = g_string_new("");
1554                                                         CHECK_QUOTE(q);
1555                                                         while (VALID_CHAR(q)) {
1556                                                                 src = g_string_append_c(src, *q);
1557                                                                 q++;
1558                                                         }
1559                                                         p = q;
1560                                                 } else if (!g_ascii_strncasecmp(p, "alt=", 4)) {
1561                                                         const char *q = p + 4;
1562                                                         if (alt)
1563                                                                 g_string_free(alt, TRUE);
1564                                                         alt = g_string_new("");
1565                                                         CHECK_QUOTE(q);
1566                                                         while (VALID_CHAR(q)) {
1567                                                                 alt = g_string_append_c(alt, *q);
1568                                                                 q++;
1569                                                         }
1570                                                         p = q;
1571                                                 } else {
1572                                                         p++;
1573                                                 }
1574                                         }
1575                                         if ((c = strchr(p, '>')) != NULL)
1576                                                 c++;
1577                                         else
1578                                                 c = p;
1579                                         /* src and alt are required! */
1580                                         if(src && xhtml)
1581                                                 g_string_append_printf(xhtml, "<img src='%s' alt='%s' />", g_strstrip(src->str), alt ? alt->str : "");
1582                                         if(alt) {
1583                                                 if(plain)
1584                                                         plain = g_string_append(plain, alt->str);
1585                                                 if(!src && xhtml)
1586                                                         xhtml = g_string_append(xhtml, alt->str);
1587                                                 g_string_free(alt, TRUE);
1588                                         }
1589                                         g_string_free(src, TRUE);
1590                                         continue;
1591                                 }
1592                                 if (!g_ascii_strncasecmp(c, "<a", 2) && (*(c+2) == '>' || *(c+2) == ' ')) {
1593                                         const char *p = c + 2;
1594                                         struct purple_parse_tag *pt;
1595                                         while (*p && *p != '>') {
1596                                                 if (!g_ascii_strncasecmp(p, "href=", 5)) {
1597                                                         const char *q = p + 5;
1598                                                         if (url)
1599                                                                 g_string_free(url, TRUE);
1600                                                         url = g_string_new("");
1601                                                         if (cdata)
1602                                                                 g_string_free(cdata, TRUE);
1603                                                         cdata = g_string_new("");
1604                                                         CHECK_QUOTE(q);
1605                                                         while (VALID_CHAR(q)) {
1606                                                                 int len;
1607                                                                 if ((*q == '&') && (purple_markup_unescape_entity(q, &len) == NULL))
1608                                                                         url = g_string_append(url, "&amp;");
1609                                                                 else
1610                                                                         url = g_string_append_c(url, *q);
1611                                                                 q++;
1612                                                         }
1613                                                         p = q;
1614                                                 } else {
1615                                                         p++;
1616                                                 }
1617                                         }
1618                                         if ((c = strchr(p, '>')) != NULL)
1619                                                 c++;
1620                                         else
1621                                                 c = p;
1622                                         pt = g_new0(struct purple_parse_tag, 1);
1623                                         pt->src_tag = "a";
1624                                         pt->dest_tag = "a";
1625                                         tags = g_list_prepend(tags, pt);
1626                                         if(xhtml)
1627                                                 g_string_append_printf(xhtml, "<a href='%s'>", url ? g_strstrip(url->str) : "");
1628                                         continue;
1629                                 }
1630                                 if(!g_ascii_strncasecmp(c, "<font", 5) && (*(c+5) == '>' || *(c+5) == ' ')) {
1631                                         const char *p = c + 5;
1632                                         GString *style = g_string_new("");
1633                                         struct purple_parse_tag *pt;
1634                                         while (*p && *p != '>') {
1635                                                 if (!g_ascii_strncasecmp(p, "back=", 5)) {
1636                                                         const char *q = p + 5;
1637                                                         GString *color = g_string_new("");
1638                                                         CHECK_QUOTE(q);
1639                                                         while (VALID_CHAR(q)) {
1640                                                                 color = g_string_append_c(color, *q);
1641                                                                 q++;
1642                                                         }
1643                                                         g_string_append_printf(style, "background: %s; ", color->str);
1644                                                         g_string_free(color, TRUE);
1645                                                         p = q;
1646                                                 } else if (!g_ascii_strncasecmp(p, "color=", 6)) {
1647                                                         const char *q = p + 6;
1648                                                         GString *color = g_string_new("");
1649                                                         CHECK_QUOTE(q);
1650                                                         while (VALID_CHAR(q)) {
1651                                                                 color = g_string_append_c(color, *q);
1652                                                                 q++;
1653                                                         }
1654                                                         g_string_append_printf(style, "color: %s; ", color->str);
1655                                                         g_string_free(color, TRUE);
1656                                                         p = q;
1657                                                 } else if (!g_ascii_strncasecmp(p, "face=", 5)) {
1658                                                         const char *q = p + 5;
1659                                                         GString *face = g_string_new("");
1660                                                         CHECK_QUOTE(q);
1661                                                         while (VALID_CHAR(q)) {
1662                                                                 face = g_string_append_c(face, *q);
1663                                                                 q++;
1664                                                         }
1665                                                         g_string_append_printf(style, "font-family: %s; ", g_strstrip(face->str));
1666                                                         g_string_free(face, TRUE);
1667                                                         p = q;
1668                                                 } else if (!g_ascii_strncasecmp(p, "size=", 5)) {
1669                                                         const char *q = p + 5;
1670                                                         int sz;
1671                                                         const char *size = "medium";
1672                                                         CHECK_QUOTE(q);
1673                                                         sz = atoi(q);
1674                                                         switch (sz)
1675                                                         {
1676                                                         case 1:
1677                                                           size = "xx-small";
1678                                                           break;
1679                                                         case 2:
1680                                                           size = "small";
1681                                                           break;
1682                                                         case 3:
1683                                                           size = "medium";
1684                                                           break;
1685                                                         case 4:
1686                                                           size = "large";
1687                                                           break;
1688                                                         case 5:
1689                                                           size = "x-large";
1690                                                           break;
1691                                                         case 6:
1692                                                         case 7:
1693                                                           size = "xx-large";
1694                                                           break;
1695                                                         default:
1696                                                           break;
1697                                                         }
1698                                                         g_string_append_printf(style, "font-size: %s; ", size);
1699                                                         p = q;
1700                                                 } else {
1701                                                         p++;
1702                                                 }
1703                                         }
1704                                         if ((c = strchr(p, '>')) != NULL)
1705                                                 c++;
1706                                         else
1707                                                 c = p;
1708                                         pt = g_new0(struct purple_parse_tag, 1);
1709                                         pt->src_tag = "font";
1710                                         pt->dest_tag = "span";
1711                                         tags = g_list_prepend(tags, pt);
1712                                         if(style->len && xhtml)
1713                                                 g_string_append_printf(xhtml, "<span style='%s'>", g_strstrip(style->str));
1714                                         else
1715                                                 pt->ignore = TRUE;
1716                                         g_string_free(style, TRUE);
1717                                         continue;
1718                                 }
1719                                 if (!g_ascii_strncasecmp(c, "<body ", 6)) {
1720                                         const char *p = c + 6;
1721                                         gboolean did_something = FALSE;
1722                                         while (*p && *p != '>') {
1723                                                 if (!g_ascii_strncasecmp(p, "bgcolor=", 8)) {
1724                                                         const char *q = p + 8;
1725                                                         struct purple_parse_tag *pt = g_new0(struct purple_parse_tag, 1);
1726                                                         GString *color = g_string_new("");
1727                                                         CHECK_QUOTE(q);
1728                                                         while (VALID_CHAR(q)) {
1729                                                                 color = g_string_append_c(color, *q);
1730                                                                 q++;
1731                                                         }
1732                                                         if (xhtml)
1733                                                                 g_string_append_printf(xhtml, "<span style='background: %s;'>", g_strstrip(color->str));
1734                                                         g_string_free(color, TRUE);
1735                                                         if ((c = strchr(p, '>')) != NULL)
1736                                                                 c++;
1737                                                         else
1738                                                                 c = p;
1739                                                         pt->src_tag = "body";
1740                                                         pt->dest_tag = "span";
1741                                                         tags = g_list_prepend(tags, pt);
1742                                                         did_something = TRUE;
1743                                                         break;
1744                                                 }
1745                                                 p++;
1746                                         }
1747                                         if (did_something) continue;
1748                                 }
1749                                 /* this has to come after the special case for bgcolor */
1750                                 ALLOW_TAG("body");
1751                                 if(!g_ascii_strncasecmp(c, "<!--", strlen("<!--"))) {
1752                                         char *p = strstr(c + strlen("<!--"), "-->");
1753                                         if(p) {
1754                                                 if(xhtml)
1755                                                         xhtml = g_string_append(xhtml, "<!--");
1756                                                 c += strlen("<!--");
1757                                                 continue;
1758                                         }
1759                                 }
1760
1761                                 if(xhtml)
1762                                         xhtml = g_string_append(xhtml, "&lt;");
1763                                 if(plain)
1764                                         plain = g_string_append_c(plain, '<');
1765                                 c++;
1766                         }
1767                 } else if(*c == '&') {
1768                         char buf[7];
1769                         const char *pln;
1770                         int len;
1771
1772                         if ((pln = purple_markup_unescape_entity(c, &len)) == NULL) {
1773                                 len = 1;
1774                                 g_snprintf(buf, sizeof(buf), "%c", *c);
1775                                 pln = buf;
1776                         }
1777                         if(xhtml)
1778                                 xhtml = g_string_append_len(xhtml, c, len);
1779                         if(plain)
1780                                 plain = g_string_append(plain, pln);
1781                         if(cdata)
1782                                 cdata = g_string_append_len(cdata, c, len);
1783                         c += len;
1784                 } else {
1785                         if(xhtml)
1786                                 xhtml = g_string_append_c(xhtml, *c);
1787                         if(plain)
1788                                 plain = g_string_append_c(plain, *c);
1789                         if(cdata)
1790                                 cdata = g_string_append_c(cdata, *c);
1791                         c++;
1792                 }
1793         }
1794         if(xhtml) {
1795                 for (tag = tags; tag ; tag = tag->next) {
1796                         struct purple_parse_tag *pt = tag->data;
1797                         if(!pt->ignore)
1798                                 g_string_append_printf(xhtml, "</%s>", pt->dest_tag);
1799                 }
1800         }
1801         g_list_free(tags);
1802         if(xhtml_out)
1803                 *xhtml_out = g_string_free(xhtml, FALSE);
1804         if(plain_out)
1805                 *plain_out = g_string_free(plain, FALSE);
1806         if(url)
1807                 g_string_free(url, TRUE);
1808         if (cdata)
1809                 g_string_free(cdata, TRUE);
1810 #undef CHECK_QUOTE
1811 #undef VALID_CHAR
1812 }
1813
1814 /* The following are probably reasonable changes:
1815  * - \n should be converted to a normal space
1816  * - in addition to <br>, <p> and <div> etc. should also be converted into \n
1817  * - We want to turn </td>#whitespace<td> sequences into a single tab
1818  * - We want to turn <td> into a single tab (for msn profile "parsing")
1819  * - We want to turn </tr>#whitespace<tr> sequences into a single \n
1820  * - <script>...</script> and <style>...</style> should be completely removed
1821  */
1822
1823 char *
1824 purple_markup_strip_html(const char *str)
1825 {
1826         int i, j, k, entlen;
1827         gboolean visible = TRUE;
1828         gboolean closing_td_p = FALSE;
1829         gchar *str2;
1830         const gchar *cdata_close_tag = NULL, *ent;
1831         gchar *href = NULL;
1832         int href_st = 0;
1833
1834         if(!str)
1835                 return NULL;
1836
1837         str2 = g_strdup(str);
1838
1839         for (i = 0, j = 0; str2[i]; i++)
1840         {
1841                 if (str2[i] == '<')
1842                 {
1843                         if (cdata_close_tag)
1844                         {
1845                                 /* Note: Don't even assume any other tag is a tag in CDATA */
1846                                 if (g_ascii_strncasecmp(str2 + i, cdata_close_tag,
1847                                                 strlen(cdata_close_tag)) == 0)
1848                                 {
1849                                         i += strlen(cdata_close_tag) - 1;
1850                                         cdata_close_tag = NULL;
1851                                 }
1852                                 continue;
1853                         }
1854                         else if (g_ascii_strncasecmp(str2 + i, "<td", 3) == 0 && closing_td_p)
1855                         {
1856                                 str2[j++] = '\t';
1857                                 visible = TRUE;
1858                         }
1859                         else if (g_ascii_strncasecmp(str2 + i, "</td>", 5) == 0)
1860                         {
1861                                 closing_td_p = TRUE;
1862                                 visible = FALSE;
1863                         }
1864                         else
1865                         {
1866                                 closing_td_p = FALSE;
1867                                 visible = TRUE;
1868                         }
1869
1870                         k = i + 1;
1871
1872                         if(g_ascii_isspace(str2[k]))
1873                                 visible = TRUE;
1874                         else if (str2[k])
1875                         {
1876                                 /* Scan until we end the tag either implicitly (closed start
1877                                  * tag) or explicitly, using a sloppy method (i.e., < or >
1878                                  * inside quoted attributes will screw us up)
1879                                  */
1880                                 while (str2[k] && str2[k] != '<' && str2[k] != '>')
1881                                 {
1882                                         k++;
1883                                 }
1884
1885                                 /* If we've got an <a> tag with an href, save the address
1886                                  * to print later. */
1887                                 if (g_ascii_strncasecmp(str2 + i, "<a", 2) == 0 &&
1888                                     g_ascii_isspace(str2[i+2]))
1889                                 {
1890                                         int st; /* start of href, inclusive [ */
1891                                         int end; /* end of href, exclusive ) */
1892                                         char delim = ' ';
1893                                         /* Find start of href */
1894                                         for (st = i + 3; st < k; st++)
1895                                         {
1896                                                 if (g_ascii_strncasecmp(str2+st, "href=", 5) == 0)
1897                                                 {
1898                                                         st += 5;
1899                                                         if (str2[st] == '"' || str2[st] == '\'')
1900                                                         {
1901                                                                 delim = str2[st];
1902                                                                 st++;
1903                                                         }
1904                                                         break;
1905                                                 }
1906                                         }
1907                                         /* find end of address */
1908                                         for (end = st; end < k && str2[end] != delim; end++)
1909                                         {
1910                                                 /* All the work is done in the loop construct above. */
1911                                         }
1912
1913                                         /* If there's an address, save it.  If there was
1914                                          * already one saved, kill it. */
1915                                         if (st < k)
1916                                         {
1917                                                 char *tmp;
1918                                                 g_free(href);
1919                                                 tmp = g_strndup(str2 + st, end - st);
1920                                                 href = purple_unescape_html(tmp);
1921                                                 g_free(tmp);
1922                                                 href_st = j;
1923                                         }
1924                                 }
1925
1926                                 /* Replace </a> with an ascii representation of the
1927                                  * address the link was pointing to. */
1928                                 else if (href != NULL && g_ascii_strncasecmp(str2 + i, "</a>", 4) == 0)
1929                                 {
1930
1931                                         size_t hrlen = strlen(href);
1932
1933                                         /* Only insert the href if it's different from the CDATA. */
1934                                         if ((hrlen != j - href_st ||
1935                                              strncmp(str2 + href_st, href, hrlen)) &&
1936                                             (hrlen != j - href_st + 7 || /* 7 == strlen("http://") */
1937                                              strncmp(str2 + href_st, href + 7, hrlen - 7)))
1938                                         {
1939                                                 str2[j++] = ' ';
1940                                                 str2[j++] = '(';
1941                                                 g_memmove(str2 + j, href, hrlen);
1942                                                 j += hrlen;
1943                                                 str2[j++] = ')';
1944                                                 g_free(href);
1945                                                 href = NULL;
1946                                         }
1947                                 }
1948
1949                                 /* Check for tags which should be mapped to newline */
1950                                 else if (g_ascii_strncasecmp(str2 + i, "<p>", 3) == 0
1951                                  || g_ascii_strncasecmp(str2 + i, "<tr", 3) == 0
1952                                  || g_ascii_strncasecmp(str2 + i, "<br", 3) == 0
1953                                  || g_ascii_strncasecmp(str2 + i, "<hr", 3) == 0
1954                                  || g_ascii_strncasecmp(str2 + i, "<li", 3) == 0
1955                                  || g_ascii_strncasecmp(str2 + i, "<div", 4) == 0
1956                                  || g_ascii_strncasecmp(str2 + i, "</table>", 8) == 0)
1957                                 {
1958                                         str2[j++] = '\n';
1959                                 }
1960                                 /* Check for tags which begin CDATA and need to be closed */
1961 #if 0 /* FIXME.. option is end tag optional, we can't handle this right now */
1962                                 else if (g_ascii_strncasecmp(str2 + i, "<option", 7) == 0)
1963                                 {
1964                                         /* FIXME: We should not do this if the OPTION is SELECT'd */
1965                                         cdata_close_tag = "</option>";
1966                                 }
1967 #endif
1968                                 else if (g_ascii_strncasecmp(str2 + i, "<script", 7) == 0)
1969                                 {
1970                                         cdata_close_tag = "</script>";
1971                                 }
1972                                 else if (g_ascii_strncasecmp(str2 + i, "<style", 6) == 0)
1973                                 {
1974                                         cdata_close_tag = "</style>";
1975                                 }
1976                                 /* Update the index and continue checking after the tag */
1977                                 i = (str2[k] == '<' || str2[k] == '\0')? k - 1: k;
1978                                 continue;
1979                         }
1980                 }
1981                 else if (cdata_close_tag)
1982                 {
1983                         continue;
1984                 }
1985                 else if (!g_ascii_isspace(str2[i]))
1986                 {
1987                         visible = TRUE;
1988                 }
1989
1990                 if (str2[i] == '&' && (ent = purple_markup_unescape_entity(str2 + i, &entlen)) != NULL)
1991                 {
1992                         while (*ent)
1993                                 str2[j++] = *ent++;
1994                         i += entlen - 1;
1995                         continue;
1996                 }
1997
1998                 if (visible)
1999                         str2[j++] = g_ascii_isspace(str2[i])? ' ': str2[i];
2000         }
2001
2002         g_free(href);
2003
2004         str2[j] = '\0';
2005
2006         return str2;
2007 }
2008
2009 static gboolean
2010 badchar(char c)
2011 {
2012         switch (c) {
2013         case ' ':
2014         case ',':
2015         case '\0':
2016         case '\n':
2017         case '\r':
2018         case '<':
2019         case '>':
2020         case '"':
2021         case '\'':
2022                 return TRUE;
2023         default:
2024                 return FALSE;
2025         }
2026 }
2027
2028 static gboolean
2029 badentity(const char *c)
2030 {
2031         if (!g_ascii_strncasecmp(c, "&lt;", 4) ||
2032                 !g_ascii_strncasecmp(c, "&gt;", 4) ||
2033                 !g_ascii_strncasecmp(c, "&quot;", 6)) {
2034                 return TRUE;
2035         }
2036         return FALSE;
2037 }
2038
2039 char *
2040 purple_markup_linkify(const char *text)
2041 {
2042         const char *c, *t, *q = NULL;
2043         char *tmpurlbuf, *url_buf;
2044         gunichar g;
2045         gboolean inside_html = FALSE;
2046         int inside_paren = 0;
2047         GString *ret;
2048
2049         if (text == NULL)
2050                 return NULL;
2051
2052         ret = g_string_new("");
2053
2054         c = text;
2055         while (*c) {
2056
2057                 if(*c == '(' && !inside_html) {
2058                         inside_paren++;
2059                         ret = g_string_append_c(ret, *c);
2060                         c++;
2061                 }
2062
2063                 if(inside_html) {
2064                         if(*c == '>') {
2065                                 inside_html = FALSE;
2066                         } else if(!q && (*c == '\"' || *c == '\'')) {
2067                                 q = c;
2068                         } else if(q) {
2069                                 if(*c == *q)
2070                                         q = NULL;
2071                         }
2072                 } else if(*c == '<') {
2073                         inside_html = TRUE;
2074                         if (!g_ascii_strncasecmp(c, "<A", 2)) {
2075                                 while (1) {
2076                                         if (!g_ascii_strncasecmp(c, "/A>", 3)) {
2077                                                 inside_html = FALSE;
2078                                                 break;
2079                                         }
2080                                         ret = g_string_append_c(ret, *c);
2081                                         c++;
2082                                         if (!(*c))
2083                                                 break;
2084                                 }
2085                         }
2086                 } else if ((*c=='h') && (!g_ascii_strncasecmp(c, "http://", 7) ||
2087                                         (!g_ascii_strncasecmp(c, "https://", 8)))) {
2088                         t = c;
2089                         while (1) {
2090                                 if (badchar(*t) || badentity(t)) {
2091
2092                                         if ((!g_ascii_strncasecmp(c, "http://", 7) && (t - c == 7)) ||
2093                                                 (!g_ascii_strncasecmp(c, "https://", 8) && (t - c == 8))) {
2094                                                 break;
2095                                         }
2096
2097                                         if (*(t) == ',' && (*(t + 1) != ' ')) {
2098                                                 t++;
2099                                                 continue;
2100                                         }
2101
2102                                         if (*(t - 1) == '.')
2103                                                 t--;
2104                                         if ((*(t - 1) == ')' && (inside_paren > 0))) {
2105                                                 t--;
2106                                         }
2107
2108                                         url_buf = g_strndup(c, t - c);
2109                                         tmpurlbuf = purple_unescape_html(url_buf);
2110                                         g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
2111                                                         tmpurlbuf, url_buf);
2112                                         g_free(url_buf);
2113                                         g_free(tmpurlbuf);
2114                                         c = t;
2115                                         break;
2116                                 }
2117                                 t++;
2118
2119                         }
2120                 } else if (!g_ascii_strncasecmp(c, "www.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) {
2121                         if (c[4] != '.') {
2122                                 t = c;
2123                                 while (1) {
2124                                         if (badchar(*t) || badentity(t)) {
2125                                                 if (t - c == 4) {
2126                                                         break;
2127                                                 }
2128
2129                                                 if (*(t) == ',' && (*(t + 1) != ' ')) {
2130                                                         t++;
2131                                                         continue;
2132                                                 }
2133
2134                                                 if (*(t - 1) == '.')
2135                                                         t--;
2136                                                 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2137                                                         t--;
2138                                                 }
2139                                                 url_buf = g_strndup(c, t - c);
2140                                                 tmpurlbuf = purple_unescape_html(url_buf);
2141                                                 g_string_append_printf(ret,
2142                                                                 "<A HREF=\"http://%s\">%s</A>", tmpurlbuf,
2143                                                                 url_buf);
2144                                                 g_free(url_buf);
2145                                                 g_free(tmpurlbuf);
2146                                                 c = t;
2147                                                 break;
2148                                         }
2149                                         t++;
2150                                 }
2151                         }
2152                 } else if (!g_ascii_strncasecmp(c, "ftp://", 6) || !g_ascii_strncasecmp(c, "sftp://", 7)) {
2153                         t = c;
2154                         while (1) {
2155                                 if (badchar(*t) || badentity(t)) {
2156
2157                                         if ((!g_ascii_strncasecmp(c, "ftp://", 6) && (t - c == 6)) ||
2158                                                 (!g_ascii_strncasecmp(c, "sftp://", 7) && (t - c == 7))) {
2159                                                 break;
2160                                         }
2161
2162                                         if (*(t - 1) == '.')
2163                                                 t--;
2164                                         if ((*(t - 1) == ')' && (inside_paren > 0))) {
2165                                                 t--;
2166                                         }
2167                                         url_buf = g_strndup(c, t - c);
2168                                         tmpurlbuf = purple_unescape_html(url_buf);
2169                                         g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
2170                                                         tmpurlbuf, url_buf);
2171                                         g_free(url_buf);
2172                                         g_free(tmpurlbuf);
2173                                         c = t;
2174                                         break;
2175                                 }
2176                                 if (!t)
2177                                         break;
2178                                 t++;
2179
2180                         }
2181                 } else if (!g_ascii_strncasecmp(c, "ftp.", 4) && (c == text || badchar(c[-1]) || badentity(c-1))) {
2182                         if (c[4] != '.') {
2183                                 t = c;
2184                                 while (1) {
2185                                         if (badchar(*t) || badentity(t)) {
2186                                                 if (t - c == 4) {
2187                                                         break;
2188                                                 }
2189                                                 if (*(t - 1) == '.')
2190                                                         t--;
2191                                                 if ((*(t - 1) == ')' && (inside_paren > 0))) {
2192                                                         t--;
2193                                                 }
2194                                                 url_buf = g_strndup(c, t - c);
2195                                                 tmpurlbuf = purple_unescape_html(url_buf);
2196                                                 g_string_append_printf(ret,
2197                                                                 "<A HREF=\"ftp://%s\">%s</A>", tmpurlbuf,
2198                                                                 url_buf);
2199                                                 g_free(url_buf);
2200                                                 g_free(tmpurlbuf);
2201                                                 c = t;
2202                                                 break;
2203                                         }
2204                                         if (!t)
2205                                                 break;
2206                                         t++;
2207                                 }
2208                         }
2209                 } else if (!g_ascii_strncasecmp(c, "mailto:", 7)) {
2210                         t = c;
2211                         while (1) {
2212                                 if (badchar(*t) || badentity(t)) {
2213                                         char *d;
2214                                         if (t - c == 7) {
2215                                                 break;
2216                                         }
2217                                         if (*(t - 1) == '.')
2218                                                 t--;
2219                                         if ((d = strstr(c + 7, "?")) != NULL && d < t)
2220                                                 url_buf = g_strndup(c + 7, d - c - 7);
2221                                         else
2222                                                 url_buf = g_strndup(c + 7, t - c - 7);
2223                                         if (!purple_email_is_valid(url_buf)) {
2224                                                 g_free(url_buf);
2225                                                 break;
2226                                         }
2227                                         g_free(url_buf);
2228                                         url_buf = g_strndup(c, t - c);
2229                                         tmpurlbuf = purple_unescape_html(url_buf);
2230                                         g_string_append_printf(ret, "<A HREF=\"%s\">%s</A>",
2231                                                           tmpurlbuf, url_buf);
2232                                         g_free(url_buf);
2233                                         g_free(tmpurlbuf);
2234                                         c = t;
2235                                         break;
2236                                 }
2237                                 if (!t)
2238                                         break;
2239                                 t++;
2240
2241                         }
2242                 } else if ((*c=='x') && (!g_ascii_strncasecmp(c, "xmpp:", 5)) &&
2243                                    (c == text || badchar(c[-1]) || badentity(c-1))) {
2244                         t = c;
2245                         while (1) {
2246                                 if (badchar(*t) || badentity(t)) {
2247
2248                                         if (t - c == 5) {
2249                                                 break;
2250                     &nbs