| 97 | | static CHARSET *charset_new (size_t hash_size) |
| 98 | | { |
| 99 | | CHARSET *cp = safe_malloc (sizeof (CHARSET)); |
| 100 | | size_t i; |
| 101 | | |
| 102 | | cp->n_symb = 256; |
| 103 | | cp->u_symb = 0; |
| 104 | | cp->multbyte = 1; |
| 105 | | cp->symb_to_repr = hash_create (hash_size); |
| 106 | | cp->description = safe_malloc (cp->n_symb * sizeof (CHARDESC *)); |
| 107 | | |
| 108 | | for (i = 0; i < cp->n_symb; i++) |
| 109 | | cp->description[i] = NULL; |
| 110 | | |
| 111 | | return cp; |
| 112 | | } |
| 113 | | |
| 114 | | static void charset_free (CHARSET ** csp) |
| 115 | | { |
| 116 | | CHARSET *cs = *csp; |
| 117 | | size_t i; |
| 118 | | |
| 119 | | for (i = 0; i < cs->n_symb; i++) |
| 120 | | chardesc_free (&cs->description[i]); |
| 121 | | |
| 122 | | safe_free ((void **) &cs->description); |
| 123 | | |
| 124 | | hash_destroy (&cs->symb_to_repr, NULL); |
| 125 | | safe_free ((void **) csp); |
| 126 | | } |
| 127 | | |
| 128 | | static CHARMAP *charmap_new (void) |
| 129 | | { |
| 130 | | CHARMAP *m = safe_malloc (sizeof (CHARMAP)); |
| 131 | | |
| 132 | | m->charset = NULL; |
| 133 | | m->escape_char = '\\'; |
| 134 | | m->comment_char = '#'; |
| 135 | | m->multbyte = 1; |
| 136 | | m->aliases = NULL; |
| 137 | | |
| 138 | | return m; |
| 139 | | } |
| 140 | | |
| 141 | | static void charmap_free (CHARMAP ** cp) |
| 142 | | { |
| 143 | | if (!cp || !*cp) |
| 144 | | return; |
| 145 | | |
| 146 | | mutt_free_list (&(*cp)->aliases); |
| 147 | | safe_free ((void **) &(*cp)->charset); |
| 148 | | safe_free ((void **) cp); |
| 149 | | |
| 150 | | return; |
| 151 | | } |
| 152 | | |
| 153 | | static CHARDESC *chardesc_new (void) |
| 154 | | { |
| 155 | | CHARDESC *p = safe_malloc (sizeof (CHARDESC)); |
| 156 | | |
| 157 | | p->symbol = NULL; |
| 158 | | p->repr = -1; |
| 159 | | |
| 160 | | return p; |
| 161 | | } |
| 162 | | |
| 163 | | static void chardesc_free (CHARDESC ** cdp) |
| 164 | | { |
| 165 | | if (!cdp || !*cdp) |
| 166 | | return; |
| 167 | | |
| 168 | | |
| 169 | | safe_free ((void **) &(*cdp)->symbol); |
| 170 | | safe_free ((void **) cdp); |
| 171 | | |
| 172 | | return; |
| 173 | | } |
| 174 | | |
| 175 | | static CHARMAP *parse_charmap_header (FILE * fp) |
| 176 | | { |
| 177 | | char buffer[1024]; |
| 178 | | char *t, *u; |
| 179 | | CHARMAP *m = charmap_new (); |
| 180 | | |
| 181 | | while (fgets (buffer, sizeof (buffer), fp)) |
| 182 | | { |
| 183 | | if ((t = strchr (buffer, '\n'))) |
| 184 | | *t = '\0'; |
| 185 | | else |
| 186 | | { |
| 187 | | charmap_free (&m); |
| 188 | | return NULL; |
| 189 | | } |
| 190 | | |
| 191 | | if (!strncmp (buffer, "CHARMAP", 7)) |
| 192 | | break; |
| 193 | | |
| 194 | | if (*buffer == m->comment_char) |
| 195 | | { |
| 196 | | if ((t = strtok (buffer + 1, "\t ")) && !strcasecmp (t, "alias")) |
| | 62 | int mutt_is_utf8 (const char *s) |
| | 63 | { |
| | 64 | char buffer[8]; |
| | 65 | |
| | 66 | if (!s) |
| | 67 | return 0; |
| | 68 | |
| | 69 | mutt_canonical_charset (buffer, sizeof (buffer), s); |
| | 70 | return !mutt_strcmp (buffer, "utf-8"); |
| | 71 | } |
| | 72 | |
| | 73 | |
| | 74 | /* |
| | 75 | * Like iconv_open, but canonicalises the charsets |
| | 76 | */ |
| | 77 | |
| | 78 | iconv_t mutt_iconv_open (const char *tocode, const char *fromcode) |
| | 79 | { |
| | 80 | char tocode1[SHORT_STRING]; |
| | 81 | char fromcode1[SHORT_STRING]; |
| | 82 | |
| | 83 | mutt_canonical_charset (tocode1, sizeof (tocode1), tocode); |
| | 84 | mutt_canonical_charset (fromcode1, sizeof (fromcode1), fromcode); |
| | 85 | return iconv_open (tocode1, fromcode1); |
| | 86 | } |
| | 87 | |
| | 88 | |
| | 89 | /* |
| | 90 | * Like iconv, but keeps going even when the input is invalid |
| | 91 | * If you're supplying inrepls, the source charset should be stateless; |
| | 92 | * if you're supplying an outrepl, the target charset should be. |
| | 93 | */ |
| | 94 | |
| | 95 | size_t mutt_iconv (iconv_t cd, const char **inbuf, size_t *inbytesleft, |
| | 96 | char **outbuf, size_t *outbytesleft, |
| | 97 | const char **inrepls, const char *outrepl) |
| | 98 | { |
| | 99 | size_t ret = 0, ret1; |
| | 100 | const char *ib = *inbuf; |
| | 101 | size_t ibl = *inbytesleft; |
| | 102 | char *ob = *outbuf; |
| | 103 | size_t obl = *outbytesleft; |
| | 104 | |
| | 105 | for (;;) |
| | 106 | { |
| | 107 | ret1 = iconv (cd, &ib, &ibl, &ob, &obl); |
| | 108 | if (ret1 != (size_t)-1) |
| | 109 | ret += ret1; |
| | 110 | if (ibl && obl && errno == EILSEQ) |
| | 111 | { |
| | 112 | if (inrepls) |
| 205 | | continue; |
| 206 | | } |
| 207 | | |
| 208 | | if (!(t = strtok (buffer, "\t "))) |
| 209 | | continue; |
| 210 | | |
| 211 | | if (!(u = strtok (NULL, "\t "))) |
| 212 | | { |
| 213 | | charmap_free (&m); |
| 214 | | return NULL; |
| 215 | | } |
| 216 | | |
| 217 | | if (!strcmp (t, "<code_set_name>")) |
| 218 | | { |
| 219 | | safe_free ((void **) &m->charset); |
| 220 | | canonical_charset (u, strlen (u) + 1, u); |
| 221 | | m->charset = safe_strdup (u); |
| 222 | | } |
| 223 | | else if (!strcmp (t, "<comment_char>")) |
| 224 | | { |
| 225 | | m->comment_char = *u; |
| 226 | | } |
| 227 | | else if (!strcmp (t, "<escape_char>")) |
| 228 | | { |
| 229 | | m->escape_char = *u; |
| 230 | | } |
| 231 | | else if (!strcmp (t, "<mb_cur_max>")) |
| 232 | | { |
| 233 | | m->multbyte = strtol (u, NULL, 0); |
| 234 | | } |
| 235 | | } |
| 236 | | |
| 237 | | return m; |
| 238 | | } |
| 239 | | |
| 240 | | /* Properly handle escape characters within a symbol. */ |
| 241 | | |
| 242 | | static void fix_symbol (char *symbol, CHARMAP * m) |
| 243 | | { |
| 244 | | char *s, *d; |
| 245 | | |
| 246 | | for (s = symbol, d = symbol; *s; *d++ = *s++) |
| 247 | | { |
| 248 | | if (*s == m->escape_char && !*++s) |
| 249 | | break; |
| 250 | | } |
| 251 | | |
| 252 | | *d = *s; |
| 253 | | } |
| 254 | | |
| 255 | | enum |
| 256 | | { |
| 257 | | CL_DESCR, |
| 258 | | CL_END, |
| 259 | | CL_COMMENT, |
| 260 | | CL_ERROR |
| 261 | | }; |
| 262 | | |
| 263 | | static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp) |
| 264 | | { |
| 265 | | char *t, *u; |
| 266 | | short n; |
| 267 | | CHARDESC *descr; |
| 268 | | |
| 269 | | if (*line == m->comment_char) |
| 270 | | return CL_COMMENT; |
| 271 | | |
| 272 | | descr = *descrp = chardesc_new (); |
| 273 | | |
| 274 | | if (!strncmp (line, "END CHARMAP", 11)) |
| 275 | | { |
| 276 | | chardesc_free (descrp); |
| 277 | | return CL_END; |
| 278 | | } |
| 279 | | |
| 280 | | for (t = line; *t && isspace ((unsigned char) *t); t++) |
| 281 | | ; |
| 282 | | |
| 283 | | if (*t++ != '<') |
| 284 | | { |
| 285 | | chardesc_free (descrp); |
| 286 | | return CL_ERROR; |
| 287 | | } |
| 288 | | |
| 289 | | for (u = t; *u && *u != '>'; u++) |
| 290 | | { |
| 291 | | if (*u == m->escape_char && u[1]) |
| 292 | | u++; |
| 293 | | } |
| 294 | | |
| 295 | | if (*u != '>') |
| 296 | | { |
| 297 | | chardesc_free (descrp); |
| 298 | | return CL_ERROR; |
| 299 | | } |
| 300 | | |
| 301 | | *u++ = '\0'; |
| 302 | | descr->symbol = safe_strdup (t); |
| 303 | | fix_symbol (descr->symbol, m); |
| 304 | | |
| 305 | | for (t = u; *t && isspace ((unsigned char) *t); t++) |
| 306 | | ; |
| 307 | | |
| 308 | | for (u = t; *u && !isspace ((unsigned char) *u); u++) |
| 309 | | ; |
| 310 | | |
| 311 | | *u++ = 0; |
| 312 | | descr->repr = 0; |
| 313 | | |
| 314 | | for (n = 0; *t == m->escape_char && n < m->multbyte; n++) |
| 315 | | { |
| 316 | | switch (*++t) |
| 317 | | { |
| 318 | | case 'x': |
| 319 | | descr->repr = descr->repr * 256 + strtol (++t, &t, 16); |
| 320 | | break; |
| 321 | | case 'd': |
| 322 | | descr->repr = descr->repr * 256 + strtol (++t, &t, 10); |
| 323 | | break; |
| 324 | | case '0': |
| 325 | | case '1': |
| 326 | | case '2': |
| 327 | | case '3': |
| 328 | | case '4': |
| 329 | | case '5': |
| 330 | | case '6': |
| 331 | | case '7': |
| 332 | | descr->repr = descr->repr * 256 + strtol (t, &t, 8); |
| 333 | | break; |
| 334 | | default: |
| 335 | | chardesc_free (descrp); |
| 336 | | return CL_ERROR; |
| 337 | | } |
| 338 | | } |
| 339 | | |
| 340 | | if (!n) |
| 341 | | { |
| 342 | | chardesc_free (descrp); |
| 343 | | return CL_ERROR; |
| 344 | | } |
| 345 | | |
| 346 | | return CL_DESCR; |
| 347 | | } |
| 348 | | |
| 349 | | static int _cd_compar (const void *a, const void *b) |
| 350 | | { |
| 351 | | const CHARDESC *ap, *bp; |
| 352 | | int i; |
| 353 | | |
| 354 | | ap = * (CHARDESC **) a; |
| 355 | | bp = * (CHARDESC **) b; |
| 356 | | |
| 357 | | i = ap->repr - bp->repr; |
| 358 | | |
| 359 | | dprint (98, (debugfile, "_cd_compar: { %x, %s }, { %x, %s } -> %d\n", |
| 360 | | ap->repr, ap->symbol, bp->repr, bp->symbol, i)); |
| 361 | | |
| 362 | | return i; |
| 363 | | } |
| 364 | | |
| 365 | | /* |
| 366 | | * Load a character set description into memory. |
| 367 | | * |
| 368 | | * The multibyte parameter tells us whether we are going |
| 369 | | * to accept multibyte character sets. |
| 370 | | */ |
| 371 | | |
| 372 | | static int load_charset (const char *filename, CHARSET ** csp, short multbyte) |
| 373 | | { |
| 374 | | CHARDESC *cd = NULL; |
| 375 | | CHARSET *cs = NULL; |
| 376 | | CHARMAP *m = NULL; |
| 377 | | FILE *fp; |
| 378 | | char buffer[1024]; |
| 379 | | int i; |
| 380 | | int rv = -1; |
| 381 | | |
| 382 | | cs = *csp = charset_new (multbyte ? 1031 : 257); |
| 383 | | |
| 384 | | dprint (2, (debugfile, "load_charset: Trying to open: %s\n", filename)); |
| 385 | | |
| 386 | | if ((fp = fopen (filename, "r")) == NULL) |
| 387 | | { |
| 388 | | char _filename[_POSIX_PATH_MAX]; |
| 389 | | |
| 390 | | snprintf (_filename, sizeof (_filename), "%s/%s", CHARMAPS_DIR, filename); |
| 391 | | dprint (2, (debugfile, "load_charset: Trying to open: %s\n", _filename)); |
| 392 | | |
| 393 | | if ((fp = fopen (_filename, "r")) == NULL) |
| 394 | | { |
| 395 | | dprint (2, (debugfile, "load_charset: Failed.\n")); |
| 396 | | goto bail; |
| 397 | | } |
| 398 | | } |
| 399 | | |
| 400 | | if ((m = parse_charmap_header (fp)) == NULL) |
| 401 | | goto bail; |
| 402 | | |
| 403 | | /* Don't handle multibyte character sets unless explicitly requested |
| 404 | | * to do so. |
| 405 | | */ |
| 406 | | |
| 407 | | if (m->multbyte > 1 && !multbyte) |
| 408 | | { |
| 409 | | dprint (2, (debugfile, "load_charset: m->multbyte == %d\n", |
| 410 | | (int) m->multbyte)); |
| 411 | | goto bail; |
| 412 | | } |
| 413 | | |
| 414 | | cs->multbyte = m->multbyte; |
| 415 | | |
| 416 | | while (fgets (buffer, sizeof (buffer), fp) != NULL) |
| 417 | | { |
| 418 | | i = parse_charmap_line (buffer, m, &cd); |
| 419 | | |
| 420 | | if (i == CL_END) |
| 421 | | break; |
| 422 | | else if (i == CL_DESCR) |
| 423 | | { |
| 424 | | dprint (5, (debugfile, "load_charset: Got character description: <%s> -> %x\n", |
| 425 | | cd->symbol, cd->repr)); |
| 426 | | |
| 427 | | if (!multbyte) |
| 428 | | { |
| 429 | | if (0 <= cd->repr && cd->repr < 256) |
| 430 | | { |
| 431 | | hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); |
| 432 | | hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); |
| 433 | | |
| 434 | | /* note: we intentionally leak some memory here. */ |
| 435 | | if (!cs->description[cd->repr]) |
| 436 | | cs->u_symb++; |
| 437 | | |
| 438 | | cs->description[cd->repr] = cd; |
| 439 | | cd = NULL; |
| 440 | | } |
| 441 | | } |
| 442 | | else |
| 443 | | { |
| 444 | | if (cs->u_symb == cs->n_symb) |
| 445 | | { |
| 446 | | size_t new_size = cs->n_symb + 256; |
| 447 | | size_t i; |
| 448 | | |
| 449 | | safe_realloc ((void **) &cs->description, new_size * sizeof (CHARDESC *)); |
| 450 | | for (i = cs->u_symb; i < new_size; i++) |
| 451 | | cs->description[i] = NULL; |
| 452 | | cs->n_symb = new_size; |
| 453 | | } |
| 454 | | |
| 455 | | hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); |
| 456 | | hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); |
| 457 | | |
| 458 | | cs->description[cs->u_symb++] = cd; |
| 459 | | cd = NULL; |
| 460 | | } |
| 461 | | } |
| 462 | | |
| 463 | | if (cd) |
| 464 | | { |
| 465 | | dprint (5, (debugfile, "load_charset: character description still present: <%s>->%x\n", |
| 466 | | cd->symbol, cd->repr)); |
| 467 | | } |
| 468 | | chardesc_free (&cd); |
| 469 | | } |
| 470 | | |
| 471 | | if (multbyte) |
| 472 | | qsort (cs->description, cs->u_symb, sizeof (CHARDESC *), _cd_compar); |
| 473 | | |
| 474 | | rv = 0; |
| 475 | | |
| 476 | | bail: |
| 477 | | charmap_free (&m); |
| 478 | | if (fp) |
| 479 | | fclose (fp); |
| 480 | | if (rv) |
| 481 | | charset_free (csp); |
| 482 | | |
| 483 | | return rv; |
| 484 | | } |
| 485 | | |
| 486 | | static CHARDESC *repr2descr (int repr, CHARSET * cs) |
| 487 | | { |
| 488 | | CHARDESC *key; |
| 489 | | CHARDESC **r; |
| 490 | | |
| 491 | | if (!cs || repr < 0) |
| 492 | | return NULL; |
| 493 | | |
| 494 | | if (cs->multbyte == 1) |
| 495 | | { |
| 496 | | if (repr < 256) |
| 497 | | return cs->description[repr]; |
| 498 | | else |
| 499 | | return NULL; |
| 500 | | } |
| 501 | | |
| 502 | | key = safe_malloc (sizeof(CHARDESC)); |
| 503 | | key->repr = repr; |
| 504 | | key->symbol = "<unknown>"; /* otherwise, the |
| 505 | | * debug code may |
| 506 | | * segfault. ouch. |
| 507 | | */ |
| 508 | | |
| 509 | | r = bsearch (&key, cs->description, cs->u_symb, |
| 510 | | sizeof (CHARDESC *), _cd_compar); |
| 511 | | |
| 512 | | safe_free ((void **) &key); |
| 513 | | |
| 514 | | if (r) return *r; |
| 515 | | |
| 516 | | return NULL; |
| 517 | | } |
| 518 | | |
| 519 | | /* Build a translation table. If a character cannot be |
| 520 | | * translated correctly, we try to find an approximation |
| 521 | | * from the portable charcter set. |
| 522 | | * |
| 523 | | * Note that this implies the assumption that the portable |
| 524 | | * character set can be used without any conversion. |
| 525 | | * |
| 526 | | * Should be safe on POSIX systems. |
| 527 | | */ |
| 528 | | |
| 529 | | static char translate_character (CHARSET * to, const char *symbol) |
| 530 | | { |
| 531 | | CHARDESC *cdt; |
| 532 | | |
| 533 | | if ((cdt = hash_find (to->symb_to_repr, symbol))) |
| 534 | | return (char) cdt->repr; |
| 535 | | else |
| 536 | | return *symbol; |
| 537 | | } |
| 538 | | |
| 539 | | static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to) |
| 540 | | { |
| 541 | | int i; |
| 542 | | CHARSET_MAP *map; |
| 543 | | CHARDESC *cd; |
| 544 | | |
| 545 | | /* This is for 8-bit character sets. */ |
| 546 | | |
| 547 | | if (!from || !to || from->multbyte > 1 || to->multbyte > 1) |
| 548 | | return NULL; |
| 549 | | |
| 550 | | map = safe_malloc (sizeof (CHARSET_MAP)); |
| 551 | | for (i = 0; i < 256; i++) |
| 552 | | { |
| 553 | | if (!(cd = repr2descr (i, from))) |
| 554 | | (*map)[i] = '?'; |
| 555 | | else |
| 556 | | (*map)[i] = translate_character (to, cd->symbol); |
| 557 | | } |
| 558 | | |
| 559 | | return map; |
| 560 | | } |
| 561 | | |
| 562 | | /* Currently, just scan the various charset definition files. |
| 563 | | * On the long run, we should cache this stuff in a file. |
| 564 | | */ |
| 565 | | |
| 566 | | static HASH *load_charset_aliases (void) |
| 567 | | { |
| 568 | | HASH *charset_aliases; |
| 569 | | CHARMAP *m; |
| 570 | | DIR *dp; |
| 571 | | FILE *fp; |
| 572 | | struct dirent *de; |
| 573 | | |
| 574 | | if ((dp = opendir (CHARMAPS_DIR)) == NULL) |
| 575 | | return NULL; |
| 576 | | |
| 577 | | charset_aliases = hash_create(127); |
| 578 | | |
| 579 | | while ((de = readdir (dp))) |
| 580 | | { |
| 581 | | char fnbuff[_POSIX_PATH_MAX]; |
| 582 | | |
| 583 | | if (*de->d_name == '.') |
| 584 | | continue; |
| 585 | | |
| 586 | | snprintf (fnbuff, sizeof (fnbuff), "%s/%s", CHARMAPS_DIR, de->d_name); |
| 587 | | dprint (2, (debugfile, "load_charset_aliases: Opening %s\n", fnbuff)); |
| 588 | | if ((fp = fopen (fnbuff, "r")) == NULL) |
| 589 | | continue; |
| 590 | | |
| 591 | | if ((m = parse_charmap_header (fp)) != NULL) |
| 592 | | { |
| 593 | | LIST *lp; |
| 594 | | char buffer[LONG_STRING]; |
| 595 | | |
| 596 | | canonical_charset (buffer, sizeof (buffer), de->d_name); |
| 597 | | m->aliases = mutt_add_list (m->aliases, buffer); |
| 598 | | |
| 599 | | if (m->charset) |
| 600 | | m->aliases = mutt_add_list (m->aliases, m->charset); |
| 601 | | |
| 602 | | for (lp = m->aliases; lp; lp = lp->next) |
| 603 | | { |
| 604 | | if (lp->data) |
| 605 | | { |
| 606 | | dprint (2, (debugfile, "load_charset_aliases: %s -> %s\n", |
| 607 | | lp->data, de->d_name)); |
| 608 | | if (hash_find (charset_aliases, lp->data)) |
| 609 | | { |
| 610 | | dprint (2, (debugfile, "load_charset_aliases: %s already mapped.\n", |
| 611 | | lp->data)); |
| 612 | | } |
| 613 | | else |
| 614 | | hash_insert (charset_aliases, safe_strdup (lp->data), safe_strdup (de->d_name), 0); |
| 615 | | } |
| 616 | | } |
| 617 | | |
| 618 | | charmap_free (&m); |
| 619 | | } |
| 620 | | |
| 621 | | fclose (fp); |
| 622 | | } |
| 623 | | |
| 624 | | closedir (dp); |
| 625 | | return charset_aliases; |
| 626 | | } |
| 627 | | |
| 628 | | static void init_charsets () |
| 629 | | { |
| 630 | | if (Charsets) return; |
| 631 | | |
| 632 | | Charsets = hash_create (127); |
| 633 | | Translations = hash_create (127); |
| 634 | | CharsetAliases = load_charset_aliases (); |
| 635 | | } |
| 636 | | |
| 637 | | CHARSET *mutt_get_charset (const char *name) |
| 638 | | { |
| 639 | | CHARSET *charset; |
| 640 | | char buffer[SHORT_STRING]; |
| 641 | | char *real_charset; |
| 642 | | char *hooked; |
| 643 | | |
| 644 | | if (!name || !*name) |
| 645 | | return (NULL); |
| 646 | | |
| 647 | | init_charsets(); |
| 648 | | canonical_charset (buffer, sizeof(buffer), name); |
| 649 | | |
| 650 | | /* needs to be documented */ |
| 651 | | |
| 652 | | if ((hooked = mutt_charset_hook (buffer))) |
| 653 | | canonical_charset (buffer, sizeof (buffer), hooked); |
| 654 | | |
| 655 | | dprint (2, (debugfile, "mutt_get_charset: Looking for %s\n", buffer)); |
| 656 | | |
| 657 | | if(!CharsetAliases || !(real_charset = hash_find(CharsetAliases, buffer))) |
| 658 | | real_charset = buffer; |
| 659 | | |
| 660 | | dprint (2, (debugfile, "mutt_get_charset: maps to: %s\n", real_charset)); |
| 661 | | |
| 662 | | if(!(charset = hash_find (Charsets, real_charset))) |
| 663 | | { |
| 664 | | dprint (2, (debugfile, "mutt_get_charset: Need to load.\n")); |
| 665 | | if (load_charset(real_charset, &charset, 0) == 0) |
| 666 | | hash_insert(Charsets, safe_strdup(real_charset), charset, 1); |
| 667 | | else |
| 668 | | charset = NULL; |
| 669 | | } |
| 670 | | return charset; |
| 671 | | } |
| 672 | | |
| 673 | | CHARSET_MAP *mutt_get_translation(const char *_from, const char *_to) |
| 674 | | { |
| 675 | | char from_canon[SHORT_STRING]; |
| 676 | | char to_canon[SHORT_STRING]; |
| 677 | | char key[SHORT_STRING]; |
| 678 | | char *from, *to; |
| 679 | | CHARSET *from_cs, *to_cs; |
| 680 | | CHARSET_MAP *map; |
| 681 | | |
| 682 | | if(!_from || !_to) |
| 683 | | return NULL; |
| 684 | | |
| 685 | | canonical_charset(from_canon, sizeof(from_canon), _from); |
| 686 | | canonical_charset(to_canon, sizeof(to_canon), _to); |
| 687 | | |
| 688 | | /* quick check for some trivial cases. Doing this before |
| 689 | | * we actually call the initialization routine delays character |
| 690 | | * set loading until it's _really_ needed. |
| 691 | | */ |
| 692 | | |
| 693 | | if(!strcmp(from_canon, to_canon) |
| 694 | | || (!strcmp (from_canon, "us-ascii") && !strncmp (to_canon, "iso-8859", 8))) |
| 695 | | return NULL; |
| 696 | | |
| 697 | | init_charsets(); |
| 698 | | |
| 699 | | if(!CharsetAliases || !(from = hash_find(CharsetAliases, from_canon))) |
| 700 | | from = from_canon; |
| 701 | | if(!CharsetAliases || !(to = hash_find(CharsetAliases, to_canon))) |
| 702 | | to = to_canon; |
| 703 | | |
| 704 | | /* quick check for the identity mapping */ |
| 705 | | if((from == to) || !mutt_strcmp(from, to)) |
| 706 | | return NULL; |
| 707 | | |
| 708 | | snprintf(key, sizeof(key), "%s %s", from, to); |
| 709 | | if((map = hash_find(Translations, key)) == NULL) |
| 710 | | { |
| 711 | | from_cs = mutt_get_charset(from); |
| 712 | | to_cs = mutt_get_charset(to); |
| 713 | | |
| 714 | | if((map = build_translation(from_cs, to_cs))) |
| 715 | | hash_insert(Translations, safe_strdup(key), map, 1); |
| 716 | | } |
| 717 | | return map; |
| 718 | | } |
| 719 | | |
| 720 | | unsigned char mutt_display_char(unsigned char ch, CHARSET_MAP *map) |
| 721 | | { |
| 722 | | if (!map || !ch) |
| 723 | | return ch; |
| 724 | | |
| 725 | | return (unsigned char) (*map)[ch]; |
| 726 | | } |
| 727 | | |
| 728 | | int mutt_display_string(char *str, CHARSET_MAP *map) |
| 729 | | { |
| 730 | | if(!map) |
| 731 | | return -1; |
| 732 | | |
| 733 | | while ((*str = mutt_display_char((unsigned char)*str, map))) |
| 734 | | str++; |
| 735 | | |
| 736 | | return 0; |
| 737 | | } |
| 738 | | |
| 739 | | /*************************************************************/ |
| 740 | | /* UTF-8 support */ |
| 741 | | |
| 742 | | int mutt_is_utf8(const char *s) |
| 743 | | { |
| 744 | | char buffer[SHORT_STRING]; |
| 745 | | |
| 746 | | if(!s) |
| 747 | | return 0; |
| 748 | | |
| 749 | | canonical_charset(buffer, sizeof(buffer), s); |
| 750 | | return !mutt_strcmp(buffer, "utf-8"); |
| 751 | | } |
| 752 | | |
| 753 | | /* macros for the various bit maps we need */ |
| 754 | | |
| 755 | | #define IOOOOOOO 0x80 |
| 756 | | #define IIOOOOOO 0xc0 |
| 757 | | #define IIIOOOOO 0xe0 |
| 758 | | #define IIIIOOOO 0xf0 |
| 759 | | #define IIIIIOOO 0xf8 |
| 760 | | #define IIIIIIOO 0xfc |
| 761 | | #define IIIIIIIO 0xfe |
| 762 | | #define IIIIIIII 0xff |
| 763 | | |
| 764 | | static struct unicode_mask |
| 765 | | { |
| 766 | | int mask; |
| 767 | | int value; |
| 768 | | short len; |
| 769 | | } |
| 770 | | unicode_masks[] = |
| 771 | | { |
| 772 | | { IOOOOOOO, 0, 1 }, |
| 773 | | { IIIOOOOO, IIOOOOOO, 2 }, |
| 774 | | { IIIIOOOO, IIIOOOOO, 3 }, |
| 775 | | { IIIIIOOO, IIIIOOOO, 4 }, |
| 776 | | { IIIIIIOO, IIIIIOOO, 5 }, |