| 31 | | static CHARSET *mutt_new_charset(void) |
| 32 | | { |
| 33 | | CHARSET *chs; |
| 34 | | |
| 35 | | chs = safe_malloc(sizeof(CHARSET)); |
| 36 | | chs->map = NULL; |
| 37 | | |
| 38 | | return chs; |
| 39 | | } |
| 40 | | |
| 41 | | #if 0 |
| 42 | | |
| 43 | | static void mutt_free_charset(CHARSET **chsp) |
| 44 | | { |
| 45 | | CHARSET *chs = *chsp; |
| 46 | | |
| 47 | | safe_free((void **) &chs->map); |
| 48 | | safe_free((void **) chsp); |
| 49 | | } |
| 50 | | |
| 51 | | #endif |
| 52 | | |
| 53 | | static void canonical_charset(char *dest, size_t dlen, const char *name) |
| | 61 | /* Function Prototypes */ |
| | 62 | |
| | 63 | static CHARDESC *chardesc_new (void); |
| | 64 | static CHARDESC *repr2descr (int repr, CHARSET * cs); |
| | 65 | |
| | 66 | static CHARMAP *charmap_new (void); |
| | 67 | static CHARMAP *parse_charmap_header (FILE * fp); |
| | 68 | static CHARSET *charset_new (size_t hash_size); |
| | 69 | |
| | 70 | static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to); |
| | 71 | |
| | 72 | static char translate_character (CHARSET * to, const char *symbol); |
| | 73 | |
| | 74 | static int load_charset (const char *filename, CHARSET ** csp, short multbyte); |
| | 75 | static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp); |
| | 76 | static int _cd_compar (const void *a, const void *b); |
| | 77 | |
| | 78 | static void canonical_charset (char *dest, size_t dlen, const char *name); |
| | 79 | static void chardesc_free (CHARDESC ** cdp); |
| | 80 | static void charmap_free (CHARMAP ** cp); |
| | 81 | static void charset_free (CHARSET ** csp); |
| | 82 | static void fix_symbol (char *symbol, CHARMAP * m); |
| | 83 | |
| | 84 | static void canonical_charset (char *dest, size_t dlen, const char *name) |
| 71 | | static CHARSET *load_charset(const char *name) |
| 72 | | { |
| 73 | | char path[_POSIX_PATH_MAX]; |
| 74 | | char buffer[SHORT_STRING]; |
| 75 | | CHARSET *chs; |
| 76 | | FILE *fp = NULL; |
| | 102 | static CHARSET *charset_new (size_t hash_size) |
| | 103 | { |
| | 104 | CHARSET *cp = safe_malloc (sizeof (CHARSET)); |
| | 105 | short i; |
| | 106 | |
| | 107 | cp->n_symb = 256; |
| | 108 | cp->u_symb = 0; |
| | 109 | cp->multbyte = 1; |
| | 110 | cp->symb_to_repr = hash_create (hash_size); |
| | 111 | cp->description = safe_malloc (cp->n_symb * sizeof (CHARDESC *)); |
| | 112 | |
| | 113 | for (i = 0; i < cp->n_symb; i++) |
| | 114 | cp->description[i] = NULL; |
| | 115 | |
| | 116 | return cp; |
| | 117 | } |
| | 118 | |
| | 119 | static void charset_free (CHARSET ** csp) |
| | 120 | { |
| | 121 | CHARSET *cs = *csp; |
| | 122 | size_t i; |
| | 123 | |
| | 124 | for (i = 0; i < cs->n_symb; i++) |
| | 125 | chardesc_free (&cs->description[i]); |
| | 126 | |
| | 127 | safe_free ((void **) &cs->description); |
| | 128 | |
| | 129 | hash_destroy (&cs->symb_to_repr, NULL); |
| | 130 | safe_free ((void **) csp); |
| | 131 | } |
| | 132 | |
| | 133 | static CHARMAP *charmap_new (void) |
| | 134 | { |
| | 135 | CHARMAP *m = safe_malloc (sizeof (CHARMAP)); |
| | 136 | |
| | 137 | m->charset = NULL; |
| | 138 | m->escape_char = '\\'; |
| | 139 | m->comment_char = '#'; |
| | 140 | m->multbyte = 1; |
| | 141 | m->aliases = NULL; |
| | 142 | |
| | 143 | return m; |
| | 144 | } |
| | 145 | |
| | 146 | static void charmap_free (CHARMAP ** cp) |
| | 147 | { |
| | 148 | if (!cp || !*cp) |
| | 149 | return; |
| | 150 | |
| | 151 | mutt_free_list (&(*cp)->aliases); |
| | 152 | safe_free ((void **) &(*cp)->charset); |
| | 153 | safe_free ((void **) cp); |
| | 154 | |
| | 155 | return; |
| | 156 | } |
| | 157 | |
| | 158 | static CHARDESC *chardesc_new (void) |
| | 159 | { |
| | 160 | CHARDESC *p = safe_malloc (sizeof (CHARDESC)); |
| | 161 | |
| | 162 | p->symbol = NULL; |
| | 163 | p->repr = -1; |
| | 164 | |
| | 165 | return p; |
| | 166 | } |
| | 167 | |
| | 168 | static void chardesc_free (CHARDESC ** cdp) |
| | 169 | { |
| | 170 | if (!cdp || !*cdp) |
| | 171 | return; |
| | 172 | |
| | 173 | |
| | 174 | safe_free ((void **) &(*cdp)->symbol); |
| | 175 | safe_free ((void **) cdp); |
| | 176 | |
| | 177 | return; |
| | 178 | } |
| | 179 | |
| | 180 | static CHARMAP *parse_charmap_header (FILE * fp) |
| | 181 | { |
| | 182 | char buffer[1024]; |
| | 183 | char *t, *u; |
| | 184 | CHARMAP *m = charmap_new (); |
| | 185 | |
| | 186 | while (fgets (buffer, sizeof (buffer), fp)) |
| | 187 | { |
| | 188 | if ((t = strchr (buffer, '\n'))) |
| | 189 | *t = '\0'; |
| | 190 | else |
| | 191 | { |
| | 192 | charmap_free (&m); |
| | 193 | return NULL; |
| | 194 | } |
| | 195 | |
| | 196 | if (!strncmp (buffer, "CHARMAP", 7)) |
| | 197 | break; |
| | 198 | |
| | 199 | if (*buffer == m->comment_char) |
| | 200 | { |
| | 201 | if ((t = strtok (buffer + 1, "\t ")) && !strcasecmp (t, "alias")) |
| | 202 | { |
| | 203 | char _tmp[SHORT_STRING]; |
| | 204 | while ((t = strtok(NULL, "\t, "))) |
| | 205 | { |
| | 206 | canonical_charset (_tmp, sizeof (_tmp), t); |
| | 207 | m->aliases = mutt_add_list (m->aliases, _tmp); |
| | 208 | } |
| | 209 | } |
| | 210 | continue; |
| | 211 | } |
| | 212 | |
| | 213 | if (!(t = strtok (buffer, "\t "))) |
| | 214 | continue; |
| | 215 | |
| | 216 | if (!(u = strtok (NULL, "\t "))) |
| | 217 | { |
| | 218 | charmap_free (&m); |
| | 219 | return NULL; |
| | 220 | } |
| | 221 | |
| | 222 | if (!strcmp (t, "<code_set_name>")) |
| | 223 | { |
| | 224 | safe_free ((void **) &m->charset); |
| | 225 | canonical_charset (u, strlen (u) + 1, u); |
| | 226 | m->charset = safe_strdup (u); |
| | 227 | } |
| | 228 | else if (!strcmp (t, "<comment_char>")) |
| | 229 | { |
| | 230 | m->comment_char = *u; |
| | 231 | } |
| | 232 | else if (!strcmp (t, "<escape_char>")) |
| | 233 | { |
| | 234 | m->escape_char = *u; |
| | 235 | } |
| | 236 | else if (!strcmp (t, "<mb_cur_max>")) |
| | 237 | { |
| | 238 | m->multbyte = strtol (u, NULL, 0); |
| | 239 | } |
| | 240 | } |
| | 241 | |
| | 242 | return m; |
| | 243 | } |
| | 244 | |
| | 245 | /* Properly handle escape characters within a symbol. */ |
| | 246 | |
| | 247 | static void fix_symbol (char *symbol, CHARMAP * m) |
| | 248 | { |
| | 249 | char *s, *d; |
| | 250 | |
| | 251 | for (s = symbol, d = symbol; *s; *d++ = *s++) |
| | 252 | { |
| | 253 | if (*s == m->escape_char) |
| | 254 | s++; |
| | 255 | } |
| | 256 | |
| | 257 | *d = *s; |
| | 258 | } |
| | 259 | |
| | 260 | enum |
| | 261 | { |
| | 262 | CL_DESCR, |
| | 263 | CL_END, |
| | 264 | CL_COMMENT, |
| | 265 | CL_ERROR |
| | 266 | }; |
| | 267 | |
| | 268 | static int parse_charmap_line (char *line, CHARMAP * m, CHARDESC ** descrp) |
| | 269 | { |
| | 270 | char *t, *u; |
| | 271 | short n; |
| | 272 | CHARDESC *descr; |
| | 273 | |
| | 274 | if (*line == m->comment_char) |
| | 275 | return CL_COMMENT; |
| | 276 | |
| | 277 | descr = *descrp = chardesc_new (); |
| | 278 | |
| | 279 | if (!strncmp (line, "END CHARMAP", 11)) |
| | 280 | { |
| | 281 | chardesc_free (descrp); |
| | 282 | return CL_END; |
| | 283 | } |
| | 284 | |
| | 285 | for (t = line; *t && isspace (*t); t++) |
| | 286 | ; |
| | 287 | |
| | 288 | if (*t++ != '<') |
| | 289 | { |
| | 290 | chardesc_free (descrp); |
| | 291 | return CL_ERROR; |
| | 292 | } |
| | 293 | |
| | 294 | for (u = t; *u && *u != '>'; u++) |
| | 295 | { |
| | 296 | if (*u == m->escape_char && u[1]) |
| | 297 | u++; |
| | 298 | } |
| | 299 | |
| | 300 | if (*u != '>') |
| | 301 | { |
| | 302 | chardesc_free (descrp); |
| | 303 | return CL_ERROR; |
| | 304 | } |
| | 305 | |
| | 306 | *u++ = '\0'; |
| | 307 | descr->symbol = safe_strdup (t); |
| | 308 | fix_symbol (descr->symbol, m); |
| | 309 | |
| | 310 | for (t = u; *t && isspace (*t); t++) |
| | 311 | ; |
| | 312 | |
| | 313 | for (u = t; *u && !isspace (*u); u++) |
| | 314 | ; |
| | 315 | |
| | 316 | *u++ = 0; |
| | 317 | descr->repr = 0; |
| | 318 | |
| | 319 | for (n = 0; *t == m->escape_char && n < m->multbyte; n++) |
| | 320 | { |
| | 321 | switch (*++t) |
| | 322 | { |
| | 323 | case 'x': |
| | 324 | descr->repr = descr->repr * 256 + strtol (++t, &t, 16); |
| | 325 | break; |
| | 326 | case 'd': |
| | 327 | descr->repr = descr->repr * 256 + strtol (++t, &t, 10); |
| | 328 | break; |
| | 329 | case '0': |
| | 330 | case '1': |
| | 331 | case '2': |
| | 332 | case '3': |
| | 333 | case '4': |
| | 334 | case '5': |
| | 335 | case '6': |
| | 336 | case '7': |
| | 337 | descr->repr = descr->repr * 256 + strtol (t, &t, 8); |
| | 338 | break; |
| | 339 | default: |
| | 340 | chardesc_free (descrp); |
| | 341 | return CL_ERROR; |
| | 342 | } |
| | 343 | } |
| | 344 | |
| | 345 | if (!n) |
| | 346 | { |
| | 347 | chardesc_free (descrp); |
| | 348 | return CL_ERROR; |
| | 349 | } |
| | 350 | |
| | 351 | return CL_DESCR; |
| | 352 | } |
| | 353 | |
| | 354 | static int _cd_compar (const void *a, const void *b) |
| | 355 | { |
| | 356 | const CHARDESC *ap, *bp; |
| 79 | | chs = mutt_new_charset(); |
| 80 | | |
| 81 | | snprintf(path, sizeof(path), "%s/charsets/%s", SHAREDIR, name); |
| 82 | | if((fp = fopen(path, "r")) == NULL) |
| | 359 | ap = * (CHARDESC **) a; |
| | 360 | bp = * (CHARDESC **) b; |
| | 361 | |
| | 362 | i = ap->repr - bp->repr; |
| | 363 | |
| | 364 | dprint (98, (debugfile, "_cd_compar: { %x, %s }, { %x, %s } -> %d\n", |
| | 365 | ap->repr, ap->symbol, bp->repr, bp->symbol, i)); |
| | 366 | |
| | 367 | return i; |
| | 368 | } |
| | 369 | |
| | 370 | /* |
| | 371 | * Load a character set description into memory. |
| | 372 | * |
| | 373 | * The multibyte parameter tells us whether we are going |
| | 374 | * to accept multibyte character sets. |
| | 375 | */ |
| | 376 | |
| | 377 | static int load_charset (const char *filename, CHARSET ** csp, short multbyte) |
| | 378 | { |
| | 379 | CHARDESC *cd = NULL; |
| | 380 | CHARSET *cs = NULL; |
| | 381 | CHARMAP *m = NULL; |
| | 382 | FILE *fp; |
| | 383 | char buffer[1024]; |
| | 384 | int i; |
| | 385 | int rv = -1; |
| | 386 | |
| | 387 | cs = *csp = charset_new (multbyte ? 1031 : 257); |
| | 388 | |
| | 389 | dprint (2, (debugfile, "load_charset: Trying to open: %s\n", filename)); |
| | 390 | |
| | 391 | if ((fp = fopen (filename, "r")) == NULL) |
| | 392 | { |
| | 393 | char _filename[_POSIX_PATH_MAX]; |
| | 394 | |
| | 395 | snprintf (_filename, sizeof (_filename), "%s/%s", CHARMAPS_DIR, filename); |
| | 396 | dprint (2, (debugfile, "load_charset: Trying to open: %s\n", _filename)); |
| | 397 | |
| | 398 | if ((fp = fopen (_filename, "r")) == NULL) |
| | 399 | { |
| | 400 | dprint (2, (debugfile, "load_charset: Failed.\n")); |
| | 401 | goto bail; |
| | 402 | } |
| | 403 | } |
| | 404 | |
| | 405 | if ((m = parse_charmap_header (fp)) == NULL) |
| 99 | | } |
| 100 | | } |
| 101 | | |
| 102 | | bail: |
| 103 | | |
| 104 | | if(fp) fclose(fp); |
| 105 | | return chs; |
| 106 | | } |
| 107 | | |
| 108 | | static HASH *load_charset_aliases(void) |
| 109 | | { |
| | 427 | else if (i == CL_DESCR) |
| | 428 | { |
| | 429 | dprint (5, (debugfile, "load_charset: Got character description: < %s > -> %x\n", |
| | 430 | cd->symbol, cd->repr)); |
| | 431 | hash_delete (cs->symb_to_repr, cd->symbol, NULL, NULL); |
| | 432 | hash_insert (cs->symb_to_repr, cd->symbol, cd, 0); |
| | 433 | |
| | 434 | if (!multbyte) |
| | 435 | { |
| | 436 | if (0 <= cd->repr && cd->repr < 256) |
| | 437 | { |
| | 438 | if (cs->description[cd->repr]) |
| | 439 | chardesc_free (&cs->description[cd->repr]); |
| | 440 | else |
| | 441 | cs->u_symb++; |
| | 442 | |
| | 443 | cs->description[cd->repr] = cd; |
| | 444 | cd = NULL; |
| | 445 | } |
| | 446 | } |
| | 447 | else |
| | 448 | { |
| | 449 | if (cs->u_symb == cs->n_symb) |
| | 450 | { |
| | 451 | size_t new_size = cs->n_symb + 256; |
| | 452 | size_t i; |
| | 453 | |
| | 454 | safe_realloc ((void **) &cs->description, new_size * sizeof (CHARDESC *)); |
| | 455 | for (i = cs->u_symb; i < new_size; i++) |
| | 456 | cs->description[i] = NULL; |
| | 457 | cs->n_symb = new_size; |
| | 458 | } |
| | 459 | |
| | 460 | cs->description[cs->u_symb++] = cd; |
| | 461 | cd = NULL; |
| | 462 | } |
| | 463 | } |
| | 464 | |
| | 465 | chardesc_free (&cd); |
| | 466 | } |
| | 467 | |
| | 468 | if (multbyte) |
| | 469 | qsort (cs->description, cs->u_symb, sizeof (CHARDESC *), _cd_compar); |
| | 470 | |
| | 471 | rv = 0; |
| | 472 | |
| | 473 | bail: |
| | 474 | charmap_free (&m); |
| | 475 | if (fp) |
| | 476 | fclose (fp); |
| | 477 | if (rv) |
| | 478 | charset_free (csp); |
| | 479 | |
| | 480 | return rv; |
| | 481 | } |
| | 482 | |
| | 483 | static CHARDESC *repr2descr (int repr, CHARSET * cs) |
| | 484 | { |
| | 485 | size_t a, b, c; |
| | 486 | short found; |
| | 487 | |
| | 488 | if (!cs || repr < 0) |
| | 489 | return NULL; |
| | 490 | |
| | 491 | if (cs->multbyte == 1) |
| | 492 | { |
| | 493 | if (repr < 256) |
| | 494 | return cs->description[repr]; |
| | 495 | else |
| | 496 | return NULL; |
| | 497 | } |
| | 498 | |
| | 499 | /* So we have a multibyte mapping, i.e., Unicode. */ |
| | 500 | |
| | 501 | /* binary search for the proper description */ |
| | 502 | a = 0; |
| | 503 | b = cs->u_symb - 1; |
| | 504 | c = 0; /* shut up the compiler. */ |
| | 505 | found = 0; |
| | 506 | |
| | 507 | while (!found && b - a > 1) |
| | 508 | { |
| | 509 | c = (a + b) / 2; |
| | 510 | |
| | 511 | if (cs->description[c]->repr == repr) |
| | 512 | { |
| | 513 | found = 1; |
| | 514 | break; |
| | 515 | } |
| | 516 | else if (cs->description[c]->repr < repr) |
| | 517 | a = c; |
| | 518 | else if (cs->description[c]->repr > repr) |
| | 519 | b = c; |
| | 520 | } |
| | 521 | |
| | 522 | if (!found) |
| | 523 | { |
| | 524 | if (cs->description[(c = a)]->repr == repr) |
| | 525 | found = 1; |
| | 526 | else if (cs->description[(c = b)]->repr == repr) |
| | 527 | found = 1; |
| | 528 | } |
| | 529 | |
| | 530 | if (found) |
| | 531 | { |
| | 532 | dprint (5, (debugfile, "repr2descr: %x -> { %x, %s }\n", |
| | 533 | repr, cs->description[c]->repr, cs->description[c]->symbol)); |
| | 534 | return cs->description[c]; |
| | 535 | } |
| | 536 | else |
| | 537 | dprint (5, (debugfile, "Couldn't file a symbol for %x\n", |
| | 538 | repr)); |
| | 539 | |
| | 540 | return NULL; |
| | 541 | } |
| | 542 | |
| | 543 | /* Build a translation table. If a character cannot be |
| | 544 | * translated correctly, we try to find an approximation |
| | 545 | * from the portable charcter set. |
| | 546 | * |
| | 547 | * Note that this implies the assumption that the portable |
| | 548 | * character set can be used without any conversion. |
| | 549 | * |
| | 550 | * Should be safe on POSIX systems. |
| | 551 | */ |
| | 552 | |
| | 553 | static char translate_character (CHARSET * to, const char *symbol) |
| | 554 | { |
| | 555 | CHARDESC *cdt; |
| | 556 | |
| | 557 | if ((cdt = hash_find (to->symb_to_repr, symbol))) |
| | 558 | return (char) cdt->repr; |
| | 559 | else |
| | 560 | return *symbol; |
| | 561 | } |
| | 562 | |
| | 563 | static CHARSET_MAP *build_translation (CHARSET * from, CHARSET * to) |
| | 564 | { |
| | 565 | int i; |
| | 566 | CHARSET_MAP *map; |
| | 567 | CHARDESC *cd; |
| | 568 | |
| | 569 | /* This is for 8-bit character sets. */ |
| | 570 | |
| | 571 | if (!from || !to || from->multbyte > 1 || to->multbyte > 1) |
| | 572 | return NULL; |
| | 573 | |
| | 574 | map = safe_malloc (sizeof (CHARSET_MAP)); |
| | 575 | for (i = 0; i < 256; i++) |
| | 576 | { |
| | 577 | if (!(cd = repr2descr (i, from))) |
| | 578 | (*map)[i] = '?'; |
| | 579 | else |
| | 580 | (*map)[i] = translate_character (to, cd->symbol); |
| | 581 | } |
| | 582 | |
| | 583 | return map; |
| | 584 | } |
| | 585 | |
| | 586 | /* Currently, just scan the various charset definition files. |
| | 587 | * On the long run, we should cache this stuff in a file. |
| | 588 | */ |
| | 589 | |
| | 590 | static HASH *load_charset_aliases (void) |
| | 591 | { |
| | 592 | HASH *charset_aliases; |
| | 593 | CHARMAP *m; |
| | 594 | DIR *dp; |
| 129 | | *t++ = '\0'; |
| 130 | | hash_insert(charset_aliases, safe_strdup(buffer), safe_strdup(t), 1); |
| 131 | | } |
| 132 | | fclose(fp); |
| | 615 | if ((m = parse_charmap_header (fp)) != NULL) |
| | 616 | { |
| | 617 | LIST *lp; |
| | 618 | char buffer[LONG_STRING]; |
| | 619 | |
| | 620 | canonical_charset (buffer, sizeof (buffer), de->d_name); |
| | 621 | m->aliases = mutt_add_list (m->aliases, buffer); |
| | 622 | |
| | 623 | if (m->charset) |
| | 624 | m->aliases = mutt_add_list (m->aliases, m->charset); |
| | 625 | |
| | 626 | for (lp = m->aliases; lp; lp = lp->next) |
| | 627 | { |
| | 628 | if (lp->data) |
| | 629 | { |
| | 630 | dprint (2, (debugfile, "load_charset_aliases: %s -> %s\n", |
| | 631 | lp->data, de->d_name)); |
| | 632 | if (hash_find (charset_aliases, lp->data)) |
| | 633 | { |
| | 634 | dprint (2, (debugfile, "load_charset_aliases: %s already mapped.\n", |
| | 635 | lp->data)); |
| | 636 | } |
| | 637 | else |
| | 638 | hash_insert (charset_aliases, safe_strdup (lp->data), safe_strdup (de->d_name), 0); |
| | 639 | } |
| | 640 | } |
| | 641 | |
| | 642 | charmap_free (&m); |
| | 643 | } |
| | 644 | |
| | 645 | fclose (fp); |
| | 646 | } |
| | 647 | |
| | 648 | closedir (dp); |