| 847 | | /* internal use only */ |
| 848 | | |
| 849 | | struct utf8_state |
| 850 | | { |
| 851 | | char *buffer; |
| 852 | | size_t blen; |
| 853 | | size_t bp; |
| 854 | | }; |
| 855 | | |
| 856 | | static struct utf8_state *new_utf8_state (void) |
| 857 | | { |
| 858 | | return safe_calloc (1, sizeof (struct utf8_state)); |
| 859 | | } |
| 860 | | |
| 861 | | static void free_utf8_state (struct utf8_state **sp) |
| 862 | | { |
| 863 | | if (!sp || !*sp) return; |
| 864 | | safe_free ((void **) &(*sp)->buffer); |
| 865 | | safe_free ((void **) sp); |
| 866 | | } |
| 867 | | |
| 868 | | static void _state_utf8_flush(STATE *s, CHARSET *chs, struct utf8_state *sfu) |
| 869 | | { |
| 870 | | char *t; |
| 871 | | if(!sfu->buffer || !sfu->bp) |
| | 861 | |
| | 862 | |
| | 863 | |
| | 864 | /************************************************************* |
| | 865 | * General decoder framework |
| | 866 | */ |
| | 867 | |
| | 868 | |
| | 869 | |
| | 870 | #define MIN(a,b) (((a) <= (b)) ? (a): (b)) |
| | 871 | |
| | 872 | DECODER *mutt_open_decoder (const char *src, const char *dest) |
| | 873 | { |
| | 874 | DECODER *d = safe_calloc (1, sizeof (DECODER));; |
| | 875 | |
| | 876 | d->in.size = DECODER_BUFFSIZE; |
| | 877 | d->out.size = DECODER_BUFFSIZE; |
| | 878 | |
| | 879 | if (!src || !dest || mutt_is_utf8 (dest)) |
| | 880 | { |
| | 881 | d->just_take_id = 1; |
| | 882 | return d; |
| | 883 | } |
| | 884 | |
| | 885 | if (mutt_is_utf8 (src)) |
| | 886 | { |
| | 887 | if (!(d->chs = mutt_get_charset (dest)) || unicode_init () == -1) |
| | 888 | { |
| | 889 | d->just_take_id = 1; |
| | 890 | return d; |
| | 891 | } |
| | 892 | |
| | 893 | d->src_is_utf8 = 1; |
| | 894 | return d; |
| | 895 | } |
| | 896 | |
| | 897 | if (!(d->chm = mutt_get_translation (src, dest))) |
| | 898 | d->just_take_id = 1; |
| | 899 | |
| | 900 | return d; |
| | 901 | } |
| | 902 | |
| | 903 | void mutt_free_decoder (DECODER **dpp) |
| | 904 | { |
| | 905 | safe_free ((void **) dpp); |
| | 906 | } |
| | 907 | |
| | 908 | static void _process_data (DECODER *, short); |
| | 909 | |
| | 910 | void mutt_decoder_push (DECODER *d, void *_buff, size_t blen, size_t *taken) |
| | 911 | { |
| | 912 | if (!_buff || !blen) |
| | 913 | { |
| | 914 | _process_data (d, 1); |
| 873 | | |
| 874 | | sfu->buffer[sfu->bp] = '\0'; |
| 875 | | |
| 876 | | mutt_decode_utf8_string(sfu->buffer, chs); |
| 877 | | for(t = sfu->buffer; *t; t++) |
| 878 | | { |
| 879 | | /* This may lead to funny-looking output if |
| 880 | | * there are embedded CRs, NLs or similar things |
| 881 | | * - but these would constitute illegal |
| 882 | | * UTF8 encoding anyways, so we don't care. |
| 883 | | */ |
| 884 | | |
| 885 | | state_prefix_putc(*t, s); |
| 886 | | } |
| 887 | | sfu->bp = 0; |
| 888 | | } |
| | 916 | } |
| | 917 | |
| | 918 | if ((*taken = MIN(blen, d->in.size - d->in.used))) |
| | 919 | { |
| | 920 | memcpy (d->in.buff + d->in.used, _buff, *taken); |
| | 921 | d->in.used += *taken; |
| | 922 | } |
| | 923 | } |
| | 924 | |
| | 925 | |
| | 926 | void mutt_decoder_pop (DECODER *d, void *_buff, size_t blen, size_t *popped) |
| | 927 | { |
| | 928 | unsigned char *buff = _buff; |
| | 929 | |
| | 930 | _process_data (d, 0); |
| | 931 | |
| | 932 | if ((*popped = MIN (blen, d->out.used))) |
| | 933 | { |
| | 934 | memcpy (buff, d->out.buff, *popped); |
| | 935 | memmove (d->out.buff, d->out.buff + *popped, d->out.used - *popped); |
| | 936 | d->out.used -= *popped; |
| | 937 | } |
| | 938 | } |
| | 939 | |
| | 940 | void mutt_decoder_pop_to_state (DECODER *d, STATE *s) |
| | 941 | { |
| | 942 | char tmp[DECODER_BUFFSIZE]; |
| | 943 | size_t i, l; |
| | 944 | |
| | 945 | do |
| | 946 | { |
| | 947 | mutt_decoder_pop (d, tmp, sizeof (tmp), &l); |
| | 948 | for (i = 0; i < l; i++) |
| | 949 | state_prefix_putc (tmp[i], s); |
| | 950 | } |
| | 951 | while (l > 0); |
| | 952 | } |
| | 953 | |
| | 954 | /* this is where things actually happen */ |
| | 955 | |
| | 956 | static void _process_data_8bit (DECODER *d) |
| | 957 | { |
| | 958 | size_t i; |
| | 959 | |
| | 960 | for (i = 0; i < d->in.used && d->out.used < d->out.size; i++) |
| | 961 | d->out.buff[d->out.used++] = mutt_display_char (d->in.buff[i], d->chm); |
| | 962 | |
| | 963 | memmove (d->in.buff, d->in.buff + i, d->in.used - i); |
| | 964 | d->in.used -= i; |
| | 965 | } |
| | 966 | |
| | 967 | static void _process_data_utf8 (DECODER *d) |
| | 968 | { |
| | 969 | size_t i, j; |
| | 970 | CHARDESC *cd; |
| | 971 | |
| | 972 | for (i = 0, j = 0; i < d->in.used && d->out.used < d->out.size;) |
| | 973 | { |
| | 974 | while (((d->in.buff[j] & 0x80) == 0) && (j < d->in.used) && (d->out.used < d->out.size)) |
| | 975 | d->out.buff[d->out.used++] = d->in.buff[j++]; |
| | 976 | i = j; |
| | 977 | |
| | 978 | while ((d->in.buff[j] & 0x80) && j < d->in.used && |
| | 979 | (d->forced || j + 6 < d->in.used) && d->out.used < d->out.size) |
| | 980 | { |
| | 981 | int ch; |
| | 982 | char *c = utf_to_unicode (&ch, &d->in.buff[j]); |
| | 983 | |
| | 984 | j = c - d->in.buff; |
| | 985 | |
| | 986 | if (0 <= ch && ch < 128) |
| | 987 | d->out.buff[d->out.used] = ch; |
| | 988 | else if ((cd = repr2descr (ch, Unicode)) && (ch = translate_character (d->chs, cd->symbol)) != -1) |
| | 989 | d->out.buff[d->out.used] = ch; |
| | 990 | else |
| | 991 | d->out.buff[d->out.used] = '?'; |
| | 992 | |
| | 993 | if(!d->out.buff[d->out.used]) |
| | 994 | d->out.buff[d->out.used] = '?'; |
| | 995 | |
| | 996 | d->out.used++; |
| | 997 | } |
| 890 | | static void state_fput_utf8(STATE *st, char u, CHARSET *chs, struct utf8_state *sfu) |
| 891 | | { |
| 892 | | if((u & 0x80) == 0 || (sfu->bp && (u & IIOOOOOO) != IOOOOOOO)) |
| 893 | | _state_utf8_flush(st, chs, sfu); |
| 894 | | |
| 895 | | if((u & 0x80) == 0) |
| 896 | | { |
| 897 | | if(u) state_prefix_putc(u, st); |
| 898 | | } |
| | 999 | i = j; |
| | 1000 | |
| | 1001 | if (d->in.buff[j] & 0x80) |
| | 1002 | break; |
| | 1003 | } |
| | 1004 | |
| | 1005 | memmove (d->in.buff, d->in.buff + i, d->in.used - i); |
| | 1006 | d->in.used -= i; |
| | 1007 | } |
| | 1008 | |
| | 1009 | static void _process_data (DECODER *d, short force) |
| | 1010 | { |
| | 1011 | if (force) d->forced = 1; |
| | 1012 | |
| | 1013 | if (d->just_take_id) |
| | 1014 | { |
| | 1015 | size_t l = MIN (d->out.size - d->out.used, d->in.used); |
| | 1016 | memmove (d->out.buff + d->out.used, d->in.buff, l); |
| | 1017 | memmove (d->in.buff, d->in.buff + l, d->in.used - l); |
| | 1018 | d->in.used -= l; |
| | 1019 | d->out.used += l; |
| | 1020 | } |
| | 1021 | else if (d->src_is_utf8) |
| | 1022 | _process_data_utf8 (d); |
| 900 | | { |
| 901 | | if(sfu->bp + 1 >= sfu->blen) |
| 902 | | { |
| 903 | | sfu->blen = (sfu->blen + 80) * 2; |
| 904 | | safe_realloc((void **) &sfu->buffer, sfu->blen + 1); |
| 905 | | } |
| 906 | | sfu->buffer[sfu->bp++] = u; |
| 907 | | } |
| 908 | | } |
| 909 | | |
| 910 | | /* a nicer interface for decoding */ |
| 911 | | |
| 912 | | DECODER *mutt_open_decoder (STATE *s, BODY *b, int istext) |
| 913 | | { |
| 914 | | DECODER *dp = safe_calloc (1, sizeof (DECODER)); |
| 915 | | |
| 916 | | dp->s = s; |
| 917 | | |
| 918 | | if (istext && (s->flags & M_CHARCONV)) |
| 919 | | { |
| 920 | | char *charset = mutt_get_parameter ("charset", b->parameter); |
| 921 | | dp->is_utf8 = mutt_is_utf8 (charset) && !mutt_is_utf8 (Charset); |
| 922 | | |
| 923 | | if (dp->is_utf8) |
| 924 | | { |
| 925 | | dp->sfu = new_utf8_state (); |
| 926 | | dp->chs = mutt_get_charset (Charset); |
| 927 | | } |
| 928 | | else |
| 929 | | dp->map = mutt_get_translation (charset, Charset); |
| 930 | | } |
| 931 | | |
| 932 | | return dp; |
| 933 | | } |
| 934 | | |
| 935 | | void mutt_close_decoder (DECODER **dpp) |
| 936 | | { |
| 937 | | if (!dpp || !*dpp) |
| 938 | | return; |
| 939 | | |
| 940 | | if ((*dpp)->is_utf8) |
| 941 | | { |
| 942 | | _state_utf8_flush ((*dpp)->s, (*dpp)->chs, (*dpp)->sfu); |
| 943 | | free_utf8_state (&(*dpp)->sfu); |
| 944 | | } |
| 945 | | |
| 946 | | safe_free ((void **) dpp); |
| 947 | | } |
| 948 | | |
| 949 | | void mutt_decoder_putc (DECODER *dp, char c) |
| 950 | | { |
| 951 | | if (dp->is_utf8) |
| 952 | | state_fput_utf8 (dp->s, c, dp->chs, dp->sfu); |
| 953 | | else |
| 954 | | state_prefix_putc (mutt_display_char ((unsigned char) c, dp->map), dp->s); |
| 955 | | } |
| 956 | | |
| 957 | | /* FIXME: utf-8 support */ |
| | 1024 | _process_data_8bit (d); |
| | 1025 | } |
| | 1026 | |
| | 1027 | /* This one is currently lacking utf-8 support */ |
| 988 | | map = mutt_get_translation (src, dest); |
| 989 | | |
| 990 | | while ((c = fgetc (fp)) != EOF) |
| 991 | | if (fputc (mutt_display_char ((unsigned char) c, map), tmpfp) == EOF) |
| 992 | | goto bail; |
| | 1062 | dec = mutt_open_decoder (src, dest); |
| | 1063 | |
| | 1064 | while ((lf = fread (buffer, 1, sizeof (buffer), fp)) > 0) |
| | 1065 | { |
| | 1066 | for (t = buffer; lf; t += lpu) |
| | 1067 | { |
| | 1068 | mutt_decoder_push (dec, t, lf, &lpu); |
| | 1069 | lf -= lpu; |
| | 1070 | |
| | 1071 | do |
| | 1072 | { |
| | 1073 | mutt_decoder_pop (dec, tmp, sizeof (tmp), &lpo); |
| | 1074 | if (lpo) |
| | 1075 | fwrite (tmp, lpo, 1, tmpfp); |
| | 1076 | } |
| | 1077 | while (lpo); |
| | 1078 | } |
| | 1079 | } |
| | 1080 | |
| | 1081 | mutt_decoder_push (dec, NULL, 0, NULL); |
| | 1082 | do |
| | 1083 | { |
| | 1084 | mutt_decoder_pop (dec, tmp, sizeof (tmp), &lpo); |
| | 1085 | if (lpo) fwrite (tmp, lpo, 1, tmpfp); |
| | 1086 | } |
| | 1087 | while (lpo); |
| | 1088 | |
| | 1089 | mutt_free_decoder (&dec); |