From: Hisashi Gotoh Date: Thu Apr 5 04:50:38 JST 2001 << hypermail 2.1.0 に対する日本語対応パッチ >> charset=iso2022jp なメールに対応。 インストール: 1. http://prdownloads.sourceforge.net/hypermail/hypermail-2.1.0.tar.gz をとってくる 2. このパッチをあてる 3. ./configure する 4. make; make install する 5. 動かす 日本語メールを処理するには iso2022jp=1 を指定する。 (例: MH フォルダの場合) $ cd ~/Mail/inbox $ for i in `ls -1 [0-9]*`; do > hypermail/src/hypermail -u -d /tmp/htdocs -m $i -o iso2022jp=1 > done $ TODO: - 添付ファイル名が日本語の場合の対処方法 添付ファイルインデックス(attachment.html) に限って、アーカイブファイ ル名からファイル名を取得してしまう仕様なので... -- 久 Index: hypermail/src/getname.c diff -u hypermail/src/getname.c:1.1.1.1 hypermail/src/getname.c:1.5 --- hypermail/src/getname.c:1.1.1.1 Tue Mar 27 00:17:11 2001 +++ hypermail/src/getname.c Fri Mar 30 03:16:15 2001 @@ -1,5 +1,6 @@ #include "hypermail.h" #include "getname.h" +#include "setup.h" extern char *set_domainaddr; @@ -105,7 +106,7 @@ /* EMail Processing First: ** First, is there an '@' sign we can use as an anchor ? */ - if ((c = strchr(line, '@')) == NULL) { + if ((c = hm_strchr(line, '@')) == NULL) { /* ** No '@' sign here so ... */ @@ -196,8 +197,8 @@ comment_fnd = 1; } - else if (strchr(line, '(')) { - c = strchr(line, '(') + 1; + else if (hm_strchr(line, '(')) { + c = hm_strchr(line, '(') + 1; if (*c == '"') /* is there a comment in the comment ? */ c++; } @@ -250,14 +251,30 @@ } if (!comment_fnd) { + int in_ascii = TRUE, esclen = 0; for (i = 0, len = NAMESTRLEN - 1; *c && *c != '<' && *c != '\"' && *c != ')' && *c != '(' && *c != '\n' && i < len; c++) - name[i++] = *c; - + { + if (set_iso2022jp) { + iso2022_state(c, &in_ascii, &esclen); + if (esclen) { + for (; esclen; esclen--, c++) name[i++] = *c; + for (; in_ascii == FALSE && i < len; + c++, iso2022_state(c, &in_ascii, &esclen)) { + name[i++] = *c; + } + c--; + } else { + name[i++] = *c; + } + } else { + name[i++] = *c; + } + } } - if (*c == '<' || *c == '(') + if (name[i-1] == ' ' && *c == '<' || *c == '(') name[--i] = '\0'; else name[i] = '\0'; Index: hypermail/src/mprintf.c diff -u hypermail/src/mprintf.c:1.1.1.1 hypermail/src/mprintf.c:1.4 --- hypermail/src/mprintf.c:1.1.1.1 Tue Mar 27 00:17:12 2001 +++ hypermail/src/mprintf.c Thu Mar 29 05:43:47 2001 @@ -116,6 +116,7 @@ #endif #include "mprintf.h" +#include "setup.h" #define BUFFSIZE 256 /* buffer for long-to-str and float-to-str calcs */ #define MAX_PARAMETERS 128 /* lame static limit */ @@ -1044,6 +1045,19 @@ return -1; } +static int +hm_isprint(int ch) +{ + if (set_iso2022jp) { + if (isprint(ch) || isspace(ch) || ch == '\033') + return(1); + } else { + if (isprint(ch) || isspace(ch)) + return(1); + } + return(0); +} + /* fputc() look-alike */ static int addbyter(int output, FILE *data) { @@ -1051,7 +1065,7 @@ if(infop->length < infop->max) { /* only do this if we haven't reached max length yet */ - if (isprint(output) || isspace(output)) + if (hm_isprint(output)) { infop->buffer[0] = (char)output; /* store */ infop->buffer++; /* increase pointer */ Index: hypermail/src/printfile.c diff -u hypermail/src/printfile.c:1.1.1.1 hypermail/src/printfile.c:1.3 --- hypermail/src/printfile.c:1.1.1.1 Tue Mar 27 00:17:12 2001 +++ hypermail/src/printfile.c Thu Mar 29 05:43:48 2001 @@ -205,7 +205,7 @@ title = maprintf("%s: %s", label, rp = convchars(subject)); free(rp); - if (strlen(title) > TITLESTRLEN) { + if (!set_iso2022jp && strlen(title) > TITLESTRLEN) { rp = title + (TITLESTRLEN - 1); *rp-- = '\0'; } Index: hypermail/src/proto.h diff -u hypermail/src/proto.h:1.1.1.2 hypermail/src/proto.h:1.5 --- hypermail/src/proto.h:1.1.1.2 Thu Apr 5 03:12:33 2001 +++ hypermail/src/proto.h Thu Apr 5 03:27:39 2001 @@ -101,6 +101,9 @@ char *parseemail(char *, char *, char *); char *parseurl(char *); +char *hm_strchr(const char *, int); +void iso2022_state(const char *str, int *state, int *esc); + /* ** quotes.c */ Index: hypermail/src/setup.c diff -u hypermail/src/setup.c:1.1.1.2 hypermail/src/setup.c:1.5 --- hypermail/src/setup.c:1.1.1.2 Thu Apr 5 03:12:33 2001 +++ hypermail/src/setup.c Thu Apr 5 04:12:52 2001 @@ -101,6 +101,8 @@ char *set_describe_folder; int set_msgsperfolder; +bool set_iso2022jp; + struct Config cfg[] = { {"language", &set_language, LANGUAGE, CFG_STRING, "# A two-letter string specifying the language to use!\n" @@ -492,6 +494,9 @@ "# a symbolic link by this name to the most recently created\n" "# subdirectory. Note that many web servers are configured to\n" "# not follow symbolic links for security reasons.\n"}, + + {"iso2022jp", &set_iso2022jp, BFALSE, CFG_SWITCH, + "# Set this to On to support ISO-2022-JP messages.\n"}, }; /* ---------------------------------------------------------------- */ @@ -757,6 +762,7 @@ { FILE *f; char line[MAXLINE]; + int r = TRUE; PreConfig(); @@ -772,12 +778,12 @@ fclose(f); } else - return FALSE; + r = FALSE; } PostConfig(); - return TRUE; + return r; } void ConfigCleanup(void) Index: hypermail/src/setup.h diff -u hypermail/src/setup.h:1.1.1.2 hypermail/src/setup.h:1.4 --- hypermail/src/setup.h:1.1.1.2 Thu Apr 5 03:12:34 2001 +++ hypermail/src/setup.h Thu Apr 5 03:33:23 2001 @@ -125,4 +125,6 @@ extern int set_msgsperfolder; extern char *set_describe_folder; +extern bool set_iso2022jp; + #endif Index: hypermail/src/string.c diff -u hypermail/src/string.c:1.1.1.2 hypermail/src/string.c:1.6 --- hypermail/src/string.c:1.1.1.2 Thu Apr 5 03:12:34 2001 +++ hypermail/src/string.c Thu Apr 5 03:27:39 2001 @@ -387,12 +387,26 @@ char *convchars(char *line) { struct Push buff; + int in_ascii = TRUE, esclen = 0; INIT_PUSH(buff); /* init macro */ /* avoid strlen() for speed */ for (; *line; line++) { + + if (set_iso2022jp) { + iso2022_state(line, &in_ascii, &esclen); + if (esclen && in_ascii == FALSE) { + for (; in_ascii == FALSE && *line; line++) { + PushByte(&buff, *line); + iso2022_state(line, &in_ascii, &esclen); + } + line--; + continue; + } + } + switch (*line) { case '<': PushString(&buff, "<"); @@ -458,9 +472,22 @@ static void translatechars(char *start, char *end, struct Push *buff) { char *p; + int in_ascii = TRUE, esclen = 0; for (p = start; p <= end; p++) { + if (set_iso2022jp) { + iso2022_state(p, &in_ascii, &esclen); + if (esclen && in_ascii == FALSE) { + for (; in_ascii == FALSE && p <= end; p++) { + PushByte(buff, *p); + iso2022_state(p, &in_ascii, &esclen); + } + p--; + continue; + } + } + switch (*p) { case '<': PushString(buff, "<"); @@ -540,11 +567,13 @@ char *replacechar(char *string, char old, char *new) { struct Push buff; + int in_ascii = TRUE, esclen = 0; INIT_PUSH(buff); for (; *string; string++) { - if (*string == old) { + if (set_iso2022jp) iso2022_state(string, &in_ascii, &esclen); + if (in_ascii == TRUE && *string == old) { PushString(&buff, new); } else @@ -640,6 +669,8 @@ char *at; + int in_ascii = TRUE, esclen = 0; + if(set_spamprotect) at="_at_"; else @@ -657,6 +688,16 @@ #define VALID_IN_EMAIL_USERNAME "a-zA-Z0-9_.%-" #define VALID_IN_EMAIL_DOMAINNAME "a-zA-Z0-9.-" + if (set_iso2022jp) { + for (; ptr > input; input++) { + iso2022_state(input, &in_ascii, &esclen); + if (!esclen) continue; + input += esclen; + if (in_ascii == TRUE) + backoff = ptr - input; + } + } + /* check left side */ while (backoff) { if (sscanf @@ -826,6 +867,7 @@ if (leftmost) { /* we found at least one protocol prefix */ int accepted = FALSE; + int urlscan = FALSE; /* * all the charaters between the position where we started @@ -836,7 +878,11 @@ translatechars(inputp, leftmost-1, &buff); inputp = leftmost + strlen(thisprotocol); - if (sscanf(inputp, "%255[^] )>\"\'\n[\t\\]", urlbuff)) { + if (set_iso2022jp) + urlscan = sscanf(inputp, "%255[^] \033)>\"\'\n[\t\\]", urlbuff); + else + urlscan = sscanf(inputp, "%255[^] )>\"\'\n[\t\\]", urlbuff); + if (urlscan) { char *r; /* @@ -883,3 +929,93 @@ } RETURN_PUSH(buff); } /* end parseurl() */ + +/* + * Support RFC1468 (and RFC1554, 94 character sets) + * + * reference + * - RFC1468: Japanese Character Encoding for Internet Messages (ISO-2022-JP) + * - RFC1554: ISO-2022-JP-2: Multilingual Extension of ISO-2022-JP + * - RFC1557: Korean Character Encoding for Internet Messages + * - RFC2234: Japanese Character Encoding for Internet Messages + */ + +/* + * state + * TRUE: ascii (default) + * FALSE: non-ascii + * esclen + * n: escape sequence length + */ +void +iso2022_state(const char *str, int *state, int *esclen) +{ + if (*state != TRUE && *state != FALSE) + *state = TRUE; + + if (*str != '\033') { + *esclen = 0; + return; + } + + switch (*(str+1)) { + case '$': + if (*(str+2) == 'B' || *(str+2) == '@' || *(str+2) == 'A') { + /* + * ESC $ B JIS X 0208-1983 to G0 + * ESC $ @ JIS X 0208-1976 to G0 + * ESC $ A GB2312-1980 to G0 + */ + *state = FALSE; + *esclen = 3; + } else if ((*(str+2) == '(' && *(str+3) == 'C') || + (*(str+2) == '(' && *(str+3) == 'D')) { + /* + * ESC $ ) C KSC 5601-1987 to G0 + * ESC $ ( D JIS X 0212-1990 to G0 + */ + *state = FALSE; + *esclen = 4; + } else { + /* keep state */ + *esclen = 1; + } + break; + case '(': + if (*(str+2) == 'B' || *(str+2) == 'J') { + /* + * ESC ( B ASCII to G0 + * ESC ( J JIS X 0201-Roman to G0 + */ + *state = TRUE; + *esclen = 3; + } else { + /* keep state */ + *esclen = 1; + } + break; + default: + /* keep state */ + *esclen = 1; + } +} + +char * +hm_strchr(const char *str, int ch) +{ + if (!set_iso2022jp) { + return(strchr(str, ch)); + } else { + int in_ascii = TRUE, esclen = 0; + + for (; *str; str++) { + iso2022_state(str, &in_ascii, &esclen); + if (esclen) str += esclen; + if (in_ascii == TRUE) { + if (*str == ch) + return((char *)str); + } + } + return((char *)NULL); + } +}