To: gotoh@horae.dti.ne.jp From: Hisashi Gotoh (=?iso-2022-jp?B?GyRCOGVGIxsoQiAbJEI1VxsoQg==?=) Date: Sun, 01 Apr 2001 04:30:30 +0900 Subject: Hypermail =?iso-2022-jp?B?GyRCJE5GfEtcOGwyPRsoQg==?= Message-Id: X-Mailer: Mew version 1.94.2 on Emacs 20.7 / Mule 4.0 (HANANOEN) Mime-Version: 1.0 Content-Type: Multipart/Mixed; boundary="--Next_Part(Sun_Apr__1_04:55:50_2001_997)--" Content-Transfer-Encoding: 7bit ----Next_Part(Sun_Apr__1_04:55:50_2001_997)-- Content-Type: Text/Plain; charset=iso-2022-jp Content-Transfer-Encoding: 7bit こんにちは。 Hypermail 2b30 で日本語のメールを処理するためのパッチです。 以下の行は URL を含んでいます。 パッチはhttp://www.horae.dti.ne.jp/~gotoh/dist/hypermail/に置いてあり ます。 以下の行はメールアドレスを含んでいます。 僕のメールアドレスはgotoh@horae.dti.ne.jpです。 -- 久 ----Next_Part(Sun_Apr__1_04:55:50_2001_997)-- Content-Type: Text/Plain; charset=iso-2022-jp Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="=?iso-2022-jp?B?GyRCJVElQyVBGyhCLnR4dA==?=" From: Hisashi Gotoh Date: Fri Mar 30 17:43:50 JST 2001 << hypermail 2b30 に対する日本語対応パッチ >> charset=iso2022jp なメールに対応。 インストール: 1. http://www.hypermail.org/dist/hypermail-2b30.tar.gz とってくる 2. このパッチをあてる 3. ./configure する 4. make する 5. make install は、してもしなくても動く 6. 動かす 日本語メールを処理するには iso2022jp=1 を指定する。 (例: MH フォルダの場合) $ cd ~/Mail/inbox $ for i in `ls -1 [0-9]*`; do > hypermail/src/hypermail -u -d /tmp/htdocs -m $i -o iso2022jp=1 > done $ -- 久 Index: hypermail/src/getname.c diff -u hypermail/src/getname.c:1.1.1.1 hypermail/src/getname.c:1.5 --- hypermail/src/getname.c:1.1.1.1 Tue Mar 27 00:17:11 2001 +++ hypermail/src/getname.c Fri Mar 30 03:16:15 2001 @@ -1,5 +1,6 @@ #include "hypermail.h" #include "getname.h" +#include "setup.h" extern char *set_domainaddr; @@ -105,7 +106,7 @@ /* EMail Processing First: ** First, is there an '@' sign we can use as an anchor ? */ - if ((c = strchr(line, '@')) == NULL) { + if ((c = hm_strchr(line, '@')) == NULL) { /* ** No '@' sign here so ... */ @@ -196,8 +197,8 @@ comment_fnd = 1; } - else if (strchr(line, '(')) { - c = strchr(line, '(') + 1; + else if (hm_strchr(line, '(')) { + c = hm_strchr(line, '(') + 1; if (*c == '"') /* is there a comment in the comment ? */ c++; } @@ -250,14 +251,30 @@ } if (!comment_fnd) { + int in_ascii = TRUE, esclen = 0; for (i = 0, len = NAMESTRLEN - 1; *c && *c != '<' && *c != '\"' && *c != ')' && *c != '(' && *c != '\n' && i < len; c++) - name[i++] = *c; - + { + if (set_iso2022jp) { + iso2022_state(c, &in_ascii, &esclen); + if (esclen) { + for (; esclen; esclen--, c++) name[i++] = *c; + for (; in_ascii == FALSE && i < len; + c++, iso2022_state(c, &in_ascii, &esclen)) { + name[i++] = *c; + } + c--; + } else { + name[i++] = *c; + } + } else { + name[i++] = *c; + } + } } - if (*c == '<' || *c == '(') + if (name[i-1] == ' ' && *c == '<' || *c == '(') name[--i] = '\0'; else name[i] = '\0'; Index: hypermail/src/mprintf.c diff -u hypermail/src/mprintf.c:1.1.1.1 hypermail/src/mprintf.c:1.4 --- hypermail/src/mprintf.c:1.1.1.1 Tue Mar 27 00:17:12 2001 +++ hypermail/src/mprintf.c Thu Mar 29 05:43:47 2001 @@ -116,6 +116,7 @@ #endif #include "mprintf.h" +#include "setup.h" #define BUFFSIZE 256 /* buffer for long-to-str and float-to-str calcs */ #define MAX_PARAMETERS 128 /* lame static limit */ @@ -1044,6 +1045,19 @@ return -1; } +static int +hm_isprint(int ch) +{ + if (set_iso2022jp) { + if (isprint(ch) || isspace(ch) || ch == '\033') + return(1); + } else { + if (isprint(ch) || isspace(ch)) + return(1); + } + return(0); +} + /* fputc() look-alike */ static int addbyter(int output, FILE *data) { @@ -1051,7 +1065,7 @@ if(infop->length < infop->max) { /* only do this if we haven't reached max length yet */ - if (isprint(output) || isspace(output)) + if (hm_isprint(output)) { infop->buffer[0] = (char)output; /* store */ infop->buffer++; /* increase pointer */ Index: hypermail/src/parse.c diff -u hypermail/src/parse.c:1.1.1.1 hypermail/src/parse.c:1.4 --- hypermail/src/parse.c:1.1.1.1 Tue Mar 27 00:17:12 2001 +++ hypermail/src/parse.c Thu Mar 29 05:43:47 2001 @@ -284,11 +284,41 @@ return (NULL); } +static void +safe_filename_iso2022(char *name) +{ + register char *np; + int in_ascii = TRUE, esclen = 0; + + /* for multibyte char. ++ is multibyte strings. + ++++ -> ____ + ++aa -> __aa + ++.txt -> __.txt + ++aa.doc -> __aa.doc + */ + np = name; + for (; *np; np++) { + iso2022_state(np, &in_ascii, &esclen); + if (esclen) + while (esclen--) np++; + if (in_ascii == TRUE) { + *name = *np; + name++; + } else { + *name = REPLACEMENT_CHAR; + name++; + } + } + *name = '\0'; +} + char *safe_filename(char *name) { register char *sp; register char *np; + if (set_iso2022jp) safe_filename_iso2022(name); + np = name; while (*np && (*np == ' ' || *np == '\t')) np++; @@ -777,7 +807,10 @@ /* base64 decoding */ int len; base64Decode(ptr, output, &len); - output += len - 1; + if (strcasecmp(charset, "iso-2022-jp") == 0) + output += len; + else + output += len - 1; } else { /* unsupported encoding type */ @@ -1067,6 +1100,7 @@ FileStatus file_created = NO_FILE; /* for attachments */ char attachname[129]; /* for attachment file names */ + char attachname_keep[129]; /* for attachment file names (use href body) */ char inline_force = FALSE; /* show a attachment in-line, regardles of the content_disposition */ char *description = NULL; /* user-supplied description for an attachment */ @@ -1265,6 +1299,7 @@ *jp++ = *np++; } *jp = '\0'; + strncpy(attachname_keep, attachname, sizeof(attachname_keep) - 1); safe_filename(attachname); } else { @@ -1299,6 +1334,7 @@ *jp++ = *np++; } *jp = '\0'; + strncpy(attachname_keep, attachname, sizeof(attachname_keep) - 1); safe_filename(attachname); } else { @@ -1449,6 +1485,7 @@ if ('\"' == *fname) fname++; sscanf(fname, "%128[^\"]", attachname); + strncpy(attachname_keep, attachname, sizeof(attachname_keep) - 1); safe_filename(attachname); } else { @@ -2075,11 +2112,11 @@ desc = description; else if (inline_force || inlinecontent(type)) desc = - attachname[0] ? attachname : + attachname_keep[0] ? attachname_keep : "picture"; else desc = - attachname[0] ? attachname : + attachname_keep[0] ? attachname_keep : "stored"; if (description) Index: hypermail/src/print.c diff -u hypermail/src/print.c:1.1.1.1 hypermail/src/print.c:1.3 --- hypermail/src/print.c:1.1.1.1 Tue Mar 27 00:17:12 2001 +++ hypermail/src/print.c Tue Mar 27 06:15:31 2001 @@ -504,6 +504,11 @@ pre = FALSE; } fprintf(fp, "

\n"); + } else { + if (!pre) { + fprintf(fp, "

\n");
+				pre = TRUE;
+			}
 		}
 		inheader = FALSE;
 	    }
Index: hypermail/src/printfile.c
diff -u hypermail/src/printfile.c:1.1.1.1 hypermail/src/printfile.c:1.3
--- hypermail/src/printfile.c:1.1.1.1	Tue Mar 27 00:17:12 2001
+++ hypermail/src/printfile.c	Thu Mar 29 05:43:48 2001
@@ -205,7 +205,7 @@
     title = maprintf("%s: %s", label, rp = convchars(subject));
     free(rp);
 
-    if (strlen(title) > TITLESTRLEN) {
+    if (!set_iso2022jp && strlen(title) > TITLESTRLEN) {
 	rp = title + (TITLESTRLEN - 1);
 	*rp-- = '\0';
     }
Index: hypermail/src/proto.h
diff -u hypermail/src/proto.h:1.1.1.1 hypermail/src/proto.h:1.4
--- hypermail/src/proto.h:1.1.1.1	Tue Mar 27 00:17:12 2001
+++ hypermail/src/proto.h	Thu Mar 29 05:43:48 2001
@@ -94,6 +94,9 @@
 char *parseemail(char *, char *, char *);
 char *parseurl(char *);
 
+char *hm_strchr(const char *, int);
+void iso2022_state(const char *str, int *state, int *esc);
+
 #ifdef lint
 int isspace(int);
 int isalpha(int);
Index: hypermail/src/setup.c
diff -u hypermail/src/setup.c:1.1.1.1 hypermail/src/setup.c:1.2
--- hypermail/src/setup.c:1.1.1.1	Tue Mar 27 00:17:12 2001
+++ hypermail/src/setup.c	Thu Mar 29 05:43:48 2001
@@ -39,6 +39,7 @@
 bool set_uselock;
 bool set_ietf_mbox;
 bool set_spamprotect;
+bool set_iso2022jp;
 
 int set_thrdlevels;
 int set_dirmode;
@@ -310,8 +311,10 @@
 
     {"spamprotect", &set_spamprotect, BFALSE, CFG_SWITCH,
      "# Set this to On to make hypermail not output real email addresses\n"
-     "# in the output HTML but instead it will obfuscate them a little.\n"}
+     "# in the output HTML but instead it will obfuscate them a little.\n"},
 
+    {"iso2022jp", &set_iso2022jp, BFALSE, CFG_SWITCH,
+     "# Set this to On to support ISO-2022-JP messages.\n"}
 
 };
 
Index: hypermail/src/setup.h
diff -u hypermail/src/setup.h:1.1.1.1 hypermail/src/setup.h:1.2
--- hypermail/src/setup.h:1.1.1.1	Tue Mar 27 00:17:12 2001
+++ hypermail/src/setup.h	Thu Mar 29 05:43:48 2001
@@ -64,6 +64,7 @@
 extern bool set_uselock;
 extern bool set_ietf_mbox;
 extern bool set_spamprotect;
+extern bool set_iso2022jp;
 
 extern int set_thrdlevels;
 extern int set_dirmode;
Index: hypermail/src/string.c
diff -u hypermail/src/string.c:1.1.1.1 hypermail/src/string.c:1.5
--- hypermail/src/string.c:1.1.1.1	Tue Mar 27 00:17:12 2001
+++ hypermail/src/string.c	Sun Apr  1 04:31:17 2001
@@ -382,12 +382,26 @@
 char *convchars(char *line)
 {
     struct Push buff;
+    int in_ascii = TRUE, esclen = 0;
 
     INIT_PUSH(buff);		/* init macro */
 
     /* avoid strlen() for speed */
 
     for (; *line; line++) {
+
+	if (set_iso2022jp) {
+		iso2022_state(line, &in_ascii, &esclen);
+		if (esclen && in_ascii == FALSE) {
+			for (; in_ascii == FALSE && *line; line++) {
+				PushByte(&buff, *line);
+				iso2022_state(line, &in_ascii, &esclen);
+			}
+			line--;
+			continue;
+		}
+	}
+
 	switch (*line) {
 	case '<':
 	    PushString(&buff, "<");
@@ -453,9 +467,22 @@
 static void translatechars(char *start, char *end, struct Push *buff)
 {
     char *p;
+    int in_ascii = TRUE, esclen = 0;
 
     for (p = start; p <= end; p++) {
 
+	if (set_iso2022jp) {
+		iso2022_state(p, &in_ascii, &esclen);
+		if (esclen && in_ascii == FALSE) {
+			for (; in_ascii == FALSE && p <= end; p++) {
+				PushByte(buff, *p);
+				iso2022_state(p, &in_ascii, &esclen);
+			}
+			p--;
+			continue;
+		}
+	}
+
 	switch (*p) {
 
 	case '<':
@@ -536,11 +563,13 @@
 char *replacechar(char *string, char old, char *new)
 {
     struct Push buff;
+    int in_ascii = TRUE, esclen = 0;
 
     INIT_PUSH(buff);
 
     for (; *string; string++) {
-	if (*string == old) {
+	if (set_iso2022jp) iso2022_state(string, &in_ascii, &esclen);
+	if (in_ascii == TRUE && *string == old) {
 	    PushString(&buff, new);
 	}
 	else
@@ -636,6 +665,8 @@
     
     char *at;
 
+    int in_ascii = TRUE, esclen = 0;
+
     if(set_spamprotect)
       at="_at_";
     else
@@ -653,6 +684,16 @@
 #define VALID_IN_EMAIL_USERNAME   "a-zA-Z0-9_.%-"
 #define VALID_IN_EMAIL_DOMAINNAME "a-zA-Z0-9.-"
 
+	    if (set_iso2022jp) {
+		    for (; ptr > input; input++) {
+			    iso2022_state(input, &in_ascii, &esclen);
+			    if (!esclen) continue;
+			    input += esclen;
+			    if (in_ascii == TRUE)
+				    backoff = ptr - input;
+		    }
+	    }
+
 	    /* check left side */
 	    while (backoff) {
 		if (sscanf
@@ -823,6 +864,7 @@
 
 	if (leftmost) { /* we found at least one protocol prefix */
 	    int accepted = FALSE;
+	    int urlscan = FALSE;
 
 	    /* 
 	     * all the charaters between the position where we started
@@ -833,7 +875,11 @@
 	    translatechars(inputp, leftmost-1, &buff);
 	    inputp = leftmost + strlen(thisprotocol);
 
-	    if (sscanf(inputp, "%255[^] )>\"\'\n[\t\\]", urlbuff)) {
+	    if (set_iso2022jp)
+		    urlscan = sscanf(inputp, "%255[^] \033)>\"\'\n[\t\\]", urlbuff);
+	    else
+		    urlscan = sscanf(inputp, "%255[^] )>\"\'\n[\t\\]", urlbuff);
+	    if (urlscan) {
 	        char *r;
 	
 		/* 
@@ -879,4 +925,94 @@
 	}
     }
     RETURN_PUSH(buff);
+}
+
+/*
+ * Support RFC1468 (and RFC1554, 94 character sets)
+ *
+ * reference
+ * - RFC1468: Japanese Character Encoding for Internet Messages (ISO-2022-JP)
+ * - RFC1554: ISO-2022-JP-2: Multilingual Extension of ISO-2022-JP
+ * - RFC1557: Korean Character Encoding for Internet Messages
+ * - RFC2234: Japanese Character Encoding for Internet Messages
+ */
+
+/*
+ * state
+ *	TRUE: ascii (default)
+ *	FALSE: non-ascii
+ * esclen
+ *	n: escape sequence length
+ */
+void
+iso2022_state(const char *str, int *state, int *esclen)
+{
+	if (*state != TRUE && *state != FALSE)
+		*state = TRUE;
+
+	if (*str != '\033') {
+		*esclen = 0;
+		return;
+	}
+
+	switch (*(str+1)) {
+	case '$':	
+		if (*(str+2) == 'B' || *(str+2) == '@' || *(str+2) == 'A') {
+			/*
+			 * ESC $ B	JIS X 0208-1983 to G0
+			 * ESC $ @	JIS X 0208-1976 to G0
+			 * ESC $ A	GB2312-1980 to G0
+			 */
+			*state = FALSE;
+			*esclen = 3;
+		} else if ((*(str+2) == '(' && *(str+3) == 'C') ||
+			   (*(str+2) == '(' && *(str+3) == 'D')) {
+			/*
+			 * ESC $ ) C	KSC 5601-1987 to G0
+			 * ESC $ ( D	JIS X 0212-1990 to G0
+			 */
+			*state = FALSE;
+			*esclen = 4;
+		} else {
+			/* keep state */
+			*esclen = 1;
+		}
+		break;
+	case '(':
+		if (*(str+2) == 'B' || *(str+2) == 'J') {
+			/*
+			 * ESC ( B	ASCII to G0
+			 * ESC ( J	JIS X 0201-Roman to G0
+			 */
+			*state = TRUE;
+			*esclen = 3;
+		} else {
+			/* keep state */
+			*esclen = 1;
+		}
+		break;
+	default:
+		/* keep state */
+		*esclen = 1;
+	}
+}
+
+char *
+hm_strchr(const char *str, int ch)
+{
+	if (!set_iso2022jp) {
+		return(strchr(str, ch));
+	} else {
+		int in_ascii = TRUE, esclen = 0;
+	
+		for (; *str; str++) {
+			iso2022_state(str, &in_ascii, &esclen);
+			if (esclen) str += esclen;
+			if (in_ascii == TRUE) {
+				if (*str == ch)
+					return((char *)str);
+			}
+		}
+		return((char *)NULL);
+	}
 }

----Next_Part(Sun_Apr__1_04:55:50_2001_997)----