Index: bin/ksh/Makefile =================================================================== RCS file: /cvs/src/bin/ksh/Makefile,v diff -u -r1.39 Makefile --- bin/ksh/Makefile 18 Jun 2018 17:03:58 -0000 1.39 +++ bin/ksh/Makefile 17 Dec 2024 01:36:45 -0000 @@ -7,7 +7,7 @@ SRCS= alloc.c c_ksh.c c_sh.c c_test.c c_ulimit.c edit.c emacs.c eval.c \ exec.c expr.c history.c io.c jobs.c lex.c mail.c main.c \ misc.c path.c shf.c syn.c table.c trap.c tree.c tty.c var.c \ - version.c vi.c + version.c vi.c unicode.c WARNINGS=yes DEFS= -DEMACS -DVI Index: bin/ksh/emacs.c =================================================================== RCS file: /cvs/src/bin/ksh/emacs.c,v diff -u -r1.90 emacs.c --- bin/ksh/emacs.c 21 Jun 2023 22:22:08 -0000 1.90 +++ bin/ksh/emacs.c 17 Dec 2024 01:36:45 -0000 @@ -29,6 +29,11 @@ #include "sh.h" #include "edit.h" +#ifndef SMALL +#include "unicode.h" +#else +#define x_size_rev x_size +#endif static Area aedit; #define AEDIT &aedit /* area for kill ring and macro defns */ @@ -126,6 +131,7 @@ static void x_goto(char *); static void x_bs(int); static int x_size_str(char *); +static int x_size_rev(int); static int x_size(int); static void x_zots(char *); static void x_zotc(int); @@ -459,7 +465,7 @@ if (adj == x_adj_done) { /* has x_adjust() been called? */ /* no */ for (cp = xlp; cp > xcp; ) - x_bs(*--cp); + x_bs((unsigned char)*--cp); } x_adj_ok = 1; @@ -552,7 +558,7 @@ x_adj_ok = 1; xlp_valid = false; for (cp = x_lastcp(); cp > xcp; ) - x_bs(*--cp); + x_bs((unsigned char)*--cp); return; } @@ -653,7 +659,7 @@ { int i; - i = x_size(c); + i = x_size_rev(c); while (i--) x_e_putc('\b'); } @@ -663,20 +669,93 @@ { int size = 0; while (*cp) - size += x_size(*cp++); + size += x_size((unsigned char)*cp++); return size; } +#ifndef SMALL +static int +x_size_rev(int c) +{ + static unsigned char ch[5] = { 0 }; + static int cnt = 3; + unsigned long cpt; + int w; + + if (c=='\t') + return 4; /* Kludge, tabs are always four spaces. */ + if (iscntrl(c)) /* control char */ + return 2; + + if (!isu8cont(c)) { + if (c <= 0x7f) { + cnt = 3; + return 1; + } + + ch[cnt] = c; + u8_to_cpt(ch + cnt, &cpt); + w = is_fullwidth(cpt) ? 2 : 1; + + cnt = 3; + memset(ch, 0, 4); + return w; + } else { + if (cnt <= 0) + return 0; + ch[cnt] = c; + cnt--; + } + + return 0; +} +#endif static int x_size(int c) { +#ifndef SMALL + static unsigned char ch[5] = { 0 }; + static int len = 0, cnt = 0; + unsigned long cpt; +#endif if (c=='\t') return 4; /* Kludge, tabs are always four spaces. */ if (iscntrl(c)) /* control char */ return 2; +#ifdef SMALL if (isu8cont(c)) return 0; return 1; +#else + if (!isu8cont(c)) { + if (c <= 0x7f) { + len = 0; + return 1; + } + + if ((c & 0xf8) == 0xf0 && c < 0xf5) + len = 3; + else if ((c & 0xf0) == 0xe0) + len = 2; + else if ((c & 0xe0) == 0xc0 && c > 0xc1) + len = 1; + else { + len = 0; + return 0; + } + + cnt = 0; + memset(ch, 0, 5); + ch[cnt++] = c; + } else { + ch[cnt++] = c; + if (cnt > len) { + u8_to_cpt(ch, &cpt); + return is_fullwidth(cpt) ? 2 : 1; + } + } + return 0; +#endif } static void @@ -1099,6 +1178,8 @@ x_transpose(int c) { char tmp; + char rune1[4], rune2[4]; + char *p1, *p2, *p; /* What transpose is meant to do seems to be up for debate. This * is a general summary of the options; the text is abcd with the @@ -1124,25 +1205,55 @@ /* Gosling/Unipress emacs style: Swap two characters before the * cursor, do not change cursor position */ - x_bs(xcp[-1]); - x_bs(xcp[-2]); - x_zotc(xcp[-1]); - x_zotc(xcp[-2]); - tmp = xcp[-1]; - xcp[-1] = xcp[-2]; - xcp[-2] = tmp; + p1 = xcp; + do { + x_bs((unsigned char) *--p1); + } while (xbuf < p1 && (xcp - p1) < 5 && isu8cont(*p1)); + + if (p1 == xbuf) { + x_e_putc(BEL); + return KSTD; + } + + p2 = p1; + do { + x_bs((unsigned char) *--p2); + } while (xbuf <= p2 && (p1 - p2) < 5 && isu8cont(*p2)); + + for (p = p1; p < xcp; p++) + x_zotc(*p); + for (p = p2; p < p1; p++) + x_zotc(*p); + + memcpy(rune1, p1, xcp - p1); + memcpy(rune2, p2, p1 - p2); + memcpy(p2, rune1, xcp - p1); + memcpy(p2 + (xcp - p1), rune2, p1 - p2); } else { /* GNU emacs style: Swap the characters before and under the * cursor, move cursor position along one. */ - x_bs(xcp[-1]); - x_zotc(xcp[0]); - x_zotc(xcp[-1]); - tmp = xcp[-1]; - xcp[-1] = xcp[0]; - xcp[0] = tmp; - x_bs(xcp[0]); - x_goto(xcp + 1); + p1 = xcp + 1; + while (p1 < xep && *p1 && (p1 - xcp) <= 5 && isu8cont(*p1)) + p1++; + + p2 = xcp; + do { + x_bs((unsigned char) *--p2); + } while (xbuf <= p2 && (xcp - p2) < 5 && isu8cont(*p2)); + + for (p = xcp; p < p1; p++) + x_zotc(*p); + for (p = p2; p < xcp; p++) + x_zotc(*p); + + memcpy(rune1, xcp, p1 - xcp); + memcpy(rune2, p2, xcp - p2); + memcpy(p2, rune1, p1 - xcp); + memcpy(p2 + (p1 - xcp), rune2, xcp - p2); + + xcp = p1; + x_goto(p1); } return KSTD; } @@ -1804,6 +1915,11 @@ */ if ((xbp = xcp - (x_displen / 2)) < xbuf) xbp = xbuf; + else { + /* rewind to the last valid codepoint */ + while (xbp > xbuf && isu8cont((unsigned char) *xbp)) + xbp--; + } xlp_valid = false; x_redraw(xx_cols); x_flush(); @@ -1882,8 +1998,16 @@ } static void -x_e_putc(int c) +x_e_putc(int sc) { +#ifdef SMALL + static unsigned char ch[5] = { 0 }; + static int len = 0, cnt = 0; + unsigned long cpt; +#endif + unsigned char c; + + c = sc; if (c == '\r' || c == '\n') x_col = 0; if (x_col < xx_cols) { @@ -1898,9 +2022,43 @@ x_col--; break; default: +#ifdef SMALL if (!isu8cont(c)) x_col++; break; +#else + if (!isu8cont(c)) { + if (c <= 0x7f) { + x_col++; + len = 0; + break; + } + + if ((c & 0xf8) == 0xf0 && c < 0xf5) + len = 3; + else if ((c & 0xf0) == 0xe0) + len = 2; + else if ((c & 0xe0) == 0xc0 && c > 0xc1) + len = 1; + else { + len = 0; + break; + } + + cnt = 0; + memset(ch, 0, 5); + ch[cnt++] = c; + } else { + ch[cnt++] = c; + if (cnt > len) { + x_col++; + u8_to_cpt(ch, &cpt); + if (is_fullwidth(cpt)) + x_col++; + } + } + break; +#endif } } if (x_adj_ok && (x_col < 0 || x_col >= (xx_cols - 2))) Index: bin/ksh/unicode.c =================================================================== --- bin/ksh/unicode.c (new file) +++ bin/ksh/unicode.c (working copy) --- /dev/null 2024-12-17 08:35:07.996000076 +0800 +++ bin/ksh/unicode.c 2024-12-17 09:18:58.489692801 +0800 @@ -0,0 +1,163 @@ +#include "unicode.h" +#include "stdio.h" + +#if !defined(SMALL) + +/* The following code was generated from EastAsianWidth.txt (Flag: W&F) + * Reference: https://www.unicode.org/reports/tr11/tr11-6.html + */ + +int is_fullwidth(unsigned long cpt) { + if ((0x1100 <= cpt && cpt <= 0x115f) + || (0x231a <= cpt && cpt <= 0x231b) + || (0x2329 <= cpt && cpt <= 0x232a) + || (0x23e9 <= cpt && cpt <= 0x23ec) + || (cpt == 0x23f0) + || (cpt == 0x23f3) + || (0x25fd <= cpt && cpt <= 0x25fe) + || (0x2614 <= cpt && cpt <= 0x2615) + || (0x2630 <= cpt && cpt <= 0x2637) + || (0x2648 <= cpt && cpt <= 0x2653) + || (cpt == 0x267f) + || (0x268a <= cpt && cpt <= 0x268f) + || (cpt == 0x2693) + || (cpt == 0x26a1) + || (0x26aa <= cpt && cpt <= 0x26ab) + || (0x26bd <= cpt && cpt <= 0x26be) + || (0x26c4 <= cpt && cpt <= 0x26c5) + || (cpt == 0x26ce) + || (cpt == 0x26d4) + || (cpt == 0x26ea) + || (0x26f2 <= cpt && cpt <= 0x26f3) + || (cpt == 0x26f5) + || (cpt == 0x26fa) + || (cpt == 0x26fd) + || (cpt == 0x2705) + || (0x270a <= cpt && cpt <= 0x270b) + || (cpt == 0x2728) + || (cpt == 0x274c) + || (cpt == 0x274e) + || (0x2753 <= cpt && cpt <= 0x2755) + || (cpt == 0x2757) + || (0x2795 <= cpt && cpt <= 0x2797) + || (cpt == 0x27b0) + || (cpt == 0x27bf) + || (0x2b1b <= cpt && cpt <= 0x2b1c) + || (cpt == 0x2b50) + || (cpt == 0x2b55) + || (0x2e80 <= cpt && cpt <= 0x2e99) + || (0x2e9b <= cpt && cpt <= 0x2ef3) + || (0x2f00 <= cpt && cpt <= 0x2fd5) + || (0x2ff0 <= cpt && cpt <= 0x303e) + || (0x3041 <= cpt && cpt <= 0x3096) + || (0x3099 <= cpt && cpt <= 0x30ff) + || (0x3105 <= cpt && cpt <= 0x312f) + || (0x3131 <= cpt && cpt <= 0x318e) + || (0x3190 <= cpt && cpt <= 0x31e5) + || (0x31ef <= cpt && cpt <= 0x321e) + || (0x3220 <= cpt && cpt <= 0x3247) + || (0x3250 <= cpt && cpt <= 0xa48c) + || (0xa490 <= cpt && cpt <= 0xa4c6) + || (0xa960 <= cpt && cpt <= 0xa97c) + || (0xac00 <= cpt && cpt <= 0xd7a3) + || (0xf900 <= cpt && cpt <= 0xfaff) + || (0xfe10 <= cpt && cpt <= 0xfe19) + || (0xfe30 <= cpt && cpt <= 0xfe52) + || (0xfe54 <= cpt && cpt <= 0xfe66) + || (0xfe68 <= cpt && cpt <= 0xfe6b) + || (0xff01 <= cpt && cpt <= 0xff60) + || (0xffe0 <= cpt && cpt <= 0xffe6) + || (0x16fe0 <= cpt && cpt <= 0x16fe4) + || (0x16ff0 <= cpt && cpt <= 0x16ff1) + || (0x17000 <= cpt && cpt <= 0x187f7) + || (0x18800 <= cpt && cpt <= 0x18cd5) + || (0x18cff <= cpt && cpt <= 0x18d08) + || (0x1aff0 <= cpt && cpt <= 0x1aff3) + || (0x1aff5 <= cpt && cpt <= 0x1affb) + || (0x1affd <= cpt && cpt <= 0x1affe) + || (0x1b000 <= cpt && cpt <= 0x1b122) + || (cpt == 0x1b132) + || (0x1b150 <= cpt && cpt <= 0x1b152) + || (cpt == 0x1b155) + || (0x1b164 <= cpt && cpt <= 0x1b167) + || (0x1b170 <= cpt && cpt <= 0x1b2fb) + || (0x1d300 <= cpt && cpt <= 0x1d356) + || (0x1d360 <= cpt && cpt <= 0x1d376) + || (cpt == 0x1f004) + || (cpt == 0x1f0cf) + || (cpt == 0x1f18e) + || (0x1f191 <= cpt && cpt <= 0x1f19a) + || (0x1f200 <= cpt && cpt <= 0x1f202) + || (0x1f210 <= cpt && cpt <= 0x1f23b) + || (0x1f240 <= cpt && cpt <= 0x1f248) + || (0x1f250 <= cpt && cpt <= 0x1f251) + || (0x1f260 <= cpt && cpt <= 0x1f265) + || (0x1f300 <= cpt && cpt <= 0x1f320) + || (0x1f32d <= cpt && cpt <= 0x1f335) + || (0x1f337 <= cpt && cpt <= 0x1f37c) + || (0x1f37e <= cpt && cpt <= 0x1f393) + || (0x1f3a0 <= cpt && cpt <= 0x1f3ca) + || (0x1f3cf <= cpt && cpt <= 0x1f3d3) + || (0x1f3e0 <= cpt && cpt <= 0x1f3f0) + || (cpt == 0x1f3f4) + || (0x1f3f8 <= cpt && cpt <= 0x1f43e) + || (cpt == 0x1f440) + || (0x1f442 <= cpt && cpt <= 0x1f4fc) + || (0x1f4ff <= cpt && cpt <= 0x1f53d) + || (0x1f54b <= cpt && cpt <= 0x1f54e) + || (0x1f550 <= cpt && cpt <= 0x1f567) + || (cpt == 0x1f57a) + || (0x1f595 <= cpt && cpt <= 0x1f596) + || (cpt == 0x1f5a4) + || (0x1f5fb <= cpt && cpt <= 0x1f64f) + || (0x1f680 <= cpt && cpt <= 0x1f6c5) + || (cpt == 0x1f6cc) + || (0x1f6d0 <= cpt && cpt <= 0x1f6d2) + || (0x1f6d5 <= cpt && cpt <= 0x1f6d7) + || (0x1f6dc <= cpt && cpt <= 0x1f6df) + || (0x1f6eb <= cpt && cpt <= 0x1f6ec) + || (0x1f6f4 <= cpt && cpt <= 0x1f6fc) + || (0x1f7e0 <= cpt && cpt <= 0x1f7eb) + || (cpt == 0x1f7f0) + || (0x1f90c <= cpt && cpt <= 0x1f93a) + || (0x1f93c <= cpt && cpt <= 0x1f945) + || (0x1f947 <= cpt && cpt <= 0x1f9ff) + || (0x1fa70 <= cpt && cpt <= 0x1fa7c) + || (0x1fa80 <= cpt && cpt <= 0x1fa89) + || (0x1fa8f <= cpt && cpt <= 0x1fac6) + || (0x1face <= cpt && cpt <= 0x1fadc) + || (0x1fadf <= cpt && cpt <= 0x1fae9) + || (0x1faf0 <= cpt && cpt <= 0x1faf8) + || (0x20000 <= cpt && cpt <= 0x2fffd) + || (0x30000 <= cpt && cpt <= 0x3fffd)) + return 1; + + return 0; +} + +int u8_to_cpt(const char *buf, unsigned long *cpt) { + const unsigned char *ubuf = buf; + + if (ubuf[0] <= 0x7F) { + *cpt = ubuf[0]; + return 1; + } else if ((ubuf[0] & 0xE0) == 0xC0) { + *cpt = ((ubuf[0] & 0x1F) << 6) | (ubuf[1] & 0x3F); + return 2; + } else if ((ubuf[0] & 0xF0) == 0xE0) { + *cpt = ((ubuf[0] & 0x0F) << 12) + | ((ubuf[1] & 0x3F) << 6) + | (ubuf[2] & 0x3F); + return 3; + } else if ((ubuf[0] & 0xF8) == 0xF0) { + *cpt = ((ubuf[0] & 0x07) << 18) + | ((ubuf[1] & 0x3F) << 12) + | ((ubuf[2] & 0x3F) << 6) + | (ubuf[3] & 0x3F); + return 4; + } + + return 0; +} + +#endif Index: bin/ksh/unicode.h =================================================================== --- bin/ksh/unicode.h (new file) +++ bin/ksh/unicode.h (working copy) --- /dev/null 2024-12-17 08:35:07.996000076 +0800 +++ bin/ksh/unicode.h 2024-12-17 09:19:00.521730569 +0800 @@ -0,0 +1,7 @@ +#ifndef UNICODE_H +#define UNICODE_H + +int is_fullwidth(unsigned long); +int u8_to_cpt(const char *, unsigned long *); + +#endif /* UNICODE_H */