輸出GB 18030 2005字元編碼(Lua)

2021-08-30 11:39:25 字數 3761 閱讀 5059

按單位元組、雙位元組、四位元組及編碼順序輸出gb 18030-2005中規定的中文圖形字元二進位制編碼,不包括保留區字元和使用者自定義區字元。

考慮到最近《資訊科技 中文編碼字符集》修訂版送審稿通過專家審查,採用lua指令碼實現以方便修改。

function isindoublebytesuserdefinedarea(ch1, ch2)

-- ch1 = [0xaa, 0xaf], ch2 = [0xa1, 0xfe]

if (0xaa <= ch1 and ch1 <= 0xaf and 0xa1 <= ch2 and ch2 <= 0xfe) then

return true

end-- ch1 = [0xf8, 0xfe], ch2 = [0xa1, 0xfe]

if (0xf8 <= ch1 and ch1 <= 0xfe and 0xa1 <= ch2 and ch2 <= 0xfe) then

return true

end-- ch1 = [0xa1, 0xa7], ch2 = [0x40, 0x7e] or ch2 = [0x80, 0xa0]

if (0xa1 <= ch1 and ch1 <= 0xa7 and

((0x40 <=ch2 and ch2 <= 0x7e) or (0x80 <= ch2 and ch2 <= 0xa0))) then

return true

endreturn false

endfunction isinquadbytesreservedarea(ch1, ch2, ch3, ch4)

local ch = (ch1 << 24) | (ch2 << 16) | (ch3 << 8) | ch4

if (0x85308130 <= ch and ch <= 0x8539fe39) then

return true

endif (0x86308130 <= ch and ch <= 0x8f39fe39) then

return true

endif (0xe4308130 <= ch and ch <= 0xfc39fe39) then

return true

endreturn false

endfunction isinquadbytesuserdefinedarea(ch1, ch2, ch3, ch4)

local ch = (ch1 << 24) | (ch2 << 16) | (ch3 << 8) | ch4

if (0xfd308130 <= ch and ch <= 0xfe39fe39) then

return true

endreturn false

endfunction printsinglebyte(ch)

if ((ch & 0x0f) == 0) then

io.write(string.format("\n %1x", ch >> 4))

endio.write(string.format(" %c", ch))

endfunction printdoublebytes(ch1, ch2, last_ch)

local ch = (ch1 << 8) | ch2

bnewline = false

if ((last_ch >> 8) ~= ch1) then

io.write(string.format("\n\n%02x 0 1 2 3 4 5 6 7 8 9 a b c d e f", ch1))

bnewline = true

endif (bnewline or ((last_ch & 0xf0) ~= (ch2 & 0xf0))) then

io.write(string.format("\n %1x", ch2 >> 4))

for i = 1, ch2 & 0x0f, 1 do

io.write(" ")

endelse

for i = 1, (ch2 & 0x0f) - (last_ch & 0x0f) - 1, 1 do

io.write(" ")

endend

io.write(string.format(" %c%c", ch1, ch2))

return ch

endfunction printquadbytes(ch1, ch2, ch3, ch4, last_ch)

local leading2bytes = (ch1 << 8) | ch2

local bnewline = false

if ((last_ch >> 16) ~= leading2bytes) then

io.write(string.format("\n\n%04x\n 30 31 32 33 34 35 36 37 38 39", leading2bytes))

bnewline = true

endlocal ch = (leading2bytes << 16) | (ch3 << 8) | ch4

if (bnewline or (((last_ch >> 8) & 0xff) ~= ch3)) then

io.write(string.format("\n%02x", ch3))

for i = 1, ch4 - 0x30, 1 do

io.write(" ")

endelse

for i = 1, ch4 - (last_ch & 0xff) - 1, 1 do

io.write(" ")

endend

io.write(string.format(" %c%c%c%c", ch1, ch2, ch3, ch4))

return ch

end-- main

io.write(" 0 1 2 3 4 5 6 7 8 9 a b c d e f")

for ch = 0, 0x7f, 1 do

printsinglebyte(ch)

endlocal last_ch = 0

for ch1 = 0x81, 0xfe, 1 do

for ch2 = 0x40, 0x7e, 1 do

if (not isindoublebytesuserdefinedarea(ch1, ch2)) then

last_ch = printdoublebytes(ch1, ch2, last_ch)

endend

for ch2 = 0x80, 0xfe, 1 do

if (not isindoublebytesuserdefinedarea(ch1, ch2)) then

last_ch = printdoublebytes(ch1, ch2, last_ch)

endend

endfor ch1 = 0x81, 0xfe, 1 do

for ch2 = 0x30, 0x39, 1 do

for ch3 = 0x81, 0xfe, 1 do

for ch4 = 0x30, 0x39, 1 do

if (not isinquadbytesreservedarea(ch1, ch2, ch3, ch4)

and not isinquadbytesuserdefinedarea(ch1, ch2, ch3, ch4)) then

last_ch = printquadbytes(ch1, ch2, ch3, ch4, last_ch)

endend

endend

end

GB2312字符集顯示

最近需要做一下12 12點陣的gbk字符集,但是網上只找到了gb2312的字符集一大堆說法,陷身於網上的大海洋中,看了一下資料,有點明白漢字在螢幕上如何顯示了 比如顯示 漢字 兩個字,因為漢字是兩個位元組顯示的,比如ab兩個位元組,第乙個位元組是高位元組,第二個位元組是低位元組,在gb2312中,字...

安裝GB2312字符集

linux下安裝gb2312的示例 step 1 到linux字符集的安裝包目錄下 cd usr share i18n charmaps step 2 解壓該目錄下的gb2312.gz gunzip gb2312.gz step 3 安裝字符集 localedef f gb2312 i zh cn ...

11 3 字串輸出

目錄作業 c有3個標準庫函式 用於列印字串 put fputs 和 printf include include 判斷空白字元標頭檔案 define len 80 char getword char str,int a 定義儲存字元的函式 int main void printf done.n re...