漢字處理的工具

2021-08-09 07:09:03 字數 4120 閱讀 1622

#!/usr/bin/env python

# -*- coding:utf-8 -*-

"""漢字處理的工具:判斷unicode是否是漢字,數字,英文,或者其他字元。全形符號轉半形符號。"""

def is_chinese(uchar):

"""判斷乙個unicode是否是漢字"""

if uchar >= u'\u4e00' and uchar<=u'\u9fa5':

return true

else:

return false

def is_chinese_all(ustring):

"""判斷乙個unicode串是否是漢字串"""

for uchar in ustring:

if not is_chinese(uchar):

return false

return true

def is_number(uchar, but = u''):

"""判斷乙個unicode是否是數字"""

if uchar == but or uchar >= u'\u0030' and uchar<=u'\u0039':

return true

else:

return false

def is_number_all(ustring, but = u''):

"""判斷乙個unicode串是否是數字串"""

for uchar in ustring:

if not is_number(uchar, but):

return false

return true

def is_alphabet(uchar, but = u''):

"""判斷乙個unicode是否是英文本母"""

if uchar == but or (uchar >= u'\u0061' and uchar<=u'\u007a') or (uchar >= u'\u0041' and uchar<=u'\u005a'):

return true

else:

return false

def is_alphabet_all(ustring, but = u''):

"""判斷乙個unicode串是否是英文本母串"""

for uchar in ustring:

if not is_alphabet(uchar, but):

return false

return true

def is_alphanum(uchar, but = u''):

"""判斷乙個unicode是否是英文本母或數字"""

if is_number(uchar, but) or is_alphabet(uchar, but):

return true

else:

return false

def is_alpha_or_num_all(ustring, but = u''):

"""判斷乙個unicode串是否是英文本母或數字串"""

for uchar in ustring:

if not is_alphanum(uchar, but):

return false

return true

def is_alpha_and_num_all(ustring, but = u''):

"""判斷乙個unicode串是否是英文本母及數字串"""

alphabet = 0

number = 0

for uchar in ustring:

if is_alphabet(uchar, but):

alphabet += 1

elif is_number(uchar, but):

number += 1

else:

return false

if alphabet > 0 and number > 0:

return true

else:

return false

def is_other(uchar, but = u''):

"""判斷是否非漢字、數字和英文本元"""

if not (is_chinese(uchar) or is_number(uchar, but) or is_alphabet(uchar, but)):

return true

else:

return false

def is_other_all(ustring, but = u''):

"""判斷是否非漢字、數字和英文本串"""

for uchar in ustring:

if not is_other(uchar, but):

return false

return true

def exist_chinese(ustring):

for uchar in ustring:

if is_chinese(uchar):

return true

return false

def exist_number(ustring):

for uchar in ustring:

if is_number(uchar):

return true

return false

def exist_alphabet(ustring):

for uchar in ustring:

if is_alphabet(uchar):

return true

return false

def exist_other(ustring, but = u''):

for uchar in ustring:

if is_other(uchar, but):

return true

return false

def b2q(uchar):

"""半形轉全形"""

inside_code=ord(uchar)

if inside_code<0x0020 or inside_code>0x7e:      #不是半形字元就返回原來的字元

return uchar

if inside_code==0x0020:       #除了空格其他的全形半形的公式為:半形=全形-0xfee0

inside_code=0x3000

else:

inside_code+=0xfee0

return unichr(inside_code)

def q2b(uchar):

"""全形轉半形"""

if uchar == u'』':

return u'\''

inside_code=ord(uchar)

if inside_code==0x3000:

inside_code=0x0020

else:

inside_code-=0xfee0

if inside_code<0x0020 or inside_code>0x7e:      #轉完之後不是半形字元返回原來的字元

return uchar

return unichr(inside_code)

def stringq2b(ustring):

"""把字串全形轉半形"""

return "".join([q2b(uchar) for uchar in ustring])

def uniform(ustring):

"""格式化字串,完成全形轉半形,大寫轉小寫的工作"""

return stringq2b(ustring).lower()

def string2list(ustring):

"""將ustring按照中文,字母,數字分開"""

retlist=

utmp=

for uchar in ustring:

if is_other(uchar):

if len(utmp)==0:

continue

else:

utmp=

else:

if len(utmp)!=0:

return retlist

漢字處理元件

有時候專案中會根據使用者姓名的拼音檢索資料,微軟專門提供了乙個元件安裝包來處理非英文的特殊語言,名稱為 預設的安裝路徑為 c program files x86 microsoft visual studio international pack simplified chinese pin yin...

把16進製制編碼得到其中的漢字處理方法

name 中國 print name.encode utf8 結果輸出b xe4 xb8 xad xe5 x9b xbd b xe4 xb8 xad xe5 x9b xbd decode utf8 中國 注釋,也就是說只有字串才能進行編碼處理,位元組流進行解碼處理 所以要想知道16進製制下的內容就要...

jsp的分頁技術與訪問資料庫的漢字處理

class.forname com.microsoft.sqlserver.jdbc.sqlserverdriver string url jdbc sqlserver localhost 1433 databasename bookstore string user bookstore strin...