python的第乙個小程式

2021-06-04 06:09:19 字數 3535 閱讀 3650

#-*-coding:utf-8-*-

import sys

from htmlparser import htmlparser

reload(sys)

encoding = sys.getdefaultencoding()

if not encoding == 'utf-8':

sys.setdefaultencoding('utf-8')

class contentparser(htmlparser):

def __init__(self):

self.text = ''

self.is_comment = 0

self.is_content = 0

htmlparser.__init__(self)

def handle_starttag(self, tag, attr):

if tag == 'li':

for k,v in attr:

if k == 'id':

self.is_comment = 1;

if tag == 'a':

self.is_content = 0

if tag == 'p':

align = 0

for k,v in attr:

if k == 'align':

align = 1

if not align and not self.is_comment:

self.is_content = 1

def handle_endtag(self, tag):

if tag == 'li':

self.is_comment = 0

if tag == 'p':

self.is_content = 0

def handle_data(self, text):

if self.is_content:

text.find('稱呼')==-1 and\

text.find('內容')==-1:

self.text += '\n'+text

#print text

def get_text(self):

return self.text

if __name__ == '__main__':

fd = open(sys.argv[1])

cp = contentparser()

cp.feed(fd.read())

fd.close()

這個檔案儲存成progress.py

import sys,urllib2,time

from progress import contentparser

from htmlparser import htmlparser

reload(sys)

sys.setdefaultencoding('utf-8')

class linkparser(htmlparser):

def __init__(self):

self.link = ''

self.content = ''

self.mulu = ''

self.has_mulu = 0

self.is_mulu = 0

self.is_href = 0

self.start_time = 0

self.end_time = 0

htmlparser.__init__(self)

def handle_starttag(self, tag, attr):

if tag == 'div':

for k,v in attr:

if k == 'class' and v == 'mulu':

self.is_mulu = 1

if tag == 'a' and self.is_mulu:

self.is_href = 1

for k,v in attr:

if k == 'href':

self.link = v

if tag == 'td' and self.is_mulu:

for k,v in attr:

if k == 'colspan':

self.has_mulu = 1

def handle_endtag(self, tag):

if tag == 'div' and self.is_mulu and len(self.mulu):

self.is_mulu = 0

print 'end',self.mulu

self.mulu = ''

self.end_time = time.time()

print 'time : ', str(self.end_time - self.start_time)

if tag == 'a':

self.is_href = 0

if tag == 'td' and self.is_mulu and self.has_mulu:

self.has_mulu = 0

def handle_data(self, text):

if self.is_mulu and self.is_href:

self.content = text

progressing(self.link, self.mulu, self.content)

return

if self.has_mulu:

self.mulu = text

print 'begin',self.mulu

self.start_time = time.time()

def progressing(url, filename, chaptername):

chapter_text = get_chapter_text(url)

fd = open(filename, 'a')

fd.write(chaptername)

fd.write('\n'.format(chapter_text))

fd.close()

def get_chapter_text(url):

fd = urllib2.urlopen(urllib2.request(url))

cp = contentparser()

try:

cp.feed(fd.read())

except htmlparseerror, msg:

print msg

return cp.get_text()

if __name__ == '__main__':

fd = urllib2.urlopen(urllib2.request(''))

lp = linkparser()

try:

lp.feed(fd.read())

except htmlparseerror, msg:

print msg

第乙個小程式

2.建立pages目錄檔案 作用是放各個頁面的 3.建立頁面 給頁面起名字,並建立四個檔案 1 js 邏輯的實現 2 json 負責標題欄和一些狀態列 3 wxml 頁面文字 4 wxss 頁面樣式 4.把內容元素封裝在view內部,寫法 內容 5.這節課需要用到三個元件 文字 按鈕 1 2 文字 ...

python的第乙個小程式,helloword

程式的編寫有兩種就是編譯式和互動式。黑視窗的方式是互動式,互動式執行程式的方法如下 進入環境 終端輸入 python就進入了python直譯器。exit 退出 python環境 互動式的缺點是是寫一行執行一行,無法儲存,編譯式,可以統一程式設計,可以儲存和維護程式,所以採用編譯式進行程式編寫。編譯式...

第乙個python 程式

有人在論壇 上問 將日誌格式化的方法,剛好學python,就拿這個練手了 09 55 54 error1 tmp error log.3 50 times mon jun 28 00 00 53 2009 09 55 54 error1 tmp error log.3 50 times 09 56 ...