主题:是否要考虑开设Python专区?(桌面图片下载器,一分钟400张左右超快)
现在python非常火爆,而且有统一未来脚本的趋势,特别是unix平台下面。我觉得pfun应该开始一个python专区,python语言异常的强大,而且容易上手,写脚本管理系统也非常方便。
贴一个简单的代码(自动翻页下载 image.baidu.com搜索出来的图片)
打包之后的exe没有办法贴出来,件大小限制1M,没有办法贴了。需要的朋友email给我: cjcse@sohu.com。方便没有安装python 的兄弟使用。
[code=c]
#!/usr/bin/python
# filename: getbaidupic.py
# description: get images from image.baidu.com
# author: cjcse
# version: v 0.21
import urllib
import htmllib
import formatter
import string
import os
import sys
import time
import thread
#import threading
class Parser(htmllib.HTMLParser):
#return a dictionary mapping anchor texts to lists of associated hyperlinks
def __init__(self, verbose=0):
self.anchors = {}
f = formatter.NullFormatter()
htmllib.HTMLParser.__init__(self, f, verbose)
def anchor_bgn(self, href, name, type):
self.save_bgn()
self.anchor = href
def anchor_end(self):
text = string.strip(self.save_end())
if self.anchor and text:
self.anchors[text] = self.anchors.get(text, []) + [self.anchor]
def GetJpg(url):
try:
global save
global total
global successed
global failed
total += 1
seps = url.split("/")
size = len(seps)
name = seps[size-1]
fp = urllib.urlopen(url)
name = save + "\\" + name
i = 1
list = name.split(".")
while os.path.exists(name):
if len(list) == 2:
name = list[0] + "_" + repr(i) + "." + list[1]
else:
name = list[0] + "_" + repr(i)
i += 1
op = open(name, "wb")
if not op:
failed += 1
print url + "\t[Failed]"
exit()
while 1:
s = fp.read(8192)
if not s:
break
op.write(s)
op.close()
fp.close()
successed += 1
print url + "\t[OK]"
except:
print url + "\t[Failed]"
failed += 1
def GetBaiduNextPage(url):
global pn
url += "&rn=" + repr(rn) + "&pn=" + repr(pn) + "&ln=" + repr(ln)
pn += 18
return url
def GetAllJpg(url):
html = urllib.urlopen(url).read()
p = Parser()
p.feed(html)
p.close()
cnt = 0
for k, v in p.anchors.items():
for uri in v:
if uri.find(".jpg") != -1:
ls = uri.split("&")
for st in ls:
url2 = st.split("=")
for st2 in url2:
st2 = string.lower(st2)
if string.find(st2, "http://") != -1 and string.find(st2, ".jpg") != -1:
try:
GetJpg(st2)
except:
continue
print "---------------------------------------------------------------------"
print "Description: Get images from image.baidu.com. "
print "Author: cjcse from CU."
print "version: v 0.2."
print "---------------------------------------------------------------------"
str = raw_input("Input your keywords: ")
while (len(str) == 0):
str = raw_input("Keyword: ")
url = "http://image.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baiduimage&pv=&word=" + str + "&z=5"
try:
if not os.path.exists("c:\\image_baidu"):
os.mkdir("c:\\image_baidu")
except:
print "Failed to create directory in disk c:"
exit()
pages = 50
save = "c:\\image_baidu"
print "The images will be stored in folder \"c:\\image_baidu\"."
rn = 21
pn = 18
ln = 2000
total = 0
successed = 0
failed = 0
for i in range(0, pages):
thread.start_new_thread(GetAllJpg,(url,))
url = GetBaiduNextPage(url)
while True:
pass
[/code]
贴一个简单的代码(自动翻页下载 image.baidu.com搜索出来的图片)
打包之后的exe没有办法贴出来,件大小限制1M,没有办法贴了。需要的朋友email给我: cjcse@sohu.com。方便没有安装python 的兄弟使用。
[code=c]
#!/usr/bin/python
# filename: getbaidupic.py
# description: get images from image.baidu.com
# author: cjcse
# version: v 0.21
import urllib
import htmllib
import formatter
import string
import os
import sys
import time
import thread
#import threading
class Parser(htmllib.HTMLParser):
#return a dictionary mapping anchor texts to lists of associated hyperlinks
def __init__(self, verbose=0):
self.anchors = {}
f = formatter.NullFormatter()
htmllib.HTMLParser.__init__(self, f, verbose)
def anchor_bgn(self, href, name, type):
self.save_bgn()
self.anchor = href
def anchor_end(self):
text = string.strip(self.save_end())
if self.anchor and text:
self.anchors[text] = self.anchors.get(text, []) + [self.anchor]
def GetJpg(url):
try:
global save
global total
global successed
global failed
total += 1
seps = url.split("/")
size = len(seps)
name = seps[size-1]
fp = urllib.urlopen(url)
name = save + "\\" + name
i = 1
list = name.split(".")
while os.path.exists(name):
if len(list) == 2:
name = list[0] + "_" + repr(i) + "." + list[1]
else:
name = list[0] + "_" + repr(i)
i += 1
op = open(name, "wb")
if not op:
failed += 1
print url + "\t[Failed]"
exit()
while 1:
s = fp.read(8192)
if not s:
break
op.write(s)
op.close()
fp.close()
successed += 1
print url + "\t[OK]"
except:
print url + "\t[Failed]"
failed += 1
def GetBaiduNextPage(url):
global pn
url += "&rn=" + repr(rn) + "&pn=" + repr(pn) + "&ln=" + repr(ln)
pn += 18
return url
def GetAllJpg(url):
html = urllib.urlopen(url).read()
p = Parser()
p.feed(html)
p.close()
cnt = 0
for k, v in p.anchors.items():
for uri in v:
if uri.find(".jpg") != -1:
ls = uri.split("&")
for st in ls:
url2 = st.split("=")
for st2 in url2:
st2 = string.lower(st2)
if string.find(st2, "http://") != -1 and string.find(st2, ".jpg") != -1:
try:
GetJpg(st2)
except:
continue
print "---------------------------------------------------------------------"
print "Description: Get images from image.baidu.com. "
print "Author: cjcse from CU."
print "version: v 0.2."
print "---------------------------------------------------------------------"
str = raw_input("Input your keywords: ")
while (len(str) == 0):
str = raw_input("Keyword: ")
url = "http://image.baidu.com/i?ct=201326592&cl=2&lm=-1&tn=baiduimage&pv=&word=" + str + "&z=5"
try:
if not os.path.exists("c:\\image_baidu"):
os.mkdir("c:\\image_baidu")
except:
print "Failed to create directory in disk c:"
exit()
pages = 50
save = "c:\\image_baidu"
print "The images will be stored in folder \"c:\\image_baidu\"."
rn = 21
pn = 18
ln = 2000
total = 0
successed = 0
failed = 0
for i in range(0, pages):
thread.start_new_thread(GetAllJpg,(url,))
url = GetBaiduNextPage(url)
while True:
pass
[/code]