Demo entry 6632295

py

   

Submitted by anonymous on Jul 24, 2017 at 14:17
Language: Python 3. Code size: 791 Bytes.

import urllib.request, socket, re, sys, os

targetPath = "C:\WebCrawlerExample"

def saveFile(path):
    if not os.path.isdir(targetPath):
           os.mkdir(targetPath)
    pos = path.rindex('/')
    t = os.path.join(targetPath, path[pos + 1:])
    return t

url = "https://movie.douban.com/subject/26235354/all_photos"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11'}
req = urllib.request.Request(url=url, headers=headers)
res = urllib.request.urlopen(req)
data = res.read()

for link, t in set(re.findall(r'(https:[^s]*?(jpg|png|gif))', str(data))):

    print(link)
    try:
        urllib.request.urlretrieve(link, saveFile(link))
    except:
        print('Fail')

This snippet took 0.01 seconds to highlight.

Back to the Entry List or Home.

Delete this entry (admin only).