背景:备份简书所有的文章
目前简书提供了文章打包下载功能。但文章中的图片是以链接的形式存在的,并未下载到本地。
因此用 python
写了一个脚本来下载文章中的图片(保存成 .py
文件,修改脚本的路径即可)
- V1: 支持简书内部链接
from os import listdir, mkdir, path
from os.path import isfile, join, isdir
from pathlib import Path
import re
from urllib import request
dirRoot = "C:\\Users\\ATLgo\\Downloads\\user-726742-1555140684" # 此处改成文件解压后的路径
onlyDir = [d for d in listdir(dirRoot) if isdir(join(dirRoot, d))]
print(onlyDir)
for dir in onlyDir:
dirName = dirRoot + "/" + dir
onlyFiles = [f for f in listdir(dirName) if isfile(join(dirName, f))]
print(onlyFiles)
for file in onlyFiles:
text = Path(dirName + "/" + file).read_text(encoding='utf-8')
imgUrls = re.findall(r"(?:!\[(.*?)\]\((.*?)\))", text)
for imgUrl in imgUrls:
if imgUrl is not None:
pngName = re.search(r"[\w-]*.png", imgUrl[1])
if not path.exists(dirName + "/images"):
mkdir(dirName + "/images")
request.urlretrieve(imgUrl[1], dirName + "/images/" + pngName.group())
pass
pass
text = text.replace("http://upload-images.jianshu.io/upload_", "").replace("?imageMogr2/auto-orient/strip%7CimageView2/2/w/1240", "") # 此处可能需要调整,懒得写正则匹配
Path(dirName + "/" + file).write_text(text, encoding='utf-8')
pass