Python
脚本用于对网站的sitemap.xml
文件进行解析
1from lxml import etree
2import requests, os
3
4if __name__ == "__main__":
5
6 file_path = r"e:\urls.txt"
7 try:
8 os.remove(file_path)
9 except OSError:
10 pass
11
12 with open(file_path, 'a') as url_file:
13 xml_dict = {}
14
15 r = requests.get("https://lucumt.info/sitemap.xml")
16 root = etree.fromstring(r.content)
17 count = 0
18 for sitemap in root:
19 data = sitemap.getchildren()[0].text
20 if 'tags' in data or 'categories' in data:
21 continue
22 count = count + 1
23 url_file.write(data + '\n')
24 print(f"Url files write success,total count {count}")
使用生成的urls.txt
文件利用curl
命令提交百度收录
1curl -H 'Content-Type:text/plain' --data-binary @urls.txt "http://data.zz.baidu.com/urls?site=https://lucumt.info&token=xxx"