43 lines
1.4 KiB
Python
Executable File
43 lines
1.4 KiB
Python
Executable File
import wget
|
||
from bs4 import BeautifulSoup
|
||
from urllib import request
|
||
|
||
|
||
def getUrl(url):
|
||
html = request.urlopen(url)
|
||
soup = BeautifulSoup(html, 'html.parser')
|
||
data = soup.find_all('a')
|
||
file = open('./url.txt', mode='w', encoding='utf-8')
|
||
# 自增变量
|
||
i = 1
|
||
for item in data:
|
||
if item.string is not None and item['href'] != 'javascript:;' and item['href'] != '#':
|
||
if not item['href'] == '/' and 'proceedings' not in item['href']:
|
||
file.write((str(i) + '-' + item.string+ '[]').replace('?',''))
|
||
href = item['href'].split('/')[-1].split('-')[0]
|
||
# TODO:改这里的年份
|
||
url = 'https://proceedings.neurips.cc/paper/2020/file/' + href + '-Paper.pdf'
|
||
file.write(str.__add__(url, '\n'))
|
||
i += 1
|
||
|
||
file.close()
|
||
|
||
|
||
def downFile(filepath):
|
||
file = open('url.txt', mode='r', encoding='utf-8')
|
||
for url_txt in file.readlines():
|
||
path = url_txt.split('[]')[0]+'.pdf'
|
||
file_name = (url_txt.split('[]')[0]+'.pdf')
|
||
u = url_txt.split('[]')[-1]
|
||
print(file_name)
|
||
print(url_txt.split('[]')[-1])
|
||
print("已下载。")
|
||
wget.download(u, path) # 下载
|
||
|
||
|
||
if __name__ == '__main__':
|
||
# TODO:改这里的年份
|
||
url = 'https://proceedings.neurips.cc/paper/2020'
|
||
getUrl(url)
|
||
#downFile('./url.txt')
|