import wget from bs4 import BeautifulSoup from urllib import request def getUrl(url): html = request.urlopen(url) soup = BeautifulSoup(html, 'html.parser') data = soup.find_all('a') file = open('./url.txt', mode='w', encoding='utf-8') # 自增变量 i = 1 for item in data: if item.string is not None and item['href'] != 'javascript:;' and item['href'] != '#': if not item['href'] == '/' and 'proceedings' not in item['href']: file.write((str(i) + '-' + item.string+ '[]').replace('?','')) href = item['href'].split('/')[-1].split('-')[0] # TODO:改这里的年份 url = 'https://proceedings.neurips.cc/paper/2020/file/' + href + '-Paper.pdf' file.write(str.__add__(url, '\n')) i += 1 file.close() def downFile(filepath): file = open('url.txt', mode='r', encoding='utf-8') for url_txt in file.readlines(): path = url_txt.split('[]')[0]+'.pdf' file_name = (url_txt.split('[]')[0]+'.pdf') u = url_txt.split('[]')[-1] print(file_name) print(url_txt.split('[]')[-1]) print("已下载。") wget.download(u, path) # 下载 if __name__ == '__main__': # TODO:改这里的年份 url = 'https://proceedings.neurips.cc/paper/2020' getUrl(url) #downFile('./url.txt')