1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
| if recrawling_data == 'N' or recrawling_data == 'n': # 日期設定 # 今日日期(台灣) today_date = datetime.datetime.today().strftime('%Y-%m-%d') # 交易日期(美國) df_last_date = (datetime.date.today() - datetime.timedelta(days=1)).strftime('%Y-%m-%d') # 資料抓取起始日 start_date = (datetime.date.today() - datetime.timedelta(days=10)).strftime('%Y-%m-%d') # 資料抓取起始日前一天(為了刪除資料庫內的資料而設定) bf_st_date = (datetime.date.today() - datetime.timedelta(days=11)).strftime('%Y-%m-%d')
# 將日期轉成unix格式 st_unix = str(int(time.mktime(datetime.datetime.strptime(start_date, '%Y-%m-%d').timetuple()))) ed_unix = str(int(time.mktime(datetime.datetime.strptime(today_date, '%Y-%m-%d').timetuple())))
# 爬蟲設定 s = requests.Session() # 請求頭設定 cookies = dict(B='2dlj1k5dn0vg0&b=3&s=g3') headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36'}
# 抓取大盤資料(S&P500)確定今天是否開盤 bench_url = 'https://query1.finance.yahoo.com/v7/finance/download/%5EGSPC?period1=' + st_unix + '&period2=' + ed_unix + '&interval=1d&events=history&crumb=xxy94FXoLdp' r = s.get(bench_url, cookies=cookies, headers=headers).content bench_data = pd.read_csv(io.StringIO(r.decode('utf-8')))
error_ticker = []
# 若今日有開盤則開始更新資料 if bench_data['Date'].iloc[-1] == df_last_date:
# 匯入ETF代碼資料 code_list = open('ETF_code.txt') ticker = code_list.readlines() code_list.close() ticker = list(map(lambda s: s.strip(), ticker)) # 開始爬資料 for i in ticker: print("Running to : ", ticker.index(i), "/", (len(ticker)-1), "\n") url='https://query1.finance.yahoo.com/v7/finance/download/'+ i +'?period1=' + st_unix + '&period2=' + ed_unix + '&interval=1d&events=history&crumb=xxy94FXoLdp' r = s.get(url, cookies=cookies, headers=headers).content
# 紀錄下市名單 if len(r) < 500: error_ticker.append(i) next
# 合併ETF價格資料 else: if i == ticker[0]: data = pd.read_csv(io.StringIO(r.decode('utf-8'))) data['code'] = i else: df = pd.read_csv(io.StringIO(r.decode('utf-8'))) df['code'] = i data = data.append(df)
#time.sleep(5) # 重新命名欄位 data.columns = ['date','open','high','low','close','adjusted','volume','code'] # 針對日期格式處理以便匯入SQL資料庫 data['date'] = data['date'].str.replace("-", "") data.to_csv('ETF_Data_new.csv', sep=',', encoding='utf-8', index=False) print("Saving Complete!")
# 若今日沒有開盤則不更新資料 else : print("Today is holiday!")
# 連接 mysql etfdb = mysql.connector.connect( host="xxx.xxx.xx.xxx", user="xxxxx", passwd="xxxxxxxxxxxxxx" ) etfcursor = etfdb.cursor()
# 刪除某段時間的資料 bf_st_date = bf_st_date.replace("-", "") df_last_date = df_last_date.replace("-", "") # 送入SQL指令將資料刪除 query ="DELETE FROM etf.historical_data WHERE date >= " + bf_st_date + " and date <= " + df_last_date + ";" etfcursor.execute(query) etfdb.commit() print("Data Deleted!")
# 送入SQL指令將資料匯入 query = "LOAD DATA LOCAL INFILE 'E:/NSYSU/昭文團隊/ETF/ETF_Data_new.csv' INTO TABLE etf.historical_data FIELDS TERMINATED BY ',' LINES TERMINATED BY '\r\n' IGNORE 1 ROWS;" etfcursor.execute(query) etfdb.commit() print("Data Inserted!")
|