這篇文章主要介紹如何利用Jupyter Notekook做初步分析,文中介紹的非常詳細(xì),具有一定的參考價值,感興趣的小伙伴們一定要看完!
創(chuàng)新互聯(lián)公司主要從事成都做網(wǎng)站、網(wǎng)站設(shè)計、網(wǎng)頁設(shè)計、企業(yè)做網(wǎng)站、公司建網(wǎng)站等業(yè)務(wù)。立足成都服務(wù)碾子山,十余年網(wǎng)站建設(shè)經(jīng)驗,價格優(yōu)惠、服務(wù)專業(yè),歡迎來電咨詢建站服務(wù):028-86922220
最近一段時間都是Jupyter Notebook做策略的最初版本設(shè)計,就是行情導(dǎo)入畫圖一類。
之前做個dataframe做分析容易,這個算是簡化版本。
新建一個DataAnalyzer 類,這個簡單很多,支持從csv和MongoDB導(dǎo)入行情數(shù)據(jù),和從1分鐘k線整合不同分鐘k線
下面是導(dǎo)入1分鐘螺紋鋼數(shù)據(jù),整合為5分鐘K線
from pymongo import MongoClient, ASCENDING import pandas as pd import numpy as np from datetime import datetime import talib import matplotlib.pyplot as plt import scipy.stats as st %matplotlib inline %config InlineBackend.figure_format = 'retina' class DataAnalyzer(object): """ """ def __init__(self, exportpath="C:\Project\\", datformat=['datetime', 'high', 'low', 'open', 'close','volume']): self.mongohost = None self.mongoport = None self.db = None self.collection = None self.df = pd.DataFrame() self.exportpath = exportpath self.datformat = datformat self.startBar = 2 self.endBar = 12 self.step = 2 self.pValue = 0.015 def db2df(self, db, collection, start, end, mongohost="localhost", mongoport=27017, export2csv=False): """讀取MongoDB數(shù)據(jù)庫行情記錄,輸出到Dataframe中""" self.mongohost = mongohost self.mongoport = mongoport self.db = db self.collection = collection dbClient = MongoClient(self.mongohost, self.mongoport, connectTimeoutMS=500) db = dbClient[self.db] cursor = db[self.collection].find({'datetime':{'$gte':start, '$lt':end}}).sort("datetime",ASCENDING) self.df = pd.DataFrame(list(cursor)) self.df = self.df[self.datformat] self.df = self.df.reset_index(drop=True) path = self.exportpath + self.collection + ".csv" if export2csv == True: self.df.to_csv(path, index=True, header=True) return self.df def csv2df(self, csvpath, dataname="csv_data", export2csv=False): """讀取csv行情數(shù)據(jù),輸入到Dataframe中""" csv_df = pd.read_csv(csvpath) self.df = csv_df[self.datformat] self.df["datetime"] = pd.to_datetime(self.df['datetime']) # self.df["high"] = self.df['high'].astype(float) # self.df["low"] = self.df['low'].astype(float) # self.df["open"] = self.df['open'].astype(float) # self.df["close"] = self.df['close'].astype(float) # self.df["volume"] = self.df['volume'].astype(int) self.df = self.df.reset_index(drop=True) path = self.exportpath + dataname + ".csv" if export2csv == True: self.df.to_csv(path, index=True, header=True) return self.df def df2Barmin(self, inputdf, barmins, crossmin=1, export2csv=False): """輸入分鐘k線dataframe數(shù)據(jù),合并多多種數(shù)據(jù),例如三分鐘/5分鐘等,如果開始時間是9點1分,crossmin = 0;如果是9點0分,crossmin為1""" dfbarmin = pd.DataFrame() highBarMin = 0 lowBarMin = 0 openBarMin = 0 volumeBarmin = 0 datetime = 0 for i in range(0, len(inputdf) - 1): bar = inputdf.iloc[i, :].to_dict() if openBarMin == 0: openBarmin = bar["open"] if highBarMin == 0: highBarMin = bar["high"] else: highBarMin = max(bar["high"], highBarMin) if lowBarMin == 0: lowBarMin = bar["low"] else: lowBarMin = min(bar["low"], lowBarMin) closeBarMin = bar["close"] datetime = bar["datetime"] volumeBarmin += int(bar["volume"]) # X分鐘已經(jīng)走完 if not (bar["datetime"].minute + crossmin) % barmins: # 可以用X整除 # 生成上一X分鐘K線的時間戳 barMin = {'datetime': datetime, 'high': highBarMin, 'low': lowBarMin, 'open': openBarmin, 'close': closeBarMin, 'volume' : volumeBarmin} dfbarmin = dfbarmin.append(barMin, ignore_index=True) highBarMin = 0 lowBarMin = 0 openBarMin = 0 volumeBarmin = 0 if export2csv == True: dfbarmin.to_csv(self.exportpath + "bar" + str(barmins)+ str(self.collection) + ".csv", index=True, header=True) return dfbarmin exportpath = "C:\\Project\\" DA = DataAnalyzer(exportpath) #數(shù)據(jù)庫導(dǎo)入 start = datetime.strptime("20190920", '%Y%m%d') end = datetime.now() dfrb8888 = DA.db2df(db="VnTrader_1Min_Db", collection="rb8888", start = start, end = end,export2csv=True) dfrb5min = DA.df2Barmin(dfrb8888,5,crossmin=1, export2csv=True) dfrb5min.tail()
2. 計算5分鐘K線的參照,包括標(biāo)準(zhǔn)差,rsi,5分鐘均線,和40分鐘均線
logdata = pd.DataFrame() logdata['close'] =(dfrb5min['close']) # logdata['tr'] = talib.ATR(np.array(dfrb8888['high']), np.array(dfrb8888['low']), np.array(dfrb8888['close']) ,1) # logdata['atr'] = talib.ATR(np.array(dfrb8888['high']), np.array(dfrb8888['low']), np.array(dfrb8888['close']) ,20) logdata['std20'] = talib.STDDEV( np.array(dfrb5min['close']) ,20) logdata['rsi30'] = talib.RSI(np.array(dfrb5min['close']) ,30) logdata['sma5'] = talib.SMA(np.array(dfrb5min['close']) ,5) logdata['sma40'] = talib.SMA(np.array(dfrb5min['close']) ,40) logdata.plot(subplots=True,figsize=(18,16))
3. 使用快慢均線策略,顯示買入賣出點
closeArray = np.array(logdata['close']) listup,listdown = [],[] for i in range(1,len(logdata['close'])): if logdata.loc[i,'sma5'] > logdata.loc[i,'sma40'] and logdata.loc[i-1,'sma5'] < logdata.loc[i-1,'sma40']: listup.append(i) elif logdata.loc[i,'sma5'] < logdata.loc[i,'sma40'] and logdata.loc[i-1,'sma5'] > logdata.loc[i-1,'sma40']: listdown.append(i) fig=plt.figure(figsize=(18,6)) plt.plot(closeArray, color='y', lw=2.) plt.plot(closeArray, '^', markersize=5, color='r', label='UP signal', markevery=listup) plt.plot(closeArray, 'v', markersize=5, color='g', label='DOWN signal', markevery=listdown) plt.legend() plt.show()
以上是“如何利用Jupyter Notekook做初步分析”這篇文章的所有內(nèi)容,感謝各位的閱讀!希望分享的內(nèi)容對大家有幫助,更多相關(guān)知識,歡迎關(guān)注創(chuàng)新互聯(lián)行業(yè)資訊頻道!