1506 lines
41 KiB
Python
1506 lines
41 KiB
Python
import matplotlib
|
|
import matplotlib.dates as mdate
|
|
from matplotlib import style
|
|
import matplotlib.mlab as mlab
|
|
import matplotlib.pyplot as plt
|
|
from matplotlib import cbook
|
|
from matplotlib.colors import LightSource
|
|
from matplotlib.colors import BoundaryNorm
|
|
import matplotlib.ticker as mticker
|
|
import matplotlib.gridspec as gridspec
|
|
from matplotlib.dates import bytespdate2num
|
|
import matplotlib.image as mpimg
|
|
import matplotlib.transforms as transforms
|
|
import matplotlib.image as mpimg
|
|
from matplotlib.offsetbox import TextArea, DrawingArea, OffsetImage, AnnotationBbox
|
|
import operator
|
|
import plotly.graph_objects as go
|
|
|
|
#import tensorflow as tf
|
|
#from tensorflow import keras
|
|
#from tensorflow.keras import layers
|
|
#from tensorflow.keras.layers.experimental import preprocessing
|
|
|
|
|
|
|
|
import numpy as np
|
|
import sys
|
|
import warnings
|
|
warnings.filterwarnings("ignore")
|
|
import timeit
|
|
import multiprocessing
|
|
import time
|
|
import matplotlib.cm as cm
|
|
|
|
import csv,codecs,datetime,math,pickle
|
|
import itertools
|
|
import os
|
|
from random import shuffle
|
|
import matplotlib.dates as mdates
|
|
from sklearn import preprocessing,svm,neighbors,svm
|
|
#from statsmodels import robust
|
|
import plotly.graph_objects as go
|
|
#from mpl_finance import candlestick2_ohlc,candlestick_ohlc, v_overlay
|
|
|
|
from sklearn.linear_model import LinearRegression
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.preprocessing import normalize
|
|
from sklearn.model_selection import ShuffleSplit
|
|
from sklearn.model_selection import KFold
|
|
from sklearn.model_selection import RepeatedKFold
|
|
from sklearn.utils import shuffle
|
|
from sklearn.preprocessing import PolynomialFeatures
|
|
from sklearn.linear_model import Ridge
|
|
|
|
from matplotlib import colors, ticker, cm
|
|
import pandas as pd
|
|
from pandas import Series
|
|
from datetime import datetime, date, timedelta
|
|
import datetime as dt
|
|
import pylab as plot
|
|
from numpy import genfromtxt
|
|
from mpl_toolkits.axes_grid1 import ImageGrid
|
|
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
|
|
from numpy.random import uniform, seed
|
|
from mpl_toolkits.axes_grid1 import make_axes_locatable
|
|
|
|
from mpl_toolkits.mplot3d import Axes3D
|
|
|
|
from array import array
|
|
from math import log10
|
|
matplotlib.rcParams['axes.unicode_minus'] = False
|
|
import os
|
|
import random
|
|
|
|
from datetime import datetime
|
|
import matplotlib.pyplot as plt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def open_File(data1,startDate,endDate):
|
|
#print(startDate,endDate,date.today())
|
|
if endDate > date.today():
|
|
endDate = date.today()
|
|
while startDate.weekday() != 0:
|
|
startDate = startDate + dt.timedelta(days=1)
|
|
|
|
#colnames=['Date','PX_OPEN','PX_HIGH','PX_LOW','PX_LAST','PX_VOLUME']
|
|
colnames = ['Date','PX_LAST','PX_HIGH','PX_LOW','PX_OPEN','PX_VOLUME']
|
|
df = pd.read_csv(data1,encoding='utf-8',delimiter=",", decimal=".",parse_dates=True)
|
|
#df = pd.read_excel(data1)
|
|
|
|
|
|
df.columns = colnames[0:df.shape[1]]
|
|
if 'REL_PE' in df.columns:
|
|
if np.isnan(df['REL_PE'].mean()) == False:
|
|
df['REL_PE'] = df['REL_PE'].replace(np.nan, df['REL_PE'].mean())
|
|
else:
|
|
df['REL_PE'] = df['REL_PE'].replace(np.nan, 0)
|
|
if 'PX_VOLUME' in df.columns:
|
|
if np.isnan(df['PX_VOLUME'].mean()) == False:
|
|
df['PX_VOLUME'] = df['PX_VOLUME'].replace(np.nan, df['PX_VOLUME'].mean())
|
|
else:
|
|
df['PX_VOLUME'] = df['PX_VOLUME'].replace(np.nan, 0)
|
|
d = [dt.datetime.strptime(dd,'%Y-%m-%d').date() for dd in df['Date']]
|
|
|
|
df = df.reset_index(drop=True)
|
|
#print(data1,df)
|
|
for j in range(0, len(d)):
|
|
if d[j].weekday() == 5 or d[j].weekday() == 6 or d[j]<startDate or d[j]>endDate:
|
|
df.drop([j],inplace=True,axis=0)
|
|
#print (df['Date'])
|
|
#d = df['Date']
|
|
|
|
df.sort_values(['Date'], inplace=False)
|
|
df.drop_duplicates(subset="Date",keep = 'last', inplace=True)
|
|
df = df.dropna()
|
|
df = df.reset_index()
|
|
return df,len(df)
|
|
|
|
def retrieve_name(var):
|
|
import inspect
|
|
callers_local_vars = inspect.currentframe().f_back.f_locals.items()
|
|
return [var_name for var_name, var_val in callers_local_vars if var_val is var]
|
|
|
|
def find_filenames( path_to_dir, suffix):
|
|
from os import listdir
|
|
filenames = listdir(path_to_dir)
|
|
return [ filename for filename in filenames if filename.endswith( suffix ) ]
|
|
|
|
def my_norm(y):
|
|
y1 = np.sum(np.array([abs(x) for x in y]))
|
|
y = [x/y1 for x in y]
|
|
return (y)
|
|
|
|
def H_L(o,h,l,c):
|
|
hl = np.zeros(len(c))
|
|
coR = np.zeros(len(c))
|
|
hl[0] = 1
|
|
coR[0] = 1
|
|
for d in range(1,len(c)):
|
|
if c[d-1]!= o[d-1]:
|
|
coR[d] = (c[d]-o[d])/(c[d-1]-o[d-1])
|
|
else:
|
|
coR[d] = np.sign(c[d]-o[d])*100.0
|
|
|
|
if h[d]-(o[d]+c[d])/2 > (o[d]+c[d])/2-l[d] and (o[d]+c[d])/2-l[d] != 0:
|
|
hl[d] = - (h[d]-(o[d]+c[d])/2)/((o[d]+c[d])/2-l[d])
|
|
|
|
elif h[d]-(o[d]+c[d])/2 != 0:
|
|
hl[d] = ((o[d]+c[d])/2-l[d])/(h[d]-(o[d]+c[d])/2)
|
|
else:
|
|
hl[d] = 1
|
|
return hl,coR
|
|
|
|
def correct_darsad(pred,y):
|
|
count = 0.0
|
|
zeros = 0.0
|
|
for j in range(0,len(y)):
|
|
#print(pred[j])
|
|
#print(y[j])
|
|
if pred[j] ==0:
|
|
zeros += 1
|
|
elif pred[j]*y[j]>0:
|
|
count += 1
|
|
|
|
#print (pred[j],y_test[j]
|
|
if (len(y)-zeros)==0:
|
|
return 0
|
|
else:
|
|
return (float(count)/(len(y)-zeros))
|
|
|
|
|
|
|
|
def psar(barsdata, iaf = 0.02, maxaf = 0.2):
|
|
length = len(barsdata)
|
|
dates = list(barsdata['Date'])
|
|
high = list(barsdata['PX_HIGH'])
|
|
low = list(barsdata['PX_LOW'])
|
|
close = list(barsdata['PX_LAST'])
|
|
psar = close[0:len(close)]
|
|
psarbull = [None] * length
|
|
psarbear = [None] * length
|
|
bull = True
|
|
af = iaf
|
|
ep = low[0]
|
|
hp = high[0]
|
|
lp = low[0]
|
|
|
|
for i in range(2,length):
|
|
if bull:
|
|
psar[i] = psar[i - 1] + af * (hp - psar[i - 1])
|
|
else:
|
|
psar[i] = psar[i - 1] + af * (lp - psar[i - 1])
|
|
|
|
reverse = False
|
|
|
|
if bull:
|
|
if low[i] < psar[i]:
|
|
bull = False
|
|
reverse = True
|
|
psar[i] = hp
|
|
lp = low[i]
|
|
af = iaf
|
|
else:
|
|
if high[i] > psar[i]:
|
|
bull = True
|
|
reverse = True
|
|
psar[i] = lp
|
|
hp = high[i]
|
|
af = iaf
|
|
|
|
if not reverse:
|
|
if bull:
|
|
if high[i] > hp:
|
|
hp = high[i]
|
|
af = min(af + iaf, maxaf)
|
|
if low[i - 1] < psar[i]:
|
|
psar[i] = low[i - 1]
|
|
if low[i - 2] < psar[i]:
|
|
psar[i] = low[i - 2]
|
|
else:
|
|
if low[i] < lp:
|
|
lp = low[i]
|
|
af = min(af + iaf, maxaf)
|
|
if high[i - 1] > psar[i]:
|
|
psar[i] = high[i - 1]
|
|
if high[i - 2] > psar[i]:
|
|
psar[i] = high[i - 2]
|
|
|
|
if bull:
|
|
psarbull[i] = psar[i]
|
|
else:
|
|
psarbear[i] = psar[i]
|
|
|
|
return np.array(psar)#pd.DataFrame({"Dates":dates, "high":high, "low":low, "close":close, "psar":psar, "psarbear":psarbear, "psarbull":psarbull})
|
|
|
|
def ISTha(pul,had,sahm,kharfor, sTarikh,eTarikh,paein,bala):
|
|
bakhtha = []
|
|
bordha = []
|
|
gheimatha = []
|
|
arzesh = []
|
|
faalha = []
|
|
for i,esm in enumerate(sahm):
|
|
df, Nf = open_File(dadehFold+esm,sTarikh,eTarikh)
|
|
df['Date'] = pd.to_datetime(df['Date'],format='%Y-%m-%d')
|
|
if (df.loc[len(df)-1,'Date'].date() == eTarikh):
|
|
#print(eTarikh,df)
|
|
gheimat = list(df['PX_LAST'])[-1] #df.loc[df['Date'] == eTarikh].loc[:,'PX_LAST']
|
|
#print(esm,eTarikh,gheimat)
|
|
gheimatha.append(gheimat)
|
|
df.set_index('Date', inplace=True)
|
|
df.sort_index(inplace=True)
|
|
logic = {'PX_HIGH' : 'max',
|
|
'PX_LOW' : 'min',
|
|
'PX_OPEN' : 'first',
|
|
'PX_LAST' : 'last',
|
|
'PX_VOLUME': 'sum'}
|
|
#df = pd.read_clipboard(parse_dates=['Date'], index_col=['Date'])
|
|
df = df.resample('4W').apply(logic)
|
|
o = np.array(df['PX_OPEN'])
|
|
h = np.array(df['PX_HIGH'])
|
|
l = np.array(df['PX_LOW'])
|
|
c = np.array(df['PX_LAST'])
|
|
Ni = 0
|
|
Nf = len(df)
|
|
co = np.zeros(Nf-Ni-1)
|
|
HIGH = np.zeros(Nf-Ni-1)
|
|
LOW = np.zeros(Nf-Ni-1)
|
|
gain = np.zeros(Nf-Ni-1)
|
|
lost = np.zeros(Nf-Ni-1)
|
|
for k in range(Ni,Nf-1):
|
|
co[k-Ni] = abs(c[k]/o[k]-1)
|
|
if c[k]-o[k]>0:#abs(h[k]-o[k])>abs(l[k]-o[k]):
|
|
HIGH[k-Ni] = abs(h[k]/o[k]-1) #max([abs(h[k]-o[k])/unit,abs(h[k+1]-o[k])/unit])
|
|
LOW[k-Ni] = abs(l[k]/o[k]-1) #max([abs(o[k]-l[k])/unit,abs(o[k]-l[k+1])/unit])
|
|
elif c[k]-o[k]<0:
|
|
HIGH[k-Ni] =abs(l[k]/o[k]-1) #max([abs(o[k]-l[k])/unit,abs(o[k]-l[k+1])/unit])
|
|
LOW[k-Ni] = abs(h[k]/o[k]-1) #max([abs(h[k]-o[k])/unit,abs(h[k+1]-o[k])/unit])
|
|
|
|
maxLose = np.percentile(LOW,paein)+0.05/100
|
|
|
|
maxGain = 3*maxLose
|
|
#print(maxLose*100,maxGain*100)
|
|
if kharfor[i] > 0:
|
|
bakhtha.append(gheimat*(1 - maxLose))
|
|
bordha.append( gheimat*(1 + maxGain))
|
|
elif kharfor[i] < 0:
|
|
bakhtha.append(gheimat*(1 + maxLose))
|
|
bordha.append( gheimat*(1 - maxGain))
|
|
vazn = had*1#(1.5-float(i)/19.0)
|
|
if pul >= vazn : # and pul>gheimat
|
|
arzesh.append(vazn)#max([int(had/gheimat),1])*gheimat
|
|
pul = pul - arzesh[-1]
|
|
faalha.append(1)
|
|
else:
|
|
arzesh.append(0)
|
|
faalha.append(0)
|
|
|
|
else:
|
|
print(esm,"This date does not have data")
|
|
|
|
bordha.append(0)
|
|
bakhtha.append(0)
|
|
gheimatha.append(0)
|
|
arzesh.append(0)
|
|
faalha.append(0)
|
|
|
|
return (bordha,bakhtha,gheimatha,arzesh,faalha,pul)
|
|
|
|
|
|
def take_first(array_like):
|
|
return array_like[0]
|
|
|
|
def take_last(array_like):
|
|
return array_like[-1]
|
|
|
|
def daily_week_month(df):
|
|
df['Date'] = pd.to_datetime(df['Date'],format='%d.%m.%Y')
|
|
df.set_index('Date', inplace=True)
|
|
df.sort_index(inplace=True)
|
|
logic = {'PX_HIGH' : 'max',
|
|
'PX_LOW' : 'min',
|
|
'PX_OPEN' : 'first',
|
|
'PX_LAST' : 'last',
|
|
'PX_VOLUME': 'sum'}
|
|
|
|
|
|
#df = pd.read_clipboard(parse_dates=['Date'], index_col=['Date'])
|
|
week = df.resample('W').apply(logic)
|
|
#df.to_csv('daily1.csv',index=True)
|
|
#week.to_csv('weekly.csv',index=True)
|
|
|
|
|
|
month = df.resample('M').apply(logic)
|
|
#month.to_csv('monthly.csv',index=True)
|
|
|
|
|
|
week = week.reset_index()
|
|
month = month.reset_index()
|
|
|
|
return week,month
|
|
|
|
|
|
def tarikh_tasadofi(start, end, format, prop):
|
|
stime = time.mktime(time.strptime(start, format))
|
|
etime = time.mktime(time.strptime(end, format))
|
|
|
|
ptime = stime + prop * (etime - stime)
|
|
#tarikh = datetime.utcfromtimestamp(ptime)
|
|
|
|
return time.strftime(format, time.localtime(ptime))
|
|
|
|
|
|
def beshmar_manmos(a):
|
|
man = 0
|
|
mos = 0
|
|
for i in a:
|
|
if i >0:
|
|
mos += 1
|
|
elif i < 0:
|
|
man += 1
|
|
|
|
return (man,mos)
|
|
|
|
def plot_Ranges(name):
|
|
|
|
fig= plt.figure(figsize=(16, 5))
|
|
#df = open_File(here+name +'.csv',dt.date(2000,1,1),dt.date(2021,1,1))
|
|
df,fullName = open_File(here+name +'.xlsx',dt.date(2000,1,1),dt.date(2021,1,1))
|
|
df1 = df.copy()
|
|
Probability_dist(name,'Daily',df,fig,1)
|
|
|
|
df_W,df_M = daily_week_month(df1)
|
|
Probability_dist(name,'Weekly',df_W,fig,2)
|
|
Probability_dist(name,'Monthly',df_M,fig,3)
|
|
plt.tight_layout()
|
|
plt.savefig(visFolder+'range_'+name+".png",bbox_inches='tight', dpi=150)
|
|
|
|
|
|
def mAverage(values,window):
|
|
weights = np.repeat(1.0,window)/window
|
|
smas = np.convolve(values,weights)[:len(values)]
|
|
smas[:window] = smas[window]
|
|
return smas
|
|
|
|
|
|
def ExpMovAverage(values,window):
|
|
weights = np.exp(np.linspace(-1,0,window))
|
|
weights /= weights.sum()
|
|
a = np.convolve(values,weights)[:len(values)]
|
|
a[:window] = a[window]
|
|
return a
|
|
|
|
def RSI(v,wRSI):
|
|
Up = np.zeros(len(v))
|
|
Down = np.zeros(len(v))
|
|
RSI_dir = np.zeros(len(v))
|
|
|
|
for d in range(1,len(v)):
|
|
if v[d]>v[d-1]:
|
|
Up[d] = v[d] - v[d-1]
|
|
Down[d] = 0
|
|
elif v[d]<v[d-1] :
|
|
Up[d] = 0
|
|
Down[d] = v[d-1] - v[d]
|
|
a = mAverage(Down,wRSI)
|
|
for d in range(1,len(a)):
|
|
if a[d] ==0:
|
|
a[d] = a[d-1]
|
|
RS1 = mAverage(Up,wRSI)/a
|
|
|
|
RS1 = 100 - 100/(1+RS1)
|
|
RS1_mv = mAverage(RS1,9)
|
|
for d in range(1,len(v)):
|
|
RSI_dir[d] = np.sign(RS1[d]-RS1_mv[d])*RS1[d]
|
|
#print RSI_dir[d],RS1[d]
|
|
#print('lenght',len(RS1), len(RS1_mv),len(v))
|
|
return RS1,RS1_mv
|
|
|
|
def SOR(o,h,l,v,wSO):
|
|
K5 = np.zeros(len(v))
|
|
D3 = np.zeros(len(v))
|
|
SOR = np.zeros(len(v))
|
|
for d in range(5,len(v)):
|
|
K5[d] = (v[d]-np.min(l[d-4:d+1]))/(np.max(h[d-4:d+1])-np.min(l[d-4:d+1]))*100.0
|
|
D3 = ExpMovAverage(K5,wSO)
|
|
for d in range(5,len(v)):
|
|
if (K5[d-1]-D3[d-1])*(K5[d]-D3[d])<=0 and D3[d]>80 :
|
|
SOR[d] = -1
|
|
elif (K5[d-1]-D3[d-1])*(K5[d]-D3[d])<=0 and D3[d]<20 :
|
|
SOR[d] = 1
|
|
else:
|
|
SOR[d] = SOR[d-1]
|
|
|
|
return K5,K5-D3
|
|
|
|
def MACD_M(v,slow,fast):
|
|
a26 = ExpMovAverage(v,slow)
|
|
a12 = ExpMovAverage(v,fast)
|
|
MACD = a12-a26
|
|
|
|
return MACD
|
|
|
|
|
|
def bolinger(df1,w):
|
|
df = pd.DataFrame()
|
|
df['middle'] = df1.rolling(w).mean()
|
|
df['ENHERAF'] =df1.rolling(w).std()
|
|
df['UpUp'] = df['middle'] + (df['ENHERAF'] * 2)
|
|
df['LowLow'] = df['middle'] - (df['ENHERAF'] * 2)
|
|
df['Up'] = df['middle'] + (df['ENHERAF'] * 1)
|
|
df['Low'] = df['middle'] - (df['ENHERAF'] * 1)
|
|
|
|
return df1-df['middle'],df1-df['UpUp'], df1-df['LowLow'],df1-df['Up'], df1-df['Low']
|
|
|
|
def Ikim(ohcl_df):
|
|
tenkan_window = 9
|
|
kijun_window = 26
|
|
senkou_span_b_window = 52
|
|
cloud_displacement = 26
|
|
chikou_shift = -26
|
|
#ohcl_df = self.ohcl_df
|
|
#print(ohcl_df.head())
|
|
# Dates are floats in mdates like 736740.0
|
|
# the period is the difference of last two dates
|
|
#last_date = ohcl_df["Date"].iloc[-1]
|
|
#period = last_date - ohcl_df["Date"].iloc[-2]
|
|
|
|
# Add rows for N periods shift (cloud_displacement)
|
|
#ext_beginning = decimal.Decimal(last_date+period)
|
|
#ext_end = decimal.Decimal(last_date + ((period*cloud_displacement)+period))
|
|
#dates_ext = list(self.drange(ext_beginning, ext_end, str(period)))
|
|
#dates_ext_df = pd.DataFrame({"Date": dates_ext})
|
|
# dates_ext_df.index = dates_ext # also update the df index
|
|
#ohcl_df = ohcl_df.append(dates_ext_df)
|
|
|
|
# Tenkan
|
|
tenkan_sen_high = ohcl_df['PX_HIGH'].rolling( window=tenkan_window ).max()
|
|
tenkan_sen_low = ohcl_df['PX_LOW'].rolling( window=tenkan_window ).min()
|
|
ohcl_df['tenkan_sen'] = (tenkan_sen_high + tenkan_sen_low) /2
|
|
# Kijun
|
|
kijun_sen_high = ohcl_df['PX_HIGH'].rolling( window=kijun_window ).max()
|
|
kijun_sen_low = ohcl_df['PX_LOW'].rolling( window=kijun_window ).min()
|
|
ohcl_df['kijun_sen'] = (kijun_sen_high + kijun_sen_low) / 2
|
|
# Senkou Span A
|
|
ohcl_df['senkou_span_a'] = ((ohcl_df['tenkan_sen'] + ohcl_df['kijun_sen']) / 2).shift(cloud_displacement)
|
|
# Senkou Span B
|
|
senkou_span_b_high = ohcl_df['PX_HIGH'].rolling( window=senkou_span_b_window ).max()
|
|
senkou_span_b_low = ohcl_df['PX_LOW'].rolling( window=senkou_span_b_window ).min()
|
|
ohcl_df['senkou_span_b'] = ((senkou_span_b_high + senkou_span_b_low) / 2).shift(cloud_displacement)
|
|
# Chikou
|
|
ohcl_df['chikou_span'] = ohcl_df['PX_LAST'].shift(chikou_shift)
|
|
#print(ohcl_df.head())
|
|
#self.ohcl_df = ohcl_df
|
|
ohcl_df = ohcl_df.fillna(method='bfill')
|
|
ohcl_df = ohcl_df.fillna(method='ffill')
|
|
#print(ohcl_df.head())
|
|
return (ohcl_df['tenkan_sen']-ohcl_df['PX_LAST'],ohcl_df['kijun_sen']-ohcl_df['PX_LAST'],
|
|
ohcl_df['senkou_span_a']-ohcl_df['PX_LAST'],ohcl_df['senkou_span_b']-ohcl_df['PX_LAST'],ohcl_df['chikou_span']-ohcl_df['PX_LAST'])
|
|
|
|
def smoothMovAverage(values,window):
|
|
smv = np.zeros(len(values))
|
|
smv[0:window].fill(np.mean(values[0:window]))
|
|
for d in range(window,len(values)):
|
|
smv[d] = ((window-1)*smv[d-1]+values[d])/window
|
|
return smv
|
|
|
|
def TR(o,h,l,v):
|
|
TR = np.zeros(len(v))
|
|
|
|
for d in range(1,len(v)):
|
|
TR[d] = np.max([h[d]-l[d],abs(h[d]-v[d-1]),abs(l[d]-v[d-1])])
|
|
TR[0] = TR[1]
|
|
return TR
|
|
|
|
def ATR(o,h,l,v,window):
|
|
return smoothMovAverage(TR(o,h,l,v),window)
|
|
|
|
def ADX(o,h,l,v,wADX):
|
|
upDAX = np.zeros(len(v))
|
|
downDAX = np.zeros(len(v))
|
|
pDM = np.zeros(len(v))
|
|
nDM = np.zeros(len(v))
|
|
for d in range(1,len(v)):
|
|
upDAX[d] = h[d]-h[d-1]
|
|
downDAX[d] = l[d-1]-l[d]
|
|
|
|
if upDAX[d]>downDAX[d] and upDAX[d]>0:
|
|
pDM[d] = upDAX[d]
|
|
else:
|
|
pDM[d] = 0
|
|
if downDAX[d]>upDAX[d] and downDAX[d]>0:
|
|
nDM[d] = downDAX[d]
|
|
else:
|
|
nDM[d] = 0
|
|
|
|
pDI = 100*smoothMovAverage(pDM,wADX)/ATR(o,h,l,v,wADX)
|
|
nDI = 100*smoothMovAverage(nDM,wADX)/ATR(o,h,l,v,wADX)
|
|
ADX = 100*ExpMovAverage(abs(pDI-nDI)/(pDI+nDI),wADX)
|
|
return ADX
|
|
|
|
def enheraf_meyar(df,w):
|
|
df1 = df.rolling(w).std()
|
|
return df1
|
|
def weekdays_count(fromdate, todate):
|
|
daygenerator = (fromdate + timedelta(x + 1) for x in range((todate - fromdate).days))
|
|
return(sum(1 for day in daygenerator if day.weekday() < 5))
|
|
|
|
|
|
def objective(co,num):
|
|
obj = np.zeros(len(co))
|
|
obj[0:len(co)-num] = [sum(co[i+1:i+num+1])/np.std(co[i+1:i+num+1]) for i in range(0,len(co)-num)] #/np.std(co[i+1:i+num+1]
|
|
|
|
#print (obj)
|
|
return obj
|
|
|
|
|
|
def remove_small_candle(X,y,size):
|
|
i = 0
|
|
size = np.percentile(abs(np.array(y)),size)
|
|
#print (size)
|
|
while i < len(y):
|
|
if abs(y[i]) <size:
|
|
X = np.delete(X,i,0)
|
|
y =np.delete(y,i,0)
|
|
#date =np.delete(date,i,0)
|
|
else:
|
|
i += 1
|
|
|
|
return (X,y)
|
|
|
|
def remove_big_candle(X,y,size):
|
|
i = 0
|
|
size = np.percentile(abs(np.array(y)),size)
|
|
#print (size)
|
|
while i < len(y):
|
|
if abs(y[i]) >size:
|
|
X = np.delete(X,i,0)
|
|
y =np.delete(y,i,0)
|
|
#date =np.delete(date,i,0)
|
|
else:
|
|
i += 1
|
|
|
|
return (X,y)
|
|
|
|
|
|
|
|
def find_Neighbours(X,y,point,darsad):
|
|
size = len(X)
|
|
z = np.zeros((size,1))
|
|
y = y.reshape(size,1)
|
|
#print('y',y)
|
|
X = np.append(X, y, axis=1)
|
|
X = np.append(X, z, axis=1)
|
|
|
|
for i in range(0,size):
|
|
X[i,-1] = np.sum( [(X[i,j]-point[j])**2 for j in range(0,len(point))] )
|
|
X=X[np.argsort(X[:,-1])]
|
|
y = X[:,-2]
|
|
X, y = X[:int(darsad*size),:-2], y[:int(darsad*size)]
|
|
|
|
return(X,y)
|
|
|
|
|
|
|
|
def nomreE(pishbini):
|
|
score = 0
|
|
lenght = len(pishbini)
|
|
for i in range(0,lenght):
|
|
if pishbini[i] > 0:
|
|
score +=1
|
|
elif pishbini[i]<0:
|
|
score -=1
|
|
return score
|
|
|
|
|
|
def nomreU(pishbini):
|
|
score = 0
|
|
if pishbini[0] < 0:
|
|
score +=1
|
|
elif pishbini[0] > 0:
|
|
score -=1
|
|
|
|
if pishbini[1] > 0:
|
|
score +=1
|
|
elif pishbini[1] < 0:
|
|
score -=1
|
|
|
|
if pishbini[2] < 0:
|
|
score +=1
|
|
elif pishbini[2] > 0:
|
|
score -=1
|
|
|
|
if pishbini[3] > 0:
|
|
score +=1
|
|
elif pishbini[3] < 0:
|
|
score -=1
|
|
|
|
if pishbini[4] < 0:
|
|
score +=1
|
|
elif pishbini[4] > 0:
|
|
score -=1
|
|
|
|
if pishbini[5] > 0:
|
|
score +=1
|
|
elif pishbini[5] < 0:
|
|
score -=1
|
|
return score
|
|
|
|
|
|
def nomre_ruz(pishbini):
|
|
nomre1 = nomreE(pishbini[0:7])
|
|
nomre2 = nomreU(pishbini[7:13])
|
|
if nomre1 > nomre2 + 2 :
|
|
return 1
|
|
elif nomre1 < nomre2 - 2 :
|
|
return -1
|
|
else:
|
|
return 0
|
|
|
|
|
|
|
|
|
|
def nonZeroLlen(a):
|
|
return sum([1 if x !=0 else 0 for x in a])
|
|
|
|
|
|
|
|
def date_range(start, end):
|
|
r = (end+dt.timedelta(days=1)-start).days
|
|
return [start+dt.timedelta(days=i) for i in range(r)]
|
|
|
|
|
|
|
|
def kamineTarikh(neshane):
|
|
df = pd.read_csv(neshane,encoding='utf-8',delimiter=",", decimal=".",parse_dates=True)
|
|
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d").dt.date
|
|
df.sort_values(['Date'], inplace=False)
|
|
df = df.reset_index(drop=True)
|
|
#print(neshane)
|
|
#print(df)
|
|
return(df['Date'].min(),df['Date'].max())
|
|
|
|
|
|
def Afarinesh(esm,sDate,edate,monFri):
|
|
|
|
df, fullName = open_File(dadehFold+esm +'.csv',sDate,edate)
|
|
#print('bazshao')
|
|
df = df.dropna()
|
|
if len(df) < 200:
|
|
return 1
|
|
|
|
#d = [dt.datetime.strptime(date,'%Y.%m.%d').date() for date in df['date']]
|
|
#df['weekday'] = [a.weekday() for a in d]
|
|
o = np.array(df['PX_OPEN'])
|
|
h = np.array(df['PX_HIGH'])
|
|
l = np.array(df['PX_LOW'])
|
|
v = np.array(df['PX_LAST'])
|
|
v0 = np.array(df['PX_LAST'].shift(+1))
|
|
#print(v)
|
|
|
|
df['co'] = (v/v0-1)
|
|
df['hl'],df['coR'] = H_L(o,h,l,v)
|
|
df['RSI'],df['RSI_mv'] = RSI(v,14)
|
|
df['mv10'] = ExpMovAverage(v,10)-v
|
|
df['mv20'] = ExpMovAverage(v,20)-v
|
|
df['mv50'] = ExpMovAverage(v,50)-v
|
|
df['mv100'] = ExpMovAverage(v,100)-v
|
|
#df['rel_co'] = df['co']/mAverage(abs(df['co']),20)
|
|
df['ADX'] = ADX(o,h,l,v,20)
|
|
df['TR'] = TR(o,h,l,v)
|
|
df['MACD'] = MACD_M(v,26,12)
|
|
df['middle'],df['UpUp'],df['LowLow'],df['Up'], df['Low']= bolinger(df['PX_LAST'],20)
|
|
df['psar'] = psar(df)-v
|
|
df['KK'],df['DD'] = SOR(o,h,l,v,14)
|
|
df['en_mey'] = enheraf_meyar(df['PX_LAST'],20)
|
|
#df['year'] = [datetime.strptime(i, "%Y-%m-%d").year for i in list(df['Date'])]
|
|
df['month'] = [datetime.strptime(i, "%Y-%m-%d").month for i in list(df['Date'])]
|
|
#print(df.head())
|
|
df['Ikim1'],df['Ikim2'],df['Ikim3'],df['Ikim4'],df['Ikim5'] = Ikim(df[['PX_LAST','PX_HIGH','PX_LOW','PX_OPEN']])
|
|
#print(df.head())
|
|
#df['day'] = [datetime.strptime(i, "%Y-%m-%d").day for i in list(df['Date'])]
|
|
df['weekday'] = [datetime.strptime(i, "%Y-%m-%d").weekday() for i in list(df['Date'])]
|
|
df.dropna(inplace = True)
|
|
df['co_4W'] = df['PX_LAST'].shift(-20)/df['PX_LAST']-1
|
|
df = df.reset_index(drop=True)
|
|
|
|
#print(df['year'])
|
|
for j in range(1,4):
|
|
df['hl'+str(j)] = df['hl'].shift(+j)
|
|
df['PX_LAST'+str(j) ] = df['PX_LAST'].shift(+j)
|
|
|
|
for j in range(1,4):
|
|
df.drop(j-1,inplace=True,axis=0)
|
|
|
|
forecast = 'co_4W'
|
|
forecast_out = 0
|
|
df = df.reset_index(drop=True)
|
|
df['label'] = df[forecast].shift(-forecast_out)
|
|
|
|
df = df.reset_index(drop=True)
|
|
k = 0
|
|
beband = False
|
|
while beband == False:
|
|
try:
|
|
value = df.loc[k,'label']
|
|
df.drop([k],inplace=True,axis=0)
|
|
k += 1
|
|
if value != 0:
|
|
beband = True
|
|
except:
|
|
k += 1
|
|
continue
|
|
#print(df.tail())
|
|
df = df.reset_index(drop=True)
|
|
|
|
|
|
d = [dt.datetime.strptime(dd,'%Y-%m-%d').date() for dd in df['Date']]
|
|
if monFri ==0:
|
|
for j in range(0, len(d)):
|
|
if d[j].weekday() == 3 or d[j].weekday() == 4:
|
|
df.drop([j],inplace=True,axis=0)
|
|
|
|
#print(df.tail())
|
|
|
|
|
|
return df
|
|
|
|
|
|
|
|
def khalgh(index):
|
|
|
|
from os import listdir
|
|
#a = find_filenames( 'O:\\SLM IF\\AI-Rivaldi\\DATA_RVD\\data_base\\', '.csv')
|
|
|
|
ll = tarkib(dadehFold,index)
|
|
start = timeit.default_timer()
|
|
#sahmList = list(ll['Ticker'])
|
|
#ll = pd.read_excel(folder+'1 SPX500.xlsx')
|
|
a = list(ll['ISIN'])
|
|
#print(a)
|
|
|
|
for s,i in enumerate(a):
|
|
## try:
|
|
## df = pd.read_csv(folder+'features/'+i.rstrip('.csv')+'_features.csv',encoding='utf-8',delimiter=",", decimal=".",parse_dates=True)
|
|
## recent = datetime.strptime(df['Date'].max(),'%Y-%m-%d').date()
|
|
## if recent >= dt.date(2020,5,21):
|
|
## #print(df['Date'].max())
|
|
## continue
|
|
## else:
|
|
## print(i,"it will be done")
|
|
## except:
|
|
## print()
|
|
|
|
if "/" in i:
|
|
i = i.replace("/","_")
|
|
print(i)
|
|
elif "*" in i:
|
|
i = i.replace("*","_")
|
|
try:
|
|
time = os.path.getmtime(folder+'featuUS/'+i.rstrip('.csv')+'_features.csv')
|
|
#print(time,dt.datetime.fromtimestamp(time))
|
|
#if dt.datetime.fromtimestamp(time) > dt.datetime.now()-dt.timedelta(hours = 3): #dt.datetime.fromtimestamp(time) >date.today()-dt.timedelta(hours = 2):
|
|
# print("it is already done")
|
|
# continue
|
|
#else:
|
|
try:
|
|
df = Afarinesh(i,dt.date(2000,1,1),date.today(),1)
|
|
print('inshod')
|
|
df.to_csv(folder+'featuUS/'+i.rstrip('.csv')+'_features.csv',index = False)
|
|
except:
|
|
print("afarinesh kar nakard baraye: ",i)
|
|
except:
|
|
print(i,"does not exist")
|
|
try:
|
|
df = Afarinesh(i,dt.date(2000,1,1),date.today(),1)
|
|
df.to_csv(folder+'featuUS/'+i.rstrip('.csv')+'_features.csv',index = False)
|
|
except:
|
|
print("afarinesh kar nakard baraye: ",i)
|
|
print(float(s)/len(a)*100)
|
|
|
|
stop = timeit.default_timer()
|
|
print((stop-start)/3600.0)
|
|
return 1
|
|
|
|
|
|
""" try:
|
|
df = pd.read_csv(folder+'features/'+i.rstrip('.csv')+'_features.csv',encoding='utf-8',delimiter=",", decimal=".",parse_dates=True)
|
|
df['Date'] = pd.to_datetime(df['Date'], format="%Y-%m-%d").dt.date
|
|
df = df.reset_index(drop=True)
|
|
#print(df)
|
|
#print(i,df)
|
|
df2 = Afarinesh(i,df.loc[len(df)-1,'Date']-dt.timedelta(days=300),date.today(),1)
|
|
#print(i,df2)
|
|
ta = pd.concat([df,df2])
|
|
ta = ta.drop_duplicates(subset = 'Ticker',keep='last')
|
|
ta.to_csv(folder+'features/'+i.rstrip('.csv')+'_features.csv',index = False)
|
|
except
|
|
"""
|
|
def tarb_az(name,tarikh_az,biboed,shru_ruz,payan_ruz):
|
|
df = pd.read_csv(name,encoding='utf-8',delimiter=",", decimal=".",parse_dates=True)
|
|
df = df.reset_index(drop=True)
|
|
df['date1'] = pd.to_datetime(df['Date'])
|
|
mask = (df['date1'] >= shru_ruz) & (df['date1'] <= payan_ruz)
|
|
df = df.loc[mask]
|
|
d = [dt.datetime.strptime(dd,'%Y-%m-%d').date() for dd in df['Date']]
|
|
for j in range(0, len(d)):
|
|
if d[j].weekday() == 5 or d[j].weekday() == 6:
|
|
df.drop([j],inplace=True,axis=0)
|
|
#print(tarikh_az)
|
|
#ohlc = df.o.shift(-forecast_out), df.h.shift(-forecast_out),df.l.shift(-forecast_out),df.c.shift(-forecast_out)
|
|
#OHLC = np.array([df.o.shift(-forecast_out), df.h.shift(-forecast_out),df.l.shift(-forecast_out),df.c.shift(-forecast_out)])
|
|
df_az = df['Date'].isin(tarikh_az)
|
|
df_az = df_az.index[df_az].tolist()
|
|
|
|
df_tar = ~df['Date'].isin(tarikh_az)
|
|
df_tar = df_tar.index[df_tar].tolist()
|
|
#print(df_az)
|
|
#X = np.array(df.drop(['Date','date1','PX_OPEN','PX_HIGH','PX_LOW','co_4W','label'],1))
|
|
X = np.array(df.drop(['Date','date1','PX_LAST','PX_HIGH','PX_OPEN','co_4W','label'],1))
|
|
X = normalize(X, axis=1, norm=biboed)
|
|
#print(df.tail(20))
|
|
y = list(df['label'])
|
|
|
|
#print(y[-20:])
|
|
## j = 1
|
|
## bb = 0
|
|
## while np.isnan(y[-j]):
|
|
## bb += 1
|
|
## j += 1
|
|
## #print(j,y[-j])
|
|
## if bb == 0:
|
|
## bb = 1
|
|
bb = 20
|
|
y1 = y
|
|
y = y[:-bb]
|
|
y = my_norm(y)
|
|
X_tar = X[df_tar[:-bb]]
|
|
X_az = X[df_az]
|
|
y_tar = [y[i] for i in df_tar[:-bb]]
|
|
y_az = [y1[i] for i in df_az]
|
|
|
|
#OHLCY = np.delete(OHLC, -1, 1)
|
|
|
|
#min_max_scaler = preprocessing.MinMaxScaler()
|
|
#X = min_max_scaler.fit_transform(X)
|
|
|
|
x_new = X[-1:]
|
|
y_new = y1[-1]
|
|
#X_tar = X_tar[:-1]
|
|
#y_tar = y_tar[:-1]
|
|
|
|
#print OHLC
|
|
#print y
|
|
#print (len(y))
|
|
return X_tar,y_tar,X_az,y_az,x_new,y_new, df['Date']
|
|
|
|
def For_khar_esm(sahmha,pishbin,natij,tedad):
|
|
motlagh = [abs(x) for x in pishbin]
|
|
df = pd.DataFrame()
|
|
df['sahmha'] = sahmha
|
|
df['pishbin'] = pishbin
|
|
df['motlagh'] = motlagh
|
|
df['natij'] = natij
|
|
df = df.sort_values(by = ['pishbin'],ascending = False)
|
|
df = df[:tedad]
|
|
For = df[df['pishbin']<0]
|
|
Khar = df[df['pishbin']>0]
|
|
return list(For['sahmha']),list(For['pishbin']),list(For['natij']),list(Khar['sahmha']),list(Khar['pishbin']),list(Khar['natij'])
|
|
|
|
|
|
|
|
|
|
def pishbini_ruzaneh(aa,tn,alpha):
|
|
|
|
polis = []
|
|
pred_kharid = []
|
|
pred_forush = []
|
|
base = tn
|
|
tarikh_az = [base - dt.timedelta(days=x) for x in range(1,121)]
|
|
tarikh_az = [x for x in tarikh_az if x.weekday() not in [5,6]]
|
|
|
|
tarikh_az = [date_obj.strftime('%Y-%m-%d') for date_obj in tarikh_az]
|
|
#print(tarikh_az)
|
|
pishbini=[]
|
|
ys = []
|
|
model = Ridge(alpha=alpha)#LinearRegression()
|
|
|
|
#clf = svm.SVC(C = 10.0**c,gamma=10.0**gamma,kernel='rbf', probability=True)
|
|
#model = clf.fit(X_train,y_train)
|
|
|
|
for k,i in enumerate(aa):
|
|
#print(tarikh_az)
|
|
try:
|
|
X_tar,y_tar,X_az,y_az,x_new,y_new,tarikh= tarb_az(folder+'featuUS/'+i,tarikh_az,'l2',dt.date(2000,1,1),tn)
|
|
except:
|
|
#print('problem in data',i)
|
|
pishbini.append(0)
|
|
ys.append(0)
|
|
continue
|
|
if k == 0:
|
|
ref = len(y_az)
|
|
if len(y_az) != ref:
|
|
#print('problem in dates',i, len(y_az),len(tarikh_az))
|
|
pishbini.append(0)
|
|
ys.append(0)
|
|
continue
|
|
#X_tar,y_tar,X_az,y_az = find_Neighbours(X_tar,np.array(y_tar),x_new,121)
|
|
#print(tarikh.iloc[-1])
|
|
#print(y_new)
|
|
X_tar, y_tar = shuffle(X_tar, y_tar, random_state=int(time.time()))
|
|
#X_tar, y_tar = remove_small_candle(X_tar, y_tar,10)
|
|
|
|
x_poly = X_tar #polynomial_features.fit_transform(X_tar)
|
|
#print(i) '''
|
|
|
|
clf = model.fit(x_poly, y_tar)
|
|
|
|
x_poly_az = X_az #polynomial_features.fit_transform(X_az)
|
|
coef =1
|
|
|
|
ys.append(y_new)
|
|
|
|
if coef ==1:
|
|
#print(pairs[i])
|
|
X_tar,y_tar,X_az,y_az,x_new,y_new,tarikh= tarb_az(folder+'featuUS/'+i,tarikh_az,'l2',dt.date(2000,1,1),tn)
|
|
X_tar, y_tar = shuffle(X_tar, y_tar, random_state=int(time.time()))
|
|
clf = model.fit(X_tar, y_tar)
|
|
pishbini.append(clf.predict(x_new))
|
|
|
|
#print i
|
|
|
|
if i in ['SPX Index_features.csv','SXXP Index_features.csv','NKY Index_features.csv','HSI Index_features.csv']:
|
|
#print i
|
|
polis.append(pishbini[-1][0])
|
|
|
|
|
|
|
|
return(np.array(pishbini))
|
|
|
|
def entekhab_ruzaneh(pul,had,tarikh,sahmha,pishbin,boro):
|
|
|
|
motlagh = [abs(x) for x in pishbin]
|
|
df = pd.DataFrame()
|
|
df['ticker'] = sahmha
|
|
df['pishbin'] = pishbin
|
|
df['motlagh'] = motlagh
|
|
|
|
man,mos = beshmar_manmos(pishbin)
|
|
nesbat = float(mos+man)/len(pishbin)
|
|
ab = float(mos)
|
|
man= max([man,1])
|
|
mos= max([mos,1])
|
|
man_ave = df[df['pishbin'] < 0]['pishbin'].sum()/man
|
|
pos_ave = df[df['pishbin'] > 0]['pishbin'].sum()/mos
|
|
|
|
#print(man,mos,nesbat)
|
|
df1 = df.sort_values(by = ['pishbin'],ascending = False)
|
|
df1 = df1[df1['pishbin'] > 0] #!= 0
|
|
|
|
#df_mosh = port[port.sahmha.isin(df1['sahmha'])]
|
|
#df_NoMosh = port[~port.sahmha.isin(df1['sahmha'])]
|
|
#pul = pul + df_NoMosh['arzesh'].sum()
|
|
|
|
|
|
|
|
# df1 = df.sort_values(by = ['motlagh'],ascending = False)
|
|
#df1 = df1[:boro]
|
|
#df1 = df1[df1['motlagh'] != 0] #!= 0
|
|
|
|
#df = df1[~df1.sahmha.isin(port['sahmha'])]
|
|
#df = df.sort_values(by = ['pishbin'],ascending = False)
|
|
#print(len(df))
|
|
|
|
|
|
df = df1[:boro]
|
|
|
|
|
|
#print(df[0:2000])
|
|
#print('newdf',df)
|
|
#common = df.merge(port,on=['sahmha'])
|
|
#print(common)
|
|
#df = df[~df.sahmha.isin(common.sahmha)]
|
|
#print(df)
|
|
df['date'] = tarikh
|
|
df['limit'],df['stop'],df['price'],df['value'],df['active'] ,pul = ISTha(pul,had,df['ticker'],list(df['pishbin']),tarikh-dt.timedelta(days=7*121),tarikh,98,100)
|
|
|
|
|
|
|
|
sans_csv = [str(x.replace(".csv", "")) for x in df['ticker']]
|
|
|
|
new_dataframe = pd.DataFrame()
|
|
|
|
new_dataframe["ticker"] = sans_csv
|
|
new_dataframe["price"] = df["price"]
|
|
|
|
|
|
df["ticker2"] = sans_csv
|
|
|
|
print(new_dataframe[['ticker', 'price']])
|
|
|
|
print(df[['ticker2', 'price']])
|
|
|
|
df2 = df
|
|
|
|
|
|
|
|
del df['ticker']
|
|
|
|
df.to_excel("USRidge.xlsx", sheet_name="Ridge")
|
|
|
|
#df['SZ'] = 0
|
|
#df['pul'] = pul
|
|
#df['n'] = had/df['gheimat']
|
|
#print('newdf1',df)
|
|
#print(df[['khar_for','sahmha','draft']])
|
|
#df = pd.concat([df_mosh,df[['tarikh','pul','sahmha','n','faal','khar_for','modat','gheimat','khat_bakht','khat_bord','Meghdar_bakht','Meghdar_bord','arzesh0','arzesh','SZ']]],ignore_index =True)
|
|
return (df[['date','ticker','price','stop','limit']])
|
|
|
|
|
|
def baze_begir(sahm,tarikh):
|
|
#print(sahm)
|
|
df, fullName = open_File(dadehFold+sahm.rstrip('_features.csv')+'.csv',tarikh-dt.timedelta(days=10),tarikh)
|
|
|
|
h = np.array(df['PX_HIGH'])
|
|
l = np.array(df['PX_LOW'])
|
|
v = np.array(df['PX_LAST'])
|
|
return( v[-2],h[-1],l[-1],v[-1])
|
|
|
|
|
|
def pish_test(tarikh_new,alpha):
|
|
from os import listdir
|
|
aa = find_filenames( folder+'featuUS/', '.csv')
|
|
for i in aa:
|
|
x1,x2 = kamineTarikh(folder+'featuUS/'+i)
|
|
print(x1,x2)
|
|
if x1 > dt.date(2009,1,1) or x2 < dt.date(2020,1,21):
|
|
aa.remove(i)
|
|
|
|
random.seed(time.time())
|
|
tarikh_new = [x for x in tarikh_new if x.weekday() not in [5,6]]
|
|
soFar = 0
|
|
tedad = 0
|
|
dore = 20
|
|
for j, tn in enumerate(tarikh_new):
|
|
if np.mod(j,dore) == 0:
|
|
soFar = 0
|
|
tedad = 0
|
|
pred_kharid = []
|
|
pred_forush = []
|
|
base = tn
|
|
tarikh_az = [base - dt.timedelta(days=x) for x in range(1,121)]
|
|
tarikh_az = [x for x in tarikh_az if x.weekday() not in [5,6]]
|
|
|
|
tarikh_az = [date_obj.strftime('%Y-%m-%d') for date_obj in tarikh_az]
|
|
#print(tarikh_az)
|
|
pishbini=[]
|
|
ys = []
|
|
model= Ridge(alpha=alpha)#LinearRegression()
|
|
#polynomial_features = PolynomialFeatures(degree=2)
|
|
for k,i in enumerate(aa):
|
|
#print(tarikh_az)
|
|
try:
|
|
X_tar,y_tar,X_az,y_az,x_new,y_new,tarikh= tarb_az(folder+'featuUS/'+i,tarikh_az,'l2',dt.date(2000,1,1),tn)
|
|
except:
|
|
#print('problem in data',i)
|
|
pishbini.append(0)
|
|
ys.append(0)
|
|
continue
|
|
if k == 0:
|
|
ref = len(y_az)
|
|
if len(y_az) != ref:
|
|
#print('problem in dates',i, len(y_az),len(tarikh_az))
|
|
pishbini.append(0)
|
|
ys.append(0)
|
|
continue
|
|
#X_tar,y_tar,X_az,y_az = find_Neighbours(X_tar,np.array(y_tar),x_new,121)
|
|
#print(tarikh.iloc[-1])
|
|
#print(y_new)
|
|
X_tar, y_tar = shuffle(X_tar, y_tar, random_state=int(time.time()))
|
|
#X_tar, y_tar = remove_small_candle(X_tar, y_tar,10)
|
|
|
|
x_poly = X_tar #polynomial_features.fit_transform(X_tar)
|
|
clf = model.fit(x_poly, y_tar)
|
|
|
|
x_poly_az = X_az #polynomial_features.fit_transform(X_az)
|
|
coef = 1
|
|
ys.append(y_new)
|
|
if coef ==1:
|
|
#print(pairs[i])
|
|
X_tar,y_tar,X_az,y_az,x_new,y_new,tarikh= tarb_az(folder+'featuUS/'+i,tarikh_az,'l2',dt.date(2000,1,1),tn)
|
|
X_tar, y_tar = shuffle(X_tar, y_tar, random_state=int(time.time()))
|
|
clf = model.fit(X_tar, y_tar)
|
|
pishbini.append(clf.predict(x_new))
|
|
else:
|
|
pishbini.append(0)
|
|
|
|
|
|
|
|
|
|
#print('bishbini',pishbini)
|
|
|
|
pishbini = np.array(pishbini)
|
|
|
|
|
|
#print('natijeh',natijeh)
|
|
#print('pishbini',pishbini)
|
|
|
|
|
|
for_esm,for_pish,for_natij,khar_esm,khar_pish,khar_natij = For_khar_esm(aa,pishbini,ys,10)
|
|
|
|
|
|
for i in range(0,len(khar_esm)):
|
|
tedad += 1
|
|
soFar += np.sign(khar_natij[i]*khar_pish[i])
|
|
for i in range(0,len(for_esm)):
|
|
tedad += 1
|
|
soFar += np.sign(for_natij[i]*for_pish[i])
|
|
|
|
|
|
if np.mod(j,dore) == dore - 1:
|
|
print(tn,'Buy:',khar_esm,'sell:',for_esm,'tedad:',tedad,'soFar',0.5+0.5*soFar/(tedad) )
|
|
|
|
|
|
def tarkib(folder,baskets):
|
|
ta = pd.DataFrame(columns = ['ISIN','Name','Sector'])
|
|
for bas in baskets:
|
|
l = pd.read_excel(folder+bas)
|
|
ta = pd.concat([ta,l])
|
|
ta = ta.drop_duplicates(subset = 'ISIN',keep='last')
|
|
return ta
|
|
|
|
|
|
def shabihsazi(alpha1,sabad,pishday):
|
|
## from os import listdir
|
|
## aa = find_filenames( folder+'features/', '.csv')
|
|
## aa1 = np.array(aa)
|
|
## for k,i in enumerate(aa1):
|
|
## x1,x2 = kamineTarikh(folder+'features/'+i)
|
|
## #print(k,i,x1,x2)
|
|
## if x1 > dt.date(2010,1,1) or x2 < dt.date(2020,4,30):
|
|
## aa.remove(i)
|
|
#print(aa)
|
|
ll = tarkib(dadehFold,sabad)
|
|
#sahmList = list(ll['Ticker'])
|
|
#ll = pd.read_excel(folder+'1 SPX500.xlsx')
|
|
sahmList = list(ll['ISIN'])
|
|
#print(ll['Ticker'].sort_values())
|
|
|
|
aa = [i+'_features.csv' for i in sahmList] #find_filenames( folder+'features/', '.csv')
|
|
print(len(aa))
|
|
aa1 = np.array(aa)
|
|
for k,i in enumerate(aa1):
|
|
try:
|
|
x1,x2 = kamineTarikh(folder+'feature/'+i)
|
|
#print(k,i,x1,x2)
|
|
except:
|
|
aa.remove(i)
|
|
continue
|
|
|
|
if x1 > dt.date(2014,1,1) or x2 < dt.date(2020,1,30):
|
|
aa.remove(i)
|
|
|
|
print(pishday)
|
|
|
|
j = 0
|
|
random.seed(time.time())
|
|
pred_kharid = []
|
|
pred_forush = []
|
|
SUM_kHARID = 0
|
|
SUM_FORUSH = 0
|
|
SUM_0 = 0
|
|
|
|
#for i in range(1,14):
|
|
#Afarinesh(pairs[i],unit[i],dt.date(2000,1,1),dt.date(2100,3,10),'d',monFri = 1)
|
|
|
|
while(j<100):
|
|
#base = dt.datetime.strptime(tarikh_tasadofi("2010-1-1", "2020-3-10","%Y-%m-%d", random.random()),"%Y-%m-%d")
|
|
tarikh_az =[dt.datetime.strptime(tarikh_tasadofi("2010-1-1", "2020-1-20","%Y-%m-%d", random.random()),"%Y-%m-%d") for i in range(0,121*7)]
|
|
#[base - dt.timedelta(days = i) for i in range(0,121)]
|
|
tarikh_az = [x for x in tarikh_az if x.weekday() == pishday]#[x for x in tarikh_az if x.weekday() not in [5,6]]
|
|
tarikh_az = [date_obj.strftime('%Y-%m-%d') for date_obj in tarikh_az]
|
|
|
|
#print(tarikh_az)
|
|
pishbini=[]
|
|
natijeh = []
|
|
model = Ridge(alpha=alpha1)#LinearRegression()
|
|
polynomial_features = PolynomialFeatures(degree=2)
|
|
for k,i in enumerate(aa):
|
|
#print(tarikh_az)
|
|
try:
|
|
X_tar,y_tar,X_az,y_az,x_new,y_new,tarikh= tarb_az(folder+'featuUS/'+i,tarikh_az,'l2',dt.date(2000,1,1),dt.date(2100,3,10))
|
|
except:
|
|
#print('problem in data',i)
|
|
pishbini.append([0 for m in range(ref)])
|
|
natijeh.append([0 for m in range(ref)])
|
|
continue
|
|
if k == 0:
|
|
ref = len(y_az)
|
|
if len(y_az) != ref:
|
|
#print('problem in dates',i, len(y_az),len(tarikh_az))
|
|
pishbini.append([0 for m in range(ref)])
|
|
natijeh.append([0 for m in range(ref)])
|
|
continue
|
|
|
|
X_tar, y_tar = shuffle(X_tar, y_tar, random_state=int(time.time()))
|
|
#X_tar, y_tar = remove_small_candle(X_tar, y_tar,10)
|
|
|
|
x_poly = X_tar #polynomial_features.fit_transform(X_tar)
|
|
clf = model.fit(x_poly, y_tar)
|
|
|
|
x_poly_az = X_az #polynomial_features.fit_transform(X_az)
|
|
|
|
pishbini.append(clf.predict(x_poly_az))
|
|
#print(y_az)
|
|
natijeh.append(y_az)
|
|
|
|
#print('natijeh',natijeh)
|
|
#print('pishbini',pishbini)
|
|
|
|
|
|
pishbini = np.asarray(pishbini)
|
|
|
|
natijeh = np.asarray(natijeh)
|
|
|
|
#print('natijeh',natijeh)
|
|
#print('pishbini',pishbini)
|
|
|
|
kharid_ruz = []
|
|
natijeh_kharid = []
|
|
forush_ruz = []
|
|
natijeh_forush = []
|
|
|
|
for i in range(0,pishbini.shape[1]):
|
|
for_esm,for_pish,for_natij,khar_esm,khar_pish,khar_natij = For_khar_esm(aa,pishbini[:,i],natijeh[:,i],20)
|
|
#print(' p pish:',pishbini[:,i])
|
|
#print('p khar:',khar_pish)
|
|
#print('p for:',for_pish)
|
|
kharid_ruz = kharid_ruz + khar_pish
|
|
natijeh_kharid = natijeh_kharid + khar_natij
|
|
|
|
forush_ruz = forush_ruz + for_pish
|
|
natijeh_forush = natijeh_forush + for_natij
|
|
|
|
#print(pishbini[:,i])
|
|
#print(bishbini_Score)
|
|
# print(kharid_ruz, natijeh_kharid)
|
|
a = correct_darsad(kharid_ruz, natijeh_kharid)
|
|
pred_kharid.append(a*nonZeroLlen(kharid_ruz) if a>0 else np.nan)
|
|
SUM_kHARID += nonZeroLlen(kharid_ruz)
|
|
|
|
a = correct_darsad(forush_ruz, natijeh_forush)
|
|
pred_forush.append(a*nonZeroLlen(forush_ruz) if a>0 else np.nan)
|
|
SUM_FORUSH += nonZeroLlen(forush_ruz)
|
|
|
|
#print(j,'kha','{0:.2f}'.format(pred_kharid[-1]),'fo','{0:.2f}'.format(pred_forush[-1])
|
|
#,nonZeroLlen(kharid_ruz),nonZeroLlen(forush_ruz),SUM_kHARID)
|
|
print(alpha1, np.nansum(pred_kharid)/SUM_kHARID,SUM_kHARID,np.nansum(pred_forush)/SUM_FORUSH,SUM_FORUSH)
|
|
del pishbini,natijeh,kharid_ruz, forush_ruz,natijeh_kharid,natijeh_forush
|
|
j+=1
|
|
print(alpha1, np.nansum(pred_kharid)/SUM_kHARID,SUM_kHARID,
|
|
np.nansum(pred_forush)/SUM_FORUSH,SUM_FORUSH)
|
|
|
|
def begir_esmha(sabad):
|
|
ll = tarkib(dadehFold,sabad)
|
|
#sahmList = list(ll['Ticker'])
|
|
#ll = pd.read_excel(folder+'1 SPX500.xlsx')
|
|
sahmList = list(ll['ISIN'])
|
|
#print(ll['Ticker'].sort_values())
|
|
|
|
aa = [i+'_features.csv' for i in sahmList] #find_filenames( folder+'features/', '.csv')
|
|
print(len(aa))
|
|
|
|
#aa = aa[:500]
|
|
aa1 = np.array(aa)
|
|
for k,i in enumerate(aa1):
|
|
try:
|
|
x1,x2 = kamineTarikh(folder+'featuUS/'+i)
|
|
#print(k,i,x1,x2)
|
|
except:
|
|
aa.remove(i)
|
|
continue
|
|
|
|
if x1 > dt.date(2014,1,1) or x2 < dt.date(2020,4,30):
|
|
aa.remove(i)
|
|
|
|
return aa
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def pish_emruz(tarikh_new,alpha,pak,puljadid,basket,pasvand, pul,had,ent_kol,ent_az):
|
|
|
|
|
|
|
|
|
|
from os import listdir
|
|
|
|
ll = tarkib(dadehFold,basket)
|
|
#sahmList = list(ll['Ticker'])
|
|
#ll = pd.read_excel(folder+'1 SPX500.xlsx')
|
|
sahmList = list(ll['ISIN'])
|
|
#print(ll['Ticker'].sort_values())
|
|
|
|
aa = [str(i)+'_features.csv' for i in sahmList] #find_filenames( folder+'features/', '.csv')
|
|
print(len(aa))
|
|
|
|
#print(aa)
|
|
for i in range(0,len(aa)):
|
|
if "/" in aa[i]:
|
|
aa[i] = aa[i].replace("/","_")
|
|
elif "*" in aa[i]:
|
|
aa[i] = aa[i].replace("*","_")
|
|
|
|
aa1 = np.array(aa)
|
|
for k,i in enumerate(aa1):
|
|
try:
|
|
x1,x2 = kamineTarikh(folder+'featuUS/'+i)
|
|
#print(k,i,x1,x2)
|
|
except:
|
|
aa.remove(i)
|
|
continue
|
|
|
|
if x1 > dt.date(2010,1,1) or x2 < dt.date(2020,6,12) :
|
|
#print(folder+'features/'+i,x2)
|
|
aa.remove(i)
|
|
|
|
## aa = aa[:50]
|
|
#print(aa[1])
|
|
|
|
start = timeit.default_timer()
|
|
random.seed(time.time())
|
|
#tarikh_new = date_range(shoru,payan)
|
|
tarikh_new = [x for x in tarikh_new if x.weekday() not in [5,6] and x<date.today() ]
|
|
while tarikh_new[0].weekday() !=4:
|
|
tarikh_new = tarikh_new[1:]
|
|
|
|
soFar = 0
|
|
tedad = 0
|
|
#print(port.to_string())
|
|
bazar = 0
|
|
boro = 10
|
|
print(len(aa))
|
|
|
|
for j, tn in enumerate(tarikh_new):
|
|
pul1 = pul
|
|
aa1 = [x.rstrip('_features.csv')+'.csv' for x in aa]
|
|
#port = pd.read_csv('JADVAL.csv',encoding = 'utf-8',delimiter =",", decimal = ".",parse_dates = True)
|
|
#port['modat'] += 1
|
|
#port[port['modat'] == 5].to_csv('Gozashte'+pasvand+'.csv',mode = 'a',index = False)
|
|
if tn.weekday() == 4:
|
|
#port['modat'] += 1
|
|
#mosh = port[port['modat']>=4]
|
|
#port = port[port['modat']<4]
|
|
#print(port.to_string())
|
|
pishbini = pishbini_ruzaneh(aa,tn,alpha)
|
|
entekhab_ruz = entekhab_ruzaneh(pul1,had,tn,aa1,pishbini,ent_kol)#min([4,3+int(2*random.random())])
|
|
|
|
#print(entekhab_ruz.to_string())
|
|
entekhab_ruz = entekhab_ruz.reset_index(drop = True)
|
|
|
|
#print('FORUSH:'+pasvand)
|
|
#forush = mosh[~mosh.sahmha.isin(entekhab_ruz[:ent_az]['sahmha'])]
|
|
#print(forush.to_string())
|
|
#forush.to_csv(pasvand+'_khar.csv',mode = 'a',index = False)
|
|
print('KHARID:'+pasvand)
|
|
kharid = entekhab_ruz#[~entekhab_ruz.sahmha.isin(mosh['sahmha'])]
|
|
print(kharid.to_string()) #entekhab_ruz[~entekhab_ruz.sahmha.isin(mosh['sahmha'])].to_string())
|
|
kharid.to_csv(pasvand+'_khar.csv',mode = 'a',index = False)
|
|
#port = pd.concat([port[port['modat']<2],entekhab_ruz],ignore_index=True)
|
|
|
|
|
|
#port = port[port['modat']<2]
|
|
#pishbini_a = pishbini_ruzaneh(aa,tn,alpha)
|
|
#emruz_entekhab,pul,nesbat_a,ab_a,pos_ave_a,man_ave_a = entekhab_ruzaneh(pul,had,tn,aa1,pishbini_a,port[port['modat']>=2],20)
|
|
|
|
#print(emruz_entekhab.to_string())
|
|
#pul = pul+emruz_entekhab[emruz_entekhab['modat'] == 0]['arzesh'].sum()
|
|
#port = pd.concat([port[port['modat']<2],emruz_entekhab],ignore_index=True)
|
|
|
|
|
|
#print(emruz_entekhab.to_string())
|
|
#pul = pul+emruz_entekhab[emruz_entekhab['modat'] == 0]['arzesh'].sum()
|
|
#port = pd.concat([port,entekhab_ruz],ignore_index=True)
|
|
#port.to_csv(pasvand+'_khar.csv',mode = 'a',index = False)
|
|
|
|
#print(port.to_string())
|
|
|
|
#bazarNew = sum([abs(x) for x in pishbini])*np.count_nonzero(pishbini)/len(aa)
|
|
#boro = max([min([boro+int((bazarNew/bazar-1)/0.05),10]), 0])
|
|
#kol = pul+port[port['modat']<5]['arzesh'].sum()
|
|
## if np.count_nonzero(pishbini)/float(len(aa)) > 0.0:
|
|
## boro = int(kol/(had*5))
|
|
## if kol%(had*5) >had:
|
|
## boro += 1
|
|
## restLenht = 5*boro
|
|
## else:
|
|
## boro = 0
|
|
#bazar = bazarNew
|
|
#print(bazarNew,boro)
|
|
|
|
|
|
|
|
#port.to_csv(pasvand+'_gozasht.csv',mode = 'a',index = False)
|
|
#port.drop_duplicates(subset="sahmha",keep = 'first', inplace=True)#
|
|
#pul = pul-port[port['modat'] == 0]['arzesh'].sum()
|
|
|
|
stop = timeit.default_timer()
|
|
#print(tn.strftime("%Y-%m-%d"),len(port),pul,pul+port['arzesh'].sum(),nesbat,ab,pos_ave,man_ave, (stop - start)/3600)
|
|
|
|
dadehFold = "/home/tenzin/Desktop/RIvaGet/data/databasesp500/"
|
|
folder = '/home/tenzin/Desktop/RIvaGe/data/' +'/'
|
|
|
|
#vijegi = ['PX_OPEN', "PX_HIGH", "PX_LOW", "PX_LAST","PX_VOLUME"]
|
|
vijegi = ['PX_LAST','PX_HIGH','PX_LOW','PX_OPEN','PX_VOLUME']
|
|
US = ['1 INDU.xlsx','1 SPX500.xlsx']#'1 IBOV.xlsx','1 SPTSX.xlsx','1 MEXBOL.xlsx',
|
|
US_IS = ['ISIN Universes US SP500.xlsx']
|
|
EUROPE =['1 DAX.xlsx','1 UKX.xlsx','1 SXXP.xlsx','1 EURO50.xlsx']#'1 IBEX.xlsx',
|
|
ASIA = ['1 SP_ASIA.xlsx','1 HANG SENG.xlsx','1 NIKKEI.xlsx']
|
|
OTHERS = ['1 IBOV.xlsx','1 SPTSX.xlsx','1 MEXBOL.xlsx']
|
|
ALL = US + EUROPE + ASIA + OTHERS
|
|
|
|
|
|
|
|
|
|
|
|
# khalgh(US_IS)
|
|
|
|
targetDate = dt.date(2022,4,29)
|
|
|
|
#pish_emruz([targetDate - dt.timedelta(days=x) for x in range(0,5)],0, True,0,ASIA,'A',1e6,20833,20,20)
|
|
#pish_emruz([targetDate - dt.timedelta(days=x) for x in range(0,5)],0, True,0,EUROPE,'E',1e6,20833,20,20)
|
|
pish_emruz([targetDate - dt.timedelta(days=x) for x in range(0,5)],0, True,0,US,'U',1e6,20833,20,20)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#
|