i have multiple (approximately 11) dataframes looks like:
Energy
Date
2020-09-14 42
2020-09-11 0
2020-09-10 0
2020-09-09 11
2020-09-08 0
2020-09-04 23
2020-09-03 11
2020-09-02 11
2020-09-01 19
2020-08-31 23
2020-08-28 69
2020-08-27 30
2020-08-26 15
2020-08-25 53
2020-08-24 57
2020-08-21 0
2020-08-20 0
2020-08-19 0
2020-08-18 0
2020-08-17 0
Materials
Date
2020-09-14 100
2020-09-11 89
2020-09-10 28
2020-09-09 42
2020-09-08 0
2020-09-04 50
2020-09-03 46
2020-09-02 100
2020-09-01 92
2020-08-31 17
2020-08-28 85
2020-08-27 78
2020-08-26 82
2020-08-25 78
2020-08-24 82
2020-08-21 17
2020-08-20 0
2020-08-19 0
2020-08-18 0
2020-08-17 0
how to merge them into a big dataframe which looks like:
Energy Consumer Staples Consumer Discretionary ...
Date
2020-09-14 42 20 ..
2020-09-11 0 .. ..
2020-09-10 0 .. ..
2020-09-09 11 .. ..
2020-09-08 0
2020-09-04 23
2020-09-03 11
2020-09-02 11
2020-09-01 19
2020-08-31 23
2020-08-28 69
2020-08-27 30
2020-08-26 15
2020-08-25 53
2020-08-24 57
2020-08-21 0
2020-08-20 0
2020-08-19 0
2020-08-18 0
2020-08-17 0
I am thinking to use a for loop to repeatedly append or concatenate them into a new dataframe, but the date column is missing in that way. so I wonder how to create a complete dataframe with date in the very left column and rest of the data and column names remains the same. all the 11 dataframes are indexed by Date, I wish the result has a Date column and 11 data columns with columns name.
my code is :
from collections import OrderedDict
import pandas as pd
import datetime as dt
import pandas_datareader as web
#====================================================
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', -1)
cmaps=OrderedDict()
print(type(cmaps.items()))
#############
prev=70
endDate=dt.datetime.today().date()
sDate=endDate-pd.to_timedelta(prev,unit='d')
#############
#def get_price(tickers): #input is a list or Series
#result=pd.DataFrame()
#for i in tickers:
#df=pd.DataFrame()
#df['Adj Close']=web.DataReader(i,'yahoo',sDate,endDate)['Adj Close']
#df['MA']=df['Adj Close'].rolling(5).mean()
#df.sort_values(ascending=False,inplace=True,by="Date")
#df['Higher?']=df['Adj Close']>df['MA']
#df['Higher?']=df['Higher?'].astype(int)
#result['{}'.format(i)]=df['Higher?']
#return result
#--------------------------------------------------------------code from stackoverflow
def get_price(tickers,roll_num=20): #input is a list or Series
result=pd.DataFrame()
pic=pd.DataFrame()
for i in tickers:
try:
df=pd.DataFrame()
df['Adj Close']=web.DataReader(i,'yahoo',sDate,endDate)['Adj Close']
df['MA']=df['Adj Close'].rolling(roll_num).mean()
df.sort_values(ascending=False,inplace=True,by="Date") # sometimes error
df['Higher?']=df['Adj Close']>df['MA']
df['Higher?']=df['Higher?'].astype(int)
result[str(i)]=df['Higher?']
except Exception as ex: # no date column
print('Ticker', i, 'ERROR', ex)
print(df)
pic[tickers.name]=(result.sum(axis=1)/len(result.columns)*100).astype(int)
pic.name=tickers.name
pic.drop(pic.tail(roll_num-1).index,inplace=True)
return pic
#--------------------------------------------------------------
test=pd.Series(['A','TSLA','KO','T','aapl','nke'])
test=test.str.replace('.','-')
test.name='I am test'
a=get_price(test)
print(a)
#=============================================================================
base_url = "http://www.sectorspdr.com/sectorspdr/IDCO.Client.Spdrs.Holdings/Export/ExportExcel?symbol="
data = {
'Ticker' : [ 'XLC','XLY','XLP','XLE','XLF','XLV','XLI','XLB','XLRE','XLK','XLU' ]
, 'Name' : [ 'Communication Services','Consumer Discretionary','Consumer Staples','Energy','Financials','Health Care','Industrials','Materials','Real Estate','Technology','Utilities' ]
}
spdr_df = pd.DataFrame(data)
print(spdr_df)
final_product=pd.DataFrame()
for i, row in spdr_df.iterrows():
url = base_url + row['Ticker']
df_url = pd.read_excel(url)
header = df_url.iloc[0]
holdings_df = df_url[1:]
holdings_df.set_axis(header, axis='columns', inplace=True)
holdings_df=holdings_df['Symbol'].str.replace('.','-')
holdings_df.name=row.Name
b=get_price(holdings_df)
print(b)