I am trying to plot a time series from a python data frame. The code is below.
import requests
from bs4 import BeautifulSoup
import pandas as pd
import datetime
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.dates import DateFormatter, YearLocator, MonthLocator
plt.style.use('ggplot')
def plot(df, filename, heading=None):
fig, ax = plt.subplots(figsize=(8, 4))
min_date = None
max_date = None
for col_name in df.columns.values:
# plot the column
col = df[col_name]
col = col[col.notnull()] # drop NAs
dates = [zzz.to_timestamp().date() for zzz in col.index]
ax.plot_date(x=dates, y=col, fmt='-', label=col_name,
tz=None, xdate=True, ydate=False, linewidth=1.5)
# establish the date range for the data
if min_date:
min_date = min(min_date, min(dates))
else:
min_date = min(dates)
if max_date:
max_date = max(max_date, max(dates))
else:
max_date = max(dates)
# give a bit of space at each end of the plot - aesthetics
span = max_date - min_date
extra = int(span.days * 0.03) * datetime.timedelta(days=1)
ax.set_xlim([min_date - extra, max_date + extra])
# format the x tick marks
ax.xaxis.set_major_formatter(DateFormatter('%Y'))
ax.xaxis.set_minor_formatter(DateFormatter('\n%b'))
ax.xaxis.set_major_locator(YearLocator())
ax.xaxis.set_minor_locator(MonthLocator(bymonthday=1, interval=2))
# grid, legend and yLabel
ax.grid(True)
ax.legend(loc='best', prop={'size':'x-small'})
ax.set_ylabel('Percent')
# heading
if heading:
fig.suptitle(heading, fontsize=12)
fig.tight_layout(pad=1.5)
# footnote
fig.text(0.99, 0.01, 'nse-timeseries-plot', ha='right',
va='bottom', fontsize=8, color='#999999')
# save to file
fig.savefig(filename, dpi=125)
url = "https://www.google.com/finance/historical?cid=207437&startdate=Jan%201%2C%201971&enddate=Jul%201%2C%202017&start={0}&num=30"
how_many_pages=138
start=0
for i in range(how_many_pages):
new_url = url.format(start)
page = requests.get(new_url)
soup = BeautifulSoup(page.content, "lxml")
table = soup.find_all('table', class_='gf-table historical_price')[0]
columns_header = [th.getText() for th in table.findAll('tr')[0].findAll('th')]
data_rows=table.findAll('tr')[1:]
data=[[td.getText() for td in data_rows[i].findAll(['td'])] for i in range(len(data_rows))]
if start == 0:
final_df = pd.DataFrame(data, columns=columns_header)
else:
df = pd.DataFrame(data, columns=columns_header)
final_df = pd.concat([final_df, df],axis=0)
start += 30
final_df.to_csv('nse_data.csv', sep='\t', encoding='utf-8')
plot(final_df,'nsetsplot')
When I run the code I get the error
AttributeError: 'numpy.int64' object has no attribute 'to_timestamp'
when I do
dates = [zzz.to_timestamp().date() for zzz in col.index]
I am using Anaconda 64-bit on Windows 7 (x86_64)
to_timestamp()?numpy,scipy,pandas?to_timedelta()andto_datetime()inpandasbut noto_timestamp(). Maybe you're just calling the wrong method or from the wrong object/scope.http://markthegraph.blogspot.com.au/2015/05/plotting-time-series-dataframes-in.html