1

I have data in this format. I am able to get a bar plot out of it but not a scatter plot because the x-axis has strings. I looked into a couple of other posts from where I couldn't gather much info.

  import pandas as pd
    import numpy as np

    df = pd.DataFrame({"id":["ssa", "ssb", "ssc", "xxa", "xxb", "xxc"], "mean":[1.3,1.5,5.2,3.1,2.1,3.2], "sd":[0.9,0.5,0.3,0.1,0.2,0.3]})
    df

I get a bar plot with error bars using the following command:

import matplotlib.pyplot as plt
ax = plt.figure()
ax = df.plot(kind='bar',x='id', y='mean',figsize=[15,6], yerr='sd')
ax.set_xlabel("id")
ax.set_ylabel("mean")
ax = plt.tight_layout()
ax = plt.show()

enter image description here

But I get an error when I try to do a scatter plot of the same df.

ax = plt.figure()
ax = df.plot(kind='scatter',x='id', y='mean',figsize=[15,6], yerr='sd')
ax.set_xlabel("id")
ax.set_ylabel("mean")
ax = plt.tight_layout()
ax = plt.show()

Error traceback:

---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-10-b3ab7237d4f1> in <module>()
      1 ax = plt.figure()
----> 2 ax = df.plot(kind='scatter',x='id', y='mean',figsize=[15,6], yerr='sd', style='.')
      3 ax.set_xlabel("id")
      4 ax.set_ylabel("mean")
      5 ax = plt.tight_layout()

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\plotting\_core.pyc in __call__(self, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   2618                           fontsize=fontsize, colormap=colormap, table=table,
   2619                           yerr=yerr, xerr=xerr, secondary_y=secondary_y,
-> 2620                           sort_columns=sort_columns, **kwds)
   2621     __call__.__doc__ = plot_frame.__doc__
   2622 

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\plotting\_core.pyc in plot_frame(data, x, y, kind, ax, subplots, sharex, sharey, layout, figsize, use_index, title, grid, legend, style, logx, logy, loglog, xticks, yticks, xlim, ylim, rot, fontsize, colormap, table, yerr, xerr, secondary_y, sort_columns, **kwds)
   1855                  yerr=yerr, xerr=xerr,
   1856                  secondary_y=secondary_y, sort_columns=sort_columns,
-> 1857                  **kwds)
   1858 
   1859 

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\plotting\_core.pyc in _plot(data, x, y, subplots, ax, kind, **kwds)
   1680         plot_obj = klass(data, subplots=subplots, ax=ax, kind=kind, **kwds)
   1681 
-> 1682     plot_obj.generate()
   1683     plot_obj.draw()
   1684     return plot_obj.result

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\plotting\_core.pyc in generate(self)
    236         self._compute_plot_data()
    237         self._setup_subplots()
--> 238         self._make_plot()
    239         self._add_table()
    240         self._make_legend()

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\plotting\_core.pyc in _make_plot(self)
    829         else:
    830             label = None
--> 831         scatter = ax.scatter(data[x].values, data[y].values, c=c_values,
    832                              label=label, cmap=cmap, **self.kwds)
    833         if cb:

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.pyc in __getitem__(self, key)
   2060             return self._getitem_multilevel(key)
   2061         else:
-> 2062             return self._getitem_column(key)
   2063 
   2064     def _getitem_column(self, key):

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\frame.pyc in _getitem_column(self, key)
   2067         # get column
   2068         if self.columns.is_unique:
-> 2069             return self._get_item_cache(key)
   2070 
   2071         # duplicate columns & possible reduce dimensionality

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\generic.pyc in _get_item_cache(self, item)
   1532         res = cache.get(item)
   1533         if res is None:
-> 1534             values = self._data.get(item)
   1535             res = self._box_item_values(item, values)
   1536             cache[item] = res

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\internals.pyc in get(self, item, fastpath)
   3588 
   3589             if not isnull(item):
-> 3590                 loc = self.items.get_loc(item)
   3591             else:
   3592                 indexer = np.arange(len(self.items))[isnull(self.items)]

C:\Users\AppData\Local\Continuum\Anaconda2\lib\site-packages\pandas\core\indexes\base.pyc in get_loc(self, key, method, tolerance)
   2393                 return self._engine.get_loc(key)
   2394             except KeyError:
-> 2395                 return self._engine.get_loc(self._maybe_cast_indexer(key))
   2396 
   2397         indexer = self.get_indexer([key], method=method, tolerance=tolerance)

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5239)()

pandas\_libs\index.pyx in pandas._libs.index.IndexEngine.get_loc (pandas\_libs\index.c:5085)()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20405)()

pandas\_libs\hashtable_class_helper.pxi in pandas._libs.hashtable.PyObjectHashTable.get_item (pandas\_libs\hashtable.c:20359)()

KeyError: 'id'

So, instead I used seaborn to plot and it plots perfectly. However, I am not sure how to pass "sd" column to plot error bars in it.

fig, ax = plt.subplots(figsize=(5,3))
ax = sns.pointplot(x="id", y="mean",  data=df, join=False)
ax = plt.xticks(rotation=90)
ax = plt.tight_layout()
ax = plt.show()

enter image description here

fig, ax = plt.subplots(figsize=(25,5))
ax = sns.pointplot(x="id", y="mean", data=df, join=False)
ax.map(plt.errorbar, "id", "mean", "sd", marker="o")
ax = plt.xticks(rotation=90)
ax = plt.tight_layout()
ax = plt.show()

The above code throws the following error:

AttributeError                            Traceback (most recent call last)
<ipython-input-21-18652e3e8b12> in <module>()
      1 fig, ax = plt.subplots(figsize=(25,5))
      2 ax = sns.pointplot(x="id", y="mean", data=df, join=False)
----> 3 ax.map(plt.errorbar, "id", "mean", "sd", marker="o")
      4 ax = plt.xticks(rotation=90)
      5 ax = plt.tight_layout()

AttributeError: 'AxesSubplot' object has no attribute 'map'

What I would ideally like to have is a plot similar to the pointplot but with each point having a different size (as specified by its corresponding sd) or with each point having an error bar (given by sd). Can someone tell me how to do this?

1 Answer 1

3

Just use matplotlib.axes.Axes.errorbar() as shown in this example by adding this line:

ax.errorbar(np.arange(len(df['id'])), df['mean'], yerr=df['sd'], ls='None')

But I don't think you have to use seaborn:

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

df = pd.DataFrame({"id":["ssa", "ssb", "ssc", "xxa", "xxb", "xxc"], 
                   "mean":[1.3,1.5,5.2,3.1,2.1,3.2], 
                   "sd":[0.9,0.5,0.3,0.1,0.2,0.3]})
plt.errorbar(np.arange(len(df['id'])), df['mean'], yerr=df['sd'], ls='None', marker='o')
ax = plt.gca()
ax.xaxis.set_ticks(np.arange(len(df['id'])))
ax.xaxis.set_ticklabels(df['id'], rotation=90)
plt.xlabel("id")
plt.ylabel("mean")

plt.show()

enter image description here

Edit: OP now wants "to remove error bars and draw your point with size proportional to sd".

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

df = pd.DataFrame({"id":["ssa", "ssb", "ssc", "xxa", "xxb", "xxc"], 
                   "mean":[1.3,1.5,5.2,3.1,2.1,3.2], 
                   "sd":[0.9,0.5,0.3,0.1,0.2,0.3]})
fig, ax = plt.subplots()
size_scaler = 300 # Your points will be too small if you just use sd
ax.scatter(np.arange(len(df['id'])), df['mean'], s=df['sd']*size_scaler, marker='o')
ax.xaxis.set_ticks(np.arange(len(df['id'])))
ax.xaxis.set_ticklabels(df['id'], rotation=90)
plt.xlabel("id")
plt.ylabel("mean")

plt.show()

enter image description here

Sign up to request clarification or add additional context in comments.

5 Comments

@Y Luo: Is there a way I can make the points look bigger based on sd instead of error bars as such?
@Gingerbread Not sure what you mean by "bigger based on sd". Please give more specifics. Are you trying to remove error bars and draw your point with size proportional to sd?
@Y Luo: Yes, exactly what you said.
@Gingerbread Thank you for clarification. Please check my edits and let me know if that's what you want.
@Y Luo: Yes, this is exactly what I wanted! Thank you so much :D

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.