KeyError: 0L when running packaged code

https://stackoverflow.com/questions/22757906

24-06-2023
|

Question

I have script called rra.py that has the following skelton structure:

usn=""
usnl=[]
from bs4 import BeautifulSoup 
import re
import asys
import glob
import os
import sys
import sys 

def inputIndex():
    #some processsing

def getval():
    #some processing

def parsehtml():
    #some processsing   

def ret():
    #some processing

def main():
    inputIndex()
    ret()
    parsehtml()
    getval()
    print "watsuup"
    asys.Compavg()
    asys.compSub()
    print "nearly done"
    return 0


if __name__ == '__main__':
    inputIndex()
    main()
    asys.Compavg()
        asys.compSub()

and I have another script asys.py which I have imported in the above script rra.py which has the following content.

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
import statsmodels.api as sm
from numpy.random import randn
import matplotlib as mpl
import seaborn as sns

sns.set_color_palette("deep", desat=.6)
mpl.rc("figure", figsize=(8, 4))

def Compavg():
    data=Total()
    markMax=[]
    markAvg=[]
    N = 5
    ind = np.arange(N)    
    width = 0.35 
    fig = plt.figure()
    ax = fig.add_subplot(111)      
    markMax.extend((data["Total"].max(),data["Total.1"].max(),data["Total.2"].max(),data["Total.3"].max(),data["Total.4"].max()))
    markAvg.extend((data["Total"].mean(),data["Total.1"].mean(),data["Total.2"].mean(),data["Total.3"].mean(),data["Total.4"].mean()))
    rects1 = ax.bar(ind, markMax, width, color='black')
    rects2 = ax.bar(ind+width, markAvg, width, color='green')
    ax.set_xlim(-width,len(ind)+width)
    ax.set_ylim(0,120)
    ax.set_ylabel('Marks')
    ax.set_title('Max, Mean and Your Marks')
    xTickMarks = ['Subject'+str(i) for i in range(1,6)]
    ax.set_xticks(ind+width)
    xtickNames = ax.set_xticklabels(xTickMarks)
    plt.setp(xtickNames, rotation=10, fontsize=10)
    ax.legend( (rects1[0], rects2[0]), ('Max', 'Mean') )
    plt.show()
    print "finished reading 1"

def compSub():
    #max_data = np.r_[data["Total"]].max()
    #bins = np.linspace(0, max_data, max_data + 1)
    data=Total()
        print data['Total']
    plt.hist(data['Total'],linewidth=0, alpha=.7)
    plt.hist(data['Total.1'],linewidth=0,alpha=.7)
    plt.hist(data['Total.2'],linewidth=0,alpha=.7)
    plt.hist(data['Total.3'],linewidth=0,alpha=.7)
    plt.hist(data['Total.4'],linewidth=0,alpha=.7)
    plt.title("Total marks Histogram")
    plt.xlabel("Value")
    plt.ylabel("Frequency")
    plt.show()
    print "finished reading 2"

def Total():
    print "going to read"
    data=pd.read_csv("output11cs.csv")
    df3=data[['Total','Total.1','Total.2','Total.3','Total.4','Total.5','Total.6','Total.7']]
    data["Main Total"]=df3.sum(axis=1)
    data = data.dropna()
    data.reset_index(drop=True)
    return data
    #compSub()
    #Compavg()

When I execute rra.py outside virtualenv(even inside virtualenv) by using python rra.py it executes perfectly and plots the graph and histogram as given in asys.py.

Then I created a 'setup.py' file to package it which is given below:

from ez_setup import use_setuptools
use_setuptools()
from setuptools import setup, find_packages
from setuptools.command.test import test as TestCommand
import vturra
import sys
class PyTest(TestCommand):
    def finalize_options(self):
        TestCommand.finalize_options(self)
        self.test_args = []
        self.test_suite = True

    def run_tests(self):
        import pytest
        errcode = pytest.main(self.test_args)
        sys.exit(errcode)

setup(name="vturra",
    version=vturra.__version__,
    license='Apache Software License',
    tests_require=['pytest'],
    install_requires=['matplotlib>=1.2.1',
                      'pandas',
                      'numpy',
                      'scipy',
                      'beautifulsoup',
                      'requests',
                      'patsy',
                      'seaborn==0.2.1',
                      'BeautifulSoup4'
                     ],
    cmdclass={'test': PyTest},
    description="Downloads results from VTU website and analyzes the result",
    long_description=open('README.md').read(),
    author="Muhammed Thaha",
    author_email='mthaha1989@gmail.com',
    download_url='https://github.com/stormvirux/vturra',
    packages=find_packages(exclude='tests'),
    package_data={'vturra': ['data/*.xml']},
    include_package_data=True,
    platforms='any',
    test_suite='vturra.test.test_vturra',
    classifiers = [
        'Programming Language :: Python',
        'Development Status :: 1 - Beta',
        'Natural Language :: English',
        'Environment :: Web Environment',
        'Intended Audience :: Developers',
        'License :: OSI Approved :: Apache Software License',
        'Operating System :: OS Independent',
        'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
        ],
    extras_require={
        'testing': ['pytest'],
    },
    entry_points={
        "console_scripts": [
            "vturra=vturra.rra:main"
            #"pip%s=pip:main" % sys.version[:1],
            #"pip%s=pip:main" % sys.version[:3],
        ],
    }
)

I installed the package using python setup.py install in virtualenv and everything went fine with installation. Now when I execute the package vturra I get the following error:

Traceback (most recent call last):
  File "asys.py", line 65, in <module>
    compSub()
  File "asys.py", line 46, in compSub
    plt.hist(data['Total'],linewidth=0, alpha=.7)
  File "/home/storm/Documents/lol/local/lib/python2.7/site-packages/matplotlib/pyplot.py", line 2827, in hist
    stacked=stacked, **kwargs)
  File "/home/storm/Documents/lol/local/lib/python2.7/site-packages/matplotlib/axes.py", line 8249, in hist
    if isinstance(x, np.ndarray) or not iterable(x[0]):
  File "/home/storm/Documents/lol/local/lib/python2.7/site-packages/pandas/core/series.py", line 491, in __getitem__
    result = self.index.get_value(self, key)
  File "/home/storm/Documents/lol/local/lib/python2.7/site-packages/pandas/core/index.py", line 1032, in get_value
    return self._engine.get_value(s, k)
  File "index.pyx", line 97, in pandas.index.IndexEngine.get_value (pandas/index.c:2661)
  File "index.pyx", line 105, in pandas.index.IndexEngine.get_value (pandas/index.c:2476)
  File "index.pyx", line 149, in pandas.index.IndexEngine.get_loc (pandas/index.c:3215)
  File "hashtable.pyx", line 382, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6450)
  File "hashtable.pyx", line 388, in pandas.hashtable.Int64HashTable.get_item (pandas/hashtable.c:6394)
KeyError: 0L

But if I comment out the function call asys.compSub() in main() it works without any error.

UPDATE The function works perfectly when executed as a standalone script outside virtualenv but inside virtualenv you get the above error. I beleive it is an issue with virtualenv then?

Solution

Finally solved it. It was an issue between latest version ofpandas and matplotlib. Global System pandas version was 0.10 compared to virtualenv that had 0.13.0. So I downgraded to 0.10.0in virtualenv.

Now its working perfectly.

Basically its an API incompatibility with matplotlib and pandas >= 0.13. plt.hist with a Series currently will face this problem.

So either use data['Total.1'].hist()

or pass in the values plt.hist(data['Total.1'].values,....)

or downgrade pandas to a version lesser than 0.13.

Licensed under: CC-BY-SA with attribution

Not affiliated with StackOverflow