0

I have the following command below:

def get_list_of_university_towns():
    import re 
    import pandas as pd 
    dataframe = pd.DataFrame(columns=('State','RegionName'))
    with open('university_towns.txt',"r") as f_in:
        lines = f_in.readlines()
        i = 0 
        for line in lines: 
            if '[edit]' in line:
                 states = re.search(r'^([^(\[]+)', line).group(1)
            else:
                countries = re.search(r'^([^(\[]+)', line).group(1)
                dataframe.loc[i] = [states,countries] 
                i += 1 
        listed = []
        states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'} 
        statesinverse = {v: k for k, v in states.items()}
        for i in dataframe['State']:
            if i in statesinverse.keys():
                value = statesinverse.get(i)
                listed.append(value)
        dataframe['State'] = listed
    return dataframe
get_list_of_university_towns()

However, this prints an output in the form:

State   RegionName
0   AL  Auburn
1   AL  Florence
2   AL  Jacksonville

I was hoping for state to appear as 'Alabama' rather than 'AL'.

Thus, I would like the keys for the dictionary 'statesinverse' to appear in the 'State' column and not the values.

Would anybody be able to give me a helping hand?

1
  • 2
    you reversed the keys, you don't need to do that Commented May 20, 2020 at 14:41

2 Answers 2

3

You don't need to reverse the keys, dict.get(<key>) is like dict[<key>] and will return the value of the given key but get() won't error if it can't find the key, will return None. Make sure the dataframe you are using is passing in the statecode 'AL':

def get_list_of_university_towns():
    import re 
    import pandas as pd 
    dataframe = pd.DataFrame(columns=('State','RegionName'))
    with open('university_towns.txt',"r") as f_in:
        lines = f_in.readlines()
        i = 0 
        for line in lines: 
            if '[edit]' in line:
                 states = re.search(r'^([^(\[]+)', line).group(1)
            else:
                countries = re.search(r'^([^(\[]+)', line).group(1)
                dataframe.loc[i] = [states,countries] 
                i += 1 
        listed = []
        states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'} 
        for i in dataframe['State']:
            if i in states.keys():
                value = states.get(i) # this should return the value 'Alabama' if key is 'AL'
                listed.append(value)
        dataframe['State'] = listed
    return dataframe
get_list_of_university_towns()

Test it:

states = {'OH': 'Ohio', 'KY': 'Kentucky', 'AS': 'American Samoa', 'NV': 'Nevada', 'WY': 'Wyoming', 'NA': 'National', 'AL': 'Alabama', 'MD': 'Maryland', 'AK': 'Alaska', 'UT': 'Utah', 'OR': 'Oregon', 'MT': 'Montana', 'IL': 'Illinois', 'TN': 'Tennessee', 'DC': 'District of Columbia', 'VT': 'Vermont', 'ID': 'Idaho', 'AR': 'Arkansas', 'ME': 'Maine', 'WA': 'Washington', 'HI': 'Hawaii', 'WI': 'Wisconsin', 'MI': 'Michigan', 'IN': 'Indiana', 'NJ': 'New Jersey', 'AZ': 'Arizona', 'GU': 'Guam', 'MS': 'Mississippi', 'PR': 'Puerto Rico', 'NC': 'North Carolina', 'TX': 'Texas', 'SD': 'South Dakota', 'MP': 'Northern Mariana Islands', 'IA': 'Iowa', 'MO': 'Missouri', 'CT': 'Connecticut', 'WV': 'West Virginia', 'SC': 'South Carolina', 'LA': 'Louisiana', 'KS': 'Kansas', 'NY': 'New York', 'NE': 'Nebraska', 'OK': 'Oklahoma', 'FL': 'Florida', 'CA': 'California', 'CO': 'Colorado', 'PA': 'Pennsylvania', 'DE': 'Delaware', 'NM': 'New Mexico', 'RI': 'Rhode Island', 'MN': 'Minnesota', 'VI': 'Virgin Islands', 'NH': 'New Hampshire', 'MA': 'Massachusetts', 'GA': 'Georgia', 'ND': 'North Dakota', 'VA': 'Virginia'}
states.get('AL')

Output:

'Alabama'
Sign up to request clarification or add additional context in comments.

Comments

1

This part of the code replaces full state names with abbreviations:

listed = []
states = {'OH': 'Ohio', 'KY': 'Kentucky', ...} 
statesinverse = {v: k for k, v in states.items()}
for i in dataframe['State']:
    if i in statesinverse.keys():
        value = statesinverse.get(i)
        listed.append(value)
dataframe['State'] = listed

Try removing this code and see the output.

1 Comment

thanks so much for your help! My issue has been solved

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.