0

I need help / guidance with my code below to see if I am doing wrong or what i need to add. I am trying to create three tables using joins in pandas. Can anyone tell or help me out with my code below. Right now, it is getting an slight error ValueError: All arrays must be of the same length for the table1 last varchar2 data type. Thanks for the help. here is my code.

   #folder_location = '/content/0_Test/' #You need to mount google drive and change this directory string
folder_location = '/content/drive/MyDrive/ITA HRP Data Conversion Materials/Compensation Extractor/0_Test/'
df = pandas.read_excel(folder_location+'Compensation Worksheet.xlsx')#,dtype=str)
df = df[['Row ID',
         'Existing P Level',
          'Existing P Code',
          'Existing P Variation Code',
          'Considered "Adds to Rate"? (Y/N)',
          'Calc Method',
          'Calculation Sequence',
          'MOU',
          'Frequency',
          'Rate or Flat Amount',
          'Percent',
          'FMS Dept.',
          'Class Code',
          'Reference ID',
          'Comp Mapping Tab'
          ]
        ]
#The main filter is to avoid all Blanks and N/A on the Reference ID columns
df = df.dropna(subset = ['Reference ID'])

df[~df['Reference ID'].str.contains('N/A', case = False)].any()
df = df.fillna("") #make sure NaNs are simply empty strings

def basic_formatting(x):
  str_x = str(x)
  str_x = str_x.strip()
  str_x = str_x.replace(' IN ','')
  str_x = str_x.replace("\n",";")
  str_x = str_x.replace(',',';')
  str_x = str_x.replace(';;',';')
  str_x = str_x.replace('‒','-')
  return str_x
  
def specific_formatting(x):
  str_x = str(x)
  str_x = str_x.strip()
  str_x = str_x.upper().replace('AND',';')
  str_x = str_x.upper().replace('&',';')
  str_x = str_x.upper().replace('MOUS','')
  str_x = str_x.upper().replace('MOU','')
  str_x = re.sub('^ALL\s*EXCEPT\s','NOT', str_x)
  return str_x

def class_formatting(x):
  str_x = str(x)
  str_x = str_x.strip()
  str_x = re.sub(u'\2014','',str_x)
  return str_x

df[['MOU','FMS Dept.','Class Code','Existing P Variation Code','Existing P Code','Existing P Level']] = df[['MOU','FMS Dept.','Class Code','Existing P Variation Code','Existing P Code','Existing P Level']].applymap(basic_formatting)
df[['Class Code']] = df[['Class Code']].applymap(class_formatting)
df[['MOU','FMS Dept.','Class Code','Existing P Variation Code']] = df[['MOU','FMS Dept.','Class Code','Existing P Variation Code',]].applymap(specific_formatting)

wd_m_compensation_information = [['Reference ID',
          'Row ID',
          'Comp Mapping Tab',
          'Considered "Adds to Rate"? (Y/N)',
          'Calc Method',
          'Calculation Sequence',
          'Frequency',
          'Rate or Flat Amount',
          'Percent'
          ]]
wd_m_compensation_p_codes_and_levels = [['Reference ID','Row ID','P Bonus Code', 'P Level']]

wd_m_compensation_p_varcodes = [['Reference ID','Row ID','P Varcode']]

wd_m_compensation_mou_inclusions = [['Reference ID','Row ID','MOU']]
wd_m_compensation_mou_exclusions = [['Reference ID','Row ID','MOU']]

wd_m_compensation_fms_inclusions = [['Reference ID','Row ID','FMS']]
wd_m_compensation_fms_exclusions = [['Reference ID','Row ID','FMS']]

wd_m_compensation_class_inclusions = [['Reference ID','Row ID','CLASS']]
wd_m_compensation_class_exclusions = [['Reference ID','Row ID','CLASS']]

for index, row in df.iterrows():
  #Comp information table
  ref_id = row['Reference ID']
  row_id = row['Row ID']
  r_wd_m_compensation_information = [ row['Reference ID'],
                                     row['Row ID'],
                                     row['Comp Mapping Tab'],
                                     row['Considered "Adds to Rate"? (Y/N)'],
                                     row['Calc Method'], 
                                     row['Calculation Sequence'], 
                                     row['Frequency'], 
                                     row['Rate or Flat Amount'],
                                     row['Percent']
  ]
  wd_m_compensation_information.append(r_wd_m_compensation_information)

  
  l_p_codes = str.split(row['Existing p Code'],';')
  l_p_levels = str.split(row['Existing p Level'],';')
  l_cross_codes_level = [(i,j) for i in l_p_codes for j in l_p_levels]
  for (code,level) in l_cross_codes_level:
    if( (code is not None and len(code) > 0) and (level is not None and len(level) > 0)):
      wd_m_compensation_p_codes_and_levels.append([ref_id, row_id, code, level])

  #Varcodes
  l_varcodes = str.split(row['Existing P Variation Code'],';')
  for varcode in l_varcodes:
    if( len(varcode.strip() ) ):
      wd_m_compensation_p_varcodes.append([ref_id, row_id, varcode.strip()])

  #MOUs
  l_mous = str.split(row['MOU'],';')
  for mou in l_mous:
    mou = mou.strip()
    if( len(mou) > 0):
      if 'NOT' not in mou:
        mou = re.sub("[^\d]+",'',mou)
        wd_m_compensation_mou_inclusions.append([ref_id, row_id, mou])
      else:
        mou = mou.replace('NOT','')
        mou = re.sub("[^\d]+",'',mou)
        wd_m_compensation_mou_exclusions.append([ref_id, row_id, mou])

  #FMSs
  l_fms = str.split(row['FMS Dept.'],';')
  for fms in l_fms:
    fms = fms.strip()
    if( len(fms) > 0 ):
      if 'NOT' not in fms:
        fms = re.sub("[^\d]+",'',fms)
        wd_m_compensation_fms_inclusions.append([ref_id, row_id,  fms])
      else:
        fms = fms.replace('NOT','')
        fms = re.sub("[^\d]+",'',fms)
        wd_m_compensation_fms_exclusions.append([ref_id, row_id,  fms])

  #Class Codes
  l_cCodes = str.split(row['Class Code'],';')
  for cCode in l_cCodes:
    cCode = cCode.strip()
    if( len(cCode) > 0):
      if 'NOT' not in cCode:
        wd_m_compensation_class_inclusions.append([ref_id, row_id,  cCode])
      else:
        cCode = cCode.replace('NOT','')
        wd_m_compensation_class_exclusions.append([ref_id, row_id,  cCode])


 
9
  • Your tables are not valid pandas DataFrames. What should table1 look like? Commented Apr 7, 2022 at 15:07
  • table1 should just look like a normal table as I specify in my code with its column names and the table name. Commented Apr 7, 2022 at 15:10
  • You're mixing SQL and pandas. You don't have the right syntax for pandas. Commented Apr 7, 2022 at 15:12
  • Ok. Can you help me out to get started and the right path ? Commented Apr 7, 2022 at 15:12
  • No, because it's very unclear what you want to do. Post what your tables should look like and your expected output. Commented Apr 7, 2022 at 15:13

2 Answers 2

1
import pandas as pd


data1 = {'COLUMN_NAME':['P_CODE',
                     'NOT_JOB_CLASS',
                     'MOU',
                     'NOT_MOU',
                     'FMS',
                     'NOT_FMS',
                     'CLASS_CODE',
                     'JOB_CLASS', 
                     'COMP_PLAN_REF_ID', 
                     'BONUS_PERCENT', 
                     'BONUS_AMOUNT', 
                     'CALC_METHOD', 
                     'FREQUENCY',
                     'NOT_CLASS_CODE', 
                     'P_LEVEL'],
        'DATA_TYPE':['VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'NUMBER', 
                'NUMBER', 
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2']}

table1 = pd.DataFrame(data1, columns = ['COLUMN_NAME', 'DATA_TYPE'])
table1['TABLE_NAME'] = 'WD_W_F41_BONUS_MAPPING'


data2 = {'COLUMN_NAME':['CALC_METHOD',
                     'CALC_SEQUENCE',
                     'COMP_PLAN_REF_ID',
                     'ADD_RATE_OR_PAY',
                     'FREQUENCY'],
        'DATA_TYPE':['VARCHAR2',
                'NUMBER',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2']}

table2 = pd.DataFrame(data2, columns = ['COLUMN_NAME', 'DATA_TYPE'])
table2['TABLE_NAME'] = 'WD_W_F41_LEGACY_PLAN'

data3 = {'COLUMN_NAME':[  'CALC_METHOD',
                     'CALC_SEQUENCE',
                     'COMP_PLAN_REF_ID',
                     'ADD_RATE_OR_PAY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY'],
        'DATA_TYPE':['VARCHAR2',
                'NUMBER',
                'NUMBER',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2']}

table3 = pd.DataFrame(data3, columns = ['COLUMN_NAME', 'DATA_TYPE'])
table3['TABLE_NAME'] = 'WD_W_VAR_CODE_REF_ID'
 
df = pd.concat([table1, table2, table3])

Output:

print(df)
         COLUMN_NAME DATA_TYPE              TABLE_NAME
0             P_CODE  VARCHAR2  WD_W_F41_BONUS_MAPPING
1      NOT_JOB_CLASS  VARCHAR2  WD_W_F41_BONUS_MAPPING
2                MOU  VARCHAR2  WD_W_F41_BONUS_MAPPING
3            NOT_MOU  VARCHAR2  WD_W_F41_BONUS_MAPPING
4                FMS  VARCHAR2  WD_W_F41_BONUS_MAPPING
5            NOT_FMS  VARCHAR2  WD_W_F41_BONUS_MAPPING
6         CLASS_CODE  VARCHAR2  WD_W_F41_BONUS_MAPPING
7          JOB_CLASS  VARCHAR2  WD_W_F41_BONUS_MAPPING
8   COMP_PLAN_REF_ID  VARCHAR2  WD_W_F41_BONUS_MAPPING
9      BONUS_PERCENT  VARCHAR2  WD_W_F41_BONUS_MAPPING
10      BONUS_AMOUNT    NUMBER  WD_W_F41_BONUS_MAPPING
11       CALC_METHOD    NUMBER  WD_W_F41_BONUS_MAPPING
12         FREQUENCY  VARCHAR2  WD_W_F41_BONUS_MAPPING
13    NOT_CLASS_CODE  VARCHAR2  WD_W_F41_BONUS_MAPPING
14           P_LEVEL  VARCHAR2  WD_W_F41_BONUS_MAPPING
0        CALC_METHOD  VARCHAR2    WD_W_F41_LEGACY_PLAN
1      CALC_SEQUENCE    NUMBER    WD_W_F41_LEGACY_PLAN
2   COMP_PLAN_REF_ID  VARCHAR2    WD_W_F41_LEGACY_PLAN
3    ADD_RATE_OR_PAY  VARCHAR2    WD_W_F41_LEGACY_PLAN
4          FREQUENCY  VARCHAR2    WD_W_F41_LEGACY_PLAN
0        CALC_METHOD  VARCHAR2    WD_W_VAR_CODE_REF_ID
1      CALC_SEQUENCE    NUMBER    WD_W_VAR_CODE_REF_ID
2   COMP_PLAN_REF_ID    NUMBER    WD_W_VAR_CODE_REF_ID
3    ADD_RATE_OR_PAY  VARCHAR2    WD_W_VAR_CODE_REF_ID
4          FREQUENCY  VARCHAR2    WD_W_VAR_CODE_REF_ID
5          FREQUENCY  VARCHAR2    WD_W_VAR_CODE_REF_ID
6          FREQUENCY  VARCHAR2    WD_W_VAR_CODE_REF_ID
7          FREQUENCY  VARCHAR2    WD_W_VAR_CODE_REF_ID
8          FREQUENCY  VARCHAR2    WD_W_VAR_CODE_REF_ID
9          FREQUENCY  VARCHAR2    WD_W_VAR_CODE_REF_ID
10         FREQUENCY  VARCHAR2    WD_W_VAR_CODE_REF_ID
Sign up to request clarification or add additional context in comments.

2 Comments

So, I looked at my code and my own requirements that my goal isnt to do a 1:1 value from my xlsx file. This provides 0 use. So, my xlsx file simply describes the table structure that we want to achieve. Meaning, when I query select * from p.WD_W_F41_BONUS_MAPPING it should output data with the columns: 'P_CODE', 'NOT_JOB_CLASS',...... can you please update your code above ?
Thank you once again for helping out. However, there is a slight update I did to my code howvever, I have created the data frame as shown above and I want to use wd_m_compensation_p_codes_and_levels data frame/variables by join() them? is it okay you can show me an implementation how can I do it with my code above? thank you once again!
0

Because of "TABLE_NAME" dataframes aren't same size. So i don't know how your data structure should look like, im not sure is it solution to your question but mayby my answear at least will help you:

import pandas as pd

table1=pd.DataFrame({
    'COLUMN_NAME':[  'P_CODE',
                     'NOT_JOB_CLASS',
                     'MOU',
                     'NOT_MOU',
                     'FMS',
                     'NOT_FMS',
                     'CLASS_CODE',
                     'JOB_CLASS', 
                     'COMP_PLAN_REF_ID', 
                     'BONUS_PERCENT', 
                     'BONUS_AMOUNT', 
                     'CALC_METHOD', 
                     'FREQUENCY',
                     'NOT_CLASS_CODE', 
                     'P_LEVEL'],
    'DATA_TYPE':['VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'NUMBER', 
                'NUMBER', 
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2'],
    
})

table2=pd.DataFrame({
    'COLUMN_NAME':[
                     'CALC_METHOD',
                     'CALC_SEQUENCE',
                     'COMP_PLAN_REF_ID',
                     'ADD_RATE_OR_PAY',
                     'FREQUENCY'],
    'DATA_TYPE':['VARCHAR2',
                'NUMBER',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2'],
    
})
 
table3=pd.DataFrame({
    'COLUMN_NAME':[
                     'CALC_METHOD',
                     'CALC_SEQUENCE',
                     'COMP_PLAN_REF_ID',
                     'ADD_RATE_OR_PAY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY',
                     'FREQUENCY'],
    'DATA_TYPE':['VARCHAR2',
                'NUMBER',
                'NUMBER',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2',
                'VARCHAR2'],
    
})

pd.concat([table1,table2,table3])

That's the result, did you mean it?

It's result

3 Comments

Thank you! Are you using joins,right?
@user18573205 im using pd.concat([...dataframes]) as in code if u mean join as for strings yeah it works the same
Thank you once again for helping out. However, there is a slight update I did to my code howvever, I have created the data frame as shown above and I want to use wd_m_compensation_p_codes_and_levels data frame/variables by join() them? is it okay you can show me an implementation how can I do it with my code above? thank you once again!

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.