This is pretty horrible code, but it works. I don't code in bash 😭, usually code in Python.
This is a learning exercise for me, so that's why I'm using bash + that's why I don't use libraries.
I obviously don't expect anyone to correct the whole thing, but any main points on how to make this more bash-like would be greatly appreciated! 🙏
Sample input:
1,3,John smith,"BA, Economics",,Economics
2,4,Brian Jones,Master of Science,,Economics
3,5,Bill jones,"MSc, Biology",,Biology
Sample output:
1,3,John Smith,"BA, Economics",[email protected],Economics
2,4,Brian Jones,Master of Science,[email protected],Economics
3,5,Bill Jones,"Msc, Biology",[email protected],Biology
Script:
#!/bin/bash
if [[ $# -ne 1 ]]; then
echo "Usage: $0 <input_file>"
exit 1
fi
input_file="$1"
output_file="students_new.csv"
declare -A email_count
parsed_line=()
get_corrected_surname() {
name="$1"
corrected_name=$(echo "$name" | awk '{
for(i=1; i<=NF; i++) {
$i = toupper(substr($i,1,1)) substr($i,2)
}
print
}')
echo "$corrected_name"
}
get_email_prefix() {
local name=$(echo "$1" | tr '[:upper:]' '[:lower:]')
local first_name=$(echo "$name" | awk '{print $1}')
local surname=$(echo "$name" | awk '{print $2}')
local initial=$(echo "$first_name" | cut -c 1)
local email_prefix="${initial}${surname}"
echo "$email_prefix"
}
get_email() {
name="$1"
location_id="$2"
email_prefix=$(get_email_prefix "$name")
if (( email_count["$email_prefix"] > 1 )); then
email_prefix="${email_prefix}${location_id}"
fi
email="${email_prefix}@ucla.edu"
echo "$email"
}
prep_email_count_array() {
while IFS=',' read -r id location_id full_name _rest; do
if [[ "$id" = "id" ]]; then
continue
fi
email_prefix=$(get_email_prefix "$full_name")
email_count["$email_prefix"]=$(( ${email_count["$email_prefix"]} + 1 ))
done < "$input_file"
}
parse_line() {
line="$1"
parsed_line=()
current_string=""
inside_quotes=false
i=0
while [ $i -lt ${#line} ]; do
char="${line:i:1}"
if [[ "$char" == ',' && "$inside_quotes" == false ]]; then
parsed_line+=("$current_string")
current_string=""
elif [[ "$char" == '"' ]]; then
if [[ "$inside_quotes" == false ]]; then
inside_quotes=true
else
inside_quotes=false
fi
current_string+="$char"
if [[ "$inside_quotes" == false && -n "$current_string" ]]; then
parsed_line+=("$current_string")
current_string=""
(( i++ ))
fi
else
current_string+="$char"
fi
(( i++ ))
done
if [[ -n "$current_string" ]]; then
parsed_line+=("$current_string")
fi
}
parse_and_write() {
echo "id,class_id,name,degree,email,department" > "$output_file"
while IFS= read -r line; do
parse_line "$line"
id="${parsed_line[0]}"
# skip header
if [[ "$id" = "id" ]]; then
continue
fi
class_id="${parsed_line[1]}"
full_name="${parsed_line[2]}"
degree="${parsed_line[3]}"
email="${parsed_line[4]}"
department="${parsed_line[5]}"
corrected_full_name=$(get_corrected_surname "$full_name")
email=$(get_email "$full_name" "$class_id")
echo "$id,$class_id,$corrected_full_name,$degree,$email,$department" >> "$output_file"
done < "$input_file"
}
prep_email_count_array
parse_and_write