For sample, input below one will work, but unsorted
One-liner
# using two array ( recommended )
awk 'BEGIN{FS=OFS="|"}!seen[$1,$2]++{a[$1] = ($1 in a ? a[$1] ":" : "") $2}END{for(i in a)print i,a[i]}' infile
# using regexp
awk 'BEGIN{FS=OFS="|"}{ a[$1] = $1 in a ? ( a[$1] ~ ("(^|:)"$2"(:|$)") ? a[$1] : a[$1]":"$2 ) : $2}END{for(i in a)print i,a[i]}' infile
Test Results:
$ cat infile
"id-1"|"A"
"id-2"|"C"
"id-1"|"B"
"id-1"|"D"
"id-2"|"B"
"id-3"|"A"
"id-3"|"A"
"id-1"|"B"
$ awk 'BEGIN{FS=OFS="|"}!seen[$1,$2]++{a[$1] = ($1 in a ? a[$1] ":" : "") $2}END{for(i in a)print i,a[i]}' infile
"id-1"|"A":"B":"D"
"id-2"|"C":"B"
"id-3"|"A"
$ awk 'BEGIN{FS=OFS="|"}{ a[$1] = $1 in a ? ( a[$1] ~ ("(^|:)"$2"(:|$)") ? a[$1] : a[$1]":"$2 ) : $2}END{for(i in a)print i,a[i]}' infile
"id-1"|"A":"B":"D"
"id-2"|"C":"B"
"id-3"|"A"
Better Readable:
Using regexp
awk 'BEGIN{
FS=OFS="|"
}
{
a[$1] =$1 in a ?(a[$1] ~ ("(^|:)"$2"(:|$)") ? a[$1] : a[$1]":"$2):$2
}
END{
for(i in a)
print i,a[i]
}
' infile
Using two array
awk 'BEGIN{
FS=OFS="|"
}
!seen[$1,$2]++{
a[$1] = ($1 in a ? a[$1] ":" : "") $2
}
END{
for(i in a)
print i,a[i]
}' infile
Note: you can also use !seen[$0]++, it will use entire line as index, but in case in your real data, if
you want to prefer some other column, you may prefer !seen[$1,$2]++,
here column1 and column2 are used as index
"id-2"|"B:C"instead of"id-2"|"C:B"in output whenCvalue comes first.inoperator (e.g.for (i in array)) unless it's gawk and setssorted_inwill not produce sorted output - if the output looks like it IS sorted, that's pure coincidence with your specific data set and you can be sure it will not be with other input.