1

I have this input and I would like to learn how to save string after = to variable and use it for output filename and in the first line of output that will start with "#"

c ROIysiz= 28
c column1= HJD
c RedNumDa= 18262
c column3= ERROR
c column2= FLUX
c end header ---------------------------------------------------------------------------------------
2.458375368952875026e+06 -8.420548421860798386e-04 7.020812100561693928e-03
2.458375579737625085e+06 -5.579159672996818198e-03 1.285380720081348528e-03
2.458376278315599542e+06 -7.634101850411220518e-03 2.481065693991901019e-03
2.458376347386624664e+06 7.223482191697593166e-04 2.319993894372075760e-03
2.458376416108166799e+06 5.238757879614985152e-03 1.389030320490110878e-03
2.458376485913363751e+06 6.777606553373448882e-03 8.887787066666734273e-04
2.458377048675692175e+06 1.950435173388009522e-02 3.242344477396308117e-03
2.458377185153110884e+06 1.885754079806525874e-02 2.090836971653367571e-03

Desired output, for instance:

name: FLUX28ERROR

content:

# FLUX 18262
2.458375368952875026e+06 -8.420548421860798386e-04 7.020812100561693928e-03
2.458375579737625085e+06 -5.579159672996818198e-03 1.285380720081348528e-03
2.458376278315599542e+06 -7.634101850411220518e-03 2.481065693991901019e-03
2.458376347386624664e+06 7.223482191697593166e-04 2.319993894372075760e-03
2.458376416108166799e+06 5.238757879614985152e-03 1.389030320490110878e-03
2.458376485913363751e+06 6.777606553373448882e-03 8.887787066666734273e-04
2.458377048675692175e+06 1.950435173388009522e-02 3.242344477396308117e-03
2.458377185153110884e+06 1.885754079806525874e-02 2.090836971653367571e-03

I have a script:

awk '
/ROIysiz/{
second_out=$NF
}
/column2/{
close(out_file)
found=count=""
out_file=$NF second_out 
next
}
/end header/{
found=1
next
}
found && out_file{
if(++count==1){
print "#" $0 > (out_file)
}
else{
print > (out_file)
}
}
' inputname

That gives a file FLUX28:

#2.458375368952875026e+06 -8.420548421860798386e-04 7.020812100561693928e-03
2.458375579737625085e+06 -5.579159672996818198e-03 1.285380720081348528e-03
2.458376278315599542e+06 -7.634101850411220518e-03 2.481065693991901019e-03
2.458376347386624664e+06 7.223482191697593166e-04 2.319993894372075760e-03
2.458376416108166799e+06 5.238757879614985152e-03 1.389030320490110878e-03
2.458376485913363751e+06 6.777606553373448882e-03 8.887787066666734273e-04
2.458377048675692175e+06 1.950435173388009522e-02 3.242344477396308117e-03
2.458377185153110884e+06 1.885754079806525874e-02 2.090836971653367571e-03

I would like to know how to add another string to filename and how to print the first line of the output:

# FLUX 18262

I tried it by:

awk '
/ROIysiz/{
second_out=$NF
}
/column 3/{
third_part=$NF
}
/column2/{
close(out_file)
found=count=""
out_file=$NF second_out third_part
next
}
/end header/{
found=1
next
}
found && out_file{
if(++count==1){
print "#" second_out third_part > (out_file)
}
else{
print > (out_file)
}
}
' inputname

that gives output filename FLUX28 and contains:

#28
2.458375579737625085e+06 -5.579159672996818198e-03 1.285380720081348528e-03
2.458376278315599542e+06 -7.634101850411220518e-03 2.481065693991901019e-03
2.458376347386624664e+06 7.223482191697593166e-04 2.319993894372075760e-03
2.458376416108166799e+06 5.238757879614985152e-03 1.389030320490110878e-03
2.458376485913363751e+06 6.777606553373448882e-03 8.887787066666734273e-04
2.458377048675692175e+06 1.950435173388009522e-02 3.242344477396308117e-03
2.458377185153110884e+06 1.885754079806525874e-02 2.090836971653367571e-03
2.458377252462999895e+06 2.159254025049928832e-02 2.315911471112144012e-03
2.458377462405352853e+06 1.721511461149537181e-02 1.687658552459528729e-03
2.458377602279778104e+06 1.744415665326638776e-02 3.041609691486800784e-03
2.458377956590285990e+06 8.597543276201942419e-03 3.490433838852374532e-03
2.458378025015166495e+06 6.127180820289755692e-03 2.437530774283428858e-03

Why the variable third_part is not printed in output filename and on the first row? Thank you

1
  • 1
    Indentation and white space help a lot to make any program easier to read/understand. Commented Feb 25, 2020 at 18:00

2 Answers 2

2

Untested code follows:

/^c/ { X[$2] = $3 }
/^c end/ { outfile = X["column2="] X["ROIysiz="] X["column3="]
           print "#", X["column2="], X["RedNumDa="] > outfile }
!/^c/ { print $0 >> outfile }

EDIT: The code is now tested:

$ cat x
c ROIysiz= 28
c column1= HJD
c RedNumDa= 18262
c column3= ERROR
c column2= FLUX
c end header ---------------------------------------------------------------------------------------
2.458375368952875026e+06 -8.420548421860798386e-04 7.020812100561693928e-03
2.458375579737625085e+06 -5.579159672996818198e-03 1.285380720081348528e-03
2.458376278315599542e+06 -7.634101850411220518e-03 2.481065693991901019e-03
2.458376347386624664e+06 7.223482191697593166e-04 2.319993894372075760e-03
2.458376416108166799e+06 5.238757879614985152e-03 1.389030320490110878e-03
2.458376485913363751e+06 6.777606553373448882e-03 8.887787066666734273e-04
2.458377048675692175e+06 1.950435173388009522e-02 3.242344477396308117e-03
2.458377185153110884e+06 1.885754079806525874e-02 2.090836971653367571e-03
$ awk '
/^c/ { X[$2] = $3 }
/^c end/ { outfile = X["column2="] X["ROIysiz="] X["column3="]
           print "#", X["column2="], X["RedNumDa="] > outfile }
!/^c/ { print $0 >> outfile }
' x
$ cat FLUX28ERROR
# FLUX 18262
2.458375368952875026e+06 -8.420548421860798386e-04 7.020812100561693928e-03
2.458375579737625085e+06 -5.579159672996818198e-03 1.285380720081348528e-03
2.458376278315599542e+06 -7.634101850411220518e-03 2.481065693991901019e-03
2.458376347386624664e+06 7.223482191697593166e-04 2.319993894372075760e-03
2.458376416108166799e+06 5.238757879614985152e-03 1.389030320490110878e-03
2.458376485913363751e+06 6.777606553373448882e-03 8.887787066666734273e-04
2.458377048675692175e+06 1.950435173388009522e-02 3.242344477396308117e-03
2.458377185153110884e+06 1.885754079806525874e-02 2.090836971653367571e-03

The first line, in the /^c/ pattern, saves each header line (the lines that begin with the letter c). The second and third lines, in the /^c end/ pattern, compute the output filename using string concatenation to join the various header values, then writes the first line of the output file in a similar manner, using the , operator to separate fields. The fourth line, in the !/^c/ pattern, prints each non-header line, unchanged, to the output file. Associative array X stores each header key as its index and the associated value at its value.

Sign up to request clarification or add additional context in comments.

Comments

2

Could you please try following. On the very first line this will add values of column2 line, RedNumDa line with hash and then with a new line it will print actual line.

awk '
/ROIysiz/{
  second_out=$NF
}
/RedNumDa/{
  first_line_value=$NF
}
/c column3/{
  third_part=$NF
}
/column2/{
  close(out_file)
  found=count=""
  first_part=$NF
  out_file=first_part second_out third_part
  next
}
/end header/{
  found=1
  next
}
found && out_file{
  if(++count==1){
    print "#" first_part OFS first_line_value ORS $0 > (out_file)
  }
  else{
    print > (out_file)
  }
}
' Input_file

Explanation: Adding detailed explanation for above code.

awk '                                              ##Starting awk program from here.
/ROIysiz/{                                         ##Checking condition if a line contains string ROIysiz then do following.
  second_out=$NF                                   ##Creating variable second_out for output file 2nd part.
}
/RedNumDa/{                                        ##Checking condition if line contains RedNumDa string in it.
  first_line_value=$NF                             ##Creating variable first_line_value for output file 1st part.
}
/c column3/{                                       ##Checking condition if line contains column 3 string in it.
  third_part=$NF                                   ##Creating variable third_part and setting its value to last field of current line.
}
/column2/{                                         ##Checking condition if line contains column2 string in it.
  close(out_file)                                  ##Closing out_file to avoid "too many files opened" error.
  found=count=""                                   ##Nullifying variable found here.
  first_part=$NF                                   ##Creating variable first_part which has last part of current line as value.
  out_file=first_part second_out third_part        ##Creating variable out_file which is having last field of current line and second_out variable value.
  next                                             ##next will skip all further statements from here.
}
/end header/{                                      ##Checking condition if string end header is found then do following.
  found=1                                          ##Setting variable found to 1 here.
  next                                             ##next will skip all further statements from here.
}
found && out_file{                                 ##Checking condition if found AND out_file is SET then do following.
  if(++count==1){                                  ##If count==1 then do following, to add # in starting of first line.
     print "#" first_part OFS first_line_value ORS $0 > (out_file)  ##Printing # and first_part OFS first_line_value ORS $0.
  }
  else{                                            ##Else if count is greater than 1 then do following.
    print > (out_file)                             ##Printing current line to out_file here.
  }
}
' Input_file                                       ##Mentioning Input_file name here.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.