In [110]: txt = """
...: ID,1,2
...: 5362,0.9,-0.4
...: 485,-0.6,0.5
...: 582,0.0,0.9
...: 99,0.7,0.5
...: 75,-0.4,0.5
...: 474,0.3,0.8
...: 594,-0.2,0.0
...: 597,0.9,-0.3
...: 124,0.7,0.6
...: 635,0.8,0.9
...: """
In [113]: data = np.genfromtxt(txt.splitlines(), delimiter=',',skip_header=2)
In [114]: data
Out[114]:
array([[ 5.362e+03, 9.000e-01, -4.000e-01],
[ 4.850e+02, -6.000e-01, 5.000e-01],
[ 5.820e+02, 0.000e+00, 9.000e-01],
...
[ 6.350e+02, 8.000e-01, 9.000e-01]])
In [118]: data1 = np.concatenate([np.arange(data.shape[0])[:,None],data], axis=1)
In [119]: data1
Out[119]:
array([[ 0.000e+00, 5.362e+03, 9.000e-01, -4.000e-01],
[ 1.000e+00, 4.850e+02, -6.000e-01, 5.000e-01],
[ 2.000e+00, 5.820e+02, 0.000e+00, 9.000e-01],
[ 3.000e+00, 9.900e+01, 7.000e-01, 5.000e-01],
...
[ 9.000e+00, 6.350e+02, 8.000e-01, 9.000e-01]])
creating 2 arrays, one of int id, the other float values
In [124]: ID = np.genfromtxt(txt.splitlines(), delimiter=',',skip_header=2,usecols=[0],dtype=int)
In [126]: ID
Out[126]: array([5362, 485, 582, 99, 75, 474, 594, 597, 124, 635])
In [127]: np.column_stack([np.arange(ID.shape[0]),ID])
Out[127]:
array([[ 0, 5362],
[ 1, 485],
[ 2, 582],
...
[ 9, 635]])
In [128]: data2 = np.genfromtxt(txt.splitlines(), delimiter=',',skip_header=2,usecols=[1,2])
In [129]: data2
Out[129]:
array([[ 0.9, -0.4],
[-0.6, 0.5],
[ 0. , 0.9],
...
[ 0.8, 0.9]])
Or as a structured array:
In [120]: data2 = np.genfromtxt(txt.splitlines(), delimiter=',',skip_header=1,na
...: mes=True, dtype=None)
In [121]: data2
Out[121]:
array([(5362, 0.9, -0.4), ( 485, -0.6, 0.5), ( 582, 0. , 0.9),
( 99, 0.7, 0.5), ( 75, -0.4, 0.5), ( 474, 0.3, 0.8),
( 594, -0.2, 0. ), ( 597, 0.9, -0.3), ( 124, 0.7, 0.6),
( 635, 0.8, 0.9)],
dtype=[('ID', '<i8'), ('1', '<f8'), ('2', '<f8')])
I could add another id column, and consolidate the float columns, but that can wait.
np.arange(10)[:,None]array. But the result will be all floats. For fast numeric calculations, numpy arrays have to have the same dtype through out. There are ways of mixing dtypes, but that slows down the calculation. Do those first 2 columns have to be in the same array as the float columns?