;+
; NAME:
; READ_CSV
;
; PURPOSE:
; Reads a comma-separated-variables (CSV) file into a structure.
;
; CATEGORY:
; I/O
;
; CALLING SEQUENCE:
; Result = READ_CSV(Filename)
;
; INPUTS:
; Filename: Name of CSV file. The file must contain a header line
; of the form "# COL1,COL2,COL3...".
;
; KEYWORD PARAMETERS:
; ULON64: A comma-separated list of column names that are to be
; treated as unsigned long 64-bit integers.
;
; STRING: A comma-separated list of column names that are to be
; treated as strings.
;
; DOUBLE: A comma-separated list of column names that are to be
; treated as doubles.
;
; NULL: If there are null fields in the file, they will be assigned
; to the string specified by the NULL keyword before being
; cast to the appropriate type.
;
; OUTPUTS:
; The function returns a structure containing one field for each column
; in the file, named by the entry in the file header. All fields are
; assumed to be doubles unless their name contains the string 'OBJID',
; in which case they are assumed to be unsigned long 64-bit integers,
; or 'NAME', in which case they are assumed to be strings (this comparison
; is case-insensitive). These defaults can all be over-ridden by using the
; ULON64, STRING, and DOUBLE keywords.
; The structure also contains the fields NROWS and NCOLS which contain
; the number of rows and columns respectively.
;
; EXAMPLE:
; If the file 'galaxies.csv' consists of:
;
; # NAME,OBJID,RA,DEC
; M31,493,10.68,41.27
; NGC 1068,92,40.67,-0.01
;
; Then reading it in produces:
;
; IDL> Galstruct = READ_CSV('galaxies.csv')
; IDL> HELP, /STRUCT, Galstruct
; ** Structure <8237564>, 6 tags, length=80, data length=80, refs=1:
; NAME STRING Array[2]
; OBJID ULONG64 Array[2]
; RA DOUBLE Array[2]
; DEC DOUBLE Array[2]
; NROWS LONG 2
; NCOLS LONG 4
;
;
; MODIFICATION HISTORY:
; Modified by: Jeremy Bailin
; 12 August 2008 Use IDL 5.6 FILE_LINES to count number of lines.
; Modified by: Vittorio Brando
; 12 August 2008 added function get_nlines to count lines in not unix OS
; added function valid_tag_name to ensure that the tag names are IDL compliant
;
; Written by: Jeremy Bailin
; 10 June 2008 Public release in JBIU
; jbiu@astroconst.org
;
;-
;------------------------------------------------------------------------
function valid_tag_name,tmp_name
; ensure that the tag names are IDL compliant
; Tag names may not be IDL Reserved Words,
; and must be unique within a given structure
; Structure names and tag names follow the rules of IDL identifiers:
; they must begin with a letter; following characters can be letters,
; digits, or the underscore or dollar sign characters; and case is ignored.
reserved_words=['AND','BEGIN','BREAK','CASE','COMMON','COMPILE_OPT', $
'CONTINUE','DO','ELSE','END','ENDCASE','ENDELSE','ENDFOR', $
'ENDIF','ENDREP','ENDSWITCH','ENDWHILE','EQ','FOR', $
'FORWARD_FUNCTION','FUNCTION','GE','GOTO','GT','IF', $
'INHERITS','LE','LT','MOD','NE','NOT','OF','ON_IOERROR', $
'OR','PRO','REPEAT','SWITCH','THEN','UNTIL','WHILE','XOR']
nonvalid_chars="[]() /|\,.<>!@#%^&*+=-"
test_reserved = where(tmp_name eq reserved_words, test_result)
if test_result ne 0 then tmp_name+="_" ;append underscore
tmp_name=strjoin(STRSPLIT(tmp_name,nonvalid_chars,/extract),"_")
return, tmp_name
end
;------------------------------------------------------------------------
function read_csv, filename, ulon64=ul64str, string=strstr, double=dblstr, $
null=nullstr
on_error, 0
nline=file_lines(filename)
nrows=nline-1
if n_elements(nullstr) eq 0 then nullstr=''
openr, lun, filename, /get_lun
header = string('')
readf, lun, header
header_vars = strsplit(header, ',', /EXTRACT, COUNT=ncols)
if ncols eq 0 then message,'Header contains no columns'
; get rid of initial '# '
header_vars[0] = (stregex(header_vars[0],'(# *)?(.*)',/SUBEXP,/EXTRACT))[2]
if n_elements(ul64str) gt 0 then $
ul64_list = strupcase(strsplit(ul64str, ',', /EXTRACT)) $
else ul64_list=''
if n_elements(strstr) gt 0 then $
str_list = strupcase(strsplit(strstr, ',', /EXTRACT)) $
else str_list=''
if n_elements(dblstr) gt 0 then $
dbl_list = strupcase(strsplit(dblstr, ',', /EXTRACT)) $
else dbl_list=''
upcasehead = strupcase(header_vars)
objidp = bytarr(ncols)
for i=0L,ncols-1 do begin
if stregex(strupcase(header_vars[i]), 'OBJID', /BOOLEAN) then objidp[i]=1 $
else if stregex(strupcase(header_vars[i]), 'NAME', /BOOLEAN) then objidp[i]=2
if total( strupcase(header_vars[i]) eq ul64_list ) gt 0 then objidp[i]=1
if total( strupcase(header_vars[i]) eq str_list ) gt 0 then objidp[i]=2
if total( strupcase(header_vars[i]) eq dbl_list ) gt 0 then objidp[i]=0
endfor
; ensure that the tag names are IDL compliant
for i=0L,ncols-1 do header_vars[i]=valid_tag_name(header_vars[i])
; create structure
case objidp[0] of
0 : outstruct = create_struct(header_vars[0], dblarr(nrows))
1 : outstruct = create_struct(header_vars[0], ulon64arr(nrows))
2 : outstruct = create_struct(header_vars[0], strarr(nrows))
endcase
for i=1L,ncols-1 do begin
case objidp[i] of
0 : outstruct = create_struct(outstruct, header_vars[i], dblarr(nrows))
1 : outstruct = create_struct(outstruct, header_vars[i], ulon64arr(nrows))
2 : outstruct = create_struct(outstruct, header_vars[i], strarr(nrows))
endcase
endfor
outstruct = create_struct(outstruct, 'nrows', nrows, 'ncols', ncols)
for i=0L,nrows-1 do begin
input_line = string('')
readf, lun, input_line
values = strsplit(input_line, ',', /EXTRACT, COUNT=nlinecols)
if nlinecols ne ncols then message,'Incorrect number of columns in line '+ $
string(i+1,format='(I0)')
wherenull = where(values eq 'null', nnull)
if nnull gt 0 then values[wherenull]=nullstr
for j=0,ncols-1 do begin
case objidp[j] of
0 : outstruct.(j)[i] = double(values[j])
1 : outstruct.(j)[i] = ulong64(values[j])
2 : outstruct.(j)[i] = values[j]
endcase
endfor
endfor
close, lun
free_lun, lun
return, outstruct
end