df_read_csv Subroutine

public subroutine df_read_csv(df, filename, has_headers)

Import CSV file into data frame

Reads a CSV file and populates the data frame with automatic type detection. Supports integer, real, logical, and character data types.

@param[in,out] df The data frame to populate (will be initialized) @param[in] filename Path to the input CSV file @param[in] has_headers Whether the first row contains column headers

Note

The data frame will be reinitialized (destroyed and recreated)

Note

Type detection occurs on the first non-empty, non-NaN value

Note

Supported NaN representations: NaN, NA, NULL, N/A, -, (empty)

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(inout) :: df
character(len=*), intent(in) :: filename
logical, intent(in) :: has_headers

Source Code

    subroutine df_read_csv(df, filename, has_headers)
        type(data_frame), intent(inout) :: df
        character(len=*), intent(in) :: filename
        logical, intent(in) :: has_headers

        integer :: unit, iostat, num_lines, num_cols, i, j
        character(len=1000) :: line
        character(len=100), allocatable :: fields(:), headers(:)
        character(len=100), allocatable :: all_data(:, :)

        ! Open file
        open (newunit=unit, file=filename, status='old', action='read', iostat=iostat)
        if (iostat /= 0) error stop "Cannot open CSV file"

        ! Count lines
        num_lines = 0
        do
            read (unit, '(a)', iostat=iostat) line
            if (iostat /= 0) exit
            num_lines = num_lines + 1
        end do
        rewind (unit)

        if (num_lines == 0) then
            close (unit)
            error stop "Empty CSV file"
        end if

        ! Read first line to determine number of columns
        read (unit, '(a)', iostat=iostat) line
        if (iostat /= 0) then
            close (unit)
            error stop "Cannot read CSV file"
        end if

        call parse_csv_line(line, fields)
        num_cols = size(fields)

        ! Allocate storage
        if (has_headers) then
            allocate (headers(num_cols))
            allocate (all_data(num_lines - 1, num_cols))
            headers = fields
        else
            allocate (all_data(num_lines, num_cols))
            all_data(1, :) = fields
            rewind (unit)
            read (unit, '(a)') line ! skip the line we already processed
        end if

        ! Read remaining data
        do i = 1, size(all_data, 1)
            read (unit, '(a)', iostat=iostat) line
            if (iostat /= 0) exit

            call parse_csv_line(line, fields)
            if (size(fields) /= num_cols) then
                close (unit)
                error stop "Inconsistent number of columns in CSV"
            end if
            all_data(i, :) = fields
        end do

        close (unit)

        ! Initialize data frame
        call df % new()

        ! Add columns with automatic type detection
        do j = 1, num_cols
            call add_csv_column(df, all_data(:, j), headers, j, has_headers)
        end do

        deallocate (fields, all_data)
        if (allocated(headers)) deallocate (headers)
    end subroutine df_read_csv