df_dropna Function

public function df_dropna(df) result(clean_df)

Remove rows containing NaN values

Returns a new data frame with all rows containing NaN in any column removed. Checks real and integer columns for NaN values.

@param[in] df The data frame instance @return New data frame with NaN-containing rows removed

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(in) :: df

Return Value type(data_frame)


Source Code

    function df_dropna(df) result(clean_df)
        type(data_frame), intent(in) :: df
        type(data_frame) :: clean_df

        logical, dimension(:), allocatable :: keep_mask
        integer :: i, j, dtype, num_clean_rows
        integer, dimension(:), allocatable :: clean_indices
        real(rk), dimension(:), allocatable :: real_col, real_clean_col
        integer(ik), dimension(:), allocatable :: int_col, int_clean_col
        logical, dimension(:), allocatable :: log_col, log_clean_col
        character(len=:), allocatable :: char_col(:), char_clean_col(:)
        complex(rk), dimension(:), allocatable :: cmplx_col, cmplx_clean_col
        character(len=100) :: header_name

        allocate (keep_mask(df % nrows()))
        keep_mask = .true.

        ! Check all columns for NaN
        do i = 1, df % ncols()
            dtype = df % dtype(i)

            if (dtype == REAL_NUM) then
                real_col = df_get_col_real(df, i)
                do j = 1, size(real_col)
                    if (is_nan_real(real_col(j))) then
                        keep_mask(j) = .false.
                    end if
                end do
                deallocate (real_col)
            else if (dtype == INTEGER_NUM) then
                int_col = df_get_col_integer(df, i)
                do j = 1, size(int_col)
                    if (is_nan_integer(int_col(j))) then
                        keep_mask(j) = .false.
                    end if
                end do
                deallocate (int_col)
            end if
        end do

        ! Count rows to keep
        num_clean_rows = count(keep_mask)

        if (num_clean_rows == 0) then
            call clean_df % new(df % get_max_char_len())
            return
        end if

        ! Build index array of rows to keep
        allocate (clean_indices(num_clean_rows))
        j = 0
        do i = 1, df % nrows()
            if (keep_mask(i)) then
                j = j + 1
                clean_indices(j) = i
            end if
        end do

        ! Create new dataframe with clean rows
        call clean_df % new(df % get_max_char_len())

        do i = 1, df % ncols()
            dtype = df % dtype(i)

            select case (dtype)
            case (REAL_NUM)
                real_col = df_get_col_real(df, i)
                allocate (real_clean_col(num_clean_rows))
                do j = 1, num_clean_rows
                    real_clean_col(j) = real_col(clean_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_real(clean_df, real_clean_col, trim(header_name))
                else
                    call df_append_real(clean_df, real_clean_col)
                end if
                deallocate (real_col)
                deallocate (real_clean_col)

            case (INTEGER_NUM)
                int_col = df_get_col_integer(df, i)
                allocate (int_clean_col(num_clean_rows))
                do j = 1, num_clean_rows
                    int_clean_col(j) = int_col(clean_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_integer(clean_df, int_clean_col, trim(header_name))
                else
                    call df_append_integer(clean_df, int_clean_col)
                end if
                deallocate (int_col)
                deallocate (int_clean_col)

            case (LOGICAL_NUM)
                log_col = df_get_col_logical(df, i)
                allocate (log_clean_col(num_clean_rows))
                do j = 1, num_clean_rows
                    log_clean_col(j) = log_col(clean_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_logical(clean_df, log_clean_col, trim(header_name))
                else
                    call df_append_logical(clean_df, log_clean_col)
                end if
                deallocate (log_col)
                deallocate (log_clean_col)

            case (CHARACTER_NUM)
                char_col = df_get_col_character(df, i)
                allocate (character(len=len(char_col)) :: char_clean_col(num_clean_rows))
                do j = 1, num_clean_rows
                    char_clean_col(j) = char_col(clean_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_character(clean_df, char_clean_col, trim(header_name))
                else
                    call df_append_character(clean_df, char_clean_col)
                end if
                deallocate (char_col)
                deallocate (char_clean_col)

            case (COMPLEX_NUM)
                cmplx_col = df_get_col_complex(df, i)
                allocate (cmplx_clean_col(num_clean_rows))
                do j = 1, num_clean_rows
                    cmplx_clean_col(j) = cmplx_col(clean_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_complex(clean_df, cmplx_clean_col, trim(header_name))
                else
                    call df_append_complex(clean_df, cmplx_clean_col)
                end if
                deallocate (cmplx_col)
                deallocate (cmplx_clean_col)
            end select
        end do

        deallocate (keep_mask)
        deallocate (clean_indices)
    end function df_dropna