Remove rows containing NaN values
Returns a new data frame with all rows containing NaN in any column removed. Checks real and integer columns for NaN values.
@param[in] df The data frame instance @return New data frame with NaN-containing rows removed
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(data_frame), | intent(in) | :: | df |
function df_dropna(df) result(clean_df) type(data_frame), intent(in) :: df type(data_frame) :: clean_df logical, dimension(:), allocatable :: keep_mask integer :: i, j, dtype, num_clean_rows integer, dimension(:), allocatable :: clean_indices real(rk), dimension(:), allocatable :: real_col, real_clean_col integer(ik), dimension(:), allocatable :: int_col, int_clean_col logical, dimension(:), allocatable :: log_col, log_clean_col character(len=:), allocatable :: char_col(:), char_clean_col(:) complex(rk), dimension(:), allocatable :: cmplx_col, cmplx_clean_col character(len=100) :: header_name allocate (keep_mask(df % nrows())) keep_mask = .true. ! Check all columns for NaN do i = 1, df % ncols() dtype = df % dtype(i) if (dtype == REAL_NUM) then real_col = df_get_col_real(df, i) do j = 1, size(real_col) if (is_nan_real(real_col(j))) then keep_mask(j) = .false. end if end do deallocate (real_col) else if (dtype == INTEGER_NUM) then int_col = df_get_col_integer(df, i) do j = 1, size(int_col) if (is_nan_integer(int_col(j))) then keep_mask(j) = .false. end if end do deallocate (int_col) end if end do ! Count rows to keep num_clean_rows = count(keep_mask) if (num_clean_rows == 0) then call clean_df % new(df % get_max_char_len()) return end if ! Build index array of rows to keep allocate (clean_indices(num_clean_rows)) j = 0 do i = 1, df % nrows() if (keep_mask(i)) then j = j + 1 clean_indices(j) = i end if end do ! Create new dataframe with clean rows call clean_df % new(df % get_max_char_len()) do i = 1, df % ncols() dtype = df % dtype(i) select case (dtype) case (REAL_NUM) real_col = df_get_col_real(df, i) allocate (real_clean_col(num_clean_rows)) do j = 1, num_clean_rows real_clean_col(j) = real_col(clean_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_real(clean_df, real_clean_col, trim(header_name)) else call df_append_real(clean_df, real_clean_col) end if deallocate (real_col) deallocate (real_clean_col) case (INTEGER_NUM) int_col = df_get_col_integer(df, i) allocate (int_clean_col(num_clean_rows)) do j = 1, num_clean_rows int_clean_col(j) = int_col(clean_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_integer(clean_df, int_clean_col, trim(header_name)) else call df_append_integer(clean_df, int_clean_col) end if deallocate (int_col) deallocate (int_clean_col) case (LOGICAL_NUM) log_col = df_get_col_logical(df, i) allocate (log_clean_col(num_clean_rows)) do j = 1, num_clean_rows log_clean_col(j) = log_col(clean_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_logical(clean_df, log_clean_col, trim(header_name)) else call df_append_logical(clean_df, log_clean_col) end if deallocate (log_col) deallocate (log_clean_col) case (CHARACTER_NUM) char_col = df_get_col_character(df, i) allocate (character(len=len(char_col)) :: char_clean_col(num_clean_rows)) do j = 1, num_clean_rows char_clean_col(j) = char_col(clean_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_character(clean_df, char_clean_col, trim(header_name)) else call df_append_character(clean_df, char_clean_col) end if deallocate (char_col) deallocate (char_clean_col) case (COMPLEX_NUM) cmplx_col = df_get_col_complex(df, i) allocate (cmplx_clean_col(num_clean_rows)) do j = 1, num_clean_rows cmplx_clean_col(j) = cmplx_col(clean_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_complex(clean_df, cmplx_clean_col, trim(header_name)) else call df_append_complex(clean_df, cmplx_clean_col) end if deallocate (cmplx_col) deallocate (cmplx_clean_col) end select end do deallocate (keep_mask) deallocate (clean_indices) end function df_dropna