Remove duplicate rows
Returns new data frame with duplicate rows removed (keeps first occurrence)
@param[in] df The data frame instance @return Data frame with unique rows only
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(data_frame), | intent(in) | :: | df |
function df_drop_duplicates(df) result(unique_df) type(data_frame), intent(in) :: df type(data_frame) :: unique_df logical, dimension(:), allocatable :: is_dup, keep_mask integer :: i, j, num_unique, dtype integer, dimension(:), allocatable :: unique_indices character(len=100) :: header_name real(rk), dimension(:), allocatable :: real_col, real_unique_col integer(ik), dimension(:), allocatable :: int_col, int_unique_col logical, dimension(:), allocatable :: log_col, log_unique_col character(len=:), allocatable :: char_col(:), char_unique_col(:) complex(rk), dimension(:), allocatable :: cmplx_col, cmplx_unique_col is_dup = df_duplicated(df) allocate (keep_mask(df % nrows())) keep_mask = .not. is_dup num_unique = count(keep_mask) if (num_unique == 0) then call unique_df % new() return end if ! Build index array allocate (unique_indices(num_unique)) j = 0 do i = 1, df % nrows() if (keep_mask(i)) then j = j + 1 unique_indices(j) = i end if end do ! Build result data frame call unique_df % new(df % get_max_char_len()) do i = 1, df % ncols() dtype = df % dtype(i) select case (dtype) case (REAL_NUM) real_col = df_get_col_real(df, i) allocate (real_unique_col(num_unique)) do j = 1, num_unique real_unique_col(j) = real_col(unique_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_real(unique_df, real_unique_col, trim(header_name)) else call df_append_real(unique_df, real_unique_col) end if deallocate (real_unique_col) case (INTEGER_NUM) int_col = df_get_col_integer(df, i) allocate (int_unique_col(num_unique)) do j = 1, num_unique int_unique_col(j) = int_col(unique_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_integer(unique_df, int_unique_col, trim(header_name)) else call df_append_integer(unique_df, int_unique_col) end if deallocate (int_unique_col) case (LOGICAL_NUM) log_col = df_get_col_logical(df, i) allocate (log_unique_col(num_unique)) do j = 1, num_unique log_unique_col(j) = log_col(unique_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_logical(unique_df, log_unique_col, trim(header_name)) else call df_append_logical(unique_df, log_unique_col) end if deallocate (log_unique_col) case (CHARACTER_NUM) char_col = df_get_col_character(df, i) allocate (character(len=len(char_col)) :: char_unique_col(num_unique)) do j = 1, num_unique char_unique_col(j) = char_col(unique_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_character(unique_df, char_unique_col, trim(header_name)) else call df_append_character(unique_df, char_unique_col) end if deallocate (char_unique_col) case (COMPLEX_NUM) cmplx_col = df_get_col_complex(df, i) allocate (cmplx_unique_col(num_unique)) do j = 1, num_unique cmplx_unique_col(j) = cmplx_col(unique_indices(j)) end do if (df % get_with_headers()) then header_name = df % header(i) call df_append_complex(unique_df, cmplx_unique_col, trim(header_name)) else call df_append_complex(unique_df, cmplx_unique_col) end if deallocate (cmplx_unique_col) end select end do deallocate (is_dup, keep_mask, unique_indices) end function df_drop_duplicates