df_drop_duplicates Function

public function df_drop_duplicates(df) result(unique_df)

Remove duplicate rows

Returns new data frame with duplicate rows removed (keeps first occurrence)

@param[in] df The data frame instance @return Data frame with unique rows only

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(in) :: df

Return Value type(data_frame)


Source Code

    function df_drop_duplicates(df) result(unique_df)
        type(data_frame), intent(in) :: df
        type(data_frame) :: unique_df

        logical, dimension(:), allocatable :: is_dup, keep_mask
        integer :: i, j, num_unique, dtype
        integer, dimension(:), allocatable :: unique_indices
        character(len=100) :: header_name
        real(rk), dimension(:), allocatable :: real_col, real_unique_col
        integer(ik), dimension(:), allocatable :: int_col, int_unique_col
        logical, dimension(:), allocatable :: log_col, log_unique_col
        character(len=:), allocatable :: char_col(:), char_unique_col(:)
        complex(rk), dimension(:), allocatable :: cmplx_col, cmplx_unique_col

        is_dup = df_duplicated(df)
        allocate (keep_mask(df % nrows()))
        keep_mask = .not. is_dup

        num_unique = count(keep_mask)

        if (num_unique == 0) then
            call unique_df % new()
            return
        end if

        ! Build index array
        allocate (unique_indices(num_unique))
        j = 0
        do i = 1, df % nrows()
            if (keep_mask(i)) then
                j = j + 1
                unique_indices(j) = i
            end if
        end do

        ! Build result data frame
        call unique_df % new(df % get_max_char_len())

        do i = 1, df % ncols()
            dtype = df % dtype(i)

            select case (dtype)
            case (REAL_NUM)
                real_col = df_get_col_real(df, i)
                allocate (real_unique_col(num_unique))
                do j = 1, num_unique
                    real_unique_col(j) = real_col(unique_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_real(unique_df, real_unique_col, trim(header_name))
                else
                    call df_append_real(unique_df, real_unique_col)
                end if
                deallocate (real_unique_col)

            case (INTEGER_NUM)
                int_col = df_get_col_integer(df, i)
                allocate (int_unique_col(num_unique))
                do j = 1, num_unique
                    int_unique_col(j) = int_col(unique_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_integer(unique_df, int_unique_col, trim(header_name))
                else
                    call df_append_integer(unique_df, int_unique_col)
                end if
                deallocate (int_unique_col)

            case (LOGICAL_NUM)
                log_col = df_get_col_logical(df, i)
                allocate (log_unique_col(num_unique))
                do j = 1, num_unique
                    log_unique_col(j) = log_col(unique_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_logical(unique_df, log_unique_col, trim(header_name))
                else
                    call df_append_logical(unique_df, log_unique_col)
                end if
                deallocate (log_unique_col)

            case (CHARACTER_NUM)
                char_col = df_get_col_character(df, i)
                allocate (character(len=len(char_col)) :: char_unique_col(num_unique))
                do j = 1, num_unique
                    char_unique_col(j) = char_col(unique_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_character(unique_df, char_unique_col, trim(header_name))
                else
                    call df_append_character(unique_df, char_unique_col)
                end if
                deallocate (char_unique_col)

            case (COMPLEX_NUM)
                cmplx_col = df_get_col_complex(df, i)
                allocate (cmplx_unique_col(num_unique))
                do j = 1, num_unique
                    cmplx_unique_col(j) = cmplx_col(unique_indices(j))
                end do
                if (df % get_with_headers()) then
                    header_name = df % header(i)
                    call df_append_complex(unique_df, cmplx_unique_col, trim(header_name))
                else
                    call df_append_complex(unique_df, cmplx_unique_col)
                end if
                deallocate (cmplx_unique_col)
            end select
        end do

        deallocate (is_dup, keep_mask, unique_indices)
    end function df_drop_duplicates