df_duplicated Function

public function df_duplicated(df) result(is_dup)

Check which rows are duplicates

Returns a logical array where .true. indicates the row is a duplicate of a previous row. The first occurrence is not marked as duplicate.

@param[in] df The data frame instance @return Logical array indicating duplicate rows

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(in) :: df

Return Value logical, dimension(:), allocatable


Source Code

    function df_duplicated(df) result(is_dup)
        type(data_frame), intent(in) :: df
        logical, dimension(:), allocatable :: is_dup

        integer :: i, j, k, dtype
        logical :: rows_match

        allocate (is_dup(df % nrows()))
        is_dup = .false.

        do i = 2, df % nrows()
            do j = 1, i - 1
                rows_match = .true.

                ! Compare all columns
                do k = 1, df % ncols()
                    dtype = df % dtype(k)

                    select case (dtype)
                    case (REAL_NUM)
                        if (abs(df_get_val_real(df, i, k) - df_get_val_real(df, j, k)) >= 1.0e-10_rk) then
                            rows_match = .false.
                            exit
                        end if
                    case (INTEGER_NUM)
                        if (df_get_val_integer(df, i, k) /= df_get_val_integer(df, j, k)) then
                            rows_match = .false.
                            exit
                        end if
                    case (LOGICAL_NUM)
                        if (df_get_val_logical(df, i, k) .neqv. df_get_val_logical(df, j, k)) then
                            rows_match = .false.
                            exit
                        end if
                    case (CHARACTER_NUM)
                        if (trim(df_get_val_character(df, i, k)) /= trim(df_get_val_character(df, j, k))) then
                            rows_match = .false.
                            exit
                        end if
                    case (COMPLEX_NUM)
                        if (df_get_val_complex(df, i, k) /= df_get_val_complex(df, j, k)) then
                            rows_match = .false.
                            exit
                        end if
                    end select
                end do

                if (rows_match) then
                    is_dup(i) = .true.
                    exit
                end if
            end do
        end do
    end function df_duplicated