df_outer_join Function

public function df_outer_join(df1, df2, key_col1, key_col2) result(joined_df)

Perform an outer join between two data frames

Returns all rows from both dataframes

@param[in] df1 The left data frame @param[in] df2 The right data frame @param[in] key_col1 Column index for join key in left data frame @param[in] key_col2 Column index for join key in right data frame @return A new data frame with all rows from both tables

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(in) :: df1
type(data_frame), intent(in) :: df2
integer, intent(in) :: key_col1
integer, intent(in) :: key_col2

Return Value type(data_frame)


Source Code

    function df_outer_join(df1, df2, key_col1, key_col2) result(joined_df)
        type(data_frame), intent(in) :: df1, df2
        integer, intent(in) :: key_col1, key_col2
        type(data_frame) :: joined_df

        integer :: i, j, dtype1, dtype2
        integer, dimension(:), allocatable :: match_indices_this, match_indices_other
        logical, dimension(:), allocatable :: other_matched
        integer :: num_rows
        logical :: found_match

        dtype1 = df1 % dtype(key_col1)
        dtype2 = df2 % dtype(key_col2)

        if (dtype1 /= dtype2) then
            print *, "Error: Key columns must have the same data type"
            call joined_df % new()
            return
        end if

        allocate (other_matched(df2 % nrows()))
        other_matched = .false.

        num_rows = 0
        allocate (match_indices_this((df1 % nrows() + df2 % nrows()) * max(1, max(df1 % nrows(), df2 % nrows()))))
        allocate (match_indices_other((df1 % nrows() + df2 % nrows()) * max(1, max(df1 % nrows(), df2 % nrows()))))

        ! First pass: all rows from 'df1' with matches from 'df2'
        do i = 1, df1 % nrows()
            found_match = .false.

            do j = 1, df2 % nrows()
                select case (dtype1)
                case (INTEGER_NUM)
                    if (df_get_val_integer(df1, i, key_col1) == df_get_val_integer(df2, j, key_col2)) then
                        num_rows = num_rows + 1
                        match_indices_this(num_rows) = i
                        match_indices_other(num_rows) = j
                        other_matched(j) = .true.
                        found_match = .true.
                    end if
                case (REAL_NUM)
                    if (abs(df_get_val_real(df1, i, key_col1) - df_get_val_real(df2, j, key_col2)) < 1.0e-10_rk) then
                        num_rows = num_rows + 1
                        match_indices_this(num_rows) = i
                        match_indices_other(num_rows) = j
                        other_matched(j) = .true.
                        found_match = .true.
                    end if
                case (CHARACTER_NUM)
                  if (trim(df_get_val_character(df1, i, key_col1)) == trim(df_get_val_character(df2, j, key_col2))) then
                        num_rows = num_rows + 1
                        match_indices_this(num_rows) = i
                        match_indices_other(num_rows) = j
                        other_matched(j) = .true.
                        found_match = .true.
                    end if
                end select
            end do

            if (.not. found_match) then
                num_rows = num_rows + 1
                match_indices_this(num_rows) = i
                match_indices_other(num_rows) = -1
            end if
        end do

        ! Second pass: unmatched rows from 'df2'
        do j = 1, df2 % nrows()
            if (.not. other_matched(j)) then
                num_rows = num_rows + 1
                match_indices_this(num_rows) = -1
                match_indices_other(num_rows) = j
            end if
        end do

        call build_joined_dataframe(df1, df2, match_indices_this(1:num_rows), &
                                    match_indices_other(1:num_rows), num_rows, joined_df)

        deallocate (match_indices_this)
        deallocate (match_indices_other)
        deallocate (other_matched)
    end function df_outer_join