df_inner_join Function

public function df_inner_join(df1, df2, key_col1, key_col2) result(joined_df)

Perform an inner join between two data frames

Returns a new data frame containing only rows where the key columns match in both data frames. Similar to SQL INNER JOIN.

@param[in] df1 The left data frame @param[in] df2 The right data frame @param[in] key_col1 Column index for join key in left data frame @param[in] key_col2 Column index for join key in right data frame @return A new data frame with matched rows from both tables

Note

Key columns must have the same data type (integer, real, or character)

Note

Right table columns are suffixed with “_right” to avoid name conflicts

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(in) :: df1
type(data_frame), intent(in) :: df2
integer, intent(in) :: key_col1
integer, intent(in) :: key_col2

Return Value type(data_frame)


Source Code

    function df_inner_join(df1, df2, key_col1, key_col2) result(joined_df)
        type(data_frame), intent(in) :: df1, df2
        integer, intent(in) :: key_col1, key_col2
        type(data_frame) :: joined_df

        integer :: i, j, dtype1, dtype2, num_matches
        integer, dimension(:), allocatable :: match_indices_this, match_indices_other
        logical :: match_found

        ! Check if key columns are compatible types
        dtype1 = df1 % dtype(key_col1)
        dtype2 = df2 % dtype(key_col2)

        if (dtype1 /= dtype2) then
            print *, "Error: Key columns must have the same data type"
            call joined_df % new()
            return
        end if

        ! Count matches first
        num_matches = 0
        do i = 1, df1 % nrows()
            do j = 1, df2 % nrows()
                match_found = .false.

                select case (dtype1)
                case (INTEGER_NUM)
                    if (df_get_val_integer(df1, i, key_col1) == df_get_val_integer(df2, j, key_col2)) then
                        match_found = .true.
                    end if
                case (REAL_NUM)
                    if (abs(df_get_val_real(df1, i, key_col1) - df_get_val_real(df2, j, key_col2)) < 1.0e-10_rk) then
                        match_found = .true.
                    end if
                case (CHARACTER_NUM)
                  if (trim(df_get_val_character(df1, i, key_col1)) == trim(df_get_val_character(df2, j, key_col2))) then
                        match_found = .true.
                    end if
                end select

                if (match_found) then
                    num_matches = num_matches + 1
                end if
            end do
        end do

        if (num_matches == 0) then
            call joined_df % new()
            return
        end if

        ! Allocate arrays to store matching indices
        allocate (match_indices_this(num_matches))
        allocate (match_indices_other(num_matches))

        ! Find all matching pairs
        num_matches = 0
        do i = 1, df1 % nrows()
            do j = 1, df2 % nrows()
                match_found = .false.

                select case (dtype1)
                case (INTEGER_NUM)
                    if (df_get_val_integer(df1, i, key_col1) == df_get_val_integer(df2, j, key_col2)) then
                        match_found = .true.
                    end if
                case (REAL_NUM)
                    if (abs(df_get_val_real(df1, i, key_col1) - df_get_val_real(df2, j, key_col2)) < 1.0e-10_rk) then
                        match_found = .true.
                    end if
                case (CHARACTER_NUM)
                  if (trim(df_get_val_character(df1, i, key_col1)) == trim(df_get_val_character(df2, j, key_col2))) then
                        match_found = .true.
                    end if
                end select

                if (match_found) then
                    num_matches = num_matches + 1
                    match_indices_this(num_matches) = i
                    match_indices_other(num_matches) = j
                end if
            end do
        end do

        ! Build the joined dataframe
        call build_joined_dataframe(df1, df2, match_indices_this, match_indices_other, &
                                    num_matches, joined_df)

        deallocate (match_indices_this)
        deallocate (match_indices_other)
    end function df_inner_join