Perform an inner join between two data frames
Returns a new data frame containing only rows where the key columns match in both data frames. Similar to SQL INNER JOIN.
@param[in] df1 The left data frame @param[in] df2 The right data frame @param[in] key_col1 Column index for join key in left data frame @param[in] key_col2 Column index for join key in right data frame @return A new data frame with matched rows from both tables
Note
Key columns must have the same data type (integer, real, or character)
Note
Right table columns are suffixed with “_right” to avoid name conflicts
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(data_frame), | intent(in) | :: | df1 | |||
type(data_frame), | intent(in) | :: | df2 | |||
integer, | intent(in) | :: | key_col1 | |||
integer, | intent(in) | :: | key_col2 |
function df_inner_join(df1, df2, key_col1, key_col2) result(joined_df) type(data_frame), intent(in) :: df1, df2 integer, intent(in) :: key_col1, key_col2 type(data_frame) :: joined_df integer :: i, j, dtype1, dtype2, num_matches integer, dimension(:), allocatable :: match_indices_this, match_indices_other logical :: match_found ! Check if key columns are compatible types dtype1 = df1 % dtype(key_col1) dtype2 = df2 % dtype(key_col2) if (dtype1 /= dtype2) then print *, "Error: Key columns must have the same data type" call joined_df % new() return end if ! Count matches first num_matches = 0 do i = 1, df1 % nrows() do j = 1, df2 % nrows() match_found = .false. select case (dtype1) case (INTEGER_NUM) if (df_get_val_integer(df1, i, key_col1) == df_get_val_integer(df2, j, key_col2)) then match_found = .true. end if case (REAL_NUM) if (abs(df_get_val_real(df1, i, key_col1) - df_get_val_real(df2, j, key_col2)) < 1.0e-10_rk) then match_found = .true. end if case (CHARACTER_NUM) if (trim(df_get_val_character(df1, i, key_col1)) == trim(df_get_val_character(df2, j, key_col2))) then match_found = .true. end if end select if (match_found) then num_matches = num_matches + 1 end if end do end do if (num_matches == 0) then call joined_df % new() return end if ! Allocate arrays to store matching indices allocate (match_indices_this(num_matches)) allocate (match_indices_other(num_matches)) ! Find all matching pairs num_matches = 0 do i = 1, df1 % nrows() do j = 1, df2 % nrows() match_found = .false. select case (dtype1) case (INTEGER_NUM) if (df_get_val_integer(df1, i, key_col1) == df_get_val_integer(df2, j, key_col2)) then match_found = .true. end if case (REAL_NUM) if (abs(df_get_val_real(df1, i, key_col1) - df_get_val_real(df2, j, key_col2)) < 1.0e-10_rk) then match_found = .true. end if case (CHARACTER_NUM) if (trim(df_get_val_character(df1, i, key_col1)) == trim(df_get_val_character(df2, j, key_col2))) then match_found = .true. end if end select if (match_found) then num_matches = num_matches + 1 match_indices_this(num_matches) = i match_indices_other(num_matches) = j end if end do end do ! Build the joined dataframe call build_joined_dataframe(df1, df2, match_indices_this, match_indices_other, & num_matches, joined_df) deallocate (match_indices_this) deallocate (match_indices_other) end function df_inner_join