Perform an outer join between two data frames
Returns all rows from both dataframes
@param[in] df1 The left data frame @param[in] df2 The right data frame @param[in] key_col1 Column index for join key in left data frame @param[in] key_col2 Column index for join key in right data frame @return A new data frame with all rows from both tables
Type | Intent | Optional | Attributes | Name | ||
---|---|---|---|---|---|---|
type(data_frame), | intent(in) | :: | df1 | |||
type(data_frame), | intent(in) | :: | df2 | |||
integer, | intent(in) | :: | key_col1 | |||
integer, | intent(in) | :: | key_col2 |
function df_outer_join(df1, df2, key_col1, key_col2) result(joined_df) type(data_frame), intent(in) :: df1, df2 integer, intent(in) :: key_col1, key_col2 type(data_frame) :: joined_df integer :: i, j, dtype1, dtype2 integer, dimension(:), allocatable :: match_indices_this, match_indices_other logical, dimension(:), allocatable :: other_matched integer :: num_rows logical :: found_match dtype1 = df1 % dtype(key_col1) dtype2 = df2 % dtype(key_col2) if (dtype1 /= dtype2) then print *, "Error: Key columns must have the same data type" call joined_df % new() return end if allocate (other_matched(df2 % nrows())) other_matched = .false. num_rows = 0 allocate (match_indices_this((df1 % nrows() + df2 % nrows()) * max(1, max(df1 % nrows(), df2 % nrows())))) allocate (match_indices_other((df1 % nrows() + df2 % nrows()) * max(1, max(df1 % nrows(), df2 % nrows())))) ! First pass: all rows from 'df1' with matches from 'df2' do i = 1, df1 % nrows() found_match = .false. do j = 1, df2 % nrows() select case (dtype1) case (INTEGER_NUM) if (df_get_val_integer(df1, i, key_col1) == df_get_val_integer(df2, j, key_col2)) then num_rows = num_rows + 1 match_indices_this(num_rows) = i match_indices_other(num_rows) = j other_matched(j) = .true. found_match = .true. end if case (REAL_NUM) if (abs(df_get_val_real(df1, i, key_col1) - df_get_val_real(df2, j, key_col2)) < 1.0e-10_rk) then num_rows = num_rows + 1 match_indices_this(num_rows) = i match_indices_other(num_rows) = j other_matched(j) = .true. found_match = .true. end if case (CHARACTER_NUM) if (trim(df_get_val_character(df1, i, key_col1)) == trim(df_get_val_character(df2, j, key_col2))) then num_rows = num_rows + 1 match_indices_this(num_rows) = i match_indices_other(num_rows) = j other_matched(j) = .true. found_match = .true. end if end select end do if (.not. found_match) then num_rows = num_rows + 1 match_indices_this(num_rows) = i match_indices_other(num_rows) = -1 end if end do ! Second pass: unmatched rows from 'df2' do j = 1, df2 % nrows() if (.not. other_matched(j)) then num_rows = num_rows + 1 match_indices_this(num_rows) = -1 match_indices_other(num_rows) = j end if end do call build_joined_dataframe(df1, df2, match_indices_this(1:num_rows), & match_indices_other(1:num_rows), num_rows, joined_df) deallocate (match_indices_this) deallocate (match_indices_other) deallocate (other_matched) end function df_outer_join