df_left_join Function

public function df_left_join(df1, df2, key_col1, key_col2) result(joined_df)

Perform a left join between two data frames

Returns all rows from left df, with matching rows from right df (or NULL)

@param[in] df1 The left data frame @param[in] df2 The right data frame @param[in] key_col1 Column index for join key in left data frame @param[in] key_col2 Column index for join key in right data frame @return A new data frame with all rows from left and matching rows from right

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(in) :: df1
type(data_frame), intent(in) :: df2
integer, intent(in) :: key_col1
integer, intent(in) :: key_col2

Return Value type(data_frame)


Source Code

    function df_left_join(df1, df2, key_col1, key_col2) result(joined_df)
        type(data_frame), intent(in) :: df1, df2
        integer, intent(in) :: key_col1, key_col2
        type(data_frame) :: joined_df

        integer :: i, j, dtype1, dtype2
        integer, dimension(:), allocatable :: match_indices_this, match_indices_other
        logical :: match_found
        integer :: num_rows

        dtype1 = df1 % dtype(key_col1)
        dtype2 = df2 % dtype(key_col2)

        if (dtype1 /= dtype2) then
            print *, "Error: Key columns must have the same data type"
            call joined_df % new()
            return
        end if

        ! For left join, we need one row for each row in 'df1', possibly more if multiple matches
        num_rows = 0
        allocate (match_indices_this(df1 % nrows() * max(1, df2 % nrows())))
        allocate (match_indices_other(df1 % nrows() * max(1, df2 % nrows())))

        do i = 1, df1 % nrows()
            match_found = .false.

            do j = 1, df2 % nrows()
                select case (dtype1)
                case (INTEGER_NUM)
                    if (df_get_val_integer(df1, i, key_col1) == df_get_val_integer(df2, j, key_col2)) then
                        num_rows = num_rows + 1
                        match_indices_this(num_rows) = i
                        match_indices_other(num_rows) = j
                        match_found = .true.
                    end if
                case (REAL_NUM)
                    if (abs(df_get_val_real(df1, i, key_col1) - df_get_val_real(df2, j, key_col2)) < 1.0e-10_rk) then
                        num_rows = num_rows + 1
                        match_indices_this(num_rows) = i
                        match_indices_other(num_rows) = j
                        match_found = .true.
                    end if
                case (CHARACTER_NUM)
                  if (trim(df_get_val_character(df1, i, key_col1)) == trim(df_get_val_character(df2, j, key_col2))) then
                        num_rows = num_rows + 1
                        match_indices_this(num_rows) = i
                        match_indices_other(num_rows) = j
                        match_found = .true.
                    end if
                end select
            end do

            ! If no match found, still include the row from 'df1' with NULL for 'df2'
            if (.not. match_found) then
                num_rows = num_rows + 1
                match_indices_this(num_rows) = i
                match_indices_other(num_rows) = -1  ! -1 indicates no match
            end if
        end do

        call build_joined_dataframe(df1, df2, match_indices_this(1:num_rows), &
                                    match_indices_other(1:num_rows), num_rows, joined_df)

        deallocate (match_indices_this)
        deallocate (match_indices_other)
    end function df_left_join