df_correlation_real Function

public function df_correlation_real(df, col_index1, col_index2) result(corr)

Calculate Pearson correlation coefficient between two real columns

Arguments

Type IntentOptional Attributes Name
type(data_frame), intent(in) :: df
integer, intent(in) :: col_index1
integer, intent(in) :: col_index2

Return Value real(kind=rk)


Source Code

    function df_correlation_real(df, col_index1, col_index2) result(corr)
        type(data_frame), intent(in) :: df
        integer, intent(in) :: col_index1, col_index2
        real(rk) :: corr

        real(rk), dimension(:), allocatable :: col1, col2
        real(rk) :: mean1, mean2, std1, std2, covariance
        integer :: i, n
        type(column) :: data_col1, data_col2

        if (col_index1 < 1 .or. col_index1 > df % ncols()) error stop "column index 1 out of range"
        if (col_index2 < 1 .or. col_index2 > df % ncols()) error stop "column index 2 out of range"

        data_col1 = df % get_data_col(col_index1)
        data_col2 = df % get_data_col(col_index2)

        if (data_col1 % get_type() /= REAL_NUM) error stop "column 1 is not real type"
        if (data_col2 % get_type() /= REAL_NUM) error stop "column 2 is not real type"

        col1 = data_col1 % getr()
        col2 = data_col2 % getr()
        n = size(col1)

        if (n /= size(col2)) error stop "columns must have same length"

        mean1 = df_mean_real(df, col_index1)
        mean2 = df_mean_real(df, col_index2)
        std1 = df_std_real(df, col_index1)
        std2 = df_std_real(df, col_index2)

        ! Calculate covariance
        covariance = 0.0_rk
        do i = 1, n
            covariance = covariance + (col1(i) - mean1) * (col2(i) - mean2)
        end do
        covariance = covariance / real(n - 1, rk)

        ! Pearson correlation = covariance / (std1 * std2)
        if (std1 > 0.0_rk .and. std2 > 0.0_rk) then
            corr = covariance / (std1 * std2)
        else
            corr = 0.0_rk  ! undefined if either std is zero
        end if
    end function df_correlation_real