Get n random rows from the dataframe
@param[in] df The data frame to sample from @param[in] n Number of rows to sample @param[in] seed Optional random seed @return New data frame with sampled rows
| Type | Intent | Optional | Attributes | Name | ||
|---|---|---|---|---|---|---|
| type(data_frame), | intent(in) | :: | df | |||
| integer, | intent(in) | :: | n | |||
| integer, | intent(in), | optional | :: | seed |
function df_sample(df, n, seed) result(sampled_df) type(data_frame), intent(in) :: df integer, intent(in) :: n integer, intent(in), optional :: seed type(data_frame) :: sampled_df integer, dimension(:), allocatable :: indices, selected_indices, seed_array integer :: i, j, temp, num_samples, seed_size real :: rand_val num_samples = min(n, df % nrows()) if (num_samples == 0) then call sampled_df % new(df % get_max_char_len()) return end if ! Initialize random seed if provided if (present(seed)) then call random_seed(size=seed_size) allocate (seed_array(seed_size)) seed_array = seed call random_seed(put=seed_array) deallocate (seed_array) end if ! Create array of all indices allocate (indices(df % nrows())) do i = 1, df % nrows() indices(i) = i end do ! Fisher-Yates shuffle to get random sample do i = df % nrows(), 2, -1 call random_number(rand_val) j = int(rand_val * i) + 1 temp = indices(i) indices(i) = indices(j) indices(j) = temp end do ! Take first n shuffled indices allocate (selected_indices(num_samples)) selected_indices = indices(1:num_samples) ! Create sampled dataframe call sampled_df % new(df % get_max_char_len()) do i = 1, df % ncols() call copy_filtered_column(df, sampled_df, i, selected_indices) end do deallocate (indices, selected_indices) end function df_sample