This is an R Markdown document which should make the analytical steps clear and repeatable. In lieu of providing the original spatial data (due to GDPR concerns), we present definitions, summary data and visual distributions of the research data.
There are 19008 Herd IDs and 17744 Business IDs across the three study years.
dat1 %>% group_by(year) %>%
summarise(
n.herds = n_distinct(HerdId)
, n.businesses = n_distinct(BusinessId))
## # A tibble: 3 x 3
## year n.herds n.businesses
## <fct> <int> <int>
## 1 2015 19008 17744
## 2 2016 19008 17744
## 3 2017 19008 17744
Whilst business IDs can be associated with more than one herd, in the vast majority of cases, there was a 1:1 mapping between Business IDs and Herd IDs; out of 17744 businesses, 1192 were associated with more than one Herd ID (i.e. 6.72%)
dat1 %>% group_by(BusinessId) %>%
summarise(
n.herds.per.bus.id = n_distinct(HerdId)) %>%
filter(n.herds.per.bus.id > 1) %>%
summarise(n.bus.multip.herds = n_distinct(BusinessId))
## # A tibble: 1 x 1
## n.bus.multip.herds
## <int>
## 1 1192
Whilst herds could change production type year on year, The number of different herd types remained broadly consistent across the three years of the study
ggplot(dat1, aes(x = year, fill = HerdType)) +
geom_bar(stat="count") +
scale_fill_viridis(discrete = TRUE) +
labs(x = "Year", y = "Number of herds", fill = "Herd Type") +
theme_bw()
There is no notable difference in herd types between years.
orig_table <- table(dat1$HerdType, dat1$year)
round(prop.table(orig_table, margin = 2),2)
##
## 2015 2016 2017
## Breeder 0.50 0.50 0.49
## Dairy 0.14 0.14 0.14
## Finisher 0.10 0.10 0.11
## Other 0.25 0.25 0.26
chisq.test(orig_table)
##
## Pearson's Chi-squared test
##
## data: orig_table
## X-squared = 5.3394, df = 6, p-value = 0.5011
Median herd size varied with production type and also remained broadly consistent across the three years of the study, within herd types.
dat1 %>% group_by(HerdType) %>%
summarise(
"Median herd size"
, q1 = quantile(median_herd_size)[2]
, median = median(median_herd_size)
, q2. = quantile(median_herd_size)[4]
, max = max(median_herd_size))
## # A tibble: 4 x 6
## HerdType `"Median herd size"` q1 median q2. max
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Breeder Median herd size 17 30 54 904.
## 2 Dairy Median herd size 96 171 277 1836
## 3 Finisher Median herd size 21 45.5 97 1492.
## 4 Other Median herd size 15 38 84 1208
ggplot(dat1, aes(x = HerdType, y = median_herd_size, fill = HerdType)) +
geom_boxplot() +
scale_fill_viridis(discrete = TRUE) +
labs(x = "Herd Type", y = "Median herd size", fill = "Herd Type") +
theme_bw() +
facet_wrap(~year)
The number of individual land parcels associated with each farm.
dat1 %>% group_by(year) %>%
summarise(
"Number of land parcels"
, q1= quantile(n_fields)[2]
, median = median(n_fields)
, q3 = quantile(n_fields)[4]
, max = max(n_fields))
## # A tibble: 3 x 6
## year `"Number of land parcels"` q1 median q3 max
## <fct> <chr> <dbl> <dbl> <dbl> <int>
## 1 2015 Number of land parcels 14 24 39 413
## 2 2016 Number of land parcels 14 24 39 423
## 3 2017 Number of land parcels 14 24 39 444
ggplot(dat1, aes(x = as.factor(year), y = n_fields)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "Number of land parcels") +
theme_bw()
No variation the number of land parcels at the farm level between years.
tst2 <- dat1 %>% select(HerdType, year, n_fields)
anova(glm(n_fields ~ year, data = tst2), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: n_fields
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 36362407
## year 2 268.73 57021 36362138 0.2107 0.81
The number of land parcel "units" associated with each farm.
dat1 %>% group_by(year) %>%
summarise(
"Number of fragments"
, q1 = quantile(n_fragments)[2]
, median = median(n_fragments)
, q3 = quantile(n_fragments)[4]
, max = max(n_fragments))
## # A tibble: 3 x 6
## year `"Number of fragments"` q1 median q3 max
## <fct> <chr> <dbl> <dbl> <dbl> <int>
## 1 2015 Number of fragments 4 6 10 80
## 2 2016 Number of fragments 4 6 10 81
## 3 2017 Number of fragments 4 6 10 80
ggplot(dat1, aes(x = as.factor(year), y = n_fragments)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "Number of fragments") +
theme_bw()
No variation the number of fragments at the farm level between years.
tst3 <- dat1 %>% select(HerdType, year, n_fragments)
anova(glm(n_fragments ~ year, data = tst3), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: n_fragments
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 2194821
## year 2 159.74 57021 2194661 2.0751 0.1256
The number of land parcel "units" associated with each farm, after the application of a 5m buffer.
dat1 %>% group_by(year) %>%
summarise(
"Number of fragments (5m buffer)"
, q1 = quantile(n_fragments_5m)[2]
, median = median(n_fragments_5m)
, q3 = quantile(n_fragments_5m)[4]
, max = max(n_fragments_5m))
## # A tibble: 3 x 6
## year `"Number of fragments (5m buffer)"` q1 median q3 max
## <fct> <chr> <dbl> <dbl> <dbl> <int>
## 1 2015 Number of fragments (5m buffer) 2 3 6 39
## 2 2016 Number of fragments (5m buffer) 2 3 6 47
## 3 2017 Number of fragments (5m buffer) 2 3 6 42
ggplot(dat1, aes(x = as.factor(year), y = n_fragments_5m)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "Number of fragments (5m buffer)") +
theme_bw()
No variation the number of fragments at the farm level (5m buffer) between years.
tst4 <- dat1 %>% select(HerdType, year, n_fragments_5m)
anova(glm(n_fragments_5m ~ year, data = tst4), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: n_fragments_5m
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 689708
## year 2 50.729 57021 689657 2.0972 0.1228
The total farm area in hectares.
dat1 %>% group_by(year) %>%
summarise(
"Farm area (ha)"
, q1 = quantile(total_farm_area_ha, na.rm = T)[2]
, median = median(total_farm_area_ha, na.rm = T)
, q3 = quantile(total_farm_area_ha, na.rm = T)[4]
, max = max(total_farm_area_ha, na.rm = T))
## # A tibble: 3 x 6
## year `"Farm area (ha)"` q1 median q3 max
## <fct> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2015 Farm area (ha) 16.5 31.2 59 1403
## 2 2016 Farm area (ha) 16.4 31.1 59.2 3241
## 3 2017 Farm area (ha) 16.4 31.4 59.1 3252.
ggplot(dat1, aes(x = as.factor(year), y = total_farm_area_ha)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "Farm area (ha)") +
theme_bw()
No variation in farm area at the farm level between years.
tst5 <- dat1 %>% select(HerdType, year, total_farm_area_ha)
anova(glm(total_farm_area_ha ~ year, data = tst5), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: total_farm_area_ha
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 224573593
## year 2 3521.5 57021 224570071 0.4471 0.6395
The median distance (in km) between the centroids of all land parcels associated with a cattle farm.
dat1 %>% group_by(year) %>%
summarise(
"Median distance between fragments (km)"
, q1 = quantile(median_distance_fragments_km, na.rm = T)[2]
, median = median(median_distance_fragments_km, na.rm = T)
, q3 = quantile(median_distance_fragments_km, na.rm = T)[4]
, max = max(median_distance_fragments_km, na.rm = T))
## # A tibble: 3 x 6
## year `"Median distance between fragments (km)"` q1 median q3 max
## <fct> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2015 Median distance between fragments (km) 0.528 1.39 3.04 120.
## 2 2016 Median distance between fragments (km) 0.518 1.36 3.03 120.
## 3 2017 Median distance between fragments (km) 0.518 1.38 3.07 120.
ggplot(dat1, aes(x = as.factor(year), y = median_distance_fragments_km)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "Median distance between fragments (km)") +
theme_bw()
No variation in median distance between fragments at the farm level between years.
tst6 <- dat1 %>% select(HerdType, year, median_distance_fragments_km)
anova(glm(median_distance_fragments_km ~ year, data = tst6), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: median_distance_fragments_km
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 1243464
## year 2 22.497 57021 1243442 0.5158 0.597
The max distance (in km) between the centroids of the two most distal land parcels associated with a cattle farm.
dat1 %>% #group_by(year) %>%
summarise(
"Max distance between fragments (km)"
, q1 = quantile(max_distance_fragments_km, na.rm = T)[2]
, median = median(max_distance_fragments_km, na.rm = T)
, q3 = quantile(max_distance_fragments_km, na.rm = T)[4]
, max = max(max_distance_fragments_km, na.rm = T))
## "Max distance between fragments (km)" q1 median q3 max
## 1 Max distance between fragments (km) 0.639 2.321 6.088 152.015
ggplot(dat1, aes(x = as.factor(year), y = max_distance_fragments_km)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "Max distance between fragments (km)") +
theme_bw()
No variation in max distance between fragments at the farm level between years.
tst7 <- dat1 %>% select(HerdType, year, max_distance_fragments_km)
anova(glm(max_distance_fragments_km ~ year, data = tst7), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: max_distance_fragments_km
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 4198596
## year 2 142.5 57021 4198453 0.9677 0.38
The distances (in km) between the farm homestead (registered address) and the centroid of all land parcels, the centroid of the biggest field and the centroid of the biggest fragment.
dat1$homestead_farmcentroid_dist_km <- round(as.numeric(dat1$homestead_farmcentroid_dist_km),3)
dat1$homestead_field_dist_km <- round(as.numeric(dat1$homestead_field_dist_km),3)
dat1$homestead_frag_dist_km <- round(as.numeric(dat1$homestead_frag_dist_km),3)
dat1 %>%
summarise(
"Distance: homestead & farm centroid (km)"
, q1 = quantile(homestead_farmcentroid_dist_km, na.rm = T)[2]
, median = median(homestead_farmcentroid_dist_km, na.rm = T)
, q3 = quantile(homestead_farmcentroid_dist_km, na.rm = T)[4]
, max = max(homestead_farmcentroid_dist_km, na.rm = T))
## "Distance: homestead & farm centroid (km)" q1 median q3 max
## 1 Distance: homestead & farm centroid (km) 0.273 0.667 1.705 112.756
dat1 %>%
summarise(
"Distance: homestead & largest parcel (km)"
, q1 = quantile(homestead_field_dist_km, na.rm = T)[2]
, median= median(homestead_field_dist_km, na.rm = T)
, q3 = quantile(homestead_field_dist_km, na.rm = T)[4]
, max = max(homestead_field_dist_km, na.rm = T))
## "Distance: homestead & largest parcel (km)" q1 median q3 max
## 1 Distance: homestead & largest parcel (km) 0.31 0.686 2.068 127.47
dat1 %>%
summarise(
"Distance: homestead & largest fragment (km)"
, q1 = quantile(homestead_frag_dist_km, na.rm = T)[2]
, median = median(homestead_frag_dist_km, na.rm = T)
, q3 = quantile(homestead_frag_dist_km, na.rm = T)[4]
, max = max(homestead_frag_dist_km, na.rm = T))
## "Distance: homestead & largest fragment (km)" q1 median q3 max
## 1 Distance: homestead & largest fragment (km) 0.223 0.4695 1.58325 127.54
distdf <- dat1 %>% select (HerdId, homestead_farmcentroid_dist_km, homestead_field_dist_km, homestead_frag_dist_km)
distdf$HerdId <- as.factor(distdf$HerdId)
distdf <- gather(distdf, condition, measurement, homestead_farmcentroid_dist_km:homestead_frag_dist_km, factor_key=TRUE)
names(distdf) <- c("HerdId", "DistanceType", "Distance")
ggplot(distdf, aes(x = DistanceType, y = Distance)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Distance measure", y = "Distance (km)") +
theme_bw()
The number of external land parcels connected to the index farm, where both internal and external land parcels are suitable for cattle grazing (LCM 4 or 5).
dat1 %>% group_by(year) %>%
summarise(
"Number of contiguous grazing fields"
, q1 = quantile(n_touching_cattle_fields_grazing , na.rm = T)[2]
, median = median(n_touching_cattle_fields_grazing, na.rm = T)
, q3 = quantile(n_touching_cattle_fields_grazing, na.rm = T)[4]
, max = max(n_touching_cattle_fields_grazing, na.rm = T))
## # A tibble: 3 x 6
## year `"Number of contiguous grazing fields"` q1 median q3 max
## <fct> <chr> <dbl> <dbl> <dbl> <int>
## 1 2015 Number of contiguous grazing fields 7 13 21 131
## 2 2016 Number of contiguous grazing fields 7 13 22 167
## 3 2017 Number of contiguous grazing fields 7 12 21 145
ggplot(dat1, aes(x = as.factor(year), y = n_touching_cattle_fields_grazing)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "Number of contiguous grazing fields") +
theme_bw()
Some variation in the number of contiguous grazing fields at the farm level between years, but given the number of tests we have performed, this could be spurious.
tst8 <- dat1 %>% select(HerdType, year, n_touching_cattle_fields_grazing)
anova(glm(n_touching_cattle_fields_grazing ~ year, data = tst8), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: n_touching_cattle_fields_grazing
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 9757921
## year 2 8233 57021 9749688 24.075 3.537e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
The %age area of the farm classified as LCM4 or LCM5 (suitable for grazing)
dat1$LCM4_5 <- round(((dat1$totLCM4.x + dat1$totLCM5.x)/dat1$totSQ)*100,2)
dat1 %>% #group_by(year) %>%
summarise(
"LCM 4 and LCM 5 land on farm (%)"
, q1= quantile(LCM4_5 , na.rm = T)[2]
, median = median(LCM4_5, na.rm = T)
, q3 = quantile(LCM4_5, na.rm = T)[4]
, max. = max(LCM4_5, na.rm = T))
## "LCM 4 and LCM 5 land on farm (%)" q1 median q3 max.
## 1 LCM 4 and LCM 5 land on farm (%) 70.01 87.8 96.17 100
ggplot(dat1, aes(x = as.factor(year), y = LCM4_5)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Year", y = "LCM 4 and LCM 5 land on farm (%)") +
theme_bw()
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
No variation in LCM 4 and LCM 5 land on farm level between years.
tst10 <- dat1 %>% select(HerdType, year, LCM4_5)
anova(glm(LCM4_5 ~ year, data = tst10), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: LCM4_5
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57020 29975059
## year 2 903.36 57018 29974155 0.8592 0.4235
To undertake these analysis, relevent continuous variables were categorised as follows.
This categorical variable is based directly off n_fragments_5m.
1. 1 fragment: No fragmentation 2. 2-4 fragments: Little fragmentation 3. 5-7 fragments: Medium fragmentation 4. 8-10 fragments: High fragmentation 5. 11+ fragments: Very high fragmentation
dat1$fragment_category <- factor(dat1$fragment_category
, levels = c("Not_fragmented"
, "Little_fragmentation"
, "Medium_fragmentation"
, "High_fragmentation" , "Very_High_fragmentation"))
TABLE 1
dat1 %>% dplyr::select(year,fragment_category) %>%
tbl_summary(by = year, label = fragment_category ~ "Fragment Category")%>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")
Characteristic | Year | ||
---|---|---|---|
2015, N = 19,0081 | 2016, N = 19,0081 | 2017, N = 19,0081 | |
Fragment Category | |||
Not_fragmented | 3,134 (16%) | 3,178 (17%) | 3,164 (17%) |
Little_fragmentation | 9,119 (48%) | 9,041 (48%) | 9,004 (47%) |
Medium_fragmentation | 4,137 (22%) | 4,114 (22%) | 4,102 (22%) |
High_fragmentation | 1,546 (8.1%) | 1,574 (8.3%) | 1,571 (8.3%) |
Very_High_fragmentation | 1,072 (5.6%) | 1,101 (5.8%) | 1,167 (6.1%) |
1
Statistics presented: n (%)
|
This categorical variable is based directly off the quantiles for total_farm_area_ha (across all years). Farms with total areas < 16.4ha (25th percentile) were classed as "small", farms >= 16.4ha <= 59.1ha (75th percentile) were classed as "medium" and farms larger than 59.1km2 were classed as "large".
dat1$farm_size_category <- factor(dat1$farm_size_category
, levels = c("Small"
, "Medium"
, "Large"))
TABLE 2
dat1 %>% dplyr::select(year, farm_size_category) %>%
tbl_summary(by = year, label = farm_size_category ~ "Farm area category") %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")
Characteristic | Year | ||
---|---|---|---|
2015, N = 19,0081 | 2016, N = 19,0081 | 2017, N = 19,0081 | |
Farm area category | |||
Small | 4,690 (25%) | 4,749 (25%) | 4,725 (25%) |
Medium | 9,575 (50%) | 9,501 (50%) | 9,532 (50%) |
Large | 4,743 (25%) | 4,758 (25%) | 4,751 (25%) |
1
Statistics presented: n (%)
|
TABLE 3
dat1 %>% dplyr::select(fragment_category, farm_size_category) %>%
tbl_summary(by = farm_size_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Farm area category**")
Characteristic | Farm area category | ||
---|---|---|---|
Small, N = 14,1641 | Medium, N = 28,6081 | Large, N = 14,2521 | |
Fragmentation level | |||
Not_fragmented | 4,995 (53%) | 4,076 (43%) | 405 (4.3%) |
Little_fragmentation | 7,895 (29%) | 15,151 (56%) | 4,118 (15%) |
Medium_fragmentation | 1,115 (9.0%) | 6,826 (55%) | 4,412 (36%) |
High_fragmentation | 142 (3.0%) | 1,942 (41%) | 2,607 (56%) |
Very_High_fragmentation | 17 (0.5%) | 613 (18%) | 2,710 (81%) |
1
Statistics presented: n (%)
|
frag_farm_cat_tbl_1 <- table(dat1$fragment_category, dat1$farm_size_category)
lbl_test(frag_farm_cat_tbl_1)
##
## Asymptotic Linear-by-Linear Association Test
##
## data: Var2 (ordered) by
## Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 118.73, p-value < 2.2e-16
## alternative hypothesis: two.sided
This categorical variable is based directly off the quantiles for median_distance_fragments_km (across all years). Farms with fragments less than 0.52km apart on average (25th percentile) were classed as "low dispersal"; farms with fragments >= 0.52km and < 3.05km apart were classed as "medium dispersal", and farms with fragments > 3.05km apart (75th percentile) were classed as "high dispersal".
dat1$median_distance_fragments_category <- factor(dat1$median_distance_fragments_category
, levels = c("Low"
, "Medium"
, "High"))
TABLE 4
dat1 %>% dplyr::select(year, median_distance_fragments_category) %>%
tbl_summary(by = year, label = median_distance_fragments_category ~ "Fragment dispersal category") %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")
Characteristic | Year | ||
---|---|---|---|
2015, N = 19,0081 | 2016, N = 19,0081 | 2017, N = 19,0081 | |
Fragment dispersal category | |||
Low | 4,707 (25%) | 4,774 (25%) | 4,770 (25%) |
Medium | 9,559 (50%) | 9,507 (50%) | 9,452 (50%) |
High | 4,742 (25%) | 4,727 (25%) | 4,786 (25%) |
1
Statistics presented: n (%)
|
use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and fragment_dispersal_category.
TABLE 5
dat1 %>% dplyr::select(fragment_category, median_distance_fragments_category) %>%
tbl_summary(by = median_distance_fragments_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Fragment dispersal category**")
Characteristic | Fragment dispersal category | ||
---|---|---|---|
Low, N = 14,2511 | Medium, N = 28,5181 | High, N = 14,2551 | |
Fragmentation level | |||
Not_fragmented | 9,476 (100%) | 0 (0%) | 0 (0%) |
Little_fragmentation | 4,257 (16%) | 15,999 (59%) | 6,908 (25%) |
Medium_fragmentation | 469 (3.8%) | 8,031 (65%) | 3,853 (31%) |
High_fragmentation | 33 (0.7%) | 2,847 (61%) | 1,811 (39%) |
Very_High_fragmentation | 16 (0.5%) | 1,641 (49%) | 1,683 (50%) |
1
Statistics presented: n (%)
|
frag_farm_cat_tbl_2 <- table(dat1$fragment_category, dat1$median_distance_fragments_category)
lbl_test(frag_farm_cat_tbl_2)
##
## Asymptotic Linear-by-Linear Association Test
##
## data: Var2 (ordered) by
## Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 121.69, p-value < 2.2e-16
## alternative hypothesis: two.sided
There was a strong, positive correlation between the median distance between all land parcels, and maximum distance between the most distal land parcels.
cor.test(dat1$median_distance_fragments_km, dat1$max_distance_fragments_km, method = "spearman")
##
## Spearman's rank correlation rho
##
## data: dat1$median_distance_fragments_km and dat1$max_distance_fragments_km
## S = 2.0265e+12, p-value < 2.2e-16
## alternative hypothesis: true rho is not equal to 0
## sample estimates:
## rho
## 0.9344263
This categorical variable is based directly off the quantiles for total_shared_boundary_grazing_km. Farms with less than 1.48km of shared boundary (25th percentile) were classed as "low contact"; farms a shared boundary >= 1.48km and < 4.94km were classed as "medium contact", and farms with shared boundaries of > 4.94km apart (75th percentile) were classed as "high contact".
dat1$contact_category <- factor(dat1$contact_category
, levels = c("Low"
, "Medium"
, "High"))
TABLE 6
dat1 %>% dplyr::select(year, contact_category) %>%
tbl_summary(by = year, label = contact_category ~ "Contact category", percent = "row") %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Year**")
Characteristic | Year | ||
---|---|---|---|
2015, N = 19,0081 | 2016, N = 19,0081 | 2017, N = 19,0081 | |
Contact category | |||
Low | 4,836 (34%) | 4,552 (32%) | 4,899 (34%) |
Medium | 9,522 (33%) | 9,431 (33%) | 9,491 (33%) |
High | 4,650 (33%) | 5,025 (35%) | 4,618 (32%) |
1
Statistics presented: n (%)
|
use the linear-by-linear association test to assess whether there is a relationship between the two ordered variables fragmentation_level and contact_category.
TABLE 7
dat1 %>% dplyr::select(fragment_category, contact_category) %>%
tbl_summary(by = contact_category, label = fragment_category ~ "Fragmentation level", percent = "row") %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3") ~ "**Conact category**")
Characteristic | Conact category | ||
---|---|---|---|
Low, N = 14,2871 | Medium, N = 28,4441 | High, N = 14,2931 | |
Fragmentation level | |||
Not_fragmented | 5,194 (55%) | 4,112 (43%) | 170 (1.8%) |
Little_fragmentation | 7,610 (28%) | 15,910 (59%) | 3,644 (13%) |
Medium_fragmentation | 1,236 (10%) | 6,326 (51%) | 4,791 (39%) |
High_fragmentation | 185 (3.9%) | 1,559 (33%) | 2,947 (63%) |
Very_High_fragmentation | 62 (1.9%) | 537 (16%) | 2,741 (82%) |
1
Statistics presented: n (%)
|
frag_farm_cat_tbl_3 <- table(dat1$fragment_category, dat1$contact_category)
lbl_test(frag_farm_cat_tbl_3)
##
## Asymptotic Linear-by-Linear Association Test
##
## data: Var2 (ordered) by
## Var1 (Not_fragmented < Little_fragmentation < Medium_fragmentation < High_fragmentation < Very_High_fragmentation)
## Z = 125.63, p-value < 2.2e-16
## alternative hypothesis: two.sided
TABLE 8
dat1 %>% dplyr::select(HerdType, fragment_category) %>%
tbl_summary(by = fragment_category, percent = "row") %>%
add_n() %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**")
Characteristic | N | Herd type | Very_High_fragmentation, N = 3,3401 | |||
---|---|---|---|---|---|---|
Not_fragmented, N = 9,4761 | Little_fragmentation, N = 27,1641 | Medium_fragmentation, N = 12,3531 | High_fragmentation, N = 4,6911 | |||
HerdType | 57,024 | |||||
Breeder | 5,352 (19%) | 14,428 (51%) | 5,705 (20%) | 1,844 (6.5%) | 1,086 (3.8%) | |
Dairy | 642 (7.9%) | 2,966 (36%) | 2,359 (29%) | 1,196 (15%) | 965 (12%) | |
Finisher | 983 (16%) | 2,878 (48%) | 1,297 (22%) | 475 (7.9%) | 343 (5.7%) | |
Other | 2,499 (17%) | 6,892 (48%) | 2,992 (21%) | 1,176 (8.1%) | 946 (6.5%) | |
1
Statistics presented: n (%)
|
TABLE 9
dat1 %>% dplyr::select(HerdType, total_farm_area_ha) %>%
tbl_summary(by = HerdType
, type = all_continuous() ~ "continuous2"
, label = total_farm_area_ha ~ "Farm area (ha)"
, statistic = all_continuous() ~ c("{median} ({p25}, {p75})",
"{min}, {max}")) %>%
add_n() %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
add_p()
Characteristic | N | Herd type | p-value1 | |||
---|---|---|---|---|---|---|
Breeder, N = 28,415 | Dairy, N = 8,128 | Finisher, N = 5,976 | Other, N = 14,505 | |||
Farm area (ha) | 57,024 | <0.001 | ||||
Median (IQR) | 26 (15, 46) | 65 (42, 100) | 32 (17, 58) | 28 (14, 57) | ||
Range | 0, 1,418 | 2, 1,973 | 2, 1,007 | 0, 3,252 | ||
1
Statistical tests performed: Kruskal-Wallis test
|
kruskal.test(total_farm_area_ha ~ HerdType, data = dat1)
##
## Kruskal-Wallis rank sum test
##
## data: total_farm_area_ha by HerdType
## Kruskal-Wallis chi-squared = 6620.6, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = total_farm_area_ha)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Herd Type", y = "Total farm area (ha)") +
theme_bw()
#### 3.2.3 Production type and land parcels
TABLE 10
dat1 %>% dplyr::select(HerdType, n_fields) %>%
tbl_summary(by = HerdType
, type = all_continuous() ~ "continuous2"
, label = n_fields ~ "number of land parcels"
, statistic = all_continuous() ~ c("{median} ({p25}, {p75})",
"{min}, {max}")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
add_p()
Characteristic | Herd type | p-value1 | |||
---|---|---|---|---|---|
Breeder, N = 28,415 | Dairy, N = 8,128 | Finisher, N = 5,976 | Other, N = 14,505 | ||
number of land parcels | <0.001 | ||||
Median (IQR) | 22 (13, 35) | 39 (27, 56) | 21 (13, 35) | 22 (12, 37) | |
Range | 0, 423 | 2, 379 | 1, 219 | 0, 444 | |
1
Statistical tests performed: Kruskal-Wallis test
|
Only minor changes can be observed WITHIN herd types through time
dat1 %>% group_by(HerdType, year) %>%
summarise(
"Number of land parcels"
, total = n()
, q1 = quantile(n_fields)[2]
, median = median(n_fields)
, q3 = quantile(n_fields)[4]
, max = max(n_fields))
## # A tibble: 12 x 8
## # Groups: HerdType [4]
## HerdType year `"Number of land parcels"` total q1 median q3 max
## <chr> <fct> <chr> <int> <dbl> <dbl> <dbl> <int>
## 1 Breeder 2015 Number of land parcels 9464 13 22 35 413
## 2 Breeder 2016 Number of land parcels 9565 13 22 35 423
## 3 Breeder 2017 Number of land parcels 9386 13 22 35 377
## 4 Dairy 2015 Number of land parcels 2740 27 39 56 366
## 5 Dairy 2016 Number of land parcels 2697 27 39 56 379
## 6 Dairy 2017 Number of land parcels 2691 27 40 56 377
## 7 Finisher 2015 Number of land parcels 1970 13 21 34 194
## 8 Finisher 2016 Number of land parcels 1963 13 21 34.5 188
## 9 Finisher 2017 Number of land parcels 2043 13 21 35 219
## 10 Other 2015 Number of land parcels 4834 12 21 37 366
## 11 Other 2016 Number of land parcels 4783 12 21 37 379
## 12 Other 2017 Number of land parcels 4888 12 22 37 444
kruskal.test(n_fields ~ HerdType, data = dat1)
##
## Kruskal-Wallis rank sum test
##
## data: n_fields by HerdType
## Kruskal-Wallis chi-squared = 4537.1, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = n_fields)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Herd Type", y = "Number of land parcels") +
theme_bw()
TABLE 11
dat1 %>% dplyr::select(HerdType, n_fragments_5m) %>%
tbl_summary(by = HerdType
, type = all_continuous() ~ "continuous2"
, label = n_fragments_5m ~ "Number of fragments (5m buffer)"
, statistic = all_continuous() ~ c("{median} ({p25}, {p75})",
"{min}, {max}")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
add_p()
Characteristic | Herd type | p-value1 | |||
---|---|---|---|---|---|
Breeder, N = 28,415 | Dairy, N = 8,128 | Finisher, N = 5,976 | Other, N = 14,505 | ||
Number of fragments (5m buffer) | <0.001 | ||||
Median (IQR) | 3 (2, 5) | 5 (3, 8) | 3 (2, 6) | 3 (2, 6) | |
Range | 0, 35 | 1, 33 | 1, 32 | 0, 47 | |
1
Statistical tests performed: Kruskal-Wallis test
|
kruskal.test(n_fragments_5m ~ HerdType, data = dat1)
##
## Kruskal-Wallis rank sum test
##
## data: n_fragments_5m by HerdType
## Kruskal-Wallis chi-squared = 2117.2, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = n_fragments_5m)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Herd Type", y = "Number of fragments (5m buffer)") +
theme_bw()
TABLE 12
dat1 %>% dplyr::select(HerdType, median_distance_fragments_km) %>%
tbl_summary(by = HerdType
, type = all_continuous() ~ "continuous2"
, label = median_distance_fragments_km ~ "Median distance between fragments (km)"
, statistic = all_continuous() ~ c("{median} ({p25}, {p75})",
"{min}, {max}")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
add_p()
Characteristic | Herd type | p-value1 | |||
---|---|---|---|---|---|
Breeder, N = 28,415 | Dairy, N = 8,128 | Finisher, N = 5,976 | Other, N = 14,505 | ||
Median distance between fragments (km) | <0.001 | ||||
Median (IQR) | 1.21 (0.43, 2.95) | 1.88 (1.04, 3.21) | 1.34 (0.50, 3.04) | 1.36 (0.47, 3.05) | |
Range | 0.00, 119.53 | 0.00, 97.09 | 0.00, 82.97 | 0.00, 105.15 | |
1
Statistical tests performed: Kruskal-Wallis test
|
kruskal.test(median_distance_fragments_km ~ HerdType, data = dat1)
##
## Kruskal-Wallis rank sum test
##
## data: median_distance_fragments_km by HerdType
## Kruskal-Wallis chi-squared = 699.92, df = 3, p-value < 2.2e-16
As shown previously, there is no variation in the median distance between parcels through time when considering all herd types together
anova(glm(median_distance_fragments_km ~ year, data = tst6), test = "F")
## Analysis of Deviance Table
##
## Model: gaussian, link: identity
##
## Response: median_distance_fragments_km
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev F Pr(>F)
## NULL 57023 1243464
## year 2 22.497 57021 1243442 0.5158 0.597
Only minor changes can be observed WITHIN herd types
dat1 %>% group_by(HerdType, year) %>%
summarise(
"Median distance between fragments (km)"
, total = n()
, q1 = quantile(median_distance_fragments_km)[2]
, median = median(median_distance_fragments_km)
, q3 = quantile(median_distance_fragments_km)[4]
, max = max(median_distance_fragments_km))
## # A tibble: 12 x 8
## # Groups: HerdType [4]
## HerdType year `"Median distance between frag~ total q1 median q3 max
## <chr> <fct> <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 Breeder 2015 Median distance between fragme~ 9464 0.438 1.23 2.96 120.
## 2 Breeder 2016 Median distance between fragme~ 9565 0.427 1.21 2.93 120.
## 3 Breeder 2017 Median distance between fragme~ 9386 0.425 1.2 2.96 120.
## 4 Dairy 2015 Median distance between fragme~ 2740 1.03 1.84 3.18 73.2
## 5 Dairy 2016 Median distance between fragme~ 2697 1.03 1.88 3.19 52.3
## 6 Dairy 2017 Median distance between fragme~ 2691 1.05 1.91 3.22 97.1
## 7 Finisher 2015 Median distance between fragme~ 1970 0.512 1.34 3.04 82.6
## 8 Finisher 2016 Median distance between fragme~ 1963 0.509 1.34 3.01 83.0
## 9 Finisher 2017 Median distance between fragme~ 2043 0.489 1.33 3.07 69.7
## 10 Other 2015 Median distance between fragme~ 4834 0.471 1.35 3.04 87.5
## 11 Other 2016 Median distance between fragme~ 4783 0.467 1.32 3.04 99.8
## 12 Other 2017 Median distance between fragme~ 4888 0.473 1.40 3.07 105.
ggplot(dat1, aes(x = HerdType, y = median_distance_fragments_km)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Herd Type", y = "Median distance between fragments (km)") +
theme_bw()
TABLE 13
dat1 %>% dplyr::select(HerdType, max_distance_fragments_km) %>%
tbl_summary(by = HerdType
, type = all_continuous() ~ "continuous2"
, label = max_distance_fragments_km ~ "Maximum distance between fragments (km)"
, statistic = all_continuous() ~ c("{median} ({p25}, {p75})",
"{min}, {max}")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
add_p()
Characteristic | Herd type | p-value1 | |||
---|---|---|---|---|---|
Breeder, N = 28,415 | Dairy, N = 8,128 | Finisher, N = 5,976 | Other, N = 14,505 | ||
Maximum distance between fragments (km) | <0.001 | ||||
Median (IQR) | 2.0 (0.5, 5.5) | 3.8 (1.7, 7.4) | 2.2 (0.6, 6.0) | 2.2 (0.6, 6.1) | |
Range | 0.0, 129.1 | 0.0, 117.2 | 0.0, 152.0 | 0.0, 117.5 | |
1
Statistical tests performed: Kruskal-Wallis test
|
kruskal.test(max_distance_fragments_km ~ HerdType, data = dat1)
##
## Kruskal-Wallis rank sum test
##
## data: max_distance_fragments_km by HerdType
## Kruskal-Wallis chi-squared = 1128.5, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = max_distance_fragments_km)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Herd Type", y = "Maximum distance between fragments (km)") +
theme_bw()
TABLE 14
dat1 %>% dplyr::select(HerdType, homestead_frag_dist_km) %>%
tbl_summary(by = HerdType
, type = all_continuous() ~ "continuous2"
, label = homestead_frag_dist_km ~ "Distance to biggest fragment (km)"
, statistic = all_continuous() ~ c("{median} ({p25}, {p75})",
"{min}, {max}")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
add_p()
Characteristic | Herd type | p-value1 | |||
---|---|---|---|---|---|
Breeder, N = 28,415 | Dairy, N = 8,128 | Finisher, N = 5,976 | Other, N = 14,505 | ||
Distance to biggest fragment (km) | 0.002 | ||||
Median (IQR) | 0.49 (0.22, 1.71) | 0.44 (0.25, 1.11) | 0.44 (0.22, 1.70) | 0.47 (0.22, 1.64) | |
Range | 0.00, 120.83 | 0.01, 114.34 | 0.00, 127.54 | 0.00, 109.33 | |
1
Statistical tests performed: Kruskal-Wallis test
|
kruskal.test(homestead_frag_dist_km ~ HerdType, data = dat1)
##
## Kruskal-Wallis rank sum test
##
## data: homestead_frag_dist_km by HerdType
## Kruskal-Wallis chi-squared = 14.921, df = 3, p-value = 0.001885
ggplot(dat1, aes(x = HerdType, y = homestead_frag_dist_km)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Herd Type", y = "Distance to biggest fragment (km)") +
theme_bw()
TABLE 16
dat1 %>% dplyr::select(HerdType, LCM4_5) %>%
tbl_summary(by = HerdType
, type = all_continuous() ~ "continuous2"
, label = LCM4_5 ~ "LCM 4 and LCM 5 land on farm (%)"
, statistic = all_continuous() ~ c("{median} ({p25}, {p75})",
"{min}, {max}")) %>%
modify_spanning_header(c("stat_1", "stat_2", "stat_3", "stat_4") ~ "**Herd type**") %>%
add_p()
Characteristic | Herd type | p-value1 | |||
---|---|---|---|---|---|
Breeder, N = 28,415 | Dairy, N = 8,128 | Finisher, N = 5,976 | Other, N = 14,505 | ||
LCM 4 and LCM 5 land on farm (%) | <0.001 | ||||
Median (IQR) | 86 (63, 96) | 90 (79, 96) | 89 (76, 96) | 89 (73, 97) | |
Range | 0, 100 | 1, 100 | 1, 100 | 0, 100 | |
Unknown | 3 | 0 | 0 | 0 | |
1
Statistical tests performed: Kruskal-Wallis test
|
kruskal.test(LCM4_5 ~ HerdType, data = dat1)
##
## Kruskal-Wallis rank sum test
##
## data: LCM4_5 by HerdType
## Kruskal-Wallis chi-squared = 492.57, df = 3, p-value < 2.2e-16
ggplot(dat1, aes(x = HerdType, y = LCM4_5)) +
geom_violin() +
geom_boxplot(width=0.1) +
labs(x = "Herd Type", y = "LCM 4 and LCM 5 land on farm (%)") +
theme_bw()
## Warning: Removed 3 rows containing non-finite values (stat_ydensity).
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).