Ejemplo 1: Mapeo del contenido de Arcilla

Motivación

En la agricultura de precisión el mapeo de variables de suelo y rendimiento para el estudio de la variabilidad espacial es crucial. A partir de la caracterización de esta es posible definir prácticas agronómicas que permitan realizar un manejo diferenciado del lote según las características de cada sitio.

En este caso de estudio se ilustra la implementación de técnicas de interpolación clásicas (basada en kriging) y del algoritmo Random Forest, con versiones que incorporan la información espacial en el análisis, para el mapeo del contenido de arcilla del suelo en un lote agrícola.

Datos

La base de datos proviene de un lote agrícola de 90 hectáreas, ubicados al sudeste pampeano de la provincia de Buenos Aires, Argentina. En este se realizaron mediciones intensivas de conductividad eléctrica aparente (CEa) a 30 y 90 cm de profundidad (CE30 y CE90), elevación (Elev) y profundidad de tosca (Pe). La medición de la CEa se realizó con un sensor Veris 3100, que utiliza el principio de la inducción electromagnética. Los datos de CEa fueron simultáneamente georreferenciados con un DGPS con una exactitud de medición submétrica. Los datos de elevación del terreno también se midieron con un DGPS y se procesaron para obtener una precisión vertical de entre 3 y 5 cm aproximadamente. Las mediciones de Pe se realizaron utilizando un penetrómetro hidráulico (Gidding) acoplado a un DGPS. Los datos de las variables medidas intensivamente fueron organizar los datos en una grilla común a todas las capas, de manera que cada celda de la grilla cuente con la información de su ubicación espacial y cada una de las variables medidas. Para el mapeo del contenido de Arcilla se tomaron 126 muestras de suelo a 20 cm de profundidad utilizando una grilla regular de 50 x100 m. La determinación de la distribución del tamaño de partículas se realizó utilizando el método de Robinson.

Lectura base de datos

Carga paquetes y funciones

library(gstat)
library(spdep)
library(mapview)
library(tmap)
library(leaflet)
library(stars)
library(ggplot2)
library(caret)
library(randomForest)
library(nabor)
library(parallel)
library(openair)

source("src/near.obs.R")
source("src/fvalidacion_arcilla.R")

tmap_options(basemaps = c(
  'Satelital' = leaflet::providers$Esri.WorldImagery,
  'OSM' = leaflet::providers$OpenStreetMap))

datos <- st_read("data/Arcilla_Muestra.gpkg")
head(datos)

grilla <- st_read("data/Arcilla_Grilla.gpkg")
head(grilla)

## Reading layer `Arcilla_Muestra' from data source 
##   `C:\Users\Pablo\OneDrive\Documentos\Estadistica\Cursos Dictados\GAB2022_AnalisisInSitu\data\Arcilla_Muestra.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 126 features and 5 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 311980.9 ymin: 5800258 xmax: 313032.7 ymax: 5801350
## Projected CRS: WGS 84 / UTM zone 21S

Arcilla	CE30	CE90	Elev	Pe	geom
32.1220	19.78715	23.19669	161.3124	-75.28120	POINT (312593.6 5801350)
32.6840	21.37011	26.41949	161.5158	-80.60147	POINT (312666.7 5801282)
33.8110	24.46915	24.38947	161.8116	-81.33350	POINT (312739.6 5801213)
34.3246	24.17774	26.22092	161.7234	-57.99937	POINT (312812.8 5801145)
33.2780	24.63139	24.54500	161.2083	-48.60929	POINT (312886.5 5801076)
32.5577	23.48269	27.69627	160.7578	-69.24461	POINT (312959.8 5801007)

## Reading layer `Arcilla_Grilla' from data source 
##   `C:\Users\Pablo\OneDrive\Documentos\Estadistica\Cursos Dictados\GAB2022_AnalisisInSitu\data\Arcilla_Grilla.gpkg' 
##   using driver `GPKG'
## Simple feature collection with 5982 features and 4 fields
## Geometry type: POINT
## Dimension:     XY
## Bounding box:  xmin: 311962.8 ymin: 5800234 xmax: 313052.8 ymax: 5801364
## Projected CRS: WGS 84 / UTM zone 21S

CE30	CE90	Elev	Pe	geom
25.80810	28.50741	160.4142	-78.07533	POINT (312432.8 5800234)
26.19117	28.14623	160.4164	-77.21150	POINT (312422.8 5800244)
25.26266	27.95664	160.4286	-78.65952	POINT (312432.8 5800244)
26.28254	27.56873	160.4184	-75.93873	POINT (312412.8 5800254)
25.68631	27.48603	160.4275	-76.89577	POINT (312422.8 5800254)
24.82347	27.26781	160.4379	-78.04122	POINT (312432.8 5800254)

Visualización espacial de los datos

tmap_mode('view')

tm_shape(datos) +
  tm_dots(col = 'Arcilla')

tm_shape(grilla) +
  tm_dots(col = 'Pe')

Kriging Ordinario

Ajuste de semivariogramas

semiva_exp <- variogram(Arcilla ~ 1 , datos)

semiva_teorico <-
  fit.variogram(semiva_exp , vgm(c("Exp", "Sph", "Gau")))

vgLine <-
  cbind(variogramLine(semiva_teorico, maxdist = max(semiva_exp$dist)), id =
          "Semivariograma  Teórico")

ggplot(semiva_exp, aes(x = dist, y = gamma, color = id)) +
  geom_line(data = vgLine) +
  geom_point() +
  labs(title = "Semivariograma experimental y teorico ajustado")  +
  xlab("Distancia") +
  ylab("Semivarianza")  +
  scale_color_discrete(name = "Semivariograma",
                       labels = c("Teórico", "Experimental"))

Interpolación Kriging

kriging <-
  krige(Arcilla ~ 1,
        datos,
        grilla,
        model = semiva_teorico,
        nmax = 25)

grilla$Kriging <- kriging$var1.pred

## [using ordinary kriging]

Mapa predicción KO

grilla_rast <-
  st_rasterize(grilla, dx = 10, dy = 10)

tmap_mode('view')
mapa_prediccion_Kriging <-
  tm_shape(grilla_rast,
           name = "Kriging") +
  tm_raster(
    col = "Kriging",
    title = "Arcilla (%) Kriging",
    style = "fixed",
    palette = "YlOrBr",
    contrast = c(0.1, 1),
    breaks = c(30, 31, 32, 33, 34, 35, 36, 37)
  ) +
  tm_layout(legend.format = list(
    scientific = TRUE,
    format = "f",
    digits = 0
  ))
mapa_prediccion_Kriging

Random Forest Kriging

Ajuste del RF

fitControl <- trainControl(method = "CV", number = 10)

set.seed(7)
train_rf <- train(
  Arcilla ~ .,
  data = st_drop_geometry(datos) ,
  method = "rf",
  trControl = fitControl
)
train_rf

## Random Forest 
## 
## 126 samples
##   4 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 114, 114, 114, 113, 113, 113, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE      Rsquared   MAE     
##   2     1.421077  0.2506114  1.082240
##   3     1.419343  0.2524498  1.079988
##   4     1.417601  0.2562208  1.088827
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 4.

Incorporación de los residuos del RF a la base de datos

datos$residuosRF <-
  datos$Arcilla - predict(train_rf, newdata = datos)

Ajuste de semivariograma experimetal y teorico a los residuos del RF

semiva_exp_residuos <- variogram(residuosRF ~ 1 , datos)

semiva_teorico_residuos <-
  fit.variogram(semiva_exp_residuos , vgm(c("Exp", "Sph", "Gau")))
plot(semiva_exp_residuos , semiva_teorico_residuos)

Kriging sobre residuos del RF

kgresRF <-
  krige(residuosRF ~ 1,
        datos,
        grilla,
        model = semiva_teorico_residuos,
        nmax = 25)

## [using ordinary kriging]

Prediccion RF y RF-KO

grilla$RF <-
  predict(train_rf, newdata = grilla)

grilla$RF_KO <-
  predict(train_rf, newdata = grilla) + kgresRF$var1.pred

Mapa predicción RF

grilla_rast <-
  st_rasterize(grilla, dx = 10, dy = 10)

mapa_prediccionRF <-
  tm_shape(grilla_rast,
           name = "RF") +
  tm_raster(
    col = "RF",
    title = "Arcilla (%) RF",
    style = "fixed",
    palette = "YlOrBr",
    contrast = c(0.1, 1),
    breaks = c(30, 31, 32, 33, 34, 35, 36, 37)
  ) +
  tm_layout(legend.format = list(
    scientific = TRUE,
    format = "f",
    digits = 0
  ))

mapa_prediccionRF

Mapa predicción RF-KO

grilla_rast <-
  st_rasterize(grilla, dx = 10, dy = 10)

mapa_prediccionRF_KO <-
  tm_shape(grilla_rast,
           name = "RF-KO") +
  tm_raster(
    col = "RF_KO",
    title = "Arcilla (%) RF-KO",
    style = "fixed",
    palette = "YlOrBr",
    contrast = c(0.1, 1),
    breaks = c(30, 31, 32, 33, 34, 35, 36, 37)
  ) +
  tm_layout(legend.format = list(
    scientific = TRUE,
    format = "f",
    digits = 0
  ))

mapa_prediccionRF_KO

Interpolación Espacial Random Forest (RFSI)

Generación de covariables basadas en distancia y valores vecinos

nn_datos <- near.obs(
  locations = as_Spatial(datos),
  observations = as_Spatial(datos),
  zcol = "Arcilla",
  n.obs = 4,
  idw = T
)
datos <- cbind(datos[, -7], nn_datos)

Ajuste del RF

set.seed(7)
train_rfsi <- train(
  Arcilla ~ .,
  data = st_drop_geometry(datos) ,
  method = "rf",
  trControl = fitControl,
  importance = T
  
)
train_rfsi

## Random Forest 
## 
## 126 samples
##  13 predictor
## 
## No pre-processing
## Resampling: Cross-Validated (10 fold) 
## Summary of sample sizes: 114, 114, 114, 113, 113, 113, ... 
## Resampling results across tuning parameters:
## 
##   mtry  RMSE       Rsquared   MAE      
##    2    0.3111815  0.9815889  0.2514032
##    7    0.2158811  0.9853193  0.1725439
##   13    0.2286946  0.9831749  0.1818921
## 
## RMSE was used to select the optimal model using the smallest value.
## The final value used for the model was mtry = 7.

Importancia de variables regresoras

importancia <- as.data.frame(importance(train_rfsi$finalModel))
importancia$Variable <- rownames(importancia)

ggplot(data = importancia, aes(
  x = reorder(Variable, `%IncMSE`),
  y = `%IncMSE`,
  fill = `%IncMSE`
)) +
  labs(x = "Variable", title = "Incremento de MSE (%)") +
  geom_col() +
  coord_flip() +
  theme_bw() +
  theme(legend.position = "bottom")

Calculo de los residuos del RFSI y ajuste de semivariograma experimetal

datos$residuosRFSI <-
  datos$Arcilla - predict(train_rfsi, newdata = datos)

semiva_exp_residuos_rfsi <- variogram(residuosRFSI ~ 1 , datos)
plot(semiva_exp_residuos_rfsi)

Predicción RFSI

grilla_nn <-
  near.obs(
    locations = as_Spatial(grilla),
    observations = as_Spatial(datos),
    zcol = "Arcilla",
    n.obs = 4,
    idw = T
  )
grilla <- cbind(grilla, grilla_nn)

grilla$RFSI <-
  predict(train_rfsi, newdata = grilla)

Mapa predicción RFSI

grilla_rast <-
  st_rasterize(grilla, dx = 10, dy = 10)

mapa_prediccionRFSI <-
  tm_shape(grilla_rast,
           name = "RFSI") +
  tm_raster(
    col = "RFSI",
    title = "Arcilla (%) RFSI",
    style = "fixed",
    palette = "YlOrBr",
    contrast = c(0.1, 1),
    breaks = c(30, 31, 32, 33, 34, 35, 36, 37)
  ) +
  tm_layout(legend.format = list(
    scientific = TRUE,
    format = "f",
    digits = 0
  ))

mapa_prediccionRFSI

Muestra + Mapas de predicción

mapa_muestra <- tm_shape(datos,
                         name = "Muestra") +
  tm_dots(
    "Arcilla",
    title = "Arcilla (%) Muestra",
    style = "fixed",
    palette = "YlOrBr",
    contrast = c(0.1, 1),
    breaks = c(30, 31, 32, 33, 34, 35, 36, 37),
    size = 0.1,
    popup.vars = T,
    popup.format = list(
      digits = 1,
      decimal.mark = ",",
      big.mark = "."
    )
  ) +
  tm_layout(legend.format = list(scientific = TRUE, format = "f"))

mapas <-
  mapa_prediccion_Kriging + mapa_prediccionRF_KO + mapa_prediccionRF + mapa_prediccionRFSI + mapa_muestra
mapas

Validacion cruzada. Compración de métodos: diagrama de Taylor

num_cores <- max(detectCores() - 1, 1)
cl <- makeCluster(num_cores)

tablavalidacion <- do.call(rbind, parLapply(cl, 1:10, validacion_arcilla))
stopCluster(cl)

TaylorDiagram(
  tablavalidacion,
  obs = "Observado",
  mod = "Predicho",
  group = c("Metodo"),
  cols = c("orange", "red", "#53ad5d", "#251cad"),
  annotate = "RMSE",
  xlab = "Desvio Estandar",
  ylab = "Desvio Estandar"
)