Mapping water type with validation

how accurate is our random forest model?

Households in Cambodia have three main sources of (drinking) water. Water from the pipe, water from the well and water from the tank. In this exercise we use survey information to estimate the likelihood of households to rely on one of the three sources.

step 1: copy the code below into the code editor. In this snippet we import a subset of the survey data and print the categories.

// import country data
var countries = ee.FeatureCollection("USDOS/LSIB_SIMPLE/2017");
var kh = countries.filter(ee.Filter.eq("country_na", "Cambodia"));

// import the reference data
var referenceData = ee.FeatureCollection("projects/servir-mekong/undp/Training/WaterBinary")

// print binary classes
print(referenceData.aggregate_histogram("hh_d_water"))

Step 2: import the relevant datasets and combine them into a single image

// import relevant raster datasets
var planet  = ee.Image("projects/cemis-camp/assets/Planet/202012").select(["b1","b2","b3","b4"],["red","green","blue","nir"]);
var roadDistPrimary = ee.Image("projects/servir-mekong/staticMaps/primaryRoads").rename("primaryRoads");
var roadDistSecondary = ee.Image("projects/servir-mekong/staticMaps/secondaryRoads").rename("secondaryRoads");
var roadDistTertiary = ee.Image("projects/servir-mekong/undp/distanceLayers/tertiaryRoads").rename("tertiaryRoads");
var streamDist = ee.Image("WWF/HydroSHEDS/15ACC").rename("stream").unmask(0);
var nightlight = ee.ImageCollection("NOAA/VIIRS/DNB/MONTHLY_V1/VCMSLCFG").filterDate("2020-01-01","2020-12-31").select("avg_rad").mean();
var waterLines = ee.Image("projects/servir-mekong/undp/distanceLayers/waterLinesDistance").rename("water");
var well = ee.Image("projects/servir-mekong/undp/distanceLayers/wellDistance").rename("well");
var waterDist = ee.Image("projects/servir-mekong/undp/distance/waterDist").rename("waterDist");
var waterLines = ee.Image("projects/servir-mekong/undp/distance/waterlineDistance").unmask(0).rename("waterlines");
var wp2020 = ee.Image("WorldPop/GP/100m/pop/KHM_2020").rename("wp");
var landcover = ee.Image("projects/cemis-camp/assets/landcover/lcv4/2020")

// combine raster data into a single image
var image = planet.addBands(roadDistPrimary )
.addBands(roadDistSecondary)
.addBands(roadDistTertiary)
.addBands(streamDist)
.addBands(nightlight)
.addBands(waterLines)
.addBands(well)
.addBands(waterDist)
.addBands(waterLines)
.addBands(wp2020);

Step 3: store the band names in a variable, we need them later for the random forest algorithm.

var bandNames = image.bandNames();

Step 4: create a dataset for household with a connection to a water pipe and people without and combine the data.

// create the training dataset by filtering
// choose from pipedWater, well, tank
var item = "pipedWater";
var myClass =  referenceData.filter(ee.Filter.eq("water",item)).map(function(feat){return feat.set("class",1)});
var otherClass = referenceData.filter(ee.Filter.neq("water",item)).map(function(feat){return feat.set("class",0)}).limit(2576);

// merge the two classes
var trainingData = myClass.merge(otherClass)

Step 6: add a random column and divide the dataset in a training dataset (80%) and validation dataset (20%)

// add a random number to each column
var trainingData = trainingData.randomColumn("random")

var training = trainingData.filter(ee.Filter.lt("random",0.8))
var validation = trainingData.filter(ee.Filter.gte("random",0.8))

Step 7: sample the image, this will store all the pixel values for the points

// sample the image
var trainingSample = image.sampleRegions({collection:training,scale:100});

Step 8: train the random forest classifier

// train the classifier in probability
var classifier = ee.Classifier.smileRandomForest(100).setOutputMode('PROBABILITY').train(trainingSample,"class",bandNames);

Step 9: get the variable importance and display it in a chart

// print the information of the classifier
var dict = classifier.explain();
print('Explain:',dict);

// get the variable importance
var variable_importance = ee.Feature(null, ee.Dictionary(dict).get('importance'));
 
// get the variable importance
var variables = ee.Dictionary(ee.Dictionary(dict).get('importance'));
var keys = variables.keys();

// create a chart of the variable importance and show the chart
var chart =
ui.Chart.feature.byProperty(variable_importance)
  .setChartType('ColumnChart')
  .setOptions({
  title: 'Random Forest Variable Importance',
  legend: {position: 'none'},
  hAxis: {title: 'Bands'},
  vAxis: {title: 'Importance'}
  });

// print the chart
print(chart);

Step 10: classify the image and display the map

// classify the image
var classification = image.classify(classifier);
 
 // add the layer to the map
Map.addLayer(classification.clip(kh),{min:0,max:1,palette:"darkred,red,orange,yellow,green,darkgreen"},"edu Machine Learning");

Step 11: use the validation set to sample the classified image and plot the distribution

print("histogram for class");
print(ui.Chart.image.histogram(classification,validation.filter(ee.Filter.eq("class",1)),100).setOptions({hAxis: {title: 'probability', maxValue: 1, minValue: 0}}));
print("histogram for other");
print(ui.Chart.image.histogram(classification,validation.filter(ee.Filter.eq("class",0)),100).setOptions({hAxis: {title: 'probability', maxValue: 1, minValue: 0}}));

Find all the code combined in a single script here.