Get IMERG data

Mean precipitation data from netcdf to textfile for your area of interest

Download the shapefile here: VN_shp

copy the script below and save it as

You can run the script with python. It will write the daily mean precipitation of Vietnam to a txt file. You can change the area of interest by using another shp file.

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
""", SERVIR-Mekong (2017-07-26)

Downloads data from SERVIR-Mekong's Virtual Rain and Stream Gauge (VRSG) (
and calculates a time series of the daily data over an area


$ python {options}

{options} include:

--idate (-i)      : required
                  : start date of the time series
                  : in format YYYY-MM-DD

--edate (-e)      : required
                  : end date of the time series
                  : in format YYYY-MM-DD

--product (-p)    : required
                  : precipitation product to create the time series for
                  : options are IMERG 

--directory (-d)  : aerosol profile to use (default = Continental), for options see..

--shapefile       : defines parameter space of input variables (default = test)
                  : options are: test, test2, validation and full
                  : ! MUST use full to build functioning LUT but this can take hours !

Example Usage

1) create a time series from IMERG

  $ python -p IMERG -i 2017-01-01 -e 2017-01-31

2) create a time series from IMERG over specific region

  $ python -p IMERG -i 2017-01-01 -e 2017-01-31 -s ./test.shp

3) create a time series from IMERG in specific directory

  $ python -p IMERG -i 2017-01-01 -e 2017-01-31 -d ./Users/user/Desktop/

4) create a time series from IMERG over specific region an into specific directory

  $ python -p IMERG -i 2017-01-01 -e 2017-01-31 -s ./test.shp -d ./Users/user/Desktop/


from __future__ import division,print_function
import os
import sys
import glob
import argparse
import ftplib
import datetime
import netCDF4
import numpy as np
import pandas as pd
from time import sleep
from osgeo import gdal,osr,ogr
import matplotlib.pyplot as plt

# Print iterations progress
def printProgressBar (iteration, total, prefix = '', suffix = '', decimals = 1, length = 100, fill = '█'):
    Call in a loop to create terminal progress bar
        iteration   - Required  : current iteration (Int)
        total       - Required  : total iterations (Int)
        prefix      - Optional  : prefix string (Str)
        suffix      - Optional  : suffix string (Str)
        decimals    - Optional  : positive number of decimals in percent complete (Int)
        length      - Optional  : character length of bar (Int)
        fill        - Optional  : bar fill character (Str)
    percent = ("{0:." + str(decimals) + "f}").format(100 * (iteration / float(total)))
    filledLength = int(length * iteration // total)
    bar = fill * filledLength + '-' * (length - filledLength)
    sys.stdout.write('\r%s |%s| %s%% %s\r' % (prefix, bar, percent, suffix))
    # Print New Line on Complete
    if iteration == total:


class vrg2ts(object):

    def __init__(self,directory,product,shapefile,iniTime,endTime): = directory
        self.product = product
        self.shapefile = shapefile
        self.iniTime = iniTime
        self.endTime = endTime

        stime = iniTime.split('-')
        etime = endTime.split('-')

        self.syr = stime[0]
        self.smon = stime[1]
        self.sday = stime[2]
        self.eyr = etime[0]
        self.emon = etime[1]
        self.eday = etime[2]

    def getPrecip(self):
        ftpUrl = ''

        t1 =,int(self.smon),int(self.sday))
        t2 =,int(self.emon),int(self.eday))

        offSet = t2 - t1

        n_iter = offSet.days+1

        datadir ='data/'+self.product+'/'

        if os.path.exists('data/') == False:

        if os.path.exists(datadir) == False:

        ftp = ftplib.FTP(ftpUrl)

        for i in range(n_iter):

            now = t1 + datetime.timedelta(i)

            if self.product == 'IMERG':
                ftpDir = '/VRG/IMERG/DAILY/{0}/{1:02d}/VN/'.format(now.year,now.month)
                ftpFile = 'VN_3B-DAY-E.MS.MRG.3IMERG.{0}{1:02d}{2:02d}'.format(now.year,now.month,
                ftpPath = ftpDir + ftpFile

            if os.path.exists(datadir+ftpFile) == False:
                    with open(datadir+ftpFile, 'wb') as outfile:
                        ftp.retrbinary("RETR " + ftpPath, outfile.write)

            printProgressBar(i + 1, n_iter, prefix='Download Progress:', suffix='Complete', length=40)



    def makeTimeSeries(self):

        datadir ='data/'+self.product+'/'

        t1 =,int(self.smon),int(self.sday))
        t2 =,int(self.emon),int(self.eday))

        offSet = t2 - t1

        n_iter = offSet.days+1

        ts = np.zeros([n_iter])

        for i in range(n_iter):
            now = t1 + datetime.timedelta(i)
            ncs = datadir + 'VN_3B-DAY-E.MS.MRG.3IMERG.{0}{1:02d}{2:02d}'.format(now.year,now.month,

                nc = netCDF4.Dataset(ncs,'r')

                pr_var = nc.variables['precipitationCal']
                precip  = pr_var[0,:,:]
                noData = pr_var.missing_value

                precip = np.swapaxes(precip,0,1)
                precip = np.flipud(precip)
                precip =,precip)

                if self.shapefile != None:

                    if i == 0:
                        lons = nc.variables['lon']
                        lats = nc.variables['lat']
                        mask = self.makeMask(lons[:],lats[:],0.1)

                    precip =,precip)

                ts[i] = np.mean(precip)


                ts[i] = -9999

            printProgressBar(i + 1, n_iter, prefix='Time Series Progress:', suffix='Complete', length=40)

        outfile = + 'vrg_timeseries_{0}_{1}.csv'.format(t1.strftime("%Y-%m-%d").replace('-',''),t2.strftime("%Y-%m-%d").replace('-',''))

        dates = pd.date_range(t1, t2, freq='D')
        p_series = pd.Series(ts, index=dates)

        df = pd.DataFrame(p_series) = 'Date'
        df.columns = ['Precipitation']


    def makeMask(self,lon,lat,res):

        source_ds = ogr.Open(self.shapefile)
        source_layer = source_ds.GetLayer()

         # Create high res raster in memory
        mem_ds = gdal.GetDriverByName('MEM').Create('', lon.size, lat.size, gdal.GDT_Byte)
        mem_ds.SetGeoTransform((lon.min(), res, 0, lat.max(), 0, -res))
        band = mem_ds.GetRasterBand(1)

        # Rasterize shapefile to grid
        gdal.RasterizeLayer(mem_ds, [1], source_layer, burn_values=[1])

        # Get rasterized shapefile as numpy array
        array = band.ReadAsArray()


        # Flush memory file
        mem_ds = None
        band = None

        return array

def main():
    parser = argparse.ArgumentParser(description="Download netCDF data from SERVIR Mekong's Virtual Rain and \
                                     and Stream Gauge tool and convert it to a time series")

    parser.add_argument('--idate','-i', type=str,required=True,
                        help="Start date to download data and process time series in format 'YYYY-MM-DD'")
    parser.add_argument('--edate','-e', type=str,required=True,
                        help="End date to download data and process time series in format 'YYYY-MM-DD'")
                        help='VRSG product to create time series from')
                        help='Folder directory to store the downloaded netCDF data')
                        help='Shapefile to extract the time series for')

    args = parser.parse_args()

    if == None:
        print('No directory was given for output data...\nUsing current working directory:{}\n'.format(os.getcwd())) = os.getcwd()+'/'

    if args.shapefile == None:
        print('No shapefile was given for spatial averaging...\nUsing entire raster area\n')

    if args.product not in ['IMERG']:
        print('Product type not recognized: {0}\nPlease select IMERG',format(args.product))

    vrgts = vrg2ts(,args.product,args.shapefile,args.idate,args.edate)



if __name__ == "__main__":

Leave a Reply