#!/usr/bin/env python
# -*- coding: utf-8 -*-
### Copyright (C) 2011-2012 Antonio Valentino <a_valentino@users.sf.net>
### This file is part of GSDView.
### GSDView is free software; you can redistribute it and/or modify
### it under the terms of the GNU General Public License as published by
### the Free Software Foundation; either version 2 of the License, or
### (at your option) any later version.
### GSDView is distributed in the hope that it will be useful,
### but WITHOUT ANY WARRANTY; without even the implied warranty of
### MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
### GNU General Public License for more details.
### You should have received a copy of the GNU General Public License
### along with GSDView; if not, write to the Free Software
### Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
'''Compute statistics and histograms of geo-spatial data.'''
import sys
import logging
try:
from collections import namedtuple
except ImportError:
Statistics = None
else:
Statistics = namedtuple('Statistics', 'min max mean stddev')
from osgeo import gdal
__author__ = 'Antonio Valentino <a_valentino@users.sf.net>'
__date__ = '$Date$'
__revision__ = '$Revision$'
__version__ = '1.0'
GDAL_STATS_KEYS = ('STATISTICS_MINIMUM', 'STATISTICS_MAXIMUM',
'STATISTICS_MEAN', 'STATISTICS_STDDEV')
# @NOTE: the band.GetStatistics method called with the second argument
# set to False (no image rescanning) has been fixed in
# r19666_ (1.6 branch) and r19665_ (1.7 branch)
# see `ticket #3572` on `GDAL Trac`_.
#
# .. _r19666: http://trac.osgeo.org/gdal/changeset/19666
# .. _r19665: http://trac.osgeo.org/gdal/changeset/19665
# .. _`ticket #3572`: http://trac.osgeo.org/gdal/ticket/3572
# .. _`GDAL Trac`: http://trac.osgeo.org/gdal
HAS_GETSTATS_FORCE_BUG = (('1640' <= gdal.VersionInfo() < '1700') or
(gdal.VersionInfo() > '1720'))
[docs]def SafeGetStatistics(band, approxok, force):
'''Retriewe statistics form a GDAL raster band in a safe way.
If it is not possible to get statistics (e.g. because the force
flag is set to false and statistics are not available, or because
the approxok is set and there are too many nodata elements in the
raster band) then a tuple of four None is returned.
:param approxok:
if True statistics may be computed based on overviews or a
subset of all tiles
:param force:
if False statistics will only be returned if it can be done
without rescanning the image
.. note:: in order to check errors due to nodata values the GDAL
internal error status is reset.
.. important:: this function only works for versions of GDAL in
which gdal.Band.GetStatistics correctly reset the
return values: 1.6.4 <= GDAL < 1.7.0 and
GDAL > 1.7.2 (see `ticket #3572`_ on `GDAL Trac`_.
.. _`ticket #3572`: http://trac.osgeo.org/gdal/ticket/3572
.. _`GDAL Trac`: http://trac.osgeo.org/gdal
'''
if force is False and not HAS_GETSTATS_FORCE_BUG:
raise RuntimeError('it is not safe to use gada.Band.GetStatistics '
'with the "force" parameter set to false with this '
'version of GDAL (%s)' % gdal.VersionInfo())
gdal.ErrorReset()
stats = band.GetStatistics(approxok, force)
if stats == [0, 0, 0, -1] or gdal.GetLastErrorNo() != 0:
stats = (None, None, None, None)
gdal.ErrorReset()
if Statistics:
stats = Statistics(*stats)
return stats
SOURCE_TEMPLATE = '''\
<SimpleSource>
<SourceFilename>%(filename)s</SourceFilename>
<SourceBand>%(bandno)d</SourceBand>
<SrcRect xOff="%(xoffset)d" yOff="%(yoffset)d" xSize="%(xsize)d" ySize="%(ysize)d"/>
<DstRect xOff="0" yOff="0" xSize="%(xsize)d" ySize="%(ysize)d"/>
</SimpleSource>'''
[docs]def copy_dataset_subwin(dataset, srcwin, bands=None, vrtfile=''):
xoffset, yoffset, xsize, ysize = srcwin
driver = gdal.GetDriverByName('VRT')
vrtds = driver.Create(vrtfile, xsize, ysize, 0)
if not vrtds:
if vrtfile:
msg = 'unable to create a vrtual dataset for "%s" in "%s".' % (
dataset.GetDescription(), vrtfile)
else:
msg = ('unable to create and anonymous vrtual dataset for "%s".' %
dataset.GetDescription())
raise RuntimeError(msg)
if bands is None:
bands = range(1, dataset.RasterCount + 1)
for bandno in bands:
srcband = dataset.GetRasterBand(bandno)
if not srcband:
raise RuntimeError('unable to get raster band n. %s.' % bandno)
vrtds.AddBand(srcband.DataType)
dstband = vrtds.GetRasterBand(bandno)
if not dstband:
raise RuntimeError('unable to create raster bands in temporary '
'virtual file.')
nodata = srcband.GetNoDataValue()
if nodata:
dstband.SetNoDataValue()
sourcexml = SOURCE_TEMPLATE % locals()
dstband.SetMetadataItem('source_0', sourcexml, 'new_vrt_sources')
del srcband, dstband
return vrtds
[docs]class HistogramRequest(object):
def __init__(self, hmin=None, hmax=None, nbuckets=None,
include_out_of_range=False, computehistogram=True):
#: Enable histogram computation.
self.computehistogram = computehistogram
#: Histogram minimum.
self.hmin = hmin
#: Histogram maximum.
self.hmax = hmax
#: Number of buckets for histogram comutation.
self.nbuckets = nbuckets
#: Flag for out of range values handling.
#:
#: If set then values below the histogram range will be mapped into
#: the first bucket, and values above will be mapped into last one.
#: Otherwise out of range values are discarded.
self.include_out_of_range = include_out_of_range
def __str__(self):
return ('HistogramRequest(hmin=%(hmin)s, hmax=%(hmax)s, '
'nbuckets=%(nbuckets)s, '
'include_out_of_range=%(include_out_of_range)s, '
'computehistogram=%(computehistogram)s)' % self.__dict__)
def __nonzero__(self):
return bool(self.computehistogram)
[docs] def values(self):
return self.hmin, self.hmax, self.nbuckets
[docs] def iscustom(self):
return all(val is not None
for val in (self.hmin, self.hmax, self.nbuckets))
[docs]def computestats(dataset, bands=None, computestats=True, histreq=None,
approxok=False, minmax_only=False, callback=None):
statistics = {}
histograms = {}
if bands is None:
bands = range(1, dataset.RasterCount + 1)
for bandno in bands:
band = dataset.GetRasterBand(bandno)
if not band:
raise RuntimeError('unable to open band n. %d' % bandno)
if computestats:
stats = (None, None, None, None)
if approxok:
if HAS_GETSTATS_FORCE_BUG:
stats = SafeGetStatistics(band, True, False)
if None in stats:
stats = GetStatisticsFromMetadata(band)
if None in stats:
stats = band.ComputeStatistics(approxok, callback)
statistics[bandno] = stats
vmin, vmax, mean, stddev = stats
if minmax_only:
logging.info('%f %f' % (vmin, vmax))
else:
logging.info('Statistics for band n. %d' % bandno)
logging.info('Min: %f' % vmin)
logging.info('Max: %f' % vmax)
logging.info('Mean: %f' % mean)
logging.info('Stddev: %f' % stddev)
if computestats and histreq:
logging.info('')
if histreq:
if not histreq.iscustom():
hmin, hmax, nbuckets, hist = band.GetDefaultHistogram(
callback=callback)
else:
hmin, hmax, nbuckets = histreq.values()
nbuckets = int(nbuckets)
hist = band.GetHistogram(hmin, hmax, nbuckets,
histreq.include_out_of_range,
approxok, callback)
histograms[bandno] = (hmin, hmax, nbuckets, hist)
logging.info('Histogram for band n. %d' % bandno)
logging.info('Hist. min: %f' % hmin)
logging.info('Hist. max: %f' % hmax)
logging.info('Nuckets: %d' % nbuckets)
logging.info('Histogram: %s' % hist)
if len(bands) > 1:
logging.info('')
return statistics, histograms
### Command line tool #########################################################
[docs]def handlecmd(argv=None):
import optparse
if argv is None:
argv = sys.argv
argv = gdal.GeneralCmdLineProcessor(argv)
parser = optparse.OptionParser(
usage='%prog [options] FILENAME',
version='%%prog %s' % __version__,
description=__doc__)
parser.add_option('--no-stats', dest='stats', action='store_false',
default=True,
help='disable statistics computation (default: False)')
parser.add_option('--hist', action='store_true', default=False,
help='enable histogram computation (default: %default)')
parser.add_option('-b', '--band', type='int',
help='compute statistics for a specific raster band '
'(default: all bands are precessed)')
parser.add_option('-a', '--approxok', action='store_true', default=False,
help='if set then statistics may be computed based on '
'overviews or a subset of all tiles '
'(default: %default)')
parser.add_option('--minmax-only', action='store_true', default=False,
help='only print minimum and maximum on the same line.')
parser.add_option('--histreq', nargs=3, type='float',
metavar='MIN MAX NBUCKETS',
help='specify lower bound, upper bound and the number '
'of buckets for histogram computation '
'(automatically computed otherwise)')
parser.add_option('-i', '--include_out_of_range', action='store_true',
default=False,
help='if set then values below the histogram range will '
'be mapped into the first bucket, and values above '
'will be mapped into last one. '
'Otherwise out of range values are discarded.')
parser.add_option('--srcwin', nargs=4, type='int',
metavar='XOFFSET YOFFSET XSIZE YSIZE',
help='specify source window in image coordinates: '
'(default: the entire image is processed)')
parser.add_option('-o', '--outfile', metavar='FILE',
help='write results to FILE (default: stdout)', )
parser.add_option('-q', '--quiet', action='store_true',
help='suppress progress messages')
options, args = parser.parse_args()
if options.histreq and not options.hist:
options.hist = True
#parser.error('"histreq" option requires "hist"')
if options.include_out_of_range and not options.hist:
parser.error('"include_out_of_range" option requires "hist"')
if options.band is not None and options.band < 1:
parser.error('the "band" parameter shoulb be a not null positive '
'integer.')
histonly = bool(options.hist and not options.stats)
if histonly and options.approxok and not options.histreq:
logging.warning('the "approxok" option is ignored if "histreq" '
'is not set.')
if not options.stats and not options.hist:
parser.error('nothing to compute: '
'please check "--hist" and "--no-stats" optoions.')
if len(args) != 1:
parser.error('at least one argument is required.')
return options, args
[docs]def main(*argv):
logging.basicConfig(format='%(levelname)s: %(message)s',
level=logging.INFO)
try:
if not argv:
argv = sys.argv
options, args = handlecmd(argv)
if options.outfile:
logger = logging.getLogger()
streamhandler = logger.handlers[0]
streamhandler.setLevel(logging.WARNING)
formatter = streamhandler.formatter
filehandler = logging.FileHandler(options.outfile, 'w')
filehandler.setLevel(logging.INFO)
filehandler.setFormatter(formatter)
logger.addHandler(filehandler)
filename = args[0]
if options.quiet:
progressfunc = None
else:
progressfunc = gdal.TermProgress
if options.hist:
histreq = HistogramRequest()
if options.histreq:
histreq.hmin, histreq.hmax, histreq.nbuckets = options.histreq
if options.include_out_of_range:
histreq.include_out_of_range = options.include_out_of_range
else:
histreq = None
ds = gdal.Open(filename)
if not ds:
raise RuntimeError('unable to open "%s"' % filename)
if options.band is not None:
if options.band > ds.RasterCount:
raise ValueError('band %d requested, but only bands 1 to %d '
'are available.' % (options.band,
ds.RasterCount))
bands = [options.band]
else:
bands = range(1, ds.RasterCount + 1)
if options.srcwin:
ds = copy_dataset_subwin(ds, options.srcwin)
# core
computestats(ds, bands, options.stats, histreq, options.approxok,
options.minmax_only, progressfunc)
ds = None
except Exception as e:
logging.error(str(e)) # , exc_info=True)
sys.exit(1)
if __name__ == '__main__':
main()