Source code for crackdect.imagestack

"""
This module provides the core functionality for handling a stack of images at once.

Image stacks are objects that hold multiple images and act in many cases like python lists. They can
be indexed and images in the stack can be changed. All images in one image stack must have the same dtype. If
an image with another dtype is added or an image in the stack is replaced with an other image with different dtype,
the incoming image is automatically converted to match the dtype of the image stack.

It is strongly recommended that *np.float32* is used when performing a crack detection. The crack detecion is
tested and developed for images of dtypes *float*, *np.float64*, *np.float32* or *np.float16*.

Currently, there are two image stack objects that can be used. All image stack have the same structure.
Accessing images, replacing images in the stack and adding new images works the same for all image stacks.

.. currentmodule:: crackdect.imagestack

* :class:`ImageStack`: A simple wrapper around a list. This container holds all images in the system memory (RAM).

* :class:`ImageStackSQL`: Manages RAM usage of the image stack. Images are held in memory as long as the
  total available memory does not exceed a certain percentage of available memory or the image stack
  exceeds a set number of MB. If any more images are added, all current loaded images get stored in a database and only
  references to the images are kept in memory. The images are only loaded when directly accessed. This allows working and
  changing images of a stack even if the stack is too big to fit into the memory. The loaded images will be kept in
  memory until the stack exceeds the RAM limits again. This reduces the number loading and storing operations and
  therefore saves time since this can be quite time consuming for a lot of images.

The image stack is quite easy to use.
"""
import io
from sqlalchemy import Column, Integer, create_engine, TypeDecorator, LargeBinary
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import deferred, sessionmaker
import numpy as np
import psutil
from skimage.util.dtype import _convert
from skimage.io import imread


[docs]class NumpyType(TypeDecorator): """ Numpy Type for sql databases when using sqlalchemy. This handles the IO with a sql database and sqlalchemy. Inside the database, an numpy array is stored as LargeBinary. sqlalchemy handles loading and storing of entries for columns marked with this custom type. All arrays are converted to numpy arrays when loading and converted to binary when storing in the database automatically. """ impl = LargeBinary def __init__(self): super(NumpyType, self).__init__()
[docs] def bind_processor(self, dialect): impl_processor = self.impl.bind_processor(dialect) if impl_processor: def process(value): if value is not None: out = io.BytesIO() np.save(out, value) out.seek(0) return impl_processor(out.read()) else: def process(value): if value is not None: out = io.BytesIO() np.save(out, value) out.seek(0) return out.read() return process
[docs] def result_processor(self, dialect, coltype): impl_processor = self.impl.result_processor(dialect, coltype) if impl_processor: def process(value): value = impl_processor(value) if value is None: return None value = io.BytesIO(value) value.seek(0) return np.load(value) else: def process(value): if value is None: return None value = io.BytesIO(value) value.seek(0) return np.load(value) return process
def _add_docstring(func): def inner(function): if function.__doc__ is None: function.__doc__ = func.__doc__ else: function.__doc__ = func.__doc__ + function.__doc__ return function return inner def _add_to_docstring(docstring): def docstring_decorator(func): if func.__doc__ is None: func.__doc__ = docstring else: func.__doc__ = func.__doc__ + docstring return func return docstring_decorator def _fast_convert(img, dtype): """ Check if the image is already the right dtype. This will ignore value limits if the image is already the right dtype Parameters ---------- img: array-like dtype: dtype the image should be converted to Returns ------- image: np.ndarray """ if img.dtype.type is dtype: return img else: return _convert(img, dtype)
[docs]class ImageStack: """ This object holds multiple images. All images are converted to the same datatype. This ensures that all images have the same characteristics for further processing. All images are represented as numpy arrays. The same convention for representing images is used as in skimage. If an image with mismatching dtype is added it is automatically converted to match the dtype. Read more about conversion details at skimage.util.dtype. This object behaves a lot like a list. Individual images or groups of images can be retrieved with slicing. Setitem and delitem behaviour is like with normal python lists but mages can only be added with add_image. Parameters ---------- dtype: optional, default=np.float32 The dtype all images will be converted to. E.g. np.float32, bool, etc. Examples -------- >>> # make an ImageStack object where all images are represented as unsigned integer arrays [0-255] >>> stack = ImageStack(dtype=np.uint8) >>> # Add an image to it. >>> img = (np.random.rand(200,200) * np.arange(200))/200 # floating point images must be in range [-1,1] >>> stack.add_image(img) This ImageStack can be indexed. >>> stack[0] # getting the image with index 0 from the stack Changing an image in the stack. The input will also be converted to the dtype of the stack. >>> stack[0] = (np.random.rand(200,200) * np.arange(200))/200[::-1] # setting an image in the stack Or deleting an image form the stack >>> del stack[0] """ def __init__(self, dtype=np.float32): self._dtype = dtype self._images = []
[docs] def add_image(self, img): """ Add an image to the stack. The image must be a numpy array The input array will be converted to the dtype of the ImageStack Parameters ---------- img: np.ndarray """ self._images.append(_fast_convert(img, dtype=self._dtype))
[docs] def remove_image(self, i=-1): """ Remove an image from the stack. Parameters ---------- i: int Index of the image to be removed """ self._images.pop(i)
def __len__(self): return self._images.__len__() def __repr__(self): return 'ImageStack: {}images, {}'.format(len(self), np.dtype(self._dtype).name) def __getitem__(self, i): if hasattr(i, '__index__'): i = i.__index__() if type(i) is int: return self._images[i] elif type(i) is slice: temp_stack = ImageStack(self._dtype) temp_stack._images = self._images[i] return temp_stack else: raise TypeError('slicing must be with an int or slice object') def __delitem__(self, i): del self._images[i] def __setitem__(self, i, item): if type(i) is int: return self._images.__setitem__(i, _fast_convert(item, dtype=self._dtype)) elif type(i) is slice: if len(item) == len(self._images[i]): item = [_fast_convert(j, self._dtype) for j in item] self._images[i] = item else: raise ValueError('{} images provided to override {} images!'.format(len(item), len(self._images[i]))) # def __add__(self, other): # if isinstance(other, self.__class__) and self._dtype is other._dtype: # self._images = self._images.__add__(other._images) # return self # else: # raise TypeError('Only two image stacks with the same image format can be combined!')
[docs] @classmethod def from_paths(cls, paths, dtype=None, **kwargs): """ Make an ImageStack object directly form paths of images. The images will be loaded, converted to the dtype of the ImageStack and added. Parameters ---------- paths: list paths of the images to be added dtype: optional The dtype all images will be converted to. E.g. np.float32, bool, etc. If this is not set, the dtype of the first image loaded will determine the dtype of the stack. kwargs: kwargs are forwarded to `skimage.io.imread <https://scikit-image.org/docs/dev/api/skimage.io.html#skimage.io.imread>`_ For grayscale images simply add **as_gray = True**. For the kwargs for colored images use `parameters for reading <https://imageio.readthedocs.io/en/stable/_autosummary/imageio.plugins.pillow_legacy.html#module-imageio.plugins.pillow_legacy>`_. Keep in mind that some images might have alpha channels and some not even if they have the same format. Returns ------- out: ImageStack An ImageStack with all images from paths as arrays. Examples -------- >>> paths = ['list of image paths'] >>> stack = ImageStack.from_paths(paths, as_gray=True) """ temp = imread(paths[0], **kwargs) if dtype is None: c = cls(temp.dtype.type) else: c = cls(dtype) c.add_image(temp) for p in paths[1:]: c.add_image(imread(p, **kwargs)) return c
[docs] def change_dtype(self, dtype): """ Change the dtype of all images in the stack. All images will be converted to the new dtype. Parameters ---------- dtype """ if self._dtype == dtype: return for i in range(len(self._images)): self._images[i] = _convert(self._images[i], dtype) self._dtype = dtype
[docs] def copy(self): """ Copy the current image stack. The copy is shallow until images are changed in the new stack. Returns ------- out: ImageStack """ temp = ImageStack(self._dtype) for i in self._images: temp.add_image(i) return temp
[docs] def execute_function(self, func, *args, **kwargs): """ Perform an operation on all the images in the stack. The operation can be any function which takes one images and other arguments as input and returns only one image. This operation changes the images in the stack. If the current state should be kept copy the stack first. Parameters ---------- func: function A function which takes ONE image as first input and returns ONE image. args: args are forwarded to the func. kwargs: kwargs are forwarded to the func. Examples -------- >>> def fun(img, to_add): >>> return img + to_add >>> stack.execute_function(fun, to_add=4) This will apply the function *fun* to all images in the stack. """ for ind, img in enumerate(self._images): self._images[ind] = _fast_convert(func(img, *args, **kwargs), self._dtype)
[docs] def execute_rolling_function(self, func, keep_first=False, *args, **kwargs): """ Perform an rolling operation on all the images in the stack. The operation can be any function which takes two images and other arguments as input and returns only one image. :math:`I_{new} = func(I_{n-1}, I_n)` This operation changes the images in the stack. If the current state should be kept copy the stack first. Since the 0-th image in the stack will remain unchanged because the rolling operation starts at the 1-st image, the 0-th image is removed if *keep_first* is set to *False* (default). Parameters ---------- func: function A function which takes TWO images and other arguments as input and returns ONE image. The function must have the following input structure: `fun(img1, img2, args, kwargs)`. *img1* will be the n-1st image in the calls. keep_first: bool If True, keeps the first image in the stack. Delete it otherwise. args: args are forwarded to the func. kwargs: kwargs are forwarded to the func. Examples -------- >>> def fun(img1, img2): >>> mask = img1 > img1.max()/2 >>> return img2[mask] >>> stack.execute_rolling_function(fun, keep_first=False) This will apply the function *fun* to all images in the stack. *img1* is always the n-1st image in the rolling operation. """ img_minus1 = self._images[0] for ind, img in enumerate(self._images[1:]): self._images[ind + 1] = _fast_convert(func(img_minus1, img, *args, **kwargs), self._dtype) img_minus1 = img if not keep_first: del self._images[0]
[docs]class ImageStackSQL: """ This class works the same as ImageStack. ImageStackSQL objects will track the amount of memory the images occupy. When the memory limit if surpassed, all data will be stored in an sqlite database and the RAM will be cleared. Only a lazy loaded object is left in the image stack. Only when directly accessing the images in the stack they will be loaded into RAM again. sqlalchemy is used to connect to the database in which all data is stored. This makes this container suitable for long term storage and transfer of a lot of images. The images can be loaded into an ImageStackSQL object in a new python session. Parameters ---------- database: str, optional Path of the database. If it does not exist, it will be created. If none is entered, the name is id(object) stack_name: str, optional The name of the table the images will be saved. If none is entered it will be id(object) dtype: optional, default=np.float32 The dtype all images will be converted to. E.g. np.float32, bool, etc. max_size_mb: float, optional The maximal size in mb the image stack is allowed to be. If a new image is added after surpassing this size all images will be saved in the database and the occupied RAM is cleared. All images are still accessible but will be loaded only when directly accessed. cache_limit: float, optional, default=90 The limit of the RAM usage in percent of the available system RAM. When the RAM usage of the system surpasses this limit, all images will be saved in the database and RAM is freed again even it max_size_mb is not reached. This makes sure that the system never runs out of RAM. Values over 100 will effectively deactivate this behaviour. If the total size of the image stack is too small to free enough RAM to reach the cache limit newly added images will be saved immediately in the database. This also lead to constant reads from the database as no images will be kept im RAM. Therefore it is recommended to set this well over the current RAM usage of the system when instantiating an object. """ def __init__(self, database='', stack_name='', dtype=np.float32, max_size_mb=None, cache_limit=80): self._dtype = dtype # stack name is the name of the table in the sql database. The table must have a name. self.stack_name = stack_name if stack_name != '' else 'table'+str(id(self)) # database name must end with .db self.database = database if database != '' and database.endswith('.db') else 'db'+str(id(self)) + '.db' # sqlalchemy connection self.engine = create_engine('sqlite:///{}'.format(self.database), echo=False) self.session = sessionmaker(bind=self.engine)() self.base = declarative_base() self.table = type(stack_name, (self.base,), {'__tablename__': self.stack_name, 'id': Column('id', Integer, primary_key=True), 'image': deferred(Column('image', NumpyType))}) self.base.metadata.create_all(self.engine) # list for easy access to the images. self._images = [] # ram limits self._max_nbytes = max_size_mb * 1e6 if max_size_mb is not None else np.inf self._cache_limit = cache_limit # nbytes and counter for caching logic self._nbytes = 0 self.__counter = 0
[docs] @classmethod def load_from_database(cls, database='', stack_name=''): """ Load an image stack from a database. A table of a database which was made with an ImageStackSQL object can be loaded and an ImageStackSQL object with all the images is made. The dtype of the images in the new object is the same as the images in the table. All images, which will be added to the object will be converted to match the dtype. Parameters ---------- database: str Path of the database. stack_name: str Name of the table Returns ------- out: ImageStackSQL The image stack object with connection to the database. """ c = cls(database, stack_name, dtype=bool) dtype = c.session.query(c.table).first().image.dtype c._dtype = dtype c._images = c.session.query(c.table).all() return c
[docs] @classmethod def from_paths(cls, paths, database='', stack_name='', dtype=None, max_size_mb=None, cache_limit=80, **kwargs): """ Make an ImageStackSQL object directly form paths of images. The images will be loaded, converted to the dtype of the ImageStack and added. Parameters ---------- paths: list paths of the images to be added database: str, optional Path of the database. If it does not exist, it will be created. If none is entered, the name is id(object) stack_name: str, optional The name of the table the images will be saved. If none is entered it will be id(object) dtype: optional The dtype all images will be converted to. E.g. np.float32, bool, etc. If this is not set, the dtype of the first image loaded will determine the dtype of the stack. max_size_mb: float, optional :class:`ImageStackSQL` for more details. cache_limit: float, optional, default=90 :class`ImageStackSQL` for more details. kwargs: kwargs are forwarded to `skimage.io.imread <https://scikit-image.org/docs/dev/api/skimage.io.html#skimage.io.imread>`_ For grayscale images simply add **as_gray = True**. For the kwargs for colored images use `parameters for reading <https://imageio.readthedocs.io/en/stable/_autosummary/imageio.plugins.pillow_legacy.html#module-imageio.plugins.pillow_legacy>`_. Keep in mind that some images might have alpha channels and some not even if they have the same format. Returns ------- out: ImageStackSQL A new ImageStackSQL object with connection to the database. """ stack_name = stack_name if stack_name != '' else 'table'+str(id(cls)) database = database if database != '' and database.endswith('.db') else 'db'+str(id(cls)) + '.db' temp = imread(paths[0], **kwargs) if dtype is None: c = cls(database, stack_name, temp.dtype.type, max_size_mb, cache_limit) else: c = cls(database, stack_name, dtype, max_size_mb, cache_limit) c.add_image(temp) for p in paths[1:]: c.add_image(imread(p, **kwargs)) return c
@property def nbytes(self): """ Sum of bytes for all currently fully loaded images. This tracks the used RAM from the images. The overhead of the used RAM from sqlalchemy is not included and will not be tracked. """ return self._nbytes @nbytes.setter def nbytes(self, x): self._nbytes = x if x > self._max_nbytes: self.save_state() elif self.__counter > 50: if psutil.virtual_memory().percent > self._cache_limit: self.save_state() self.__counter = 0 self.__counter += 1 @staticmethod def __is_loaded(sql_obj): return False if 'image' not in sql_obj.__dict__ else True def __get_image(self, sql_obj): if 'image' not in sql_obj.__dict__: out = sql_obj.__getattribute__('image') self.nbytes += out.nbytes return out else: return sql_obj.__getattribute__('image') def __set_image(self, img, sql_obj): temp = _fast_convert(img, self._dtype) if not self.__is_loaded(sql_obj): self.nbytes += temp.nbytes else: self.nbytes += temp.nbytes - sql_obj.image.nbytes sql_obj.image = temp def __clean_remove(self, sql_obj): if sql_obj._sa_instance_state.pending: self.session.expunge(sql_obj) else: self.session.delete(sql_obj) def __getitem__(self, i): if hasattr(i, '__index__'): i = i.__index__() if type(i) is int: return self.__get_image(self._images[i]) elif type(i) is slice: temp_objects = self._images[i] temp_stack = ImageStack(self._dtype) temp_stack._images = [self.__get_image(j) for j in temp_objects] return temp_stack else: raise TypeError('slicing must be with an int or slice object') def __setitem__(self, i, value): if hasattr(i, '__index__'): i = i.__index__() if type(i) is int: self.__set_image(value, self._images[i]) elif type(i) is slice: temp = self._images[i] if len(value) == len(temp): for image, sql_obj in zip(value, temp): self.__set_image(image, sql_obj) else: raise ValueError('{} images provided to override {} images!'.format(len(value), len(temp))) def __delitem__(self, i): if hasattr(i, '__index__'): i = i.__index__() if type(i) is int: self.remove_image(i) elif type(i) is slice: temp = self._images[i] for j in temp: if self.__is_loaded(j): self._nbytes -= j.image.nbytes self.__clean_remove(j) del self._images[i] def __repr__(self): s = 'ImageStack: {}images, {}, {:.2f}/{:.2f} MB RAM used, caching at {} percent of system memory' return s.format(len(self), np.dtype(self._dtype).name, self._nbytes / 1e6, self._max_nbytes / 1e6, self._cache_limit) def __len__(self): return self._images.__len__()
[docs] def relaod(self): """ Reload the images form the table. All not saved changes will be lost. """ self.session.expire_all() self._images = self.session.query(self.table).all() self._nbytes = 0
[docs] def save_state(self): """ Saves the current state of the image stack to the database. This commits all changes and adds all new images to the table. All currently loaded images are expired. This means, that all RAM used by the images is freed. Call this method before closing the python session if the changes made to the image stack should be saved permanently. """ self.session.commit() self.session.expire_all() self._nbytes = 0
[docs] @_add_docstring(ImageStack.add_image) def add_image(self, img): temp = self.table(image=_fast_convert(img, dtype=self._dtype)) self._images.append(temp) self.session.add(temp) self.nbytes += temp.image.nbytes
[docs] @_add_docstring(ImageStack.remove_image) def remove_image(self, i=-1): temp = self._images[i] if self.__is_loaded(temp): self._nbytes -= temp.image.nbytes self.__clean_remove(temp) self._images.pop(i)
[docs] @_add_docstring(ImageStack.change_dtype) def change_dtype(self, dtype): if dtype == self._dtype: return for i in self._images: bytes_old = i.image.nbytes i.image = _convert(i.image, dtype) self.nbytes += i.image.nbytes - bytes_old self._dtype = dtype
[docs] def copy(self, stack_name=''): """ Copy the current image stack. A new table in the database is created where all images are stored. Parameters ---------- stack_name: str The name of the stack. This is also the name of the new table in the database Returns ------- out: ImageStack """ temp = ImageStackSQL(self.database, stack_name, self._dtype, self._max_nbytes/1e6, self._cache_limit) for i in self._images: temp.add_image(i.image) return temp
[docs] @_add_docstring(ImageStack.execute_function) def execute_function(self, func, *args, **kwargs): for i in self._images: bytes_old = i.image.nbytes i.image = _fast_convert(func(i.image, *args, **kwargs), self.dtype) self.nbytes += i.image.nbytes - bytes_old
[docs] @_add_docstring(ImageStack.execute_rolling_function) def execute_rolling_function(self, func, keep_first=False, *args, **kwargs): img_minus1 = self._images[0].image for i in self._images[1:]: temp = i.image.copy() bytes_old = i.image.nbytes i.image = _fast_convert(func(img_minus1, i.image, *args, **kwargs), self._dtype) img_minus1 = temp self.nbytes += i.image.nbytes - bytes_old if not keep_first: self.remove_image(0)