"""
This module provides the core functionality for handling a stack of images at once.
Image stacks are objects that hold multiple images and act in many cases like python lists. They can
be indexed and images in the stack can be changed. All images in one image stack must have the same dtype. If
an image with another dtype is added or an image in the stack is replaced with an other image with different dtype,
the incoming image is automatically converted to match the dtype of the image stack.
It is strongly recommended that *np.float32* is used when performing a crack detection. The crack detecion is
tested and developed for images of dtypes *float*, *np.float64*, *np.float32* or *np.float16*.
Currently, there are two image stack objects that can be used. All image stack have the same structure.
Accessing images, replacing images in the stack and adding new images works the same for all image stacks.
.. currentmodule:: crackdect.imagestack
* :class:`ImageStack`: A simple wrapper around a list. This container holds all images in the system memory (RAM).
* :class:`ImageStackSQL`: Manages RAM usage of the image stack. Images are held in memory as long as the
total available memory does not exceed a certain percentage of available memory or the image stack
exceeds a set number of MB. If any more images are added, all current loaded images get stored in a database and only
references to the images are kept in memory. The images are only loaded when directly accessed. This allows working and
changing images of a stack even if the stack is too big to fit into the memory. The loaded images will be kept in
memory until the stack exceeds the RAM limits again. This reduces the number loading and storing operations and
therefore saves time since this can be quite time consuming for a lot of images.
The image stack is quite easy to use.
"""
import io
from sqlalchemy import Column, Integer, create_engine, TypeDecorator, LargeBinary
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy.orm import deferred, sessionmaker
import numpy as np
import psutil
from skimage.util.dtype import _convert
from skimage.io import imread
[docs]class NumpyType(TypeDecorator):
"""
Numpy Type for sql databases when using sqlalchemy.
This handles the IO with a sql database and sqlalchemy.
Inside the database, an numpy array is stored as LargeBinary. sqlalchemy handles
loading and storing of entries for columns marked with this custom type. All arrays are
converted to numpy arrays when loading and converted to binary when storing in the database automatically.
"""
impl = LargeBinary
def __init__(self):
super(NumpyType, self).__init__()
[docs] def bind_processor(self, dialect):
impl_processor = self.impl.bind_processor(dialect)
if impl_processor:
def process(value):
if value is not None:
out = io.BytesIO()
np.save(out, value)
out.seek(0)
return impl_processor(out.read())
else:
def process(value):
if value is not None:
out = io.BytesIO()
np.save(out, value)
out.seek(0)
return out.read()
return process
[docs] def result_processor(self, dialect, coltype):
impl_processor = self.impl.result_processor(dialect, coltype)
if impl_processor:
def process(value):
value = impl_processor(value)
if value is None:
return None
value = io.BytesIO(value)
value.seek(0)
return np.load(value)
else:
def process(value):
if value is None:
return None
value = io.BytesIO(value)
value.seek(0)
return np.load(value)
return process
def _add_docstring(func):
def inner(function):
if function.__doc__ is None:
function.__doc__ = func.__doc__
else:
function.__doc__ = func.__doc__ + function.__doc__
return function
return inner
def _add_to_docstring(docstring):
def docstring_decorator(func):
if func.__doc__ is None:
func.__doc__ = docstring
else:
func.__doc__ = func.__doc__ + docstring
return func
return docstring_decorator
def _fast_convert(img, dtype):
"""
Check if the image is already the right dtype.
This will ignore value limits if the image is already the right dtype
Parameters
----------
img: array-like
dtype:
dtype the image should be converted to
Returns
-------
image: np.ndarray
"""
if img.dtype.type is dtype:
return img
else:
return _convert(img, dtype)
[docs]class ImageStack:
"""
This object holds multiple images. All images are converted to the same datatype. This ensures that all
images have the same characteristics for further processing.
All images are represented as numpy arrays. The same convention for representing images is used as in
skimage.
If an image with mismatching dtype is added it is automatically converted to match the dtype.
Read more about conversion details at skimage.util.dtype.
This object behaves a lot like a list. Individual images or groups of images can be retrieved with slicing.
Setitem and delitem behaviour is like with normal python lists but mages can only be added with add_image.
Parameters
----------
dtype: optional, default=np.float32
The dtype all images will be converted to. E.g. np.float32, bool, etc.
Examples
--------
>>> # make an ImageStack object where all images are represented as unsigned integer arrays [0-255]
>>> stack = ImageStack(dtype=np.uint8)
>>> # Add an image to it.
>>> img = (np.random.rand(200,200) * np.arange(200))/200 # floating point images must be in range [-1,1]
>>> stack.add_image(img)
This ImageStack can be indexed.
>>> stack[0] # getting the image with index 0 from the stack
Changing an image in the stack. The input will also be converted to the dtype of the stack.
>>> stack[0] = (np.random.rand(200,200) * np.arange(200))/200[::-1] # setting an image in the stack
Or deleting an image form the stack
>>> del stack[0]
"""
def __init__(self, dtype=np.float32):
self._dtype = dtype
self._images = []
[docs] def add_image(self, img):
"""
Add an image to the stack. The image must be a numpy array
The input array will be converted to the dtype of the ImageStack
Parameters
----------
img: np.ndarray
"""
self._images.append(_fast_convert(img, dtype=self._dtype))
[docs] def remove_image(self, i=-1):
"""
Remove an image from the stack.
Parameters
----------
i: int
Index of the image to be removed
"""
self._images.pop(i)
def __len__(self): return self._images.__len__()
def __repr__(self): return 'ImageStack: {}images, {}'.format(len(self), np.dtype(self._dtype).name)
def __getitem__(self, i):
if hasattr(i, '__index__'):
i = i.__index__()
if type(i) is int:
return self._images[i]
elif type(i) is slice:
temp_stack = ImageStack(self._dtype)
temp_stack._images = self._images[i]
return temp_stack
else:
raise TypeError('slicing must be with an int or slice object')
def __delitem__(self, i): del self._images[i]
def __setitem__(self, i, item):
if type(i) is int:
return self._images.__setitem__(i, _fast_convert(item, dtype=self._dtype))
elif type(i) is slice:
if len(item) == len(self._images[i]):
item = [_fast_convert(j, self._dtype) for j in item]
self._images[i] = item
else:
raise ValueError('{} images provided to override {} images!'.format(len(item), len(self._images[i])))
# def __add__(self, other):
# if isinstance(other, self.__class__) and self._dtype is other._dtype:
# self._images = self._images.__add__(other._images)
# return self
# else:
# raise TypeError('Only two image stacks with the same image format can be combined!')
[docs] @classmethod
def from_paths(cls, paths, dtype=None, **kwargs):
"""
Make an ImageStack object directly form paths of images. The images will be loaded, converted to the
dtype of the ImageStack and added.
Parameters
----------
paths: list
paths of the images to be added
dtype: optional
The dtype all images will be converted to. E.g. np.float32, bool, etc.
If this is not set, the dtype of the first image loaded will determine the dtype of the stack.
kwargs:
kwargs are forwarded to
`skimage.io.imread <https://scikit-image.org/docs/dev/api/skimage.io.html#skimage.io.imread>`_
For grayscale images simply add **as_gray = True**. For the kwargs for colored images use
`parameters for reading <https://imageio.readthedocs.io/en/stable/_autosummary/imageio.plugins.pillow_legacy.html#module-imageio.plugins.pillow_legacy>`_.
Keep in mind that some images might have alpha channels and some not even if they have the same format.
Returns
-------
out: ImageStack
An ImageStack with all images from paths as arrays.
Examples
--------
>>> paths = ['list of image paths']
>>> stack = ImageStack.from_paths(paths, as_gray=True)
"""
temp = imread(paths[0], **kwargs)
if dtype is None:
c = cls(temp.dtype.type)
else:
c = cls(dtype)
c.add_image(temp)
for p in paths[1:]:
c.add_image(imread(p, **kwargs))
return c
[docs] def change_dtype(self, dtype):
"""
Change the dtype of all images in the stack. All images will be converted to the new dtype.
Parameters
----------
dtype
"""
if self._dtype == dtype:
return
for i in range(len(self._images)):
self._images[i] = _convert(self._images[i], dtype)
self._dtype = dtype
[docs] def copy(self):
"""
Copy the current image stack.
The copy is shallow until images are changed in the new stack.
Returns
-------
out: ImageStack
"""
temp = ImageStack(self._dtype)
for i in self._images:
temp.add_image(i)
return temp
[docs] def execute_function(self, func, *args, **kwargs):
"""
Perform an operation on all the images in the stack.
The operation can be any function which takes one images and other arguments as input and returns
only one image.
This operation changes the images in the stack. If the current state should be kept copy the stack first.
Parameters
----------
func: function
A function which takes ONE image as first input and returns ONE image.
args:
args are forwarded to the func.
kwargs:
kwargs are forwarded to the func.
Examples
--------
>>> def fun(img, to_add):
>>> return img + to_add
>>> stack.execute_function(fun, to_add=4)
This will apply the function *fun* to all images in the stack.
"""
for ind, img in enumerate(self._images):
self._images[ind] = _fast_convert(func(img, *args, **kwargs), self._dtype)
[docs] def execute_rolling_function(self, func, keep_first=False, *args, **kwargs):
"""
Perform an rolling operation on all the images in the stack.
The operation can be any function which takes two images and other arguments as input and returns
only one image.
:math:`I_{new} = func(I_{n-1}, I_n)`
This operation changes the images in the stack. If the current state should be kept copy the stack first.
Since the 0-th image in the stack will remain unchanged because the rolling operation starts at the 1-st image,
the 0-th image is removed if *keep_first* is set to *False* (default).
Parameters
----------
func: function
A function which takes TWO images and other arguments as input and returns ONE image. The function must
have the following input structure: `fun(img1, img2, args, kwargs)`. *img1* will be the n-1st image in
the calls.
keep_first: bool
If True, keeps the first image in the stack. Delete it otherwise.
args:
args are forwarded to the func.
kwargs:
kwargs are forwarded to the func.
Examples
--------
>>> def fun(img1, img2):
>>> mask = img1 > img1.max()/2
>>> return img2[mask]
>>> stack.execute_rolling_function(fun, keep_first=False)
This will apply the function *fun* to all images in the stack.
*img1* is always the n-1st image in the rolling operation.
"""
img_minus1 = self._images[0]
for ind, img in enumerate(self._images[1:]):
self._images[ind + 1] = _fast_convert(func(img_minus1, img, *args, **kwargs), self._dtype)
img_minus1 = img
if not keep_first:
del self._images[0]
[docs]class ImageStackSQL:
"""
This class works the same as ImageStack.
ImageStackSQL objects will track the amount of memory the images occupy. When the memory limit if
surpassed, all data will be stored in an sqlite database and the RAM will be cleared. Only a lazy loaded object
is left in the image stack. Only when directly accessing the images in the stack they will be loaded into RAM
again. sqlalchemy is used to connect to the database in which all data is stored.
This makes this container suitable for long term storage and transfer of a lot of images.
The images can be loaded into an ImageStackSQL object in a new python session.
Parameters
----------
database: str, optional
Path of the database. If it does not exist, it will be created. If none is entered, the name is id(object)
stack_name: str, optional
The name of the table the images will be saved. If none is entered it will be id(object)
dtype: optional, default=np.float32
The dtype all images will be converted to. E.g. np.float32, bool, etc.
max_size_mb: float, optional
The maximal size in mb the image stack is allowed to be. If a new image is added after surpassing this
size all images will be saved in the database and the occupied RAM is cleared. All images are still accessible
but will be loaded only when directly accessed.
cache_limit: float, optional, default=90
The limit of the RAM usage in percent of the available system RAM. When the RAM usage of the system
surpasses this limit, all images will be saved in the database and RAM is freed again even it
max_size_mb is not reached. This makes sure that the system never runs out of RAM.
Values over 100 will effectively deactivate this behaviour. If the total size of the image stack is
too small to free enough RAM to reach the cache limit newly added images will be saved immediately in
the database. This also lead to constant reads from the database as no images will be kept im RAM. Therefore
it is recommended to set this well over the current RAM usage of the system when instantiating an object.
"""
def __init__(self, database='', stack_name='', dtype=np.float32, max_size_mb=None, cache_limit=80):
self._dtype = dtype
# stack name is the name of the table in the sql database. The table must have a name.
self.stack_name = stack_name if stack_name != '' else 'table'+str(id(self))
# database name must end with .db
self.database = database if database != '' and database.endswith('.db') else 'db'+str(id(self)) + '.db'
# sqlalchemy connection
self.engine = create_engine('sqlite:///{}'.format(self.database), echo=False)
self.session = sessionmaker(bind=self.engine)()
self.base = declarative_base()
self.table = type(stack_name, (self.base,), {'__tablename__': self.stack_name,
'id': Column('id', Integer, primary_key=True),
'image': deferred(Column('image', NumpyType))})
self.base.metadata.create_all(self.engine)
# list for easy access to the images.
self._images = []
# ram limits
self._max_nbytes = max_size_mb * 1e6 if max_size_mb is not None else np.inf
self._cache_limit = cache_limit
# nbytes and counter for caching logic
self._nbytes = 0
self.__counter = 0
[docs] @classmethod
def load_from_database(cls, database='', stack_name=''):
"""
Load an image stack from a database.
A table of a database which was made with an ImageStackSQL object can be loaded and an ImageStackSQL
object with all the images is made. The dtype of the images in the new object is the same as the images in the
table. All images, which will be added to the object will be converted to match the dtype.
Parameters
----------
database: str
Path of the database.
stack_name: str
Name of the table
Returns
-------
out: ImageStackSQL
The image stack object with connection to the database.
"""
c = cls(database, stack_name, dtype=bool)
dtype = c.session.query(c.table).first().image.dtype
c._dtype = dtype
c._images = c.session.query(c.table).all()
return c
[docs] @classmethod
def from_paths(cls, paths, database='', stack_name='', dtype=None, max_size_mb=None, cache_limit=80, **kwargs):
"""
Make an ImageStackSQL object directly form paths of images. The images will be loaded, converted to the
dtype of the ImageStack and added.
Parameters
----------
paths: list
paths of the images to be added
database: str, optional
Path of the database. If it does not exist, it will be created. If none is entered, the name is id(object)
stack_name: str, optional
The name of the table the images will be saved. If none is entered it will be id(object)
dtype: optional
The dtype all images will be converted to. E.g. np.float32, bool, etc.
If this is not set, the dtype of the first image loaded will determine the dtype of the stack.
max_size_mb: float, optional
:class:`ImageStackSQL` for more details.
cache_limit: float, optional, default=90
:class`ImageStackSQL` for more details.
kwargs:
kwargs are forwarded to
`skimage.io.imread <https://scikit-image.org/docs/dev/api/skimage.io.html#skimage.io.imread>`_
For grayscale images simply add **as_gray = True**. For the kwargs for colored images use
`parameters for reading <https://imageio.readthedocs.io/en/stable/_autosummary/imageio.plugins.pillow_legacy.html#module-imageio.plugins.pillow_legacy>`_.
Keep in mind that some images might have alpha channels and some not even if they have the same format.
Returns
-------
out: ImageStackSQL
A new ImageStackSQL object with connection to the database.
"""
stack_name = stack_name if stack_name != '' else 'table'+str(id(cls))
database = database if database != '' and database.endswith('.db') else 'db'+str(id(cls)) + '.db'
temp = imread(paths[0], **kwargs)
if dtype is None:
c = cls(database, stack_name, temp.dtype.type, max_size_mb, cache_limit)
else:
c = cls(database, stack_name, dtype, max_size_mb, cache_limit)
c.add_image(temp)
for p in paths[1:]:
c.add_image(imread(p, **kwargs))
return c
@property
def nbytes(self):
"""
Sum of bytes for all currently fully loaded images.
This tracks the used RAM from the images. The overhead of the used RAM from sqlalchemy is not included and
will not be tracked.
"""
return self._nbytes
@nbytes.setter
def nbytes(self, x):
self._nbytes = x
if x > self._max_nbytes:
self.save_state()
elif self.__counter > 50:
if psutil.virtual_memory().percent > self._cache_limit:
self.save_state()
self.__counter = 0
self.__counter += 1
@staticmethod
def __is_loaded(sql_obj):
return False if 'image' not in sql_obj.__dict__ else True
def __get_image(self, sql_obj):
if 'image' not in sql_obj.__dict__:
out = sql_obj.__getattribute__('image')
self.nbytes += out.nbytes
return out
else:
return sql_obj.__getattribute__('image')
def __set_image(self, img, sql_obj):
temp = _fast_convert(img, self._dtype)
if not self.__is_loaded(sql_obj):
self.nbytes += temp.nbytes
else:
self.nbytes += temp.nbytes - sql_obj.image.nbytes
sql_obj.image = temp
def __clean_remove(self, sql_obj):
if sql_obj._sa_instance_state.pending:
self.session.expunge(sql_obj)
else:
self.session.delete(sql_obj)
def __getitem__(self, i):
if hasattr(i, '__index__'):
i = i.__index__()
if type(i) is int:
return self.__get_image(self._images[i])
elif type(i) is slice:
temp_objects = self._images[i]
temp_stack = ImageStack(self._dtype)
temp_stack._images = [self.__get_image(j) for j in temp_objects]
return temp_stack
else:
raise TypeError('slicing must be with an int or slice object')
def __setitem__(self, i, value):
if hasattr(i, '__index__'):
i = i.__index__()
if type(i) is int:
self.__set_image(value, self._images[i])
elif type(i) is slice:
temp = self._images[i]
if len(value) == len(temp):
for image, sql_obj in zip(value, temp):
self.__set_image(image, sql_obj)
else:
raise ValueError('{} images provided to override {} images!'.format(len(value), len(temp)))
def __delitem__(self, i):
if hasattr(i, '__index__'):
i = i.__index__()
if type(i) is int:
self.remove_image(i)
elif type(i) is slice:
temp = self._images[i]
for j in temp:
if self.__is_loaded(j):
self._nbytes -= j.image.nbytes
self.__clean_remove(j)
del self._images[i]
def __repr__(self):
s = 'ImageStack: {}images, {}, {:.2f}/{:.2f} MB RAM used, caching at {} percent of system memory'
return s.format(len(self), np.dtype(self._dtype).name, self._nbytes / 1e6, self._max_nbytes / 1e6, self._cache_limit)
def __len__(self): return self._images.__len__()
[docs] def relaod(self):
"""
Reload the images form the table. All not saved changes will be lost.
"""
self.session.expire_all()
self._images = self.session.query(self.table).all()
self._nbytes = 0
[docs] def save_state(self):
"""
Saves the current state of the image stack to the database.
This commits all changes and adds all new images to the table. All currently loaded images are expired. This
means, that all RAM used by the images is freed.
Call this method before closing the python session if the changes made to the image stack should be
saved permanently.
"""
self.session.commit()
self.session.expire_all()
self._nbytes = 0
[docs] @_add_docstring(ImageStack.add_image)
def add_image(self, img):
temp = self.table(image=_fast_convert(img, dtype=self._dtype))
self._images.append(temp)
self.session.add(temp)
self.nbytes += temp.image.nbytes
[docs] @_add_docstring(ImageStack.remove_image)
def remove_image(self, i=-1):
temp = self._images[i]
if self.__is_loaded(temp):
self._nbytes -= temp.image.nbytes
self.__clean_remove(temp)
self._images.pop(i)
[docs] @_add_docstring(ImageStack.change_dtype)
def change_dtype(self, dtype):
if dtype == self._dtype:
return
for i in self._images:
bytes_old = i.image.nbytes
i.image = _convert(i.image, dtype)
self.nbytes += i.image.nbytes - bytes_old
self._dtype = dtype
[docs] def copy(self, stack_name=''):
"""
Copy the current image stack.
A new table in the database is created where all images are stored.
Parameters
----------
stack_name: str
The name of the stack. This is also the name of the new table in the database
Returns
-------
out: ImageStack
"""
temp = ImageStackSQL(self.database, stack_name, self._dtype, self._max_nbytes/1e6, self._cache_limit)
for i in self._images:
temp.add_image(i.image)
return temp
[docs] @_add_docstring(ImageStack.execute_function)
def execute_function(self, func, *args, **kwargs):
for i in self._images:
bytes_old = i.image.nbytes
i.image = _fast_convert(func(i.image, *args, **kwargs), self.dtype)
self.nbytes += i.image.nbytes - bytes_old
[docs] @_add_docstring(ImageStack.execute_rolling_function)
def execute_rolling_function(self, func, keep_first=False, *args, **kwargs):
img_minus1 = self._images[0].image
for i in self._images[1:]:
temp = i.image.copy()
bytes_old = i.image.nbytes
i.image = _fast_convert(func(img_minus1, i.image, *args, **kwargs), self._dtype)
img_minus1 = temp
self.nbytes += i.image.nbytes - bytes_old
if not keep_first:
self.remove_image(0)