Source code for sparknlp.common.storage

#  Copyright 2017-2022 John Snow Labs
#
#  Licensed under the Apache License, Version 2.0 (the "License");
#  you may not use this file except in compliance with the License.
#  You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
#  Unless required by applicable law or agreed to in writing, software
#  distributed under the License is distributed on an "AS IS" BASIS,
#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#  See the License for the specific language governing permissions and
#  limitations under the License.
"""Contains utility classes for handling storage."""

from pyspark.ml.param import Param, Params, TypeConverters

from sparknlp.common.utils import ExternalResource
from sparknlp.common.properties import HasCaseSensitiveProperties
import sparknlp.internal as _internal


[docs]class HasStorageRef:
    storageRef = Param(Params._dummy(), "storageRef",
                       "unique reference name for identification",
                       TypeConverters.toString)

[docs]    def setStorageRef(self, value):
        """Sets unique reference name for identification.

        Parameters
        ----------
        value : str
            Unique reference name for identification
        """
        return self._set(storageRef=value)

[docs]    def getStorageRef(self):
        """Gets unique reference name for identification.

        Returns
        -------
        str
            Unique reference name for identification
        """
        return self.getOrDefault("storageRef")


[docs]class HasStorageOptions:
    includeStorage = Param(Params._dummy(),
                           "includeStorage",
                           "whether to include indexed storage in trained model",
                           typeConverter=TypeConverters.toBoolean)

    enableInMemoryStorage = Param(Params._dummy(),
                                  "enableInMemoryStorage",
                                  "whether to load whole indexed storage in memory (in-memory lookup)",
                                  typeConverter=TypeConverters.toBoolean)

[docs]    def setIncludeStorage(self, value):
        """Sets whether to include indexed storage in trained model.

        Parameters
        ----------
        value : bool
            Whether to include indexed storage in trained model
        """
        return self._set(includeStorage=value)

[docs]    def getIncludeStorage(self):
        """Gets whether to include indexed storage in trained model.

        Returns
        -------
        bool
            Whether to include indexed storage in trained model
        """
        return self.getOrDefault("includeStorage")

[docs]    def setEnableInMemoryStorage(self, value):
        """Sets whether to load whole indexed storage in memory (in-memory lookup)

        Parameters
        ----------
        value : bool
            Whether to load whole indexed storage in memory (in-memory lookup)
        """
        return self._set(enableInMemoryStorage=value)

[docs]    def getEnableInMemoryStorage(self):
        return self.getOrDefault("enableInMemoryStorage")


[docs]class HasStorageModel(HasStorageRef, HasCaseSensitiveProperties, HasStorageOptions):

[docs]    def saveStorage(self, path, spark):
        """Saves the current model to storage.

        Parameters
        ----------
        path : str
            Path for saving the model.
        spark : :class:`pyspark.sql.SparkSession`
            The current SparkSession
        """
        self._transfer_params_to_java()
        self._java_obj.saveStorage(path, spark._jsparkSession, False)

[docs]    @staticmethod
    def loadStorage(path, spark, storage_ref):
        raise NotImplementedError("AnnotatorModel with HasStorageModel did not implement 'loadStorage'")

[docs]    @staticmethod
    def loadStorages(path, spark, storage_ref, databases):
        for database in databases:
            _internal._StorageHelper(path, spark, database, storage_ref, within_storage=False)


[docs]class HasStorage(HasStorageRef, HasCaseSensitiveProperties, HasStorageOptions):
    storagePath = Param(Params._dummy(),
                        "storagePath",
                        "path to file",
                        typeConverter=TypeConverters.identity)

[docs]    def setStoragePath(self, path, read_as):
        """Sets path to file.

        Parameters
        ----------
        path : str
            Path to file
        read_as : str
            How to interpret the file

        Notes
        -----
        See :class:`ReadAs <sparknlp.common.ReadAs>` for reading options.
        """
        return self._set(storagePath=ExternalResource(path, read_as, {}))

[docs]    def getStoragePath(self):
        """Gets path to file.

        Returns
        -------
        str
            path to file
        """
        return self.getOrDefault("storagePath")