Source code for sparknlp.common.storage
# Copyright 2017-2022 John Snow Labs
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Contains utility classes for handling storage."""
from pyspark.ml.param import Param, Params, TypeConverters
from sparknlp.common.utils import ExternalResource
from sparknlp.common.properties import HasCaseSensitiveProperties
import sparknlp.internal as _internal
[docs]class HasStorageRef:
storageRef = Param(Params._dummy(), "storageRef",
"unique reference name for identification",
TypeConverters.toString)
[docs] def setStorageRef(self, value):
"""Sets unique reference name for identification.
Parameters
----------
value : str
Unique reference name for identification
"""
return self._set(storageRef=value)
[docs] def getStorageRef(self):
"""Gets unique reference name for identification.
Returns
-------
str
Unique reference name for identification
"""
return self.getOrDefault("storageRef")
[docs]class HasStorageOptions:
includeStorage = Param(Params._dummy(),
"includeStorage",
"whether to include indexed storage in trained model",
typeConverter=TypeConverters.toBoolean)
enableInMemoryStorage = Param(Params._dummy(),
"enableInMemoryStorage",
"whether to load whole indexed storage in memory (in-memory lookup)",
typeConverter=TypeConverters.toBoolean)
[docs] def setIncludeStorage(self, value):
"""Sets whether to include indexed storage in trained model.
Parameters
----------
value : bool
Whether to include indexed storage in trained model
"""
return self._set(includeStorage=value)
[docs] def getIncludeStorage(self):
"""Gets whether to include indexed storage in trained model.
Returns
-------
bool
Whether to include indexed storage in trained model
"""
return self.getOrDefault("includeStorage")
[docs] def setEnableInMemoryStorage(self, value):
"""Sets whether to load whole indexed storage in memory (in-memory lookup)
Parameters
----------
value : bool
Whether to load whole indexed storage in memory (in-memory lookup)
"""
return self._set(enableInMemoryStorage=value)
[docs] def getEnableInMemoryStorage(self):
return self.getOrDefault("enableInMemoryStorage")
[docs]class HasStorageModel(HasStorageRef, HasCaseSensitiveProperties, HasStorageOptions):
[docs] def saveStorage(self, path, spark):
"""Saves the current model to storage.
Parameters
----------
path : str
Path for saving the model.
spark : :class:`pyspark.sql.SparkSession`
The current SparkSession
"""
self._transfer_params_to_java()
self._java_obj.saveStorage(path, spark._jsparkSession, False)
[docs] @staticmethod
def loadStorage(path, spark, storage_ref):
raise NotImplementedError("AnnotatorModel with HasStorageModel did not implement 'loadStorage'")
[docs] @staticmethod
def loadStorages(path, spark, storage_ref, databases):
for database in databases:
_internal._StorageHelper(path, spark, database, storage_ref, within_storage=False)
[docs]class HasStorage(HasStorageRef, HasCaseSensitiveProperties, HasStorageOptions):
storagePath = Param(Params._dummy(),
"storagePath",
"path to file",
typeConverter=TypeConverters.identity)
[docs] def setStoragePath(self, path, read_as):
"""Sets path to file.
Parameters
----------
path : str
Path to file
read_as : str
How to interpret the file
Notes
-----
See :class:`ReadAs <sparknlp.common.ReadAs>` for reading options.
"""
return self._set(storagePath=ExternalResource(path, read_as, {}))
[docs] def getStoragePath(self):
"""Gets path to file.
Returns
-------
str
path to file
"""
return self.getOrDefault("storagePath")