From bbaa2e79666a14dbc25adea8dc0bb55f66635c83 Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Fri, 9 Jul 2021 09:31:59 -0700 Subject: [PATCH 01/19] dev: adding in a progress commit for an initial implementation of the model-converter in the SDK with the demo notebook --- model_converter_demo.ipynb | 219 +++++++++++++++++++++++ modzy/converter/__init__.py | 0 modzy/converter/converter_helpers.ipynb | 227 ++++++++++++++++++++++++ modzy/converter/mlflow.py | 50 ++++++ modzy/converter/model_converter.py | 99 +++++++++++ modzy/converter/utils.py | 69 +++++++ tests/test_model_converter.py | 44 +++++ 7 files changed, 708 insertions(+) create mode 100644 model_converter_demo.ipynb create mode 100644 modzy/converter/__init__.py create mode 100644 modzy/converter/converter_helpers.ipynb create mode 100644 modzy/converter/mlflow.py create mode 100644 modzy/converter/model_converter.py create mode 100644 modzy/converter/utils.py create mode 100644 tests/test_model_converter.py diff --git a/model_converter_demo.ipynb b/model_converter_demo.ipynb new file mode 100644 index 0000000..551e04a --- /dev/null +++ b/model_converter_demo.ipynb @@ -0,0 +1,219 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# First we will convert an Explainable SageMaker Image Classification Model" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "blob_storage_provider = \"S3\"\n", + "blob_storage_container = \"modzy-engineering-tests\"\n", + "resources_key = \"ds/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", + "weights_key = \"ds/model-converter/sagemaker/image-classification/weights.tar.gz\"" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from modzy.converter.model_converter import ModelConverter\n", + "from modzy.client import ApiClient\n", + "import os\n", + "\n", + "# To get started, store your Modzy API key as an environment variable `MODZY_API_KEY`.\n", + "# Then, create a Modzy API client to interact with the integration envrionment\n", + "modzy_api_key = os.getenv(\"MODZY_QA_API_KEY\")\n", + "modzy_instance_base_url = \"https://integration.modzy.engineering/api\"\n", + "modzy_api_client = ApiClient(api_key=modzy_api_key, base_url=modzy_instance_base_url)\n", + "\n", + "# Instantiate a Model Converter client with access to the Modzy integration environment\n", + "model_converter = ModelConverter(modzy_api_client)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The model details page for your new model can be found here: https://integration.modzy.engineering/models/106d50ca0e-modzy-image-classification/0.0.1\n" + ] + } + ], + "source": [ + "# Now, provide the Model converter with information about your stored model assets and the credentials required\n", + "# to access them. The Model converter will do the rest of the work.\n", + "\n", + "source_platform = \"sagemaker\"\n", + "model_type = \"image-classification\"\n", + "\n", + "_, converter_output = model_converter.run(\n", + " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", + " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", + " blobstore_provider=blob_storage_provider,\n", + " blobstore_container=blob_storage_container,\n", + " weights_path=weights_key,\n", + " resources_path=resources_key,\n", + " platform=source_platform,\n", + " model_type=model_type,\n", + ")\n", + "\n", + "print(f\"The model details page for your new model can be found here: {converter_output['modelURL']}\")\n", + "new_model_id = converter_output[\"modelId\"]\n", + "new_model_version = converter_output[\"modelVersion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Delegate a single processing to serve your new model\n", + "modzy_api_client.models.update_processing_engines(new_model_id, new_model_version, min_engines=1, max_engines=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Send an inference job to run against your new model with explainability!\n", + "input_source = {\n", + " \"0001\": {\n", + " f\"image\": {\n", + " \"bucket\": blob_storage_container,\n", + " \"key\": f\"/ds/model-converter/{source_platform}/{model_type}/test_input\"\n", + " }\n", + " }\n", + "}\n", + "\n", + "print(f\"Sending job to model {new_model_id} {new_model_version}\")\n", + "job = modzy_api_client.jobs.submit_aws_s3(\n", + " new_model_id, new_model_version, input_source,\n", + " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", + " region=\"us-east-1\", explain=True\n", + ")\n", + "\n", + "modzy_api_client.jobs.block_until_complete(job, timeout=None)\n", + "print(\"Job Completed!\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Next, we will convert an MLflow model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "from modzy.converter.mlflow import upload_mlflow_model\n", + "\n", + "# Raw output MLFlow Model Gets uploaded to the user's S3 Bucket\n", + "# upload_mlflow_model()\n", + "\n", + "# Now we repeat the process with an MLFlow model\n", + "source_platform = \"mlflow\"\n", + "model_type = \"tabular\"\n", + "resources_key = \"ds/model-converter/mlflow/tabular/resources.tar.gz\"\n", + "weights_key = \"ds/model-converter/mlflow/tabular/model.tar.gz\"\n", + "\n", + "\n", + "_, converter_output = model_converter.run(\n", + " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", + " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", + " blobstore_provider=blob_storage_provider,\n", + " blobstore_container=blob_storage_container,\n", + " weights_path=weights_key,\n", + " resources_path=resources_key,\n", + " platform=source_platform,\n", + " model_type=model_type,\n", + ")\n", + "\n", + "print(f\"The model details page for your new model can be found here: {converter_output['modelURL']}\")\n", + "new_model_id = converter_output[\"modelId\"]\n", + "new_model_version = converter_output[\"modelVersion\"]" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "# Delegate a single processing to serve your new model\n", + "modzy_api_client.models.update_processing_engines(\"106d50ca0e\", \"0.0.1\", min_engines=0, max_engines=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.1" + } + }, + "nbformat": 4, + "nbformat_minor": 1 +} \ No newline at end of file diff --git a/modzy/converter/__init__.py b/modzy/converter/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/modzy/converter/converter_helpers.ipynb b/modzy/converter/converter_helpers.ipynb new file mode 100644 index 0000000..bf024b0 --- /dev/null +++ b/modzy/converter/converter_helpers.ipynb @@ -0,0 +1,227 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 48, + "metadata": {}, + "outputs": [], + "source": [ + "import time\n", + "import shutil\n", + "import json\n", + "import os\n", + "import ntpath\n", + "import tarfile\n", + "from libcloud.storage.types import Provider\n", + "from libcloud.storage.providers import get_driver" + ] + }, + { + "cell_type": "code", + "execution_count": 68, + "metadata": {}, + "outputs": [], + "source": [ + "def upload_resources(model_yaml_path,container,resources_key,\n", + " storage_key,storage_secret,storage_provider,additional_filepaths=None):\n", + " \"\"\" Creates resources archive expected by model converter, uploads to storage provider.\n", + " Args:\n", + " model_yaml_path (str): Path to model.yaml file to be included.\n", + " container (str): Storage provider container name (e.g. Bucket name in S3).\n", + " resources_key (str): Desired key for resource archive once uploaded to storage provider.\n", + " storage_key (str): Storage provider access key.\n", + " storage_secret (str): Storage provider secret key.\n", + " storage_provider (str): Storage provider name (must be one of \"S3\", \"AZURE_BLOBS\", or \"GOOGLE_STORAGE\").\n", + " additional_filepaths (list): List of filepaths of additional files to be included.\n", + " \"\"\"\n", + " # Init libcloud driver\n", + " if storage_provider == \"S3\":\n", + " cls = get_driver(Provider.S3)\n", + " elif storage_provider == \"AZURE_BLOBS\":\n", + " cls = get_driver(Provider.AZURE_BLOBS)\n", + " elif storage_provider == \"GOOGLE_STORAGE\":\n", + " cls = get_driver(Provider.GOOGLE_STORAGE)\n", + " else:\n", + " raise ValueError('Only \"S3\", \"AZURE_BLOBS\", and \"GOOGLE_STORAGE\" are supported storage providers.')\n", + " \n", + " driver = cls(storage_key, storage_secret)\n", + " container = driver.get_container(container_name=container)\n", + " \n", + " # TODO: Probably set these outside of this helper function\n", + " RESOURCES_TAR_NAME = \"resources.tar.gz\"\n", + " MODEL_YAML_NAME = \"model.yaml\"\n", + " \n", + " # Create temp dir\n", + " tmp_dir_path = os.path.join(os.getcwd(),\".tmp_\"+str(time.time()))\n", + " os.mkdir(tmp_dir_path)\n", + " \n", + " # Move the local resources that you have prepared for your model into an archive\n", + " resources_tar_path = os.path.join(tmp_dir_path,RESOURCES_TAR_NAME)\n", + " tar = tarfile.open(resources_tar_path, \"w:gz\")\n", + " tar.add(model_yaml_path,arcname=MODEL_YAML_NAME)\n", + " for filepath in additional_filepaths:\n", + " tar.add(filepath,arcname=ntpath.split(filepath)[1])\n", + " tar.close()\n", + "\n", + " # This method blocks until all the parts have been uploaded.\n", + " extra = {'content_type': 'application/octet-stream'}\n", + " \n", + " # Upload archive to storage provider\n", + " with open(resources_tar_path, 'rb') as iterator:\n", + " obj = driver.upload_object_via_stream(iterator=iterator,\n", + " container=container,\n", + " object_name=resources_key,\n", + " extra=extra)\n", + " \n", + " # Remove temp dir\n", + " shutil.rmtree(tmp_dir_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 93, + "metadata": {}, + "outputs": [], + "source": [ + "def upload_mlflow_model(mlflow_model_dir,container,model_key,\n", + " storage_key,storage_secret,storage_provider):\n", + " \"\"\" Creates resources archive expected by model converter, uploads to storage provider.\n", + " Args:\n", + " mlflow_model_dir (str): Path to saved MLFlow model directory (e.g. using mlflow.sklearn.save_model())\n", + " container (str): Storage provider container name (e.g. Bucket name in S3).\n", + " resources_key (str): Desired key for model archive once uploaded to storage provider.\n", + " storage_key (str): Storage provider access key.\n", + " storage_secret (str): Storage provider secret key.\n", + " storage_provider (str): Storage provider name (must be one of \"S3\", \"AZURE_BLOBS\", or \"GOOGLE_STORAGE\").\n", + " \"\"\"\n", + " # Init libcloud driver\n", + " if storage_provider == \"S3\":\n", + " cls = get_driver(Provider.S3)\n", + " elif storage_provider == \"AZURE_BLOBS\":\n", + " cls = get_driver(Provider.AZURE_BLOBS)\n", + " elif storage_provider == \"GOOGLE_STORAGE\":\n", + " cls = get_driver(Provider.GOOGLE_STORAGE)\n", + " else:\n", + " raise ValueError('Only \"S3\", \"AZURE_BLOBS\", and \"GOOGLE_STORAGE\" are supported storage providers.')\n", + " \n", + " driver = cls(storage_key, storage_secret)\n", + " container = driver.get_container(container_name=container)\n", + " \n", + " # TODO: Probably set this outside of this helper function\n", + " MODEL_TAR_NAME = \"weights.tar.gz\"\n", + " \n", + " # Create temp dir\n", + " tmp_dir_path = os.path.join(os.getcwd(),\".tmp_\"+str(time.time()))\n", + " os.mkdir(tmp_dir_path)\n", + " \n", + " # Move the local mlflow model artifacts that were saved out by MLFlow into an archive\n", + " model_tar_path = os.path.join(tmp_dir_path,MODEL_TAR_NAME)\n", + " tar = tarfile.open(model_tar_path, \"w:gz\")\n", + " mlflow_model_filenames = os.listdir(mlflow_model_dir)\n", + " for filename in mlflow_model_filenames:\n", + " full_path = os.path.join(mlflow_model_dir,filename)\n", + " tar.add(full_path,arcname=filename)\n", + " tar.close()\n", + "\n", + " # This method blocks until all the parts have been uploaded.\n", + " extra = {'content_type': 'application/octet-stream'}\n", + " \n", + " # Upload archive to storage provider\n", + " with open(model_tar_path, 'rb') as iterator:\n", + " obj = driver.upload_object_via_stream(iterator=iterator,\n", + " container=container,\n", + " object_name=model_key,\n", + " extra=extra)\n", + " \n", + " # Remove temp dir\n", + " shutil.rmtree(tmp_dir_path)" + ] + }, + { + "cell_type": "code", + "execution_count": 94, + "metadata": {}, + "outputs": [], + "source": [ + "# set general params\n", + "BUCKET_NAME = \"sagemaker-testing-ds\"\n", + "STORAGE_PROVIDER = \"S3\"\n", + "storage_key = \"ACCESS_KEY_HERE\"\n", + "storage_secret = \"SECRET_KEY_HERE\"" + ] + }, + { + "cell_type": "code", + "execution_count": 95, + "metadata": {}, + "outputs": [], + "source": [ + "# set resources-specific params\n", + "RESOURCES_KEY = \"helper-testing/resources.tar.gz\"\n", + "model_yaml_path = \"/path/to/model.yaml\"\n", + "additional_filepaths = [\"/path/to/labels.json\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 96, + "metadata": {}, + "outputs": [], + "source": [ + "# upload resources archive to S3\n", + "upload_resources(model_yaml_path,BUCKET_NAME,RESOURCES_KEY,\n", + " storage_key,storage_secret,STORAGE_PROVIDER,additional_filepaths)" + ] + }, + { + "cell_type": "code", + "execution_count": 97, + "metadata": {}, + "outputs": [], + "source": [ + "# set mlflow-specific params\n", + "MODEL_KEY = \"helper-testing/weights.tar.gz\"\n", + "mlflow_dir = \"/path/to/mlflow_saved_model_dir/\"" + ] + }, + { + "cell_type": "code", + "execution_count": 98, + "metadata": {}, + "outputs": [], + "source": [ + "# upload mlflow model archive to S3\n", + "upload_mlflow_model(mlflow_dir,BUCKET_NAME,MODEL_KEY,\n", + " storage_key,storage_secret,STORAGE_PROVIDER)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "dl", + "language": "python", + "name": "dl" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/modzy/converter/mlflow.py b/modzy/converter/mlflow.py new file mode 100644 index 0000000..1bedee8 --- /dev/null +++ b/modzy/converter/mlflow.py @@ -0,0 +1,50 @@ +from modzy.converter.utils import get_authenticated_storage_provider_client +import time +import shutil +import os +import ntpath +import tarfile + + +def upload_mlflow_model(mlflow_model_dir, container, model_key, + storage_key, storage_secret, storage_provider): + """ Creates resources archive expected by model converter, uploads to storage provider. + Args: + mlflow_model_dir (str): Path to saved MLFlow model directory (e.g. using mlflow.sklearn.save_model()) + container (str): Storage provider container name (e.g. Bucket name in S3). + resources_key (str): Desired key for model archive once uploaded to storage provider. + storage_key (str): Storage provider access key. + storage_secret (str): Storage provider secret key. + storage_provider (str): Storage provider name (must be one of "S3", "AZURE_BLOBS", or "GOOGLE_STORAGE"). + """ + driver = get_authenticated_storage_provider_client(storage_provider, storage_key, storage_secret) + container = driver.get_container(container_name=container) + + # TODO: Probably set this outside of this helper function + MODEL_TAR_NAME = "weights.tar.gz" + + # Create temp dir + tmp_dir_path = os.path.join(os.getcwd(), ".tmp_" + str(time.time())) + os.mkdir(tmp_dir_path) + + # Move the local mlflow model artifacts that were saved out by MLFlow into an archive + model_tar_path = os.path.join(tmp_dir_path, MODEL_TAR_NAME) + tar = tarfile.open(model_tar_path, "w:gz") + mlflow_model_filenames = os.listdir(mlflow_model_dir) + for filename in mlflow_model_filenames: + full_path = os.path.join(mlflow_model_dir, filename) + tar.add(full_path, arcname=filename) + tar.close() + + # This method blocks until all the parts have been uploaded. + extra = {'content_type': 'application/octet-stream'} + + # Upload archive to storage provider + with open(model_tar_path, 'rb') as iterator: + obj = driver.upload_object_via_stream(iterator=iterator, + container=container, + object_name=model_key, + extra=extra) + + # Remove temp dir + shutil.rmtree(tmp_dir_path) diff --git a/modzy/converter/model_converter.py b/modzy/converter/model_converter.py new file mode 100644 index 0000000..e45593f --- /dev/null +++ b/modzy/converter/model_converter.py @@ -0,0 +1,99 @@ +import logging +import requests +import json + +CONVERTER_HOST = "127.0.0.1" +CONVERTER_PORT = "8080" + + +class ModelConverter: + """The `Models` object. + + This object is used to retrieve information about models from the API. + + Note: + This class should not be instantiated directly but rather accessed through the `converter` + attribute of an `ApiClient` instance. + """ + + _base_route = f'http://{CONVERTER_HOST}:{CONVERTER_PORT}' + + def __init__(self, api_client): + """Creates a `ModelConverter` instance. + + Args: + api_client (ApiClient): An `ApiClient` instance. + """ + self._api_client = api_client + self.logger = logging.getLogger(__name__) + + def env_status(self): + raw_response = self._api_client.http.get(f"{self._base_route}/env-status") + response = raw_response["responseEntries"][0] + response_code = int(response["httpCode"]) + response_message = response["message"] + self.logger.info(f"Environment status returned with code {response_code}: {response_message}") + return True if response_code == 200 else False + + def check_endpoints( + self, + sp_access_key_id, + sp_secret_access_key, + blobstore_container, + weights_path, + resources_path, + model_type, + platform, + blobstore_provider + ): + converter_request = { + "aws_key_id": sp_access_key_id, + "aws_access_key": sp_secret_access_key, + "s3Bucket": blobstore_container, + "weightsPath": weights_path, + "resourcesPath": resources_path, + "model_type": model_type, + "platform": platform, + "blobStoreProvider": blobstore_provider + } + raw_response = requests.get( + f"{self._base_route}/check-endpoints", + params=converter_request + ).json() + response = raw_response["responseEntries"][0] + status_code = response["httpCode"] + message = response["message"] + self.logger.info(f"Response received with status code {status_code}: {message}") + return status_code == 200 + + def run( + self, + sp_access_key_id, + sp_secret_access_key, + blobstore_container, + weights_path, + resources_path, + model_type, + platform, + blobstore_provider + ): + converter_request = { + "aws_key_id": sp_access_key_id, + "aws_access_key": sp_secret_access_key, + "s3Bucket": blobstore_container, + "weightsPath": weights_path, + "resourcesPath": resources_path, + "model_type": model_type, + "platform": platform, + "blobStoreProvider": blobstore_provider + } + raw_response = requests.get(f"{self._base_route}/run", params=converter_request).json() + response = raw_response["responseEntries"][0] + status_code = response["httpCode"] + message = response["message"] + + # TODO: This may not be possible if status code is not 200 + success = raw_response["successEntry"] + self.logger.info(f"Response received with status code {status_code}: {message}") + self.logger.info(f"Details: {json.dumps(success, indent=4)}") + return status_code, success diff --git a/modzy/converter/utils.py b/modzy/converter/utils.py new file mode 100644 index 0000000..adf5142 --- /dev/null +++ b/modzy/converter/utils.py @@ -0,0 +1,69 @@ +import time +import shutil +import os +import ntpath +import tarfile +from libcloud.storage.types import Provider +from libcloud.storage.providers import get_driver + + +SUPPORTED_STORAGE_PROVIDERS = { + "S3": Provider.S3, + "AZURE_BLOBS": Provider.AZURE_BLOBS, + "GOOGLE_STORAGE": Provider.GOOGLE_STORAGE +} + + +def get_authenticated_storage_provider_client(storage_provider, access_key, secret_key): + """Initialize the libcloud driver""" + if storage_provider not in SUPPORTED_STORAGE_PROVIDERS: + raise ValueError(f"Storage provider must be one of: {', '.join(SUPPORTED_STORAGE_PROVIDERS)}") + + storage_driver = get_driver(SUPPORTED_STORAGE_PROVIDERS[storage_provider]) + authenticated_storage_client = storage_driver(access_key, secret_key) + return authenticated_storage_client + + +def upload_resources(model_yaml_path, container, resources_key, + storage_key, storage_secret, storage_provider, additional_filepaths=None): + """ Creates resources archive expected by model converter, uploads to storage provider. + Args: + model_yaml_path (str): Path to model.yaml file to be included. + container (str): Storage provider container name (e.g. Bucket name in S3). + resources_key (str): Desired key for resource archive once uploaded to storage provider. + storage_key (str): Storage provider access key. + storage_secret (str): Storage provider secret key. + storage_provider (str): Storage provider name (must be one of "S3", "AZURE_BLOBS", or "GOOGLE_STORAGE"). + additional_filepaths (list): List of filepaths of additional files to be included. + """ + driver = get_authenticated_storage_provider_client(storage_provider, storage_key, storage_secret) + container = driver.get_container(container_name=container) + + # TODO: Probably set these outside of this helper function + RESOURCES_TAR_NAME = "resources.tar.gz" + MODEL_YAML_NAME = "model.yaml" + + # Create temp dir + tmp_dir_path = os.path.join(os.getcwd(), f".tmp_{time.time()}") + os.mkdir(tmp_dir_path) + + # Move the local resources that you have prepared for your model into an archive + resources_tar_path = os.path.join(tmp_dir_path, RESOURCES_TAR_NAME) + tar = tarfile.open(resources_tar_path, "w:gz") + tar.add(model_yaml_path, arcname=MODEL_YAML_NAME) + for filepath in additional_filepaths: + tar.add(filepath, arcname=ntpath.split(filepath)[1]) + tar.close() + + # This method blocks until all the parts have been uploaded. + extra = {'content_type': 'application/octet-stream'} + + # Upload archive to storage provider + with open(resources_tar_path, 'rb') as iterator: + obj = driver.upload_object_via_stream(iterator=iterator, + container=container, + object_name=resources_key, + extra=extra) + + # Remove temp dir + shutil.rmtree(tmp_dir_path) diff --git a/tests/test_model_converter.py b/tests/test_model_converter.py new file mode 100644 index 0000000..e679d92 --- /dev/null +++ b/tests/test_model_converter.py @@ -0,0 +1,44 @@ +from modzy.converter.model_converter import ModelConverter +import os +import pytest +from modzy.client import ApiClient + +BASE_URL = os.getenv('MODZY_BASE_URL') +API_KEY = os.getenv('MODZY_API_KEY') + + +@pytest.fixture() +def converter(): + return ModelConverter(ApiClient(base_url=BASE_URL, api_key=API_KEY)) + + +def test_model_converter_env_status(converter): + response = converter.env_status() + assert response + + +def test_model_converter_check_endpoints(converter): + response = converter.check_endpoints( + "AKIAUX272I2XFHORFQ7D", + "35exOPRUPxNn5FPdY8h43uF5cWSVEfzw9JfheVnF", + "modzy-engineering-tests", + "ds/model-converter/sagemaker/image-classification/weights.tar.gz", + "ds/model-converter/sagemaker/image-classification/resources.tar.gz", + "image-classification", + "sagemaker" + ) + assert response + + +def test_model_converter_run(converter): + status_code, succes_entry = converter.run( + "AKIAUX272I2XFHORFQ7D", + "35exOPRUPxNn5FPdY8h43uF5cWSVEfzw9JfheVnF", + "modzy-engineering-tests", + "ds/model-converter/sagemaker/image-classification/weights.tar.gz", + "ds/model-converter/sagemaker/image-classification/resources.tar.gz", + "image-classification", + "sagemaker", + "S3" + ) + assert status_code == 200 From 506cf84e63c165b57cecea5da168eef36d8a3f8d Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Fri, 9 Jul 2021 09:48:03 -0700 Subject: [PATCH 02/19] fix: adding in credentials from environment to test files --- tests/test_model_converter.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_model_converter.py b/tests/test_model_converter.py index e679d92..cc96329 100644 --- a/tests/test_model_converter.py +++ b/tests/test_model_converter.py @@ -19,8 +19,8 @@ def test_model_converter_env_status(converter): def test_model_converter_check_endpoints(converter): response = converter.check_endpoints( - "AKIAUX272I2XFHORFQ7D", - "35exOPRUPxNn5FPdY8h43uF5cWSVEfzw9JfheVnF", + os.getenv("SP_ACCESS_KEY_ID"), + os.getenv("SP_SECRET_ACCESS_KEY"), "modzy-engineering-tests", "ds/model-converter/sagemaker/image-classification/weights.tar.gz", "ds/model-converter/sagemaker/image-classification/resources.tar.gz", @@ -32,8 +32,8 @@ def test_model_converter_check_endpoints(converter): def test_model_converter_run(converter): status_code, succes_entry = converter.run( - "AKIAUX272I2XFHORFQ7D", - "35exOPRUPxNn5FPdY8h43uF5cWSVEfzw9JfheVnF", + os.getenv("SP_ACCESS_KEY_ID"), + os.getenv("SP_SECRET_ACCESS_KEY"), "modzy-engineering-tests", "ds/model-converter/sagemaker/image-classification/weights.tar.gz", "ds/model-converter/sagemaker/image-classification/resources.tar.gz", From bf94b0568e3c7b4dc89fff11aafd29cceb534393 Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Fri, 9 Jul 2021 09:48:32 -0700 Subject: [PATCH 03/19] fix: adding in blob storage service provider --- tests/test_model_converter.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/tests/test_model_converter.py b/tests/test_model_converter.py index cc96329..f40fd16 100644 --- a/tests/test_model_converter.py +++ b/tests/test_model_converter.py @@ -25,7 +25,8 @@ def test_model_converter_check_endpoints(converter): "ds/model-converter/sagemaker/image-classification/weights.tar.gz", "ds/model-converter/sagemaker/image-classification/resources.tar.gz", "image-classification", - "sagemaker" + "sagemaker", + "S3" ) assert response From 3739a3883f0d2d7d5f8e633998d498d1c185a8db Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Fri, 9 Jul 2021 16:50:56 -0700 Subject: [PATCH 04/19] dev: updating the demo notebook to include more documentation and run the sagemaker demo image-classification demo end to end --- examples/mlflow/tabular/model.yaml | 110 +++++++++ .../image-classification/labels.json | 1 + .../sagemaker/image-classification/model.yaml | 99 +++++++++ model_converter_demo.ipynb | 208 ++++++++++++++---- modzy/converter/model_converter.py | 10 +- modzy/converter/utils.py | 2 +- 6 files changed, 387 insertions(+), 43 deletions(-) create mode 100644 examples/mlflow/tabular/model.yaml create mode 100644 examples/sagemaker/image-classification/labels.json create mode 100644 examples/sagemaker/image-classification/model.yaml diff --git a/examples/mlflow/tabular/model.yaml b/examples/mlflow/tabular/model.yaml new file mode 100644 index 0000000..9aacafe --- /dev/null +++ b/examples/mlflow/tabular/model.yaml @@ -0,0 +1,110 @@ +specification: '0.3' +type: "file" +source: "Custom Model" +version: "0.0.1" +name: "MLFlow" +author: "MLFlow" + +description: + summary: "The Factorization Machines algorithm is a general-purpose supervised learning algorithm that you can use for both + classification and regression tasks. " + details: "It is an extension of a linear model that is designed to capture interactions between features within high dimensional + sparse datasets economically. For example, in a click prediction system, the Factorization Machines model can capture click + rate patterns observed when ads from a certain ad-category are placed on pages from a certain page-category. Factorization + machines are a good choice for tasks dealing with high dimensional sparse datasets, such as click prediction and item recommendation." + + technical: |- + #OVERVIEW: + The prediction task for a Factorization Machines model is to estimate a function ŷ from a feature set xi to a target domain. + This domain is real-valued for regression and binary for classification. The Factorization Machines model is supervised and so + has a training dataset (xi,yj) available. + + #TRAINING: + For training, the Factorization Machines algorithm currently supports only the recordIO-protobuf format with Float32 tensors. + Because their use case is predominantly on sparse data, CSV is not a good candidate. Both File and Pipe mode training are supported + for recordIO-wrapped protobuf. + + #VALIDATION: + + For inference, the Factorization Machines algorithm supports the application/json and x-recordio-protobuf formats. + + - For the binary classification problem, the algorithm predicts a score and a label. The label is a number and can be either 0 or 1. + The score is a number that indicates how strongly the algorithm believes that the label should be 1. The algorithm computes score first + and then derives the label from the score value. If the score is greater than or equal to 0.5, the label is 1. + - For the regression problem, just a score is returned and it is the predicted value. For example, if Factorization Machines is used to + predict a movie rating, score is the predicted rating value. + + + #INPUT SPECIFICATION: + The input(s) to this model must adhere to the following specifications: + | Filename | Maximum Size | Accepted Format(s) | + | -------- | ------------ | ------------------ | + | image | 1K | jpg, png, tif | + + Additional information describing input file(s) can go in a short paragraph here if necessary. Feel free to add an additional markdown table if many values need to be listed. + + #OUTPUT DETAILS: + This model will output the following: + | Filename | Maximum Size | Format | + | -------- | ------------ | ------ | + | results.json | 128 B | .json | + + performance: "The Factorization Machines algorithm reports three binary classification metrics, which are computed during training. When tuning the model for binary classification tasks, + choose one of these as the objective." + +releaseNotes: "The Amazon SageMaker implementation of the Factorization Machines algorithm considers only pair-wise (2nd order) interactions between features." + +tags: +- "regression" +- "factorization" +- "latent factors" +- "features" +filters: +- type: "Task" + label: "Factorize dataset" +- type: "Input Type" + label: "Image" +- type: "Subject" + label: "Latent features" + +metrics: +- label: "F1 Score" + value: 0.59 + type: "percentage" + description: "For a classification task, the model is trained by minimizing the cross entropy loss, also known as the log loss." + +inputs: + input.csv: + acceptedMediaTypes: "application/csv" + maxSize: 100000000 + description: "Tabular data in the same format as the training data. The structure of the input data will be enforced by the required signature that accompanies each model." + +outputs: + results.json: + mediaType: "application/json" + maxSize: 100000000 + description: "JSON file that contains the target column that mimics the target column used in training." + +resources: + memory: + size: "2Gi" + cpu: + count: 1 + gpu: + count: 0 +timeout: + status: 60 + run: 600 + +timeout: + status: 20s + run: 20s + +internal: + recommended: false + experimental: false + available: true + active: true + features: + explainable: false + adversarialDefense: false diff --git a/examples/sagemaker/image-classification/labels.json b/examples/sagemaker/image-classification/labels.json new file mode 100644 index 0000000..c373924 --- /dev/null +++ b/examples/sagemaker/image-classification/labels.json @@ -0,0 +1 @@ +["ak47", "american-flag", "backpack", "baseball-bat", "baseball-glove", "basketball-hoop", "bat", "bathtub", "bear", "beer-mug", "billiards", "binoculars", "birdbath", "blimp", "bonsai-101", "boom-box", "bowling-ball", "bowling-pin", "boxing-glove", "brain-101", "breadmaker", "buddha-101", "bulldozer", "butterfly", "cactus", "cake", "calculator", "camel", "cannon", "canoe", "car-tire", "cartman", "cd", "centipede", "cereal-box", "chandelier-101", "chess-board", "chimp", "chopsticks", "cockroach", "coffee-mug", "coffin", "coin", "comet", "computer-keyboard", "computer-monitor", "computer-mouse", "conch", "cormorant", "covered-wagon", "cowboy-hat", "crab-101", "desk-globe", "diamond-ring", "dice", "dog", "dolphin-101", "doorknob", "drinking-straw", "duck", "dumb-bell", "eiffel-tower", "electric-guitar-101", "elephant-101", "elk", "ewer-101", "eyeglasses", "fern", "fighter-jet", "fire-extinguisher", "fire-hydrant", "fire-truck", "fireworks", "flashlight", "floppy-disk", "football-helmet", "french-horn", "fried-egg", "frisbee", "frog", "frying-pan", "galaxy", "gas-pump", "giraffe", "goat", "golden-gate-bridge", "goldfish", "golf-ball", "goose", "gorilla", "grand-piano-101", "grapes", "grasshopper", "guitar-pick", "hamburger", "hammock", "harmonica", "harp", "harpsichord", "hawksbill-101", "head-phones", "helicopter-101", "hibiscus", "homer-simpson", "horse", "horseshoe-crab", "hot-air-balloon", "hot-dog", "hot-tub", "hourglass", "house-fly", "human-skeleton", "hummingbird", "ibis-101", "ice-cream-cone", "iguana", "ipod", "iris", "jesus-christ", "joy-stick", "kangaroo-101", "kayak", "ketch-101", "killer-whale", "knife", "ladder", "laptop-101", "lathe", "leopards-101", "license-plate", "lightbulb", "light-house", "lightning", "llama-101", "mailbox", "mandolin", "mars", "mattress", "megaphone", "menorah-101", "microscope", "microwave", "minaret", "minotaur", "motorbikes-101", "mountain-bike", "mushroom", "mussels", "necktie", "octopus", "ostrich", "owl", "palm-pilot", "palm-tree", "paperclip", "paper-shredder", "pci-card", "penguin", "people", "pez-dispenser", "photocopier", "picnic-table", "playing-card", "porcupine", "pram", "praying-mantis", "pyramid", "raccoon", "radio-telescope", "rainbow", "refrigerator", "revolver-101", "rifle", "rotary-phone", "roulette-wheel", "saddle", "saturn", "school-bus", "scorpion-101", "screwdriver", "segway", "self-propelled-lawn-mower", "sextant", "sheet-music", "skateboard", "skunk", "skyscraper", "smokestack", "snail", "snake", "sneaker", "snowmobile", "soccer-ball", "socks", "soda-can", "spaghetti", "speed-boat", "spider", "spoon", "stained-glass", "starfish-101", "steering-wheel", "stirrups", "sunflower-101", "superman", "sushi", "swan", "swiss-army-knife", "sword", "syringe", "tambourine", "teapot", "teddy-bear", "teepee", "telephone-box", "tennis-ball", "tennis-court", "tennis-racket", "theodolite", "toaster", "tomato", "tombstone", "top-hat", "touring-bike", "tower-pisa", "traffic-light", "treadmill", "triceratops", "tricycle", "trilobite-101", "tripod", "t-shirt", "tuning-fork", "tweezer", "umbrella-101", "unicorn", "vcr", "video-projector", "washing-machine", "watch-101", "waterfall", "watermelon", "welding-mask", "wheelbarrow", "windmill", "wine-bottle", "xylophone", "yarmulke", "yo-yo", "zebra", "airplanes-101", "car-side-101", "faces-easy-101", "greyhound", "tennis-shoes", "toad", "clutter"] \ No newline at end of file diff --git a/examples/sagemaker/image-classification/model.yaml b/examples/sagemaker/image-classification/model.yaml new file mode 100644 index 0000000..bdd4edd --- /dev/null +++ b/examples/sagemaker/image-classification/model.yaml @@ -0,0 +1,99 @@ +specification: "0.3" +type: "file" +source: "Custom Model" +version: "0.0.1" +name: "Caltech image classification" +author: "AWS Sagemaker" +description: + summary: "The Amazon SageMaker image classification algorithm is a supervised learning algorithm that supports multi-label classification." + details: "It takes an image as input and outputs one or more labels assigned to that image. It uses a convolutional neural network (ResNet) + that can be trained from scratch or trained using transfer learning when a large number of training images are not available." + technical: |- + #OVERVIEW: + The recommended input format for the Amazon SageMaker image classification algorithms is Apache MXNet RecordIO. + + #TRAINING: + If you use the RecordIO format for training, specify both train and validation channels as values for the InputDataConfig parameter of the + CreateTrainingJob request. Specify one RecordIO (.rec) file in the train channel and one RecordIO file in the validation channel. Set the + content type for both channels to application/x-recordio. + + #VALIDATION: + The image classification model processes a single image per request and so outputs only one line in the JSON or JSON Lines format. The + following is an example of a response in JSON Lines format: + + accept: application/jsonlines + + {"prediction": [prob_0, prob_1, prob_2, prob_3, ...]} + + + #INPUT SPECIFICATION: + The generated models can be hosted for inference and support encoded .jpg and .png image formats as image/png, image/jpeg, and + application/x-image content-type. The input image is resized automatically. + + #OUTPUT DETAILS: + The output is the probability values for all classes encoded in JSON format, or in JSON Lines text format for batch transform. + + For image classification, we support the following GPU instances for training: ml.p2.xlarge, ml.p2.8xlarge, ml.p2.16xlarge, ml.p3.2xlarge, + ml.p3.8xlargeand ml.p3.16xlarge. We recommend using GPU instances with more memory for training with large batch sizes. However, both CPU (such + as C4) and GPU (such as P2 and P3) instances can be used for the inference. You can also run the algorithm on multi-GPU and multi-machine settings + for distributed training. + + performance: "Accuracy, Precision, Recall, F1-score" +releaseNotes: "Image classification in Amazon SageMaker can be run in two modes: full training and transfer learning." + +tags: +- "Computer Vision" +- "Image classification" +- "ResNet" +filters: +- type: "Task" + label: "Image Classification" +- type: "Input Type" + label: "Image" +- type: "Subject" + label: "Still images" + +metrics: +- label: "Probability" + type: "percentage" + value: 1.0 + description: |- + Use case dependent + + +inputs: + image: + acceptedMediaTypes: + - "image/jpeg" + - "image/png" + - "image/tiff" + maxSize: 5M + description: "Any jpg or png image for object detection" + +outputs: + results.json: + mediaType: "application/json" + maxSize: 128K + description: "Classify objects in an image and give probabilities for each" + + +resources: + memory: + size: 6G + cpu: + count: 1 + gpu: + count: 1 + +timeout: + status: 20s + run: 20s + +internal: + recommended: false + experimental: false + available: true + active: true + features: + explainable: false + adversarialDefense: false diff --git a/model_converter_demo.ipynb b/model_converter_demo.ipynb index 551e04a..023288c 100644 --- a/model_converter_demo.ipynb +++ b/model_converter_demo.ipynb @@ -9,19 +9,87 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ + "# Import some standard dependencies\n", + "import os\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 1. Train a built-in model on the SageMaker Platform\n", + "\n", + "The raw output of the SageMaker Platform will be a set of weights for your image classification model that is stored within a bucket on AWS S3." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# TODO: Inlude concise training code to showcase how the assets below are produced\n", + "\n", "blob_storage_provider = \"S3\"\n", "blob_storage_container = \"modzy-engineering-tests\"\n", - "resources_key = \"ds/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", "weights_key = \"ds/model-converter/sagemaker/image-classification/weights.tar.gz\"" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 2. Provide metadata specific to your model\n", + "\n", + "You are encouraged to fill out a `model.yaml` file describing your model and provide any additional metadata files for the specific model type that you chose. In the case of image classification, you will need to provide a `labels.json` file that contains a mapping between numerical classes and human readable labels. For example, 12 could be the label for a \"tabby cat\"." + ] + }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from modzy.converter.utils import upload_resources\n", + "\n", + "current_working_directory = os.getcwd()\n", + "\n", + "# Indicate where your local files\n", + "model_yaml_path = os.path.join(current_working_directory, \"examples/sagemaker/image-classification/model.yaml\")\n", + "labels_json_path = os.path.join(current_working_directory, \"examples/sagemaker/image-classification/labels.json\")\n", + "auxiliary_files = [labels_json_path]\n", + "\n", + "# Your local files will be archieved and stored in the following location:\n", + "resources_key = \"ds/demo/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", + "\n", + "\n", + "upload_resources(\n", + " model_yaml_path, blob_storage_container, resources_key,\n", + " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 3. Use the Modzy Python SDK to set up an API Client to the Model Converter Service\n", + "\n", + "The Modzy Python SDK has a number of convenience functions that we will use to:\n", + "* connect to our instance of modzy\n", + "* interact with the model converter service\n", + "* manage processing engines for our new model\n", + "* run jobs against our new model" + ] + }, + { + "cell_type": "code", + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" @@ -31,7 +99,6 @@ "source": [ "from modzy.converter.model_converter import ModelConverter\n", "from modzy.client import ApiClient\n", - "import os\n", "\n", "# To get started, store your Modzy API key as an environment variable `MODZY_API_KEY`.\n", "# Then, create a Modzy API client to interact with the integration envrionment\n", @@ -45,21 +112,13 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The model details page for your new model can be found here: https://integration.modzy.engineering/models/106d50ca0e-modzy-image-classification/0.0.1\n" - ] - } - ], + "outputs": [], "source": [ "# Now, provide the Model converter with information about your stored model assets and the credentials required\n", "# to access them. The Model converter will do the rest of the work.\n", @@ -67,7 +126,7 @@ "source_platform = \"sagemaker\"\n", "model_type = \"image-classification\"\n", "\n", - "_, converter_output = model_converter.run(\n", + "_, sagemaker_converter_output = model_converter.run(\n", " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", " blobstore_provider=blob_storage_provider,\n", @@ -78,14 +137,14 @@ " model_type=model_type,\n", ")\n", "\n", - "print(f\"The model details page for your new model can be found here: {converter_output['modelURL']}\")\n", - "new_model_id = converter_output[\"modelId\"]\n", - "new_model_version = converter_output[\"modelVersion\"]" + "print(f\"The model details page for your new model can be found here: {sagemaker_converter_output['modelURL']}\")\n", + "sagemaker_model_id = sagemaker_converter_output[\"modelId\"]\n", + "sagemaker_model_version = sagemaker_converter_output[\"modelVersion\"]" ] }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" @@ -94,7 +153,12 @@ "outputs": [], "source": [ "# Delegate a single processing to serve your new model\n", - "modzy_api_client.models.update_processing_engines(new_model_id, new_model_version, min_engines=1, max_engines=1)" + "modzy_api_client.models.update_processing_engines(\n", + " sagemaker_model_id, sagemaker_model_version, min_engines=1, max_engines=1\n", + ")\n", + "print(\n", + " f\"Warming up a container to perform inference from model {sagemaker_model_id} version {sagemaker_model_version}\"\n", + ")" ] }, { @@ -108,8 +172,8 @@ "outputs": [], "source": [ "# Send an inference job to run against your new model with explainability!\n", - "input_source = {\n", - " \"0001\": {\n", + "sagemaker_input_source = {\n", + " \"00001\": {\n", " f\"image\": {\n", " \"bucket\": blob_storage_container,\n", " \"key\": f\"/ds/model-converter/{source_platform}/{model_type}/test_input\"\n", @@ -117,9 +181,9 @@ " }\n", "}\n", "\n", - "print(f\"Sending job to model {new_model_id} {new_model_version}\")\n", + "print(f\"Sending job to model {sagemaker_model_id} version {sagemaker_model_version}\")\n", "job = modzy_api_client.jobs.submit_aws_s3(\n", - " new_model_id, new_model_version, input_source,\n", + " sagemaker_model_id, sagemaker_model_version, sagemaker_input_source,\n", " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", " region=\"us-east-1\", explain=True\n", ")\n", @@ -128,6 +192,18 @@ "print(\"Job Completed!\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Release the processing engine\n", + "modzy_api_client.models.update_processing_engines(\n", + " sagemaker_model_id, sagemaker_model_version, min_engines=0, max_engines=1\n", + ")" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -138,26 +214,43 @@ { "cell_type": "code", "execution_count": null, - "metadata": { - "pycharm": { - "name": "#%%\n" - } - }, + "metadata": {}, "outputs": [], "source": [ "from modzy.converter.mlflow import upload_mlflow_model\n", "\n", - "# Raw output MLFlow Model Gets uploaded to the user's S3 Bucket\n", - "# upload_mlflow_model()\n", - "\n", "# Now we repeat the process with an MLFlow model\n", "source_platform = \"mlflow\"\n", "model_type = \"tabular\"\n", - "resources_key = \"ds/model-converter/mlflow/tabular/resources.tar.gz\"\n", - "weights_key = \"ds/model-converter/mlflow/tabular/model.tar.gz\"\n", "\n", + "mlflow_model_dir = \"\"\n", + "mlflow_weights_key = \"ds/model-converter/mlflow/tabular/model.tar.gz\"\n", + "\n", + "# upload_mlflow_model(\n", + "# mlflow_model_dir, blob_storage_container, mlflow_weights_key,\n", + "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider\n", + "# )\n", + "\n", + "mlflow_model_yaml_path = \"\"\n", + "mlflow_resources_key = \"ds/model-converter/mlflow/tabular/resources.tar.gz\"\n", "\n", - "_, converter_output = model_converter.run(\n", + "# upload_resources(\n", + "# mlflow_model_yaml_path, blob_storage_container, mlflow_resources_key,\n", + "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", + "# )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "pycharm": { + "name": "#%%\n" + } + }, + "outputs": [], + "source": [ + "_, mlflow_converter_output = model_converter.run(\n", " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", " blobstore_provider=blob_storage_provider,\n", @@ -168,9 +261,9 @@ " model_type=model_type,\n", ")\n", "\n", - "print(f\"The model details page for your new model can be found here: {converter_output['modelURL']}\")\n", - "new_model_id = converter_output[\"modelId\"]\n", - "new_model_version = converter_output[\"modelVersion\"]" + "print(f\"The model details page for your new model can be found here: {mlflow_converter_output['modelURL']}\")\n", + "mlflow_model_id = mlflow_converter_output[\"modelId\"]\n", + "mlflow_model_version = mlflow_converter_output[\"modelVersion\"]" ] }, { @@ -184,7 +277,44 @@ "outputs": [], "source": [ "# Delegate a single processing to serve your new model\n", - "modzy_api_client.models.update_processing_engines(\"106d50ca0e\", \"0.0.1\", min_engines=0, max_engines=1)" + "modzy_api_client.models.update_processing_engines(mlflow_model_id, mlflow_model_version, min_engines=1, max_engines=1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Send an inference job to run against your new model!\n", + "input_source = {\n", + " \"0001\": {\n", + " f\"input.csv\": {\n", + " \"bucket\": blob_storage_container,\n", + " \"key\": f\"/ds/model-converter/{source_platform}/{model_type}/test_input\"\n", + " }\n", + " }\n", + "}\n", + "\n", + "print(f\"Sending job to model {mlflow_model_id} {mlflow_model_version}\")\n", + "job = modzy_api_client.jobs.submit_aws_s3(\n", + " mlflow_model_id, mlflow_model_version, input_source,\n", + " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", + " region=\"us-east-1\"\n", + ")\n", + "\n", + "modzy_api_client.jobs.block_until_complete(job, timeout=None)\n", + "print(\"Job Completed!\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Spin down the resources when down performing inference\n", + "modzy_api_client.models.update_processing_engines(mlflow_model_id, mlflow_model_version, min_engines=0, max_engines=1)" ] }, { diff --git a/modzy/converter/model_converter.py b/modzy/converter/model_converter.py index e45593f..719ed3a 100644 --- a/modzy/converter/model_converter.py +++ b/modzy/converter/model_converter.py @@ -91,9 +91,13 @@ def run( response = raw_response["responseEntries"][0] status_code = response["httpCode"] message = response["message"] + self.logger.info(f"Response received with status code {status_code}: {message}") # TODO: This may not be possible if status code is not 200 - success = raw_response["successEntry"] - self.logger.info(f"Response received with status code {status_code}: {message}") - self.logger.info(f"Details: {json.dumps(success, indent=4)}") + if status_code == "200": + success = raw_response["successEntry"] + self.logger.info(f"Details: {json.dumps(success, indent=4)}") + else: + success = None + return status_code, success diff --git a/modzy/converter/utils.py b/modzy/converter/utils.py index adf5142..bf9b328 100644 --- a/modzy/converter/utils.py +++ b/modzy/converter/utils.py @@ -25,7 +25,7 @@ def get_authenticated_storage_provider_client(storage_provider, access_key, secr def upload_resources(model_yaml_path, container, resources_key, - storage_key, storage_secret, storage_provider, additional_filepaths=None): + storage_key, storage_secret, storage_provider, additional_filepaths=[]): """ Creates resources archive expected by model converter, uploads to storage provider. Args: model_yaml_path (str): Path to model.yaml file to be included. From 25f3ab950406eacf05c3cbd1f6660b3e5a593e41 Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Fri, 9 Jul 2021 17:17:40 -0700 Subject: [PATCH 05/19] feat: tested image-classification portion of the notebook --- model_converter_demo.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/model_converter_demo.ipynb b/model_converter_demo.ipynb index 023288c..88c66fd 100644 --- a/model_converter_demo.ipynb +++ b/model_converter_demo.ipynb @@ -182,7 +182,7 @@ "}\n", "\n", "print(f\"Sending job to model {sagemaker_model_id} version {sagemaker_model_version}\")\n", - "job = modzy_api_client.jobs.submit_aws_s3(\n", + "job = modzy_api_client.jobs.submit_aws_s3_bulk(\n", " sagemaker_model_id, sagemaker_model_version, sagemaker_input_source,\n", " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", " region=\"us-east-1\", explain=True\n", @@ -297,7 +297,7 @@ "}\n", "\n", "print(f\"Sending job to model {mlflow_model_id} {mlflow_model_version}\")\n", - "job = modzy_api_client.jobs.submit_aws_s3(\n", + "job = modzy_api_client.jobs.submit_aws_s3_bulk(\n", " mlflow_model_id, mlflow_model_version, input_source,\n", " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", " region=\"us-east-1\"\n", From 16b1ff7ce18c75bcc222e97f448902a2864a583d Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Mon, 12 Jul 2021 11:44:02 -0700 Subject: [PATCH 06/19] dev: making some modifications to the notebook in order to ensure that the MLflow portion is running smooothly --- model_converter_demo.ipynb | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/model_converter_demo.ipynb b/model_converter_demo.ipynb index 88c66fd..cd8e685 100644 --- a/model_converter_demo.ipynb +++ b/model_converter_demo.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -29,11 +29,11 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# TODO: Inlude concise training code to showcase how the assets below are produced\n", + "# TODO: Include concise training code to showcase how the assets below are produced\n", "\n", "blob_storage_provider = \"S3\"\n", "blob_storage_container = \"modzy-engineering-tests\"\n", @@ -51,7 +51,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -89,7 +89,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "pycharm": { "name": "#%%\n" @@ -152,7 +152,7 @@ }, "outputs": [], "source": [ - "# Delegate a single processing to serve your new model\n", + "# Delegate a single processing engine to serve your new model\n", "modzy_api_client.models.update_processing_engines(\n", " sagemaker_model_id, sagemaker_model_version, min_engines=1, max_engines=1\n", ")\n", @@ -224,7 +224,7 @@ "model_type = \"tabular\"\n", "\n", "mlflow_model_dir = \"\"\n", - "mlflow_weights_key = \"ds/model-converter/mlflow/tabular/model.tar.gz\"\n", + "mlflow_weights_key = \"ds/demo/model-converter/sagemaker/mlflow/tabular/weights.tar.gz\"\n", "\n", "# upload_mlflow_model(\n", "# mlflow_model_dir, blob_storage_container, mlflow_weights_key,\n", @@ -232,7 +232,7 @@ "# )\n", "\n", "mlflow_model_yaml_path = \"\"\n", - "mlflow_resources_key = \"ds/model-converter/mlflow/tabular/resources.tar.gz\"\n", + "mlflow_resources_key = \"ds/demo/model-converter/sagemaker/mlflow/tabular/resources.tar.gz\"\n", "\n", "# upload_resources(\n", "# mlflow_model_yaml_path, blob_storage_container, mlflow_resources_key,\n", From 10cb8669eca7c6876650741de19fc802179aeccf Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Mon, 12 Jul 2021 20:08:29 -0700 Subject: [PATCH 07/19] dev: committing the notebook with the hardcoded, tested resources for saumil to add training code --- model_converter_demo.ipynb | 70 ++++++++++++++++++++++---------------- 1 file changed, 41 insertions(+), 29 deletions(-) diff --git a/model_converter_demo.ipynb b/model_converter_demo.ipynb index cd8e685..2e82e80 100644 --- a/model_converter_demo.ipynb +++ b/model_converter_demo.ipynb @@ -1,5 +1,15 @@ { "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Notes on executing the demo:\n", + "\n", + "1. Make sure to port forward the service from the integration cluster: `kubectl port-forward svc/model-converter -n modzy 8080:8080`\n", + "2. Set envionrment variables: `MODZY_QA_API_KEY`, `SP_ACCESS_KEY_ID` and `SP_SECRET_ACCESS_KEY`" + ] + }, { "cell_type": "markdown", "metadata": {}, @@ -9,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -29,7 +39,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -37,7 +47,7 @@ "\n", "blob_storage_provider = \"S3\"\n", "blob_storage_container = \"modzy-engineering-tests\"\n", - "weights_key = \"ds/model-converter/sagemaker/image-classification/weights.tar.gz\"" + "sagemaker_weights_key = \"ds/model-converter/sagemaker/image-classification/weights.tar.gz\"" ] }, { @@ -51,7 +61,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -65,13 +75,13 @@ "auxiliary_files = [labels_json_path]\n", "\n", "# Your local files will be archieved and stored in the following location:\n", - "resources_key = \"ds/demo/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", + "sagemaker_resources_key = \"ds/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", "\n", "\n", - "upload_resources(\n", - " model_yaml_path, blob_storage_container, resources_key,\n", - " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", - ")" + "# upload_resources(\n", + "# model_yaml_path, blob_storage_container, sagemaker_resources_key,\n", + "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", + "# )" ] }, { @@ -89,7 +99,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": { "pycharm": { "name": "#%%\n" @@ -131,8 +141,8 @@ " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", " blobstore_provider=blob_storage_provider,\n", " blobstore_container=blob_storage_container,\n", - " weights_path=weights_key,\n", - " resources_path=resources_key,\n", + " weights_path=sagemaker_weights_key,\n", + " resources_path=sagemaker_resources_key,\n", " platform=source_platform,\n", " model_type=model_type,\n", ")\n", @@ -189,7 +199,7 @@ ")\n", "\n", "modzy_api_client.jobs.block_until_complete(job, timeout=None)\n", - "print(\"Job Completed!\")" + "print(f\"Job Completed: https://integration.modzy.engineering/operations/jobs/{job.job_identifier}\")" ] }, { @@ -219,20 +229,22 @@ "source": [ "from modzy.converter.mlflow import upload_mlflow_model\n", "\n", + "# TODO: add training code\n", + "\n", "# Now we repeat the process with an MLFlow model\n", "source_platform = \"mlflow\"\n", "model_type = \"tabular\"\n", "\n", - "mlflow_model_dir = \"\"\n", - "mlflow_weights_key = \"ds/demo/model-converter/sagemaker/mlflow/tabular/weights.tar.gz\"\n", + "mlflow_model_dir = os.path.join(current_working_directory, \"examples/mlflow/tabular/weights\")\n", + "mlflow_weights_key = \"mlflow-wine/weights.tar.gz\"\n", "\n", "# upload_mlflow_model(\n", - "# mlflow_model_dir, blob_storage_container, mlflow_weights_key,\n", - "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider\n", + "# mlflow_model_dir, blob_storage_container, mlflow_weights_key,\n", + "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider\n", "# )\n", "\n", - "mlflow_model_yaml_path = \"\"\n", - "mlflow_resources_key = \"ds/demo/model-converter/sagemaker/mlflow/tabular/resources.tar.gz\"\n", + "mlflow_model_yaml_path = os.path.join(current_working_directory, \"examples/mlflow/tabular/model.yaml\")\n", + "mlflow_resources_key = \"ds/model-converter/mlflow/tabular/resources.tar.gz\"\n", "\n", "# upload_resources(\n", "# mlflow_model_yaml_path, blob_storage_container, mlflow_resources_key,\n", @@ -251,14 +263,14 @@ "outputs": [], "source": [ "_, mlflow_converter_output = model_converter.run(\n", - " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", - " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", - " blobstore_provider=blob_storage_provider,\n", - " blobstore_container=blob_storage_container,\n", - " weights_path=weights_key,\n", - " resources_path=resources_key,\n", - " platform=source_platform,\n", - " model_type=model_type,\n", + " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", + " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", + " blobstore_provider=blob_storage_provider,\n", + " blobstore_container=blob_storage_container,\n", + " weights_path=mlflow_weights_key,\n", + " resources_path=mlflow_resources_key,\n", + " platform=source_platform,\n", + " model_type=model_type,\n", ")\n", "\n", "print(f\"The model details page for your new model can be found here: {mlflow_converter_output['modelURL']}\")\n", @@ -303,8 +315,8 @@ " region=\"us-east-1\"\n", ")\n", "\n", - "modzy_api_client.jobs.block_until_complete(job, timeout=None)\n", - "print(\"Job Completed!\")" + "# modzy_api_client.jobs.block_until_complete(job, timeout=None)\n", + "print(f\"Job Completed: https://integration.modzy.engineering/operations/jobs/{job.job_identifier}\")" ] }, { From f8e9e5ffcb7103c7b2321e9e2b19f0b75243f1a7 Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Tue, 13 Jul 2021 10:30:14 -0700 Subject: [PATCH 08/19] fix: cleaning up the model.yaml file to 0.1.0 --- examples/mlflow/tabular/model.yaml | 43 ++++++++++++++---------------- 1 file changed, 20 insertions(+), 23 deletions(-) diff --git a/examples/mlflow/tabular/model.yaml b/examples/mlflow/tabular/model.yaml index 9aacafe..9828cb8 100644 --- a/examples/mlflow/tabular/model.yaml +++ b/examples/mlflow/tabular/model.yaml @@ -1,37 +1,37 @@ specification: '0.3' type: "file" source: "Custom Model" -version: "0.0.1" +version: "0.1.0" name: "MLFlow" author: "MLFlow" description: - summary: "The Factorization Machines algorithm is a general-purpose supervised learning algorithm that you can use for both + summary: "The Factorization Machines algorithm is a general-purpose supervised learning algorithm that you can use for both classification and regression tasks. " - details: "It is an extension of a linear model that is designed to capture interactions between features within high dimensional - sparse datasets economically. For example, in a click prediction system, the Factorization Machines model can capture click - rate patterns observed when ads from a certain ad-category are placed on pages from a certain page-category. Factorization + details: "It is an extension of a linear model that is designed to capture interactions between features within high dimensional + sparse datasets economically. For example, in a click prediction system, the Factorization Machines model can capture click + rate patterns observed when ads from a certain ad-category are placed on pages from a certain page-category. Factorization machines are a good choice for tasks dealing with high dimensional sparse datasets, such as click prediction and item recommendation." technical: |- #OVERVIEW: - The prediction task for a Factorization Machines model is to estimate a function ŷ from a feature set xi to a target domain. - This domain is real-valued for regression and binary for classification. The Factorization Machines model is supervised and so - has a training dataset (xi,yj) available. + The prediction task for a Factorization Machines model is to estimate a function ŷ from a feature set xi to a target domain. + This domain is real-valued for regression and binary for classification. The Factorization Machines model is supervised and so + has a training dataset (xi,yj) available. #TRAINING: - For training, the Factorization Machines algorithm currently supports only the recordIO-protobuf format with Float32 tensors. - Because their use case is predominantly on sparse data, CSV is not a good candidate. Both File and Pipe mode training are supported + For training, the Factorization Machines algorithm currently supports only the recordIO-protobuf format with Float32 tensors. + Because their use case is predominantly on sparse data, CSV is not a good candidate. Both File and Pipe mode training are supported for recordIO-wrapped protobuf. #VALIDATION: For inference, the Factorization Machines algorithm supports the application/json and x-recordio-protobuf formats. - - For the binary classification problem, the algorithm predicts a score and a label. The label is a number and can be either 0 or 1. - The score is a number that indicates how strongly the algorithm believes that the label should be 1. The algorithm computes score first + - For the binary classification problem, the algorithm predicts a score and a label. The label is a number and can be either 0 or 1. + The score is a number that indicates how strongly the algorithm believes that the label should be 1. The algorithm computes score first and then derives the label from the score value. If the score is greater than or equal to 0.5, the label is 1. - - For the regression problem, just a score is returned and it is the predicted value. For example, if Factorization Machines is used to + - For the regression problem, just a score is returned and it is the predicted value. For example, if Factorization Machines is used to predict a movie rating, score is the predicted rating value. @@ -47,9 +47,9 @@ description: This model will output the following: | Filename | Maximum Size | Format | | -------- | ------------ | ------ | - | results.json | 128 B | .json | + | results.json | 128 B | .json | - performance: "The Factorization Machines algorithm reports three binary classification metrics, which are computed during training. When tuning the model for binary classification tasks, + performance: "The Factorization Machines algorithm reports three binary classification metrics, which are computed during training. When tuning the model for binary classification tasks, choose one of these as the objective." releaseNotes: "The Amazon SageMaker implementation of the Factorization Machines algorithm considers only pair-wise (2nd order) interactions between features." @@ -75,16 +75,17 @@ metrics: inputs: input.csv: - acceptedMediaTypes: "application/csv" - maxSize: 100000000 + acceptedMediaTypes: + - "application/csv" + maxSize: 10M description: "Tabular data in the same format as the training data. The structure of the input data will be enforced by the required signature that accompanies each model." outputs: results.json: mediaType: "application/json" - maxSize: 100000000 + maxSize: 10M description: "JSON file that contains the target column that mimics the target column used in training." - + resources: memory: size: "2Gi" @@ -96,10 +97,6 @@ timeout: status: 60 run: 600 -timeout: - status: 20s - run: 20s - internal: recommended: false experimental: false From 7b02bac0dbf45b542ca7b0d6b390a2ecb0980401 Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Tue, 13 Jul 2021 10:47:51 -0700 Subject: [PATCH 09/19] chore: adding in mlflow assets --- examples/mlflow/tabular/weights/data/model.pth | Bin 0 -> 6995 bytes .../tabular/weights/data/pickle_module_info.txt | 1 + 2 files changed, 1 insertion(+) create mode 100644 examples/mlflow/tabular/weights/data/model.pth create mode 100644 examples/mlflow/tabular/weights/data/pickle_module_info.txt diff --git a/examples/mlflow/tabular/weights/data/model.pth b/examples/mlflow/tabular/weights/data/model.pth new file mode 100644 index 0000000000000000000000000000000000000000..62e7cab8804d2af27a6f22eb752bd127cd02c782 GIT binary patch literal 6995 zcmbtZ3w#vSx!=uZ^VkFk&rp)`2p}6IA&<=qvL~8A3JfkVKwYpf?kkxk%kF-&vj~>0 z6xakb%HyKFw+bbKT(L?EK9G_b5iO!3TCDg&ty;vk_lmr3>x+B8b7mKI1DpEmo&09@ zJKs6yf4=iS-}lX#li8dhU8l>+(p_`q=?3T=l1ub1^GtL*WJhVR&DY%6SGO?fT9xK< z`2t~g(CcdRc}f#xld#m$<`F_|9-l`J_yw2G5ek_=XLbpKGwk)rUcVrivYtWm$_>!l{HnB)s8wBWY}cLL`b8m+ zhyG@xZ5PRNLB(%c43lNr|--07MoE3Qd*8#PUKTw`=+Xd07S zH&+Kk>=`(!&w=Y%4*Bgcl!qUfZCnnQ&1G^%&cNwq7-qH`u~8@-^hi)3rip!6nJq9B z+S9Pk?}r~UB+Uz8xIL4?GM6ZW5ecPo7@1J2fKdshN+?PwSzxpbW9;bQx{$}$0%N0a zgMB!b>b=1gvKa7}c>Te!TvAn1=C)MTI2@Ih>e@;ILM`7ELMnS zHZe3K?C`}5nrt6}CMQK>sUXCByWZZ9&Bx*Q2QWksIfR|95g_)ytfK7>x2MGs_Q_P= zV2i;g=x(x!IbtqbCP8oqFde|nwirv8KSE)NveKApUa`EQQPCu3MPUjLQ=`yq&!^)j zWFR3FcE(Z%{AdwOONof>yi0=tNjAZBwl?w112dw)@i5a7Gq2Sba5{W+Vz)%VW=2QO z5(~xYVqp|!@i03aZihMTd>NZtOF+8AA-RDUF`O}S(^%vhFd&-Hn{Gl|F-o`VR_YIT z=_52^x9bz+v;^6ZAg60`+R8Kxy}8r_=kYM#7KI<1#S!92agW7r%C5xaf906ZDEX6j(KL>vV0%mQ6B+R^Ce=7vnX=@X7NZ3pENFe~1E8z2anA?X; zvn00%=RU>j!sV4PbV6GAIm0b2w6!c!W!PSTa}fL(D43%{-jM9^%b2ZM8yY*XEa|<4 z8WY^H2$n}-1rK*>LE*$0r)w^Vfk0abe#($nvS<>#A)zJg^9f09AY#*^CPr7Rmc)YEm44p9uAuM}+A(J3^=v&_{ zTv;d++?%2aavusv!mfDFIj;osM>!hdwYFL`*7 zy)&F%M+hDg?-kJqaf8@%kjm<^vNG5zZsxLO*v7!4@GDLqh3y(}mkbZ{utNjh*#VF6 zuq!NT{`Q~ZG7;V9(J1WZ;W1Hjm0ybyo2V`Ry~a*XuF9-iYz_hZtWcVtl`!by3;RDV5 z4?Ex^9{!Yy4vJY_|rZWg|!#D9KVLK6xn%pKK^88>J4(#I!m1j8P`#qRd#6`k)k*`mz@N zsFp>alt;yQ080I7iN&BafQl>(rGZprkthwKA`3=oFctUWE^!}cKte}TLZu<7j7a(T zyh^JyloeCN9Xf@jtjtm?{ea@J`42;h&R@?N+NA<4(EW6~REUzs!4FZArQ!JGr4jhl zI2egihct?5i}2YSCVR(ioK3h*4=Qij?si@K+mc97?ie#wRZo<5L@L zJW3tXjZ8ZMpK-=ZP}UePMJXyxWG%|5R%5)JidPt~pe2p*N-AoMTd1fpUPVQX@oFmW zA&k?vTo`cWr6=LHE3UG&Hi*xee=dV z<<>=3@_xvvUM5Y-jSoJjt}C`#E9$or@u%J7&f3k^!eh@_^B)~d&it{O)Gq#|vgO57 z-Jc&Dpl*A$zYZ|p-J-!+&l_(mnlxFR+G^f>GAZ{O%H)qBZ~ z4<``+>M6?m@7%4Pn>kZGmv@G&?%Ja2tU7hDEkbGzjwgkuwkkWP98sS?zfq<9uI*g- zn{Sa@diTcfr0en<-zqdWR;S=Md9|-CJ^A(mWz5;-WP8g^qWz7-N(7r^~XWpKLrIOS1u`+1WEaK{1t4@3ALDe$*rS9VSuUK>Lw066^S!!;YixdnP zMfRWkKzU`2Ma`QqN|wRdGpAtpA981_^jPC z1}|5>da|Ark8QG6a2e|A+qS6tE;cKJE^j7Nqf^wqx@WD+>${a(U9;6SyYj6a)BFmV zzUM%o=vAv>)9LQ_UVDM;x4%r%j{1m2Uaw|-b6zoi`X^=Oifz`<-&>(<{IXX0gC`hoTM{swEG;X!5V>*uZW zmXxTD^^27uZ{-pFxv#B9gU3kSo}VcDk8V=COAn}nx!;jo+gh@D?E>}JC({oOd;VE< zrE94*aG_Q$eEN1WNbDdBW)D&5{DsbU(mOF0=>4zFpYCeE|NOtaT&*rRvyhygr7E5O zxL@7Xswe9oo4^{CxY<%SP3sP4w=~u9W$MZR7s+{GmG_lJ2*=NWXsfk>^_nsiP*9GrRVCKiMz{ zZK8K?b`_=aW44oMZXEtk{5WH>zG2SjtcL9!U2L5m+k2#do^;W>w{cU(58J_gfB!Lm z^dfnA&vRDe@Gsc-54`oY>;E@C-Oqgg@o)Wo5#xK@nc{YOPDt<6^X{i>xWe}`dQ1@T zH#biHClp`%(n!!$nrPJiVO#5bdI(DI*_@vKTsf0Z+|=aJilnRO34PDriTjVjq@M5M zCMmIZJl5jdpq@yHZ`x!eAD#$a4=KKT>4}t>qmz-2;RLUT6n{_mL`uwd$w;U0>?8#e z%kVvs67y3Jq=BiCXs+yul$cSHkz@?!R5sBss-8%RIUpG+7Z2Q0BH?FcPo%`CPDa|9 z8i{3)o=A!DnT%A3hsG&vVmYBFQewmSjNQu!y52vqQgYZZ=Zc{8?XLA`D z6Jjlf8%nOUrAdcg@n3S%kd?f(HA CbVpbK literal 0 HcmV?d00001 diff --git a/examples/mlflow/tabular/weights/data/pickle_module_info.txt b/examples/mlflow/tabular/weights/data/pickle_module_info.txt new file mode 100644 index 0000000..31b9e7e --- /dev/null +++ b/examples/mlflow/tabular/weights/data/pickle_module_info.txt @@ -0,0 +1 @@ +mlflow.pytorch.pickle_module \ No newline at end of file From 4ad8bd261219747faf0b631e80c9d91fb80489f9 Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Tue, 13 Jul 2021 10:59:28 -0700 Subject: [PATCH 10/19] chore: update weights --- examples/mlflow/tabular/weights/MLmodel | 19 ++++++++++++++++++ examples/mlflow/tabular/weights/conda.yaml | 13 ++++++++++++ .../mlflow/tabular/weights/data/model.pth | Bin 6995 -> 6931 bytes 3 files changed, 32 insertions(+) create mode 100644 examples/mlflow/tabular/weights/MLmodel create mode 100644 examples/mlflow/tabular/weights/conda.yaml diff --git a/examples/mlflow/tabular/weights/MLmodel b/examples/mlflow/tabular/weights/MLmodel new file mode 100644 index 0000000..1184480 --- /dev/null +++ b/examples/mlflow/tabular/weights/MLmodel @@ -0,0 +1,19 @@ +flavors: + python_function: + data: data + env: conda.yaml + loader_module: mlflow.pytorch + pickle_module_name: mlflow.pytorch.pickle_module + python_version: 3.8.5 + pytorch: + model_data: data + pytorch_version: 1.8.1 +signature: + inputs: '[{"name": "fixed acidity", "type": "double"}, {"name": "volatile acidity", + "type": "double"}, {"name": "citric acid", "type": "double"}, {"name": "residual + sugar", "type": "double"}, {"name": "chlorides", "type": "double"}, {"name": "free + sulfur dioxide", "type": "double"}, {"name": "total sulfur dioxide", "type": "double"}, + {"name": "density", "type": "double"}, {"name": "pH", "type": "double"}, {"name": + "sulphates", "type": "double"}, {"name": "alcohol", "type": "double"}]' + outputs: '[{"name": "quality", "type": "long"}]' +utc_time_created: '2021-07-13 17:01:09.348648' diff --git a/examples/mlflow/tabular/weights/conda.yaml b/examples/mlflow/tabular/weights/conda.yaml new file mode 100644 index 0000000..d87234f --- /dev/null +++ b/examples/mlflow/tabular/weights/conda.yaml @@ -0,0 +1,13 @@ +channels: +- defaults +- conda-forge +- pytorch +dependencies: +- python=3.8.5 +- pytorch=1.8.1 +- torchvision=0.9.1 +- pip +- pip: + - mlflow + - cloudpickle==1.6.0 +name: mlflow-env diff --git a/examples/mlflow/tabular/weights/data/model.pth b/examples/mlflow/tabular/weights/data/model.pth index 62e7cab8804d2af27a6f22eb752bd127cd02c782..797aa057614802d2c6b652c76f4e73a4442dd32a 100644 GIT binary patch literal 6931 zcmbVR3w#q*)=!$YY12Y^7AWOaDh9NrNnaG33lyy2Kqv#aQN%INPH39sOePdoB2`F> z3cIk@2MUOy`zb5}`>_H-E>Kuh+)rQ^*Ih(mU+%KIy36XSDD3y0JCl^Ol79Z2{AP0Q zWB&J?|2^m2o0|qsm#)!dWof=~_0tT{*d?dvUFx3bvdi|;V3V)DuCL~vZdYrX)9DL@ zT|uw2$>%Ojk_|$$y~!ZfX3()1V`BGlf8aHFv!b;ZUbm-eQ^BQ zUccKeS*>#dF1HWToN@q%FZKINn**+}&mD??8?YUV2GB(zJpviF(fDq;(^2V~ROYO9 zx?B}iHKs|9vhp%#rN`{HS5{B*cp%d%gPw=1RO$*Idq_5@S_&bz9Fpxaj*%^9MZjRR z8E_0iP!$mb$gwfTu)@x)fZTY|>-Wl7?2~E`Lzm^M-F+3@+4g3K%f0}afV?R5VYHK1x}cU?{Yu zVVmC%BNQan3t*&8&tMgoD1%W+B@>KJDw$zSQmGt@l1ddYR)%65I=Cj}_Ico@D2%g> z#8Ry{xLg(k{t~Z07?w+_N~&udHI9lhlX;TaN#JI&50@?uw&h}1jkd40U;G0`5e*#A z!vtG4wg_==5#W|6+&W%{5*|t;FwtnsQCfp`XOq3rjSf*}jXQ|n-ptA-F*j_rruGsg za@@^9dAZU!)8ZdjuCw~ODq&; ziiJ_I@-R0XZi9w4zD%*r6OdZ$k_+aE7{M57 zZq+8qX-Tp!NlsVgv=wO>Z9Mb9`8+JJMq!~*93_qx$B0ED`uSbrO*SnKu?)Va#L;pg zD7jr=YlpkVo5gutZkIzFw22Sg!{8S2aPN3@-^D!KCr-3lF;faW+%Hy(_ea1U0S9xM zdE#W-V1)1PBMFP0BD#Z1Bvw(KYH_MnoMshq3fv;{=0TPkktM8FGQA*#d;yvH9wSy< z4Bl`Amb5_=_8I>W_#)73RA)uP1nc!Tf`5%VA3?i>Z^>>c1OcZr;Pbf^R}UHHN-j6r zJHzY5DGg${fEsd+u*buCOCqZb*@mMHf**qeQ&GqplHGn8ld{r>#S6&YmM5$+KzK1M zjY10#%hW({U|h3#7Q{fHDFn+Ep}ZKe4PYuI znsCzM%?~QgD{cK*v$7lsF0?M*+N!iZWGi5;zJT48+~qpR!%DYRDi?{!Sglk>(S5$T z#;O_DpiZ*^+HgABBe2FO`qzj7<-JFYihs4Zy^W#_kH#T{ug)j^;Wx_i36+=B*CeK$Z4e$ene^W(S`J}RPlc}PztfH(8 zerVGn2hLy^)^ko7Hq67Q%jR<0;77O!Y1`oOM-;U?WO#yyC)X&2r`q9Z9yTgVgTreN z!877xA}S%S6TdM?MOjq^JS(o}vSrw$fJb37r;WlE75F(Bw({`23j9JlY~$g@u&Dam zOQI~I`}{Zx|HZ@0qUtKIh!0prb@I21>MZXN)%pC1s7}~UQJte#6|cuVhR1D}F-wME z7nh^t*FadlUb&mg4m&Uvu-V!ZfuAPL)_)ttM{j7GxvH`XcS;o%uorEcBE!#^ZF(-d z4PHb3khp!SefwoNz{BgReFxj&5Dzi&#(W`WwW$)P1t?!^y?DZ?=y zj;nY-Z-)~+ypiDRO_i^=qHvOjx8r>M;yPdd6X)xlIA5pYe4UQ-bw=F9WsAExJ<>5o zq2rek_*D`eXM3lkylN7>i>hNj{WViPgVVRcd&mmX@f%hB-^y@~hxb+W&$q(|Jbaj% zj;cyib&VM=AZCUP7ZuDX{EpG_dll~wGW?N;KdE>hwZorz_)CJ0ODY|Ijl$n}_Lq@zfo#9dqlQmI2K$1*CVL0B1)2IKQOp;Eq5Oc8N4 z6{hm)DpQp-gyAXn4@HUDujO=Y(l9Kr4Rf1RfRf6=aFk@J5TCp>0-q`eBT;IXMk%$U z@tKNoHsYijlUW+WI<3dOxKxCaGGbI3iy~vZ7{ArgZbC_x#^IBfZpNoNnh~XTX}nT9 z0iOxRZ$VjQ{8p5rQi;-|l+~(?Ph?^j<7KR*GHzm`%D9<{D&yr$R2i>e;x0~yjBCS= zaM)4W!0EFVEIzb$3GO-B-H`H^YoR8$+b`FN>WZNxJm!A&by;1%V#klwl9@+cyRAM(Fl)`5T3YR}Lhv3@)n8xwik%`N)3q!JO@r zr@U859>4E(I+dSABEt*FrsW$*dP@gs_}D?WUE5|6&ZWf`5A=`=`G;w-emq%SB3mvD zOrr-%SJC&rH;C>y+mBur#4d<~|azY$2;B&X2X^@>Ku*&9Pa|leI^N zhG}82l|K68iKL;AhFs+r(TztW%S*m9bYbi5WOUVDlA+5bXO6YRHhjF0zFGCs~oj zGx|!bF4|^!lV@v#W!toOp#`j%24J)o9*X-_%@5!nBO-a)AbyeTe-+DSA*V#Z` zxOji;-k-lje^WG+d|kAg7XPJ{ew1~Z*x!1SK4V!#&2MKNer-=4>38i)t$E>X^zF_? zMEBf3m3Ub+Y}u?@)ScbmQGZ+?zq|eiLhA^u9Ybtit*4Ej=99u>or)hHedE;+2cb^v z?oF?v6uhe2nEJYr{|~S8b^zT&Z=tO_9;`L5ew3I`Ehj(8KV~6ks}+7ec>umFL><`O zn;r!>)FZKA)Ys*w(4#}xO}{>^KYT%p`2qcD&!?ZB|L*xKv0Qxn_`PS_I%Qm|v8_Kl zXJL15cA9TwXV+f7zOFQdojv_FQMPu}SoZCFxAxfH_iG1#C6GIIKVMtwovUs`NHWL)gp zQSa&GDhn!jN^ zUHRb%y40{gHvi8?`hsm39pU#;_wKRe>{%C!PH%ci(H4<3Bq~H)jr^S^7*eXT&DU z1~KQ*#nuxs)5f#0ho8NT1{>?o9SoE^rYqF9{SRSUF5~|byU0ghr}@Mr3D z`R`&!3hpGIpC3kNOfXT;*DL6LV>P+fu{AcJ^$yDii({5!`OC=FQ=>?2Lk^3y5YUKWVwM^eP&DJLUxGB z?gmcFzPirTllLtBSVOm~=Lvhyx0ClHEQegbUdKI9^4mRkIX#h*SN85mg?PM|3W@Dz zdLkvSSv`=@z?4Y1FX)Msyn1v;T8pP1DUg&EuP0J+zVD7S8;@5~BH`N86Dc`kcSrhT zN+fl)=!uk^i@PKF2HsewM85Bdl$>3=Bc1OBiEaFQA|=OXcccZlRNjE6uH4)cDLJCL zBi)N<$*GW7boWF`jvjW>eEs??2`L_r4P0jCt??csbtPSW*`mj;#J9i>h4A9p5iwr) d9{WQoVAX7x+%DhV@tb{vW`)BYFS; literal 6995 zcmbtZ3w#vSx!=uZ^VkFk&rp)`2p}6IA&<=qvL~8A3JfkVKwYpf?kkxk%kF-&vj~>0 z6xakb%HyKFw+bbKT(L?EK9G_b5iO!3TCDg&ty;vk_lmr3>x+B8b7mKI1DpEmo&09@ zJKs6yf4=iS-}lX#li8dhU8l>+(p_`q=?3T=l1ub1^GtL*WJhVR&DY%6SGO?fT9xK< z`2t~g(CcdRc}f#xld#m$<`F_|9-l`J_yw2G5ek_=XLbpKGwk)rUcVrivYtWm$_>!l{HnB)s8wBWY}cLL`b8m+ zhyG@xZ5PRNLB(%c43lNr|--07MoE3Qd*8#PUKTw`=+Xd07S zH&+Kk>=`(!&w=Y%4*Bgcl!qUfZCnnQ&1G^%&cNwq7-qH`u~8@-^hi)3rip!6nJq9B z+S9Pk?}r~UB+Uz8xIL4?GM6ZW5ecPo7@1J2fKdshN+?PwSzxpbW9;bQx{$}$0%N0a zgMB!b>b=1gvKa7}c>Te!TvAn1=C)MTI2@Ih>e@;ILM`7ELMnS zHZe3K?C`}5nrt6}CMQK>sUXCByWZZ9&Bx*Q2QWksIfR|95g_)ytfK7>x2MGs_Q_P= zV2i;g=x(x!IbtqbCP8oqFde|nwirv8KSE)NveKApUa`EQQPCu3MPUjLQ=`yq&!^)j zWFR3FcE(Z%{AdwOONof>yi0=tNjAZBwl?w112dw)@i5a7Gq2Sba5{W+Vz)%VW=2QO z5(~xYVqp|!@i03aZihMTd>NZtOF+8AA-RDUF`O}S(^%vhFd&-Hn{Gl|F-o`VR_YIT z=_52^x9bz+v;^6ZAg60`+R8Kxy}8r_=kYM#7KI<1#S!92agW7r%C5xaf906ZDEX6j(KL>vV0%mQ6B+R^Ce=7vnX=@X7NZ3pENFe~1E8z2anA?X; zvn00%=RU>j!sV4PbV6GAIm0b2w6!c!W!PSTa}fL(D43%{-jM9^%b2ZM8yY*XEa|<4 z8WY^H2$n}-1rK*>LE*$0r)w^Vfk0abe#($nvS<>#A)zJg^9f09AY#*^CPr7Rmc)YEm44p9uAuM}+A(J3^=v&_{ zTv;d++?%2aavusv!mfDFIj;osM>!hdwYFL`*7 zy)&F%M+hDg?-kJqaf8@%kjm<^vNG5zZsxLO*v7!4@GDLqh3y(}mkbZ{utNjh*#VF6 zuq!NT{`Q~ZG7;V9(J1WZ;W1Hjm0ybyo2V`Ry~a*XuF9-iYz_hZtWcVtl`!by3;RDV5 z4?Ex^9{!Yy4vJY_|rZWg|!#D9KVLK6xn%pKK^88>J4(#I!m1j8P`#qRd#6`k)k*`mz@N zsFp>alt;yQ080I7iN&BafQl>(rGZprkthwKA`3=oFctUWE^!}cKte}TLZu<7j7a(T zyh^JyloeCN9Xf@jtjtm?{ea@J`42;h&R@?N+NA<4(EW6~REUzs!4FZArQ!JGr4jhl zI2egihct?5i}2YSCVR(ioK3h*4=Qij?si@K+mc97?ie#wRZo<5L@L zJW3tXjZ8ZMpK-=ZP}UePMJXyxWG%|5R%5)JidPt~pe2p*N-AoMTd1fpUPVQX@oFmW zA&k?vTo`cWr6=LHE3UG&Hi*xee=dV z<<>=3@_xvvUM5Y-jSoJjt}C`#E9$or@u%J7&f3k^!eh@_^B)~d&it{O)Gq#|vgO57 z-Jc&Dpl*A$zYZ|p-J-!+&l_(mnlxFR+G^f>GAZ{O%H)qBZ~ z4<``+>M6?m@7%4Pn>kZGmv@G&?%Ja2tU7hDEkbGzjwgkuwkkWP98sS?zfq<9uI*g- zn{Sa@diTcfr0en<-zqdWR;S=Md9|-CJ^A(mWz5;-WP8g^qWz7-N(7r^~XWpKLrIOS1u`+1WEaK{1t4@3ALDe$*rS9VSuUK>Lw066^S!!;YixdnP zMfRWkKzU`2Ma`QqN|wRdGpAtpA981_^jPC z1}|5>da|Ark8QG6a2e|A+qS6tE;cKJE^j7Nqf^wqx@WD+>${a(U9;6SyYj6a)BFmV zzUM%o=vAv>)9LQ_UVDM;x4%r%j{1m2Uaw|-b6zoi`X^=Oifz`<-&>(<{IXX0gC`hoTM{swEG;X!5V>*uZW zmXxTD^^27uZ{-pFxv#B9gU3kSo}VcDk8V=COAn}nx!;jo+gh@D?E>}JC({oOd;VE< zrE94*aG_Q$eEN1WNbDdBW)D&5{DsbU(mOF0=>4zFpYCeE|NOtaT&*rRvyhygr7E5O zxL@7Xswe9oo4^{CxY<%SP3sP4w=~u9W$MZR7s+{GmG_lJ2*=NWXsfk>^_nsiP*9GrRVCKiMz{ zZK8K?b`_=aW44oMZXEtk{5WH>zG2SjtcL9!U2L5m+k2#do^;W>w{cU(58J_gfB!Lm z^dfnA&vRDe@Gsc-54`oY>;E@C-Oqgg@o)Wo5#xK@nc{YOPDt<6^X{i>xWe}`dQ1@T zH#biHClp`%(n!!$nrPJiVO#5bdI(DI*_@vKTsf0Z+|=aJilnRO34PDriTjVjq@M5M zCMmIZJl5jdpq@yHZ`x!eAD#$a4=KKT>4}t>qmz-2;RLUT6n{_mL`uwd$w;U0>?8#e z%kVvs67y3Jq=BiCXs+yul$cSHkz@?!R5sBss-8%RIUpG+7Z2Q0BH?FcPo%`CPDa|9 z8i{3)o=A!DnT%A3hsG&vVmYBFQewmSjNQu!y52vqQgYZZ=Zc{8?XLA`D z6Jjlf8%nOUrAdcg@n3S%kd?f(HA CbVpbK From 05ce36cda6e174d99e0e9723b6183c7557bbf85f Mon Sep 17 00:00:00 2001 From: Douglas Holman Date: Tue, 13 Jul 2021 12:35:44 -0700 Subject: [PATCH 11/19] feat: releasing notebook after testing it completely --- model_converter_demo.ipynb | 128 ++++++++++++++++++++++++------------- 1 file changed, 84 insertions(+), 44 deletions(-) diff --git a/model_converter_demo.ipynb b/model_converter_demo.ipynb index 2e82e80..a163c33 100644 --- a/model_converter_demo.ipynb +++ b/model_converter_demo.ipynb @@ -19,7 +19,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -39,12 +39,10 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ - "# TODO: Include concise training code to showcase how the assets below are produced\n", - "\n", "blob_storage_provider = \"S3\"\n", "blob_storage_container = \"modzy-engineering-tests\"\n", "sagemaker_weights_key = \"ds/model-converter/sagemaker/image-classification/weights.tar.gz\"" @@ -61,7 +59,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -75,13 +73,13 @@ "auxiliary_files = [labels_json_path]\n", "\n", "# Your local files will be archieved and stored in the following location:\n", - "sagemaker_resources_key = \"ds/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", + "sagemaker_resources_key = \"ds/demo/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", "\n", "\n", - "# upload_resources(\n", - "# model_yaml_path, blob_storage_container, sagemaker_resources_key,\n", - "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", - "# )" + "upload_resources(\n", + " model_yaml_path, blob_storage_container, sagemaker_resources_key,\n", + " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", + ")" ] }, { @@ -99,7 +97,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": { "pycharm": { "name": "#%%\n" @@ -122,13 +120,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The model details page for your new model can be found here: https://integration.modzy.engineering/models/c59e25fe2a/0.0.1\n" + ] + } + ], "source": [ "# Now, provide the Model converter with information about your stored model assets and the credentials required\n", "# to access them. The Model converter will do the rest of the work.\n", @@ -137,14 +143,14 @@ "model_type = \"image-classification\"\n", "\n", "_, sagemaker_converter_output = model_converter.run(\n", - " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", - " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", - " blobstore_provider=blob_storage_provider,\n", - " blobstore_container=blob_storage_container,\n", - " weights_path=sagemaker_weights_key,\n", - " resources_path=sagemaker_resources_key,\n", - " platform=source_platform,\n", - " model_type=model_type,\n", + " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", + " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", + " blobstore_provider=blob_storage_provider,\n", + " blobstore_container=blob_storage_container,\n", + " weights_path=sagemaker_weights_key,\n", + " resources_path=sagemaker_resources_key,\n", + " platform=source_platform,\n", + " model_type=model_type,\n", ")\n", "\n", "print(f\"The model details page for your new model can be found here: {sagemaker_converter_output['modelURL']}\")\n", @@ -154,13 +160,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Warming up a container to perform inference from model c59e25fe2a version 0.0.1\n" + ] + } + ], "source": [ "# Delegate a single processing engine to serve your new model\n", "modzy_api_client.models.update_processing_engines(\n", @@ -173,13 +187,22 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 7, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sending job to model c59e25fe2a version 0.0.1\n", + "Job Completed: https://integration.modzy.engineering/operations/jobs/1b4f7fe0-8a42-4cd7-8557-5e57bc3a3913\n" + ] + } + ], "source": [ "# Send an inference job to run against your new model with explainability!\n", "sagemaker_input_source = {\n", @@ -204,7 +227,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -223,7 +246,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -236,31 +259,39 @@ "model_type = \"tabular\"\n", "\n", "mlflow_model_dir = os.path.join(current_working_directory, \"examples/mlflow/tabular/weights\")\n", - "mlflow_weights_key = \"mlflow-wine/weights.tar.gz\"\n", + "mlflow_weights_key = \"demo/mlflow-wine/weights.tar.gz\"\n", "\n", - "# upload_mlflow_model(\n", - "# mlflow_model_dir, blob_storage_container, mlflow_weights_key,\n", - "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider\n", - "# )\n", + "upload_mlflow_model(\n", + " mlflow_model_dir, blob_storage_container, mlflow_weights_key,\n", + " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider\n", + ")\n", "\n", "mlflow_model_yaml_path = os.path.join(current_working_directory, \"examples/mlflow/tabular/model.yaml\")\n", - "mlflow_resources_key = \"ds/model-converter/mlflow/tabular/resources.tar.gz\"\n", + "mlflow_resources_key = \"ds/demo/model-converter/mlflow/tabular/resources.tar.gz\"\n", "\n", - "# upload_resources(\n", - "# mlflow_model_yaml_path, blob_storage_container, mlflow_resources_key,\n", - "# os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", - "# )" + "upload_resources(\n", + " mlflow_model_yaml_path, blob_storage_container, mlflow_resources_key,\n", + " os.getenv(\"SP_ACCESS_KEY_ID\"), os.getenv(\"SP_SECRET_ACCESS_KEY\"), blob_storage_provider, auxiliary_files\n", + ")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": { "pycharm": { "name": "#%%\n" } }, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "The model details page for your new model can be found here: https://integration.modzy.engineering/models/eab49768ff/0.1.0\n" + ] + } + ], "source": [ "_, mlflow_converter_output = model_converter.run(\n", " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", @@ -280,7 +311,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": { "pycharm": { "name": "#%%\n" @@ -294,9 +325,18 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 12, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Sending job to model eab49768ff 0.1.0\n", + "Job Completed: https://integration.modzy.engineering/operations/jobs/90490dfa-d80b-494e-8b47-26d0bbaeb806\n" + ] + } + ], "source": [ "# Send an inference job to run against your new model!\n", "input_source = {\n", @@ -315,13 +355,13 @@ " region=\"us-east-1\"\n", ")\n", "\n", - "# modzy_api_client.jobs.block_until_complete(job, timeout=None)\n", + "modzy_api_client.jobs.block_until_complete(job, timeout=None)\n", "print(f\"Job Completed: https://integration.modzy.engineering/operations/jobs/{job.job_identifier}\")" ] }, { "cell_type": "code", - "execution_count": null, + "execution_count": 13, "metadata": {}, "outputs": [], "source": [ From 8a2e257b72c24edb523d0e894e404ccecc565ab8 Mon Sep 17 00:00:00 2001 From: bmunday3 Date: Tue, 24 Aug 2021 15:49:04 -0400 Subject: [PATCH 12/19] dev: modified model_converter.py to reflect new API routes available in Model Converter API service --- model_converter_demo.ipynb | 54 ++++++---- modzy/converter/model_converter.py | 159 ++++++++++++++++------------- 2 files changed, 124 insertions(+), 89 deletions(-) diff --git a/model_converter_demo.ipynb b/model_converter_demo.ipynb index a163c33..7ec7fbc 100644 --- a/model_converter_demo.ipynb +++ b/model_converter_demo.ipynb @@ -4,28 +4,28 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Notes on executing the demo:\n", + "### Notes on executing the demo:\n", "\n", - "1. Make sure to port forward the service from the integration cluster: `kubectl port-forward svc/model-converter -n modzy 8080:8080`\n", - "2. Set envionrment variables: `MODZY_QA_API_KEY`, `SP_ACCESS_KEY_ID` and `SP_SECRET_ACCESS_KEY`" + "Set envionrment variables: `MODZY_API_KEY`, `SP_ACCESS_KEY_ID` and `SP_SECRET_ACCESS_KEY`" ] }, { - "cell_type": "markdown", + "cell_type": "code", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "# First we will convert an Explainable SageMaker Image Classification Model" + "# Import some standard dependencies\n", + "import os\n", + "import json\n", + "import time" ] }, { - "cell_type": "code", - "execution_count": 1, + "cell_type": "markdown", "metadata": {}, - "outputs": [], "source": [ - "# Import some standard dependencies\n", - "import os\n", - "import json" + "# First we will convert an Explainable SageMaker Image Classification Model" ] }, { @@ -73,7 +73,7 @@ "auxiliary_files = [labels_json_path]\n", "\n", "# Your local files will be archieved and stored in the following location:\n", - "sagemaker_resources_key = \"ds/demo/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", + "sagemaker_resources_key = \"ds/model-converter/sagemaker/image-classification/resources.tar.gz\"\n", "\n", "\n", "upload_resources(\n", @@ -110,7 +110,7 @@ "\n", "# To get started, store your Modzy API key as an environment variable `MODZY_API_KEY`.\n", "# Then, create a Modzy API client to interact with the integration envrionment\n", - "modzy_api_key = os.getenv(\"MODZY_QA_API_KEY\")\n", + "modzy_api_key = os.getenv(\"MODZY_API_KEY\")\n", "modzy_instance_base_url = \"https://integration.modzy.engineering/api\"\n", "modzy_api_client = ApiClient(api_key=modzy_api_key, base_url=modzy_instance_base_url)\n", "\n", @@ -142,7 +142,7 @@ "source_platform = \"sagemaker\"\n", "model_type = \"image-classification\"\n", "\n", - "_, sagemaker_converter_output = model_converter.run(\n", + "sagemaker_converter_output = model_converter.start(\n", " sp_access_key_id=os.getenv(\"SP_ACCESS_KEY_ID\"),\n", " sp_secret_access_key=os.getenv(\"SP_SECRET_ACCESS_KEY\"),\n", " blobstore_provider=blob_storage_provider,\n", @@ -153,9 +153,23 @@ " model_type=model_type,\n", ")\n", "\n", - "print(f\"The model details page for your new model can be found here: {sagemaker_converter_output['modelURL']}\")\n", - "sagemaker_model_id = sagemaker_converter_output[\"modelId\"]\n", - "sagemaker_model_version = sagemaker_converter_output[\"modelVersion\"]" + "job_id = sagemaker_converter_output[\"jobId\"]\n", + "print(\"Model Converter Status: {} - {}\".format(sagemaker_converter_output[\"status\"], sagemaker_converter_output[\"message\"]) + '\\n' + \"Model Converter Job ID: {}\".format(job_id))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "# Block Model Converter Job until complete\n", + "start_time = time.time()\n", + "sagemaker_converter_result = model_converter.block_until_complete(job_id, timeout=600)\n", + "end_time = time.time()\n", + "sagemaker_model_id = sagemaker_converter_result[\"modelId\"]\n", + "sagemaker_model_version = sagemaker_converter_result[\"modelVersion\"]\n", + "print(\"Model Converter Job Status: {}\".format(sagemaker_converter_result[\"message\"]) + '\\n' + \"Completed in {} minutes\".format((end_time - start_time)/60) + '\\n' + \"Find your newly deployed model here: {}/models/{}/{}\".format(modzy_instance_base_url[:-4], sagemaker_model_id, sagemaker_model_version))" ] }, { @@ -379,7 +393,7 @@ ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -393,9 +407,9 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.1" + "version": "3.9.5" } }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/modzy/converter/model_converter.py b/modzy/converter/model_converter.py index 719ed3a..5c5a37f 100644 --- a/modzy/converter/model_converter.py +++ b/modzy/converter/model_converter.py @@ -1,10 +1,8 @@ import logging +from types import BuiltinMethodType +from ..error import Timeout +import time import requests -import json - -CONVERTER_HOST = "127.0.0.1" -CONVERTER_PORT = "8080" - class ModelConverter: """The `Models` object. @@ -16,7 +14,7 @@ class ModelConverter: attribute of an `ApiClient` instance. """ - _base_route = f'http://{CONVERTER_HOST}:{CONVERTER_PORT}' + _base_route = '/converter' def __init__(self, api_client): """Creates a `ModelConverter` instance. @@ -26,16 +24,8 @@ def __init__(self, api_client): """ self._api_client = api_client self.logger = logging.getLogger(__name__) - - def env_status(self): - raw_response = self._api_client.http.get(f"{self._base_route}/env-status") - response = raw_response["responseEntries"][0] - response_code = int(response["httpCode"]) - response_message = response["message"] - self.logger.info(f"Environment status returned with code {response_code}: {response_message}") - return True if response_code == 200 else False - - def check_endpoints( + + def start( self, sp_access_key_id, sp_secret_access_key, @@ -44,60 +34,91 @@ def check_endpoints( resources_path, model_type, platform, - blobstore_provider - ): - converter_request = { - "aws_key_id": sp_access_key_id, - "aws_access_key": sp_secret_access_key, - "s3Bucket": blobstore_container, - "weightsPath": weights_path, - "resourcesPath": resources_path, - "model_type": model_type, - "platform": platform, - "blobStoreProvider": blobstore_provider - } - raw_response = requests.get( - f"{self._base_route}/check-endpoints", - params=converter_request - ).json() - response = raw_response["responseEntries"][0] - status_code = response["httpCode"] - message = response["message"] - self.logger.info(f"Response received with status code {status_code}: {message}") - return status_code == 200 - - def run( - self, - sp_access_key_id, - sp_secret_access_key, - blobstore_container, - weights_path, - resources_path, - model_type, - platform, - blobstore_provider - ): + blobstore_provider + ): + """Kicks off a `ModelConverter` job run + + Args: + sp_access_key_id (str): Access key for accessing cloud blob storage + sp_secret_access_key (str): Secret access key for accessing cloud blob storage + blobstore_container (str): Blob container storage name + weights_path (str): Path to weights archive in blob container storage + resource_path (str): Path to resources archive in blob container storage + model_type (str): Type of model to be converted + platform (str): The model provider where the input artifacts are generated. Options: `["sagemaker", "mlflow"]` + blobstore_provider (str): Cloud provider where model artifacts are saved. Options: `["gcp", "azure", "S3"]` + + Returns: + converter_response (ApiObject): Raw response from `ModelConverter` with the status of the converter job, message, and http code + + Raises: + ApiError: A subclass of ApiError will be raised if the API returns an error status, + or the client is unable to connect. + """ + converter_request = { - "aws_key_id": sp_access_key_id, - "aws_access_key": sp_secret_access_key, - "s3Bucket": blobstore_container, - "weightsPath": weights_path, - "resourcesPath": resources_path, + "sp_access_key_id": sp_access_key_id, + "sp_secret_access_key": sp_secret_access_key, + "blobstore_container": blobstore_container, + "weights_path": weights_path, + "resource_path": resources_path, "model_type": model_type, "platform": platform, - "blobStoreProvider": blobstore_provider + "blobstore_provider": blobstore_provider } - raw_response = requests.get(f"{self._base_route}/run", params=converter_request).json() - response = raw_response["responseEntries"][0] - status_code = response["httpCode"] - message = response["message"] - self.logger.info(f"Response received with status code {status_code}: {message}") - - # TODO: This may not be possible if status code is not 200 - if status_code == "200": - success = raw_response["successEntry"] - self.logger.info(f"Details: {json.dumps(success, indent=4)}") - else: - success = None - - return status_code, success + + converter_response = self._api_client.http.post('{}/{}'.format(self._base_route, "start"), json_data=converter_request) + return converter_response + + + def get_status(self, job_id): + """Retrieves status of a `ModelConverter` `Job` instance. + + Args: + job_id (str): The job identifier of a `ModelConverter` job. + + Returns: + Status: The Status of the `ModelConverter` job. + + Raises: + ApiError: A subclass of ApiError will be raised if the API returns an error status, + or the client is unable to connect. + """ + + status_raw_response = self._api_client.http.get("{}/{}?job_id={}".format(self._base_route, "get-status", job_id)) + + return status_raw_response + + def block_until_complete(self, job_id, timeout=600, poll_interval=5): + """Blocks until the `Job` completes or a timeout is reached. + + This is accomplished by polling the API until the `Job` status is set to `COMPLETED` + or `FAILED`. + + Args: + job_id (Union[str, Job, Result]): The job identifier of a `ModelConverter` `Job` instance. + timeout (Optional[float]): Seconds to wait until timeout. `None` indicates wait forever. + Defaults to 60. + poll_interval (Optional[float]): Seconds between polls. Defaults to 1. + + Returns: + Status: The completed status of the `ModelConverter` `Job` instance and the completed job's processing time. + + Raises: + Timeout: The `Job` did not complete before the timeout was reached. + ApiError: A subclass of ApiError will be raised if the API returns an error status, + or the client is unable to connect. + """ + endby = time.time() + timeout if (timeout is not None) else None + while True: # wait one poll at least once + self.logger.debug("waiting... %g", poll_interval) + time.sleep(poll_interval) + status = self.get_status(job_id) + self.logger.debug("job %s", job_id) + if status["jobStatus"] not in ["BUSY", "IMAGE_CREATION"]: + return status + if (endby is not None) and (time.time() > endby - poll_interval): + raise Timeout('timed out before completion') + + + From 3a8e25376f12cb1c7ef633d2169fb827ff1a158a Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Sat, 25 Sep 2021 14:51:25 -0700 Subject: [PATCH 13/19] refactor: generalize model dir upload helper --- modzy/converter/mlflow.py | 50 --------------------------------------- modzy/converter/utils.py | 45 +++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 50 deletions(-) delete mode 100644 modzy/converter/mlflow.py diff --git a/modzy/converter/mlflow.py b/modzy/converter/mlflow.py deleted file mode 100644 index 1bedee8..0000000 --- a/modzy/converter/mlflow.py +++ /dev/null @@ -1,50 +0,0 @@ -from modzy.converter.utils import get_authenticated_storage_provider_client -import time -import shutil -import os -import ntpath -import tarfile - - -def upload_mlflow_model(mlflow_model_dir, container, model_key, - storage_key, storage_secret, storage_provider): - """ Creates resources archive expected by model converter, uploads to storage provider. - Args: - mlflow_model_dir (str): Path to saved MLFlow model directory (e.g. using mlflow.sklearn.save_model()) - container (str): Storage provider container name (e.g. Bucket name in S3). - resources_key (str): Desired key for model archive once uploaded to storage provider. - storage_key (str): Storage provider access key. - storage_secret (str): Storage provider secret key. - storage_provider (str): Storage provider name (must be one of "S3", "AZURE_BLOBS", or "GOOGLE_STORAGE"). - """ - driver = get_authenticated_storage_provider_client(storage_provider, storage_key, storage_secret) - container = driver.get_container(container_name=container) - - # TODO: Probably set this outside of this helper function - MODEL_TAR_NAME = "weights.tar.gz" - - # Create temp dir - tmp_dir_path = os.path.join(os.getcwd(), ".tmp_" + str(time.time())) - os.mkdir(tmp_dir_path) - - # Move the local mlflow model artifacts that were saved out by MLFlow into an archive - model_tar_path = os.path.join(tmp_dir_path, MODEL_TAR_NAME) - tar = tarfile.open(model_tar_path, "w:gz") - mlflow_model_filenames = os.listdir(mlflow_model_dir) - for filename in mlflow_model_filenames: - full_path = os.path.join(mlflow_model_dir, filename) - tar.add(full_path, arcname=filename) - tar.close() - - # This method blocks until all the parts have been uploaded. - extra = {'content_type': 'application/octet-stream'} - - # Upload archive to storage provider - with open(model_tar_path, 'rb') as iterator: - obj = driver.upload_object_via_stream(iterator=iterator, - container=container, - object_name=model_key, - extra=extra) - - # Remove temp dir - shutil.rmtree(tmp_dir_path) diff --git a/modzy/converter/utils.py b/modzy/converter/utils.py index bf9b328..e407fab 100644 --- a/modzy/converter/utils.py +++ b/modzy/converter/utils.py @@ -67,3 +67,48 @@ def upload_resources(model_yaml_path, container, resources_key, # Remove temp dir shutil.rmtree(tmp_dir_path) + + +def upload_model_dir(model_dir, container, model_key, + storage_key, storage_secret, storage_provider): + """ Creates resources archive expected by model converter, uploads to storage provider. + Args: + model_dir (str): Path to saved model directory + container (str): Storage provider container name (e.g. Bucket name in S3). + resources_key (str): Desired key for model archive once uploaded to storage provider. + storage_key (str): Storage provider access key. + storage_secret (str): Storage provider secret key. + storage_provider (str): Storage provider name (must be one of "S3", "AZURE_BLOBS", or "GOOGLE_STORAGE"). + """ + driver = get_authenticated_storage_provider_client(storage_provider, storage_key, storage_secret) + container = driver.get_container(container_name=container) + + # TODO: Probably set this outside of this helper function + MODEL_TAR_NAME = "weights.tar.gz" + + # Create temp dir + tmp_dir_path = os.path.join(os.getcwd(), ".tmp_" + str(time.time())) + os.mkdir(tmp_dir_path) + + # Move the local model artifacts into an archive + model_tar_path = os.path.join(tmp_dir_path, MODEL_TAR_NAME) + tar = tarfile.open(model_tar_path, "w:gz") + model_filenames = os.listdir(model_dir) + for filename in model_filenames: + if not filename.startswith('.'): + full_path = os.path.join(model_dir, filename) + tar.add(full_path, arcname=os.path.join('imagefiles/',filename)) + tar.close() + + # This method blocks until all the parts have been uploaded. + extra = {'content_type': 'application/octet-stream'} + + # Upload archive to storage provider + with open(model_tar_path, 'rb') as iterator: + obj = driver.upload_object_via_stream(iterator=iterator, + container=container, + object_name=model_key, + extra=extra) + + # Remove temp dir + shutil.rmtree(tmp_dir_path) \ No newline at end of file From aa7ed4c8e8731e60eb3d131dcc232b19a2f049d7 Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Sat, 25 Sep 2021 14:52:33 -0700 Subject: [PATCH 14/19] dev: add azure functionality --- modzy/converter/azure.py | 40 ++++++++++++++++++++++++++++++ modzy/converter/model_converter.py | 15 +++++++++-- 2 files changed, 53 insertions(+), 2 deletions(-) create mode 100644 modzy/converter/azure.py diff --git a/modzy/converter/azure.py b/modzy/converter/azure.py new file mode 100644 index 0000000..3911b6c --- /dev/null +++ b/modzy/converter/azure.py @@ -0,0 +1,40 @@ +import os +import shutil +from azureml.core.model import InferenceConfig +from azureml.core.environment import Environment +from azureml.core import Workspace +from azureml.core.model import Model + +def prepare_azure_model(registered_model_name,subscription_id,resource_group, + workspace_name,env_name,entry_script_path,output_path, + overwrite=False): + + if os.path.isdir(output_path): + if overwrite: + shutil.rmtree(output_path) + else: + raise OSError("Output directory already exists and overwrite==False.") + os.makedirs(output_path) + + ws = Workspace.get(name=workspace_name, + subscription_id=subscription_id, + resource_group=resource_group) + + model = Model(ws, registered_model_name) + + myenv = Environment.get(workspace=ws, name=env_name, version="1") + myenv.inferencing_stack_version = "latest" + inference_config = InferenceConfig(entry_script=entry_script_path, environment=myenv) + + package = Model.package(ws, [model], inference_config, generate_dockerfile=True) + package.wait_for_creation(show_output=True) + package.save(output_path) + + acr=package.get_container_registry() + registry_info = { + "base_image_registry": acr.address, + "base_image_user": acr.username, + "base_image_pass": acr.password + } + + return registry_info \ No newline at end of file diff --git a/modzy/converter/model_converter.py b/modzy/converter/model_converter.py index 5c5a37f..71a8c06 100644 --- a/modzy/converter/model_converter.py +++ b/modzy/converter/model_converter.py @@ -34,7 +34,10 @@ def start( resources_path, model_type, platform, - blobstore_provider + blobstore_provider, + base_image_registry=None, + base_image_user=None, + base_image_pass=None ): """Kicks off a `ModelConverter` job run @@ -45,8 +48,11 @@ def start( weights_path (str): Path to weights archive in blob container storage resource_path (str): Path to resources archive in blob container storage model_type (str): Type of model to be converted - platform (str): The model provider where the input artifacts are generated. Options: `["sagemaker", "mlflow"]` + platform (str): The model provider where the input artifacts are generated. Options: `["sagemaker", "mlflow", "azure"]` blobstore_provider (str): Cloud provider where model artifacts are saved. Options: `["gcp", "azure", "S3"]` + base_image_registry (str): Only required for Azure, registry location output by prepare_azure_model() + base_image_registry_user (str): Only required for Azure, registry username output by prepare_azure_model() + base_image_registry_pass (str): Only required for Azure, registry password output by prepare_azure_model() Returns: converter_response (ApiObject): Raw response from `ModelConverter` with the status of the converter job, message, and http code @@ -67,6 +73,11 @@ def start( "blobstore_provider": blobstore_provider } + if platform == "azure": + converter_request['base_image_registry'] = base_image_registry + converter_request['base_image_registry_user'] = base_image_user + converter_request['base_image_registry_pass'] = base_image_pass + converter_response = self._api_client.http.post('{}/{}'.format(self._base_route, "start"), json_data=converter_request) return converter_response From c3dd823ee65cf66dc0439d8e18ef603650b31b00 Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Sat, 25 Sep 2021 14:58:52 -0700 Subject: [PATCH 15/19] docs: add docstring --- modzy/converter/azure.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/modzy/converter/azure.py b/modzy/converter/azure.py index 3911b6c..af43019 100644 --- a/modzy/converter/azure.py +++ b/modzy/converter/azure.py @@ -8,6 +8,24 @@ def prepare_azure_model(registered_model_name,subscription_id,resource_group, workspace_name,env_name,entry_script_path,output_path, overwrite=False): + """Prepares Azure model archive for submission to model converter. + + Args: + registered_model_name (str): Registered Azure ML model name. + subscription_id (str): Azure ML subscription ID. + resource_group (str): Azure ML resource group. + workspace_name (str): Azure ML workspace name. + env_name (str): Azure ML environment name. + entry_script_path (str): Path to local entry script to be submitted. + output_path (str): Where to create prepared model archive. + overwrite (bool): Whether or not to overwrite output_path if already exists. + + Returns: + registry_info (dict): Registry auth info required to be submitted to model converter service. + + Raises: + OSError: If output directory already exists and overwrite==False. + """ if os.path.isdir(output_path): if overwrite: From b9bad469735a6297a48920122a77a8a57a2c0063 Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Sat, 25 Sep 2021 15:00:04 -0700 Subject: [PATCH 16/19] chore: add empty line in docstring --- modzy/converter/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/modzy/converter/utils.py b/modzy/converter/utils.py index e407fab..b55d9b2 100644 --- a/modzy/converter/utils.py +++ b/modzy/converter/utils.py @@ -27,6 +27,7 @@ def get_authenticated_storage_provider_client(storage_provider, access_key, secr def upload_resources(model_yaml_path, container, resources_key, storage_key, storage_secret, storage_provider, additional_filepaths=[]): """ Creates resources archive expected by model converter, uploads to storage provider. + Args: model_yaml_path (str): Path to model.yaml file to be included. container (str): Storage provider container name (e.g. Bucket name in S3). @@ -72,6 +73,7 @@ def upload_resources(model_yaml_path, container, resources_key, def upload_model_dir(model_dir, container, model_key, storage_key, storage_secret, storage_provider): """ Creates resources archive expected by model converter, uploads to storage provider. + Args: model_dir (str): Path to saved model directory container (str): Storage provider container name (e.g. Bucket name in S3). From e055bae89febd7808ebb744af6ef6051b88ab43c Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Sun, 26 Sep 2021 17:18:36 -0700 Subject: [PATCH 17/19] fix: required dir structure different for azure and mlflow --- modzy/converter/utils.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/modzy/converter/utils.py b/modzy/converter/utils.py index b55d9b2..183bc79 100644 --- a/modzy/converter/utils.py +++ b/modzy/converter/utils.py @@ -71,7 +71,7 @@ def upload_resources(model_yaml_path, container, resources_key, def upload_model_dir(model_dir, container, model_key, - storage_key, storage_secret, storage_provider): + storage_key, storage_secret, storage_provider, platform): """ Creates resources archive expected by model converter, uploads to storage provider. Args: @@ -81,6 +81,7 @@ def upload_model_dir(model_dir, container, model_key, storage_key (str): Storage provider access key. storage_secret (str): Storage provider secret key. storage_provider (str): Storage provider name (must be one of "S3", "AZURE_BLOBS", or "GOOGLE_STORAGE"). + platform (str): Either "azure" or "mlflow". """ driver = get_authenticated_storage_provider_client(storage_provider, storage_key, storage_secret) container = driver.get_container(container_name=container) @@ -99,7 +100,12 @@ def upload_model_dir(model_dir, container, model_key, for filename in model_filenames: if not filename.startswith('.'): full_path = os.path.join(model_dir, filename) - tar.add(full_path, arcname=os.path.join('imagefiles/',filename)) + if platform=="azure": + tar.add(full_path, arcname=os.path.join('imagefiles/',filename)) + elif platform=="mlflow": + tar.add(full_path, arcname=filename) + else: + raise ValueError("Only 'azure' or 'mlflow' are acceptable for 'platform'.") tar.close() # This method blocks until all the parts have been uploaded. From 7cdf13200233f032187ba7e600a5d83ba81cd08d Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Sun, 26 Sep 2021 17:20:44 -0700 Subject: [PATCH 18/19] fix: importer_proc status shouldn't terminate block_until_complete --- modzy/converter/model_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modzy/converter/model_converter.py b/modzy/converter/model_converter.py index 71a8c06..ddffe82 100644 --- a/modzy/converter/model_converter.py +++ b/modzy/converter/model_converter.py @@ -126,7 +126,7 @@ def block_until_complete(self, job_id, timeout=600, poll_interval=5): time.sleep(poll_interval) status = self.get_status(job_id) self.logger.debug("job %s", job_id) - if status["jobStatus"] not in ["BUSY", "IMAGE_CREATION"]: + if status["jobStatus"] not in ["BUSY", "IMAGE_CREATION", "IMPORTER_PROC"]: return status if (endby is not None) and (time.time() > endby - poll_interval): raise Timeout('timed out before completion') From 857b891474c61e6ad6d7a7c9f53066d489505e87 Mon Sep 17 00:00:00 2001 From: Saumil Dave Date: Fri, 8 Oct 2021 10:58:30 -0700 Subject: [PATCH 19/19] fix: assertion phase != complete --- modzy/converter/model_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modzy/converter/model_converter.py b/modzy/converter/model_converter.py index ddffe82..350daaa 100644 --- a/modzy/converter/model_converter.py +++ b/modzy/converter/model_converter.py @@ -126,7 +126,7 @@ def block_until_complete(self, job_id, timeout=600, poll_interval=5): time.sleep(poll_interval) status = self.get_status(job_id) self.logger.debug("job %s", job_id) - if status["jobStatus"] not in ["BUSY", "IMAGE_CREATION", "IMPORTER_PROC"]: + if status["jobStatus"] not in ["BUSY", "IMAGE_CREATION", "IMPORTER_PROC","ASSERTION"]: return status if (endby is not None) and (time.time() > endby - poll_interval): raise Timeout('timed out before completion') pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy