From da8ad1bf170b50f46c2f2b6c74896e7c121c1780 Mon Sep 17 00:00:00 2001 From: Lionel Weicker <lionel.weicker@arhs-spikeseed.com> Date: Tue, 31 Oct 2023 09:35:49 +0100 Subject: [PATCH] Performance test on models deployed --- 20231031-performance_test_models_deployed.py | 118 +++++++++++++++++++ 1 file changed, 118 insertions(+) create mode 100644 20231031-performance_test_models_deployed.py diff --git a/20231031-performance_test_models_deployed.py b/20231031-performance_test_models_deployed.py new file mode 100644 index 0000000..0affcfe --- /dev/null +++ b/20231031-performance_test_models_deployed.py @@ -0,0 +1,118 @@ +import json +import time + +import pandas as pd +import boto3 + +sagemaker_runtime_client = boto3.client('sagemaker-runtime', region_name="eu-west-1") +sagemaker_client = boto3.client('sagemaker', region_name="eu-west-1") +ssm_client = boto3.client('ssm', region_name="eu-west-1") + +config = { + "multi-label-division-classifier": { + "ssm_path": "/tedai/sagemaker/endpoint/multi_label_division_classifier/name" + }, + "opentender-multi-label-division-classifier": { + "ssm_path": "/tedai/sagemaker/endpoint/opentender_multi_label_division_classifier/name" + }, + "roberta-multi-label-division-classifier": { + "ssm_path": "/tedai/sagemaker/endpoint/roberta_multi_label_division_classifier/name" + } +} + +for classifier in config.items(): + classifier_name, classifier_config = classifier + ssm_path = classifier_config["ssm_path"] + response = ssm_client.get_parameter(Name=ssm_path) + endpoint_name = response['Parameter']['Value'] + config[classifier_name]["endpoint_name"] = endpoint_name + +LINEARSVC_ALL_ENG_NOTICES_ENDPOINT = config["multi-label-division-classifier"]['endpoint_name'] +LINEARSVC_OPENTENDER_ENDPOINT = config["opentender-multi-label-division-classifier"]['endpoint_name'] +ROBERTA_ENDPOINT = config["roberta-multi-label-division-classifier"]['endpoint_name'] + + +def model_multi_label_division_classifier(title: str, description: str): + payload = { + "title": title, + "description": description + } + response = sagemaker_runtime_client.invoke_endpoint( + EndpointName=LINEARSVC_ALL_ENG_NOTICES_ENDPOINT, + Body=json.dumps(payload), + ContentType='application/json' + ) + result = json.loads(response["Body"].read().decode()) + return result + + +def model_opentender_multi_label_division_classifier(title: str, description: str): + payload = { + "title": title, + "description": description + } + response = sagemaker_runtime_client.invoke_endpoint( + EndpointName=LINEARSVC_OPENTENDER_ENDPOINT, + Body=json.dumps(payload), + ContentType='application/json' + ) + + result = json.loads(response["Body"].read().decode()) + return result + + +def model_roberta_multi_label_division_classifier(title: str, description: str): + payload = { + "title": title, + "description": description + } + response = sagemaker_runtime_client.invoke_endpoint( + EndpointName=ROBERTA_ENDPOINT, + Body=json.dumps(payload), + ContentType='application/json' + ) + + result = json.loads(response["Body"].read().decode()) + return result + + +df = pd.read_csv("20231020-all_EN_notices_tedAI.csv", index_col=0) +df = df[["title", 'short_description']] + +NUMBER_ROWS = 5000 +df = df.head(NUMBER_ROWS) + +prediction_times = [] + +list_functions = [model_multi_label_division_classifier, model_opentender_multi_label_division_classifier, + model_roberta_multi_label_division_classifier] + +for index, row in df.iterrows(): + local_prediction_time = [] + try: + for fn in list_functions: + time.sleep(0.01) + start_time = time.time() + fn(row['title'], row['short_description']) + local_prediction_time.append(time.time() - start_time) + except Exception as e: + print(e) + continue + prediction_times.append(local_prediction_time) + +predictions_time_linear_svc_on_all_eng_notice = [element[0] for element in prediction_times] +predictions_time_linear_svc_on_opentender = [element[1] for element in prediction_times] +predictions_time_roberta_on_all_eng_notice = [element[2] for element in prediction_times] + +predictions_time_linear_svc_on_all_eng_notice_df = pd.Series(predictions_time_linear_svc_on_all_eng_notice) +predictions_time_linear_svc_on_opentender_df = pd.Series(predictions_time_linear_svc_on_opentender) +predictions_time_roberta_on_all_eng_notice_df = pd.Series(predictions_time_roberta_on_all_eng_notice) + +print("============== LinearSVC on ALL ENG notices") +print(predictions_time_linear_svc_on_all_eng_notice_df.describe()) + +print("============== LinearSVC on OpenTender EU notices") +print(predictions_time_linear_svc_on_opentender_df.describe()) + +print("============== Roberta on ALL ENG notices") +print(predictions_time_roberta_on_all_eng_notice_df.describe()) -- GitLab