From da8ad1bf170b50f46c2f2b6c74896e7c121c1780 Mon Sep 17 00:00:00 2001
From: Lionel Weicker <lionel.weicker@arhs-spikeseed.com>
Date: Tue, 31 Oct 2023 09:35:49 +0100
Subject: [PATCH] Performance test on models deployed

---
 20231031-performance_test_models_deployed.py | 118 +++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 20231031-performance_test_models_deployed.py

diff --git a/20231031-performance_test_models_deployed.py b/20231031-performance_test_models_deployed.py
new file mode 100644
index 0000000..0affcfe
--- /dev/null
+++ b/20231031-performance_test_models_deployed.py
@@ -0,0 +1,118 @@
+import json
+import time
+
+import pandas as pd
+import boto3
+
+sagemaker_runtime_client = boto3.client('sagemaker-runtime', region_name="eu-west-1")
+sagemaker_client = boto3.client('sagemaker', region_name="eu-west-1")
+ssm_client = boto3.client('ssm', region_name="eu-west-1")
+
+config = {
+    "multi-label-division-classifier": {
+        "ssm_path": "/tedai/sagemaker/endpoint/multi_label_division_classifier/name"
+    },
+    "opentender-multi-label-division-classifier": {
+        "ssm_path": "/tedai/sagemaker/endpoint/opentender_multi_label_division_classifier/name"
+    },
+    "roberta-multi-label-division-classifier": {
+        "ssm_path": "/tedai/sagemaker/endpoint/roberta_multi_label_division_classifier/name"
+    }
+}
+
+for classifier in config.items():
+    classifier_name, classifier_config = classifier
+    ssm_path = classifier_config["ssm_path"]
+    response = ssm_client.get_parameter(Name=ssm_path)
+    endpoint_name = response['Parameter']['Value']
+    config[classifier_name]["endpoint_name"] = endpoint_name
+
+LINEARSVC_ALL_ENG_NOTICES_ENDPOINT = config["multi-label-division-classifier"]['endpoint_name']
+LINEARSVC_OPENTENDER_ENDPOINT = config["opentender-multi-label-division-classifier"]['endpoint_name']
+ROBERTA_ENDPOINT = config["roberta-multi-label-division-classifier"]['endpoint_name']
+
+
+def model_multi_label_division_classifier(title: str, description: str):
+    payload = {
+        "title": title,
+        "description": description
+    }
+    response = sagemaker_runtime_client.invoke_endpoint(
+        EndpointName=LINEARSVC_ALL_ENG_NOTICES_ENDPOINT,
+        Body=json.dumps(payload),
+        ContentType='application/json'
+    )
+    result = json.loads(response["Body"].read().decode())
+    return result
+
+
+def model_opentender_multi_label_division_classifier(title: str, description: str):
+    payload = {
+        "title": title,
+        "description": description
+    }
+    response = sagemaker_runtime_client.invoke_endpoint(
+        EndpointName=LINEARSVC_OPENTENDER_ENDPOINT,
+        Body=json.dumps(payload),
+        ContentType='application/json'
+    )
+
+    result = json.loads(response["Body"].read().decode())
+    return result
+
+
+def model_roberta_multi_label_division_classifier(title: str, description: str):
+    payload = {
+        "title": title,
+        "description": description
+    }
+    response = sagemaker_runtime_client.invoke_endpoint(
+        EndpointName=ROBERTA_ENDPOINT,
+        Body=json.dumps(payload),
+        ContentType='application/json'
+    )
+
+    result = json.loads(response["Body"].read().decode())
+    return result
+
+
+df = pd.read_csv("20231020-all_EN_notices_tedAI.csv", index_col=0)
+df = df[["title", 'short_description']]
+
+NUMBER_ROWS = 5000
+df = df.head(NUMBER_ROWS)
+
+prediction_times = []
+
+list_functions = [model_multi_label_division_classifier, model_opentender_multi_label_division_classifier,
+                  model_roberta_multi_label_division_classifier]
+
+for index, row in df.iterrows():
+    local_prediction_time = []
+    try:
+        for fn in list_functions:
+            time.sleep(0.01)
+            start_time = time.time()
+            fn(row['title'], row['short_description'])
+            local_prediction_time.append(time.time() - start_time)
+    except Exception as e:
+        print(e)
+        continue
+    prediction_times.append(local_prediction_time)
+
+predictions_time_linear_svc_on_all_eng_notice = [element[0] for element in prediction_times]
+predictions_time_linear_svc_on_opentender = [element[1] for element in prediction_times]
+predictions_time_roberta_on_all_eng_notice = [element[2] for element in prediction_times]
+
+predictions_time_linear_svc_on_all_eng_notice_df = pd.Series(predictions_time_linear_svc_on_all_eng_notice)
+predictions_time_linear_svc_on_opentender_df = pd.Series(predictions_time_linear_svc_on_opentender)
+predictions_time_roberta_on_all_eng_notice_df = pd.Series(predictions_time_roberta_on_all_eng_notice)
+
+print("============== LinearSVC on ALL ENG notices")
+print(predictions_time_linear_svc_on_all_eng_notice_df.describe())
+
+print("============== LinearSVC on OpenTender EU notices")
+print(predictions_time_linear_svc_on_opentender_df.describe())
+
+print("============== Roberta on ALL ENG notices")
+print(predictions_time_roberta_on_all_eng_notice_df.describe())
-- 
GitLab