From f5a0c44d2d2744d7ad6c7dfc3724b50613e0e069 Mon Sep 17 00:00:00 2001
From: Gilles Habran <gilles.habran@arhs-spikeseed.com>
Date: Wed, 12 Apr 2023 13:45:53 +0200
Subject: [PATCH] AITED-118: ci pipeline

---
 dev.tfvars                      |  9 +++-
 main.tf                         |  9 ++++
 modules/classifiers/common.tf   |  7 +++
 modules/classifiers/ecr.tf      | 44 +++++++++++++++++
 modules/storage/s3.tf           | 88 +++++++++++++++++++++++++++++++++
 modules/storage/s3_variables.tf | 12 +++++
 variables.tf                    | 16 ++++++
 7 files changed, 183 insertions(+), 2 deletions(-)
 create mode 100644 modules/classifiers/common.tf
 create mode 100644 modules/classifiers/ecr.tf

diff --git a/dev.tfvars b/dev.tfvars
index 35cb04d..cab0987 100644
--- a/dev.tfvars
+++ b/dev.tfvars
@@ -17,8 +17,10 @@ terraform_s3_bucket_name      = "d-ew1-ted-ai-terraform"
 terraform_dynamodb_table_name = "d-ew1-ted-ai-terraform-locks"
 
 # TED AI project
-s3_input_bucket_name = "d-ew1-ted-ai-input"
-s3_data_bucket_name  = "d-ew1-ted-ai-experiments-data"
+s3_input_bucket_name   = "d-ew1-ted-ai-input"
+s3_data_bucket_name    = "d-ew1-ted-ai-experiments-data"
+s3_ml_data_bucket_name = "d-ew1-ted-ai-ml-data"
+s3_ml_models_bucket_name = "d-ew1-ted-ai-ml-models"
 
 ingestion_checkpoint_table = "d-ew1-ted-ai-ingestion-checkpoint"
 ingestion_tasks_table      = "d-ew1-ted-ai-ingestion-tasks"
@@ -26,9 +28,12 @@ ingestion_references_table = "d-ew1-ted-ai-ingestion-references"
 
 ingestion_tasks_queue_name = "d-ew1-ted-ai-ingestion-tasks-queue"
 
+sagemaker_classifiers_repository_name = "sagemaker-classifiers"
+
 # SSM path
 ssm_ingestion_checkpoint_id = "/dynamodb/ingestion_checkpoint/id"
 ssm_ingestion_tasks_id      = "/dynamodb/ingestion_tasks/id"
 ssm_ingestion_references_id = "/dynamodb/ingestion_references/id"
 ssm_buckets_input_id        = "/s3/input_bucket/id"
+ssm_buckets_ml_data_id      = "/s3/ml_data_bucket/id"
 ssm_ingestion_tasks_queue   = "/sqs/ingestion_tasks_queue/url"
diff --git a/main.tf b/main.tf
index 25dea11..6867325 100644
--- a/main.tf
+++ b/main.tf
@@ -24,6 +24,8 @@ module "storage" {
   source                      = "./modules/storage"
   s3_input_bucket_name        = var.s3_input_bucket_name
   s3_data_bucket_name         = var.s3_data_bucket_name
+  s3_ml_data_bucket_name      = var.s3_ml_data_bucket_name
+  s3_ml_models_bucket_name    = var.s3_ml_models_bucket_name
   tags                        = var.tags
   ingestion_checkpoint_table  = var.ingestion_checkpoint_table
   ingestion_tasks_table       = var.ingestion_tasks_table
@@ -32,6 +34,7 @@ module "storage" {
   ssm_ingestion_tasks_id      = var.ssm_ingestion_tasks_id
   ssm_ingestion_references_id = var.ssm_ingestion_references_id
   ssm_buckets_input_id        = var.ssm_buckets_input_id
+  ssm_buckets_ml_data_id      = var.ssm_buckets_ml_data_id
 }
 
 module "queue" {
@@ -41,3 +44,9 @@ module "queue" {
   ssm_ingestion_tasks_queue  = var.ssm_ingestion_tasks_queue
 }
 
+module "classifiers" {
+  source = "./modules/classifiers"
+  sagemaker_classifiers_repository_name = var.sagemaker_classifiers_repository_name
+  tags = var.tags
+}
+
diff --git a/modules/classifiers/common.tf b/modules/classifiers/common.tf
new file mode 100644
index 0000000..b5ad44e
--- /dev/null
+++ b/modules/classifiers/common.tf
@@ -0,0 +1,7 @@
+variable "tags" {
+  type = map(string)
+}
+
+variable "sagemaker_classifiers_repository_name" {
+  type = string
+}
\ No newline at end of file
diff --git a/modules/classifiers/ecr.tf b/modules/classifiers/ecr.tf
new file mode 100644
index 0000000..3c7f5e3
--- /dev/null
+++ b/modules/classifiers/ecr.tf
@@ -0,0 +1,44 @@
+resource "aws_ecr_repository" "sagemaker_classifiers" {
+  name                 = var.sagemaker_classifiers_repository_name
+  image_tag_mutability = "IMMUTABLE"
+
+  image_scanning_configuration {
+    scan_on_push = true
+  }
+
+  tags                       = var.tags
+}
+
+resource "aws_ecr_repository" "sagemaker_classifiers_ci" {
+  name                 = "${var.sagemaker_classifiers_repository_name}-ci"
+  image_tag_mutability = "IMMUTABLE"
+
+  image_scanning_configuration {
+    scan_on_push = true
+  }
+  tags                       = var.tags
+}
+
+resource "aws_ecr_lifecycle_policy" "sagemaker_classifiers_ci" {
+  repository = aws_ecr_repository.sagemaker_classifiers_ci.name
+
+  policy = <<EOF
+{
+    "rules": [
+        {
+            "rulePriority": 1,
+            "description": "Expire images older than 1 days",
+            "selection": {
+                "tagStatus": "any",
+                "countType": "sinceImagePushed",
+                "countUnit": "days",
+                "countNumber": 1
+            },
+            "action": {
+                "type": "expire"
+            }
+        }
+    ]
+}
+EOF
+}
diff --git a/modules/storage/s3.tf b/modules/storage/s3.tf
index 0a8e190..dddb3c3 100644
--- a/modules/storage/s3.tf
+++ b/modules/storage/s3.tf
@@ -85,3 +85,91 @@ resource "aws_s3_bucket_server_side_encryption_configuration" "data_bucket" {
     }
   }
 }
+
+resource "aws_s3_bucket" "ml_data_bucket" {
+  bucket = var.s3_ml_data_bucket_name
+
+  # Prevents Terraform from destroying or replacing this object - a great safety mechanism
+  lifecycle {
+    prevent_destroy = true
+  }
+
+  tags = var.tags
+}
+
+resource "aws_s3_bucket_versioning" "ml_data_bucket" {
+  bucket = aws_s3_bucket.ml_data_bucket.id
+  versioning_configuration {
+    status = "Enabled"
+  }
+}
+
+resource "aws_s3_bucket_acl" "ml_data_bucket" {
+  bucket = aws_s3_bucket.ml_data_bucket.id
+  acl    = "private"
+}
+
+resource "aws_s3_bucket_public_access_block" "ml_data_bucket" {
+  bucket = aws_s3_bucket.ml_data_bucket.id
+
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
+
+resource "aws_s3_bucket_server_side_encryption_configuration" "ml_data_bucket" {
+  bucket = aws_s3_bucket.ml_data_bucket.id
+  rule {
+    apply_server_side_encryption_by_default {
+      sse_algorithm = "AES256"
+    }
+  }
+}
+
+resource "aws_ssm_parameter" "ml_data_bucket" {
+  name  = var.ssm_buckets_ml_data_id
+  type  = "String"
+  value = aws_s3_bucket.ml_data_bucket.id
+}
+
+resource "aws_s3_bucket" "ml_models_bucket" {
+  bucket = var.s3_ml_models_bucket_name
+
+  # Prevents Terraform from destroying or replacing this object - a great safety mechanism
+  lifecycle {
+    prevent_destroy = true
+  }
+
+  tags = var.tags
+}
+
+resource "aws_s3_bucket_versioning" "ml_models_bucket" {
+  bucket = aws_s3_bucket.ml_models_bucket.id
+  versioning_configuration {
+    status = "Enabled"
+  }
+}
+
+resource "aws_s3_bucket_acl" "ml_models_bucket" {
+  bucket = aws_s3_bucket.ml_models_bucket.id
+  acl    = "private"
+}
+
+resource "aws_s3_bucket_public_access_block" "ml_models_bucket" {
+  bucket = aws_s3_bucket.ml_models_bucket.id
+
+  block_public_acls       = true
+  block_public_policy     = true
+  ignore_public_acls      = true
+  restrict_public_buckets = true
+}
+
+resource "aws_s3_bucket_server_side_encryption_configuration" "ml_models_bucket" {
+  bucket = aws_s3_bucket.ml_models_bucket.id
+  rule {
+    apply_server_side_encryption_by_default {
+      sse_algorithm = "AES256"
+    }
+  }
+}
diff --git a/modules/storage/s3_variables.tf b/modules/storage/s3_variables.tf
index eca3337..a2a615d 100644
--- a/modules/storage/s3_variables.tf
+++ b/modules/storage/s3_variables.tf
@@ -6,7 +6,19 @@ variable "s3_data_bucket_name" {
   type = string
 }
 
+variable "s3_ml_data_bucket_name" {
+  type = string
+}
+
+variable "s3_ml_models_bucket_name" {
+  type = string
+}
+
 variable "ssm_buckets_input_id" {
   type = string
 }
 
+variable "ssm_buckets_ml_data_id" {
+  type = string
+}
+
diff --git a/variables.tf b/variables.tf
index 3e6ca91..7647882 100644
--- a/variables.tf
+++ b/variables.tf
@@ -38,6 +38,14 @@ variable "s3_data_bucket_name" {
   type = string
 }
 
+variable "s3_ml_data_bucket_name" {
+  type = string
+}
+
+variable "s3_ml_models_bucket_name" {
+  type = string
+}
+
 variable "ingestion_checkpoint_table" {
   type = string
 }
@@ -54,6 +62,10 @@ variable "ingestion_tasks_queue_name" {
   type = string
 }
 
+variable "sagemaker_classifiers_repository_name" {
+  type = string
+}
+
 # SSM
 variable "ssm_ingestion_checkpoint_id" {
   type = string
@@ -71,6 +83,10 @@ variable "ssm_buckets_input_id" {
   type = string
 }
 
+variable "ssm_buckets_ml_data_id" {
+  type = string
+}
+
 variable "ssm_ingestion_tasks_queue" {
   type = string
 }
\ No newline at end of file
-- 
GitLab