From f5a0c44d2d2744d7ad6c7dfc3724b50613e0e069 Mon Sep 17 00:00:00 2001 From: Gilles Habran <gilles.habran@arhs-spikeseed.com> Date: Wed, 12 Apr 2023 13:45:53 +0200 Subject: [PATCH] AITED-118: ci pipeline --- dev.tfvars | 9 +++- main.tf | 9 ++++ modules/classifiers/common.tf | 7 +++ modules/classifiers/ecr.tf | 44 +++++++++++++++++ modules/storage/s3.tf | 88 +++++++++++++++++++++++++++++++++ modules/storage/s3_variables.tf | 12 +++++ variables.tf | 16 ++++++ 7 files changed, 183 insertions(+), 2 deletions(-) create mode 100644 modules/classifiers/common.tf create mode 100644 modules/classifiers/ecr.tf diff --git a/dev.tfvars b/dev.tfvars index 35cb04d..cab0987 100644 --- a/dev.tfvars +++ b/dev.tfvars @@ -17,8 +17,10 @@ terraform_s3_bucket_name = "d-ew1-ted-ai-terraform" terraform_dynamodb_table_name = "d-ew1-ted-ai-terraform-locks" # TED AI project -s3_input_bucket_name = "d-ew1-ted-ai-input" -s3_data_bucket_name = "d-ew1-ted-ai-experiments-data" +s3_input_bucket_name = "d-ew1-ted-ai-input" +s3_data_bucket_name = "d-ew1-ted-ai-experiments-data" +s3_ml_data_bucket_name = "d-ew1-ted-ai-ml-data" +s3_ml_models_bucket_name = "d-ew1-ted-ai-ml-models" ingestion_checkpoint_table = "d-ew1-ted-ai-ingestion-checkpoint" ingestion_tasks_table = "d-ew1-ted-ai-ingestion-tasks" @@ -26,9 +28,12 @@ ingestion_references_table = "d-ew1-ted-ai-ingestion-references" ingestion_tasks_queue_name = "d-ew1-ted-ai-ingestion-tasks-queue" +sagemaker_classifiers_repository_name = "sagemaker-classifiers" + # SSM path ssm_ingestion_checkpoint_id = "/dynamodb/ingestion_checkpoint/id" ssm_ingestion_tasks_id = "/dynamodb/ingestion_tasks/id" ssm_ingestion_references_id = "/dynamodb/ingestion_references/id" ssm_buckets_input_id = "/s3/input_bucket/id" +ssm_buckets_ml_data_id = "/s3/ml_data_bucket/id" ssm_ingestion_tasks_queue = "/sqs/ingestion_tasks_queue/url" diff --git a/main.tf b/main.tf index 25dea11..6867325 100644 --- a/main.tf +++ b/main.tf @@ -24,6 +24,8 @@ module "storage" { source = "./modules/storage" s3_input_bucket_name = var.s3_input_bucket_name s3_data_bucket_name = var.s3_data_bucket_name + s3_ml_data_bucket_name = var.s3_ml_data_bucket_name + s3_ml_models_bucket_name = var.s3_ml_models_bucket_name tags = var.tags ingestion_checkpoint_table = var.ingestion_checkpoint_table ingestion_tasks_table = var.ingestion_tasks_table @@ -32,6 +34,7 @@ module "storage" { ssm_ingestion_tasks_id = var.ssm_ingestion_tasks_id ssm_ingestion_references_id = var.ssm_ingestion_references_id ssm_buckets_input_id = var.ssm_buckets_input_id + ssm_buckets_ml_data_id = var.ssm_buckets_ml_data_id } module "queue" { @@ -41,3 +44,9 @@ module "queue" { ssm_ingestion_tasks_queue = var.ssm_ingestion_tasks_queue } +module "classifiers" { + source = "./modules/classifiers" + sagemaker_classifiers_repository_name = var.sagemaker_classifiers_repository_name + tags = var.tags +} + diff --git a/modules/classifiers/common.tf b/modules/classifiers/common.tf new file mode 100644 index 0000000..b5ad44e --- /dev/null +++ b/modules/classifiers/common.tf @@ -0,0 +1,7 @@ +variable "tags" { + type = map(string) +} + +variable "sagemaker_classifiers_repository_name" { + type = string +} \ No newline at end of file diff --git a/modules/classifiers/ecr.tf b/modules/classifiers/ecr.tf new file mode 100644 index 0000000..3c7f5e3 --- /dev/null +++ b/modules/classifiers/ecr.tf @@ -0,0 +1,44 @@ +resource "aws_ecr_repository" "sagemaker_classifiers" { + name = var.sagemaker_classifiers_repository_name + image_tag_mutability = "IMMUTABLE" + + image_scanning_configuration { + scan_on_push = true + } + + tags = var.tags +} + +resource "aws_ecr_repository" "sagemaker_classifiers_ci" { + name = "${var.sagemaker_classifiers_repository_name}-ci" + image_tag_mutability = "IMMUTABLE" + + image_scanning_configuration { + scan_on_push = true + } + tags = var.tags +} + +resource "aws_ecr_lifecycle_policy" "sagemaker_classifiers_ci" { + repository = aws_ecr_repository.sagemaker_classifiers_ci.name + + policy = <<EOF +{ + "rules": [ + { + "rulePriority": 1, + "description": "Expire images older than 1 days", + "selection": { + "tagStatus": "any", + "countType": "sinceImagePushed", + "countUnit": "days", + "countNumber": 1 + }, + "action": { + "type": "expire" + } + } + ] +} +EOF +} diff --git a/modules/storage/s3.tf b/modules/storage/s3.tf index 0a8e190..dddb3c3 100644 --- a/modules/storage/s3.tf +++ b/modules/storage/s3.tf @@ -85,3 +85,91 @@ resource "aws_s3_bucket_server_side_encryption_configuration" "data_bucket" { } } } + +resource "aws_s3_bucket" "ml_data_bucket" { + bucket = var.s3_ml_data_bucket_name + + # Prevents Terraform from destroying or replacing this object - a great safety mechanism + lifecycle { + prevent_destroy = true + } + + tags = var.tags +} + +resource "aws_s3_bucket_versioning" "ml_data_bucket" { + bucket = aws_s3_bucket.ml_data_bucket.id + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_s3_bucket_acl" "ml_data_bucket" { + bucket = aws_s3_bucket.ml_data_bucket.id + acl = "private" +} + +resource "aws_s3_bucket_public_access_block" "ml_data_bucket" { + bucket = aws_s3_bucket.ml_data_bucket.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "ml_data_bucket" { + bucket = aws_s3_bucket.ml_data_bucket.id + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +resource "aws_ssm_parameter" "ml_data_bucket" { + name = var.ssm_buckets_ml_data_id + type = "String" + value = aws_s3_bucket.ml_data_bucket.id +} + +resource "aws_s3_bucket" "ml_models_bucket" { + bucket = var.s3_ml_models_bucket_name + + # Prevents Terraform from destroying or replacing this object - a great safety mechanism + lifecycle { + prevent_destroy = true + } + + tags = var.tags +} + +resource "aws_s3_bucket_versioning" "ml_models_bucket" { + bucket = aws_s3_bucket.ml_models_bucket.id + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_s3_bucket_acl" "ml_models_bucket" { + bucket = aws_s3_bucket.ml_models_bucket.id + acl = "private" +} + +resource "aws_s3_bucket_public_access_block" "ml_models_bucket" { + bucket = aws_s3_bucket.ml_models_bucket.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "ml_models_bucket" { + bucket = aws_s3_bucket.ml_models_bucket.id + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} diff --git a/modules/storage/s3_variables.tf b/modules/storage/s3_variables.tf index eca3337..a2a615d 100644 --- a/modules/storage/s3_variables.tf +++ b/modules/storage/s3_variables.tf @@ -6,7 +6,19 @@ variable "s3_data_bucket_name" { type = string } +variable "s3_ml_data_bucket_name" { + type = string +} + +variable "s3_ml_models_bucket_name" { + type = string +} + variable "ssm_buckets_input_id" { type = string } +variable "ssm_buckets_ml_data_id" { + type = string +} + diff --git a/variables.tf b/variables.tf index 3e6ca91..7647882 100644 --- a/variables.tf +++ b/variables.tf @@ -38,6 +38,14 @@ variable "s3_data_bucket_name" { type = string } +variable "s3_ml_data_bucket_name" { + type = string +} + +variable "s3_ml_models_bucket_name" { + type = string +} + variable "ingestion_checkpoint_table" { type = string } @@ -54,6 +62,10 @@ variable "ingestion_tasks_queue_name" { type = string } +variable "sagemaker_classifiers_repository_name" { + type = string +} + # SSM variable "ssm_ingestion_checkpoint_id" { type = string @@ -71,6 +83,10 @@ variable "ssm_buckets_input_id" { type = string } +variable "ssm_buckets_ml_data_id" { + type = string +} + variable "ssm_ingestion_tasks_queue" { type = string } \ No newline at end of file -- GitLab