@@ -27,15 +27,14 @@ resource "aws_cloudwatch_dashboard" "main" {
2727 }
2828 }
2929 },
30- # EC2 Memory Utilization
30+ # EC2 Memory Utilization (Custom Metric)
3131 {
3232 type = " metric"
3333 properties = {
3434 metrics = [
35- [" CWAgent " , " mem_used_percent " , " AutoScalingGroupName " , var.eb_autoscaling_group_name, { stat = " Average " }]
35+ [{ expression = " SELECT AVG(MemoryUtilization) FROM \" CWAgent \" " , id = " m1 " }]
3636 ]
3737 period = 300
38- stat = " Average"
3938 region = var.aws_region
4039 title = " EC2 Memory Utilization (%)"
4140 yAxis = {
@@ -46,25 +45,42 @@ resource "aws_cloudwatch_dashboard" "main" {
4645 }
4746 }
4847 },
49- # EB Request Count
48+ # EC2 Disk Utilization (Custom Metric) - Root filesystem
5049 {
5150 type = " metric"
5251 properties = {
5352 metrics = [
54- [" AWS/ElasticBeanstalk" , " RequestCount" , " EnvironmentName" , var.eb_environment_name, { stat = " Sum" }]
53+ [{ expression = " SELECT AVG(DiskUtilization) FROM \" CWAgent\" WHERE path = '/'" , id = " m1" }]
54+ ]
55+ period = 300
56+ region = var.aws_region
57+ title = " EC2 Disk Utilization (%) - Root"
58+ yAxis = {
59+ left = {
60+ min = 0
61+ max = 100
62+ }
63+ }
64+ }
65+ },
66+ {
67+ type = " metric"
68+ properties = {
69+ metrics = [
70+ [" AWS/ApplicationELB" , " RequestCount" , " LoadBalancer" , var.alb_arn_suffix, { stat = " Sum" }]
5571 ]
5672 period = 300
5773 stat = " Sum"
5874 region = var.aws_region
5975 title = " Request Count"
6076 }
6177 },
62- # HTTP 5xx Errors
78+ # HTTP 5xx Errors (Target Responses)
6379 {
6480 type = " metric"
6581 properties = {
6682 metrics = [
67- [" AWS/ElasticBeanstalk " , " ApplicationRequests5xx " , " EnvironmentName " , var.eb_environment_name , { stat = " Sum" }]
83+ [" AWS/ApplicationELB " , " HTTPCode_Target_5XX_Count " , " LoadBalancer " , var.alb_arn_suffix , { stat = " Sum" }]
6884 ]
6985 period = 300
7086 stat = " Sum"
@@ -144,6 +160,47 @@ resource "aws_cloudwatch_dashboard" "main" {
144160 region = var.aws_region
145161 title = " RDS Freeable Memory (Bytes)"
146162 }
163+ },
164+ # RDS Read/Write Latency
165+ {
166+ type = " metric"
167+ properties = {
168+ metrics = [
169+ [" AWS/RDS" , " ReadLatency" , " DBInstanceIdentifier" , var.rds_instance_id, { stat = " Average" , label = " Read Latency" }],
170+ [" ." , " WriteLatency" , " ." , " ." , { stat = " Average" , label = " Write Latency" }]
171+ ]
172+ period = 300
173+ stat = " Average"
174+ region = var.aws_region
175+ title = " RDS Read/Write Latency (ms)"
176+ }
177+ },
178+ # RDS Queue Depth
179+ {
180+ type = " metric"
181+ properties = {
182+ metrics = [
183+ [" AWS/RDS" , " DiskQueueDepth" , " DBInstanceIdentifier" , var.rds_instance_id, { stat = " Average" }]
184+ ]
185+ period = 300
186+ stat = " Average"
187+ region = var.aws_region
188+ title = " RDS Disk Queue Depth"
189+ }
190+ },
191+ # RDS Throughput (MB/s)
192+ {
193+ type = " metric"
194+ properties = {
195+ metrics = [
196+ [" AWS/RDS" , " ReadThroughput" , " DBInstanceIdentifier" , var.rds_instance_id, { stat = " Average" , label = " Read Throughput" }],
197+ [" ." , " WriteThroughput" , " ." , " ." , { stat = " Average" , label = " Write Throughput" }]
198+ ]
199+ period = 300
200+ stat = " Average"
201+ region = var.aws_region
202+ title = " RDS Disk Throughput (Bytes/sec)"
203+ }
147204 }
148205 ]
149206 })
@@ -176,21 +233,22 @@ resource "aws_cloudwatch_metric_alarm" "eb_cpu_high" {
176233 }
177234}
178235
179- # High Memory Alarm
180- resource "aws_cloudwatch_metric_alarm" "eb_memory_high" {
181- alarm_name = " ${ var . project_name } -${ var . environment } -eb-memory-high"
236+ # HTTP 5xx Error Rate Alarm
237+ # This monitors server errors which indicate application health issues
238+ resource "aws_cloudwatch_metric_alarm" "alb_http_5xx_errors" {
239+ alarm_name = " ${ var . project_name } -${ var . environment } -alb-http-5xx-high"
182240 comparison_operator = " GreaterThanThreshold"
183241 evaluation_periods = 2
184- metric_name = " mem_used_percent "
185- namespace = " CWAgent "
242+ metric_name = " HTTPCode_Target_5XX_Count "
243+ namespace = " AWS/ApplicationELB "
186244 period = 300
187- statistic = " Average "
188- threshold = 80
189- alarm_description = " This metric monitors EC2 memory utilization "
245+ statistic = " Sum "
246+ threshold = 10 # Alert if more than 10 5xx errors in 5 minutes
247+ alarm_description = " High rate of HTTP 5xx errors indicates application issues "
190248 alarm_actions = [var . sns_topic_arn ]
191249
192250 dimensions = {
193- AutoScalingGroupName = var.eb_autoscaling_group_name
251+ LoadBalancer = var.alb_arn_suffix
194252 }
195253
196254 tags = {
@@ -199,21 +257,25 @@ resource "aws_cloudwatch_metric_alarm" "eb_memory_high" {
199257 }
200258}
201259
202- # Critical Memory Alarm
203- resource "aws_cloudwatch_metric_alarm" "eb_memory_critical" {
204- alarm_name = " ${ var . project_name } -${ var . environment } -eb-memory-critical"
260+ # ############
261+ # RDS CloudWatch Alarms
262+ # ############
263+
264+ # High RDS CPU Alarm
265+ resource "aws_cloudwatch_metric_alarm" "rds_cpu_high" {
266+ alarm_name = " ${ var . project_name } -${ var . environment } -rds-cpu-high"
205267 comparison_operator = " GreaterThanThreshold"
206268 evaluation_periods = 2
207- metric_name = " mem_used_percent "
208- namespace = " CWAgent "
269+ metric_name = " CPUUtilization "
270+ namespace = " AWS/RDS "
209271 period = 300
210272 statistic = " Average"
211- threshold = 90
212- alarm_description = " This metric monitors EC2 memory utilization - CRITICAL "
273+ threshold = 75
274+ alarm_description = " RDS CPU utilization is high - may need optimization or larger instance "
213275 alarm_actions = [var . sns_topic_arn ]
214276
215277 dimensions = {
216- AutoScalingGroupName = var.eb_autoscaling_group_name
278+ DBInstanceIdentifier = var.rds_instance_id
217279 }
218280
219281 tags = {
@@ -222,22 +284,73 @@ resource "aws_cloudwatch_metric_alarm" "eb_memory_critical" {
222284 }
223285}
224286
225- # HTTP 5xx Error Rate Alarm
226- # This monitors server errors which indicate application health issues
227- resource "aws_cloudwatch_metric_alarm" "eb_http_5xx_errors" {
228- alarm_name = " ${ var . project_name } -${ var . environment } -eb-http-5xx-high"
287+ # High RDS Read Latency Alarm
288+ resource "aws_cloudwatch_metric_alarm" "rds_read_latency_high" {
289+ alarm_name = " ${ var . project_name } -${ var . environment } -rds-read-latency-high"
229290 comparison_operator = " GreaterThanThreshold"
230291 evaluation_periods = 2
231- metric_name = " ApplicationRequests5xx "
232- namespace = " AWS/ElasticBeanstalk "
292+ metric_name = " ReadLatency "
293+ namespace = " AWS/RDS "
233294 period = 300
234- statistic = " Sum"
235- threshold = 10 # Alert if more than 10 5xx errors in 5 minutes
236- alarm_description = " High rate of HTTP 5xx errors indicates application issues"
295+ statistic = " Average"
296+ threshold = 0.01 # 10ms in seconds
297+ alarm_description = " RDS read latency is high - may indicate I/O bottleneck or need for indexing"
298+ alarm_actions = [var . sns_topic_arn ]
299+
300+ dimensions = {
301+ DBInstanceIdentifier = var.rds_instance_id
302+ }
303+
304+ tags = {
305+ Environment = var.environment
306+ Project = var.project_name
307+ }
308+ }
309+
310+ # Low RDS Freeable Memory Alarm
311+ resource "aws_cloudwatch_metric_alarm" "rds_memory_low" {
312+ alarm_name = " ${ var . project_name } -${ var . environment } -rds-memory-low"
313+ comparison_operator = " LessThanThreshold"
314+ evaluation_periods = 2
315+ metric_name = " FreeableMemory"
316+ namespace = " AWS/RDS"
317+ period = 300
318+ statistic = " Average"
319+ threshold = 524288000 # 500MB in bytes
320+ alarm_description = " RDS freeable memory is low - may need larger instance or query optimization"
237321 alarm_actions = [var . sns_topic_arn ]
238322
239323 dimensions = {
240- EnvironmentName = var.eb_environment_name
324+ DBInstanceIdentifier = var.rds_instance_id
325+ }
326+
327+ tags = {
328+ Environment = var.environment
329+ Project = var.project_name
330+ }
331+ }
332+
333+ # High Memory Alarm
334+ resource "aws_cloudwatch_metric_alarm" "eb_memory_high" {
335+ alarm_name = " ${ var . project_name } -${ var . environment } -eb-memory-high"
336+ comparison_operator = " GreaterThanThreshold"
337+ evaluation_periods = 2
338+ threshold = 75
339+ alarm_description = " This metric monitors EC2 memory utilization"
340+ alarm_actions = [var . sns_topic_arn ]
341+
342+ metric_query {
343+ id = " m1"
344+ return_data = true
345+ metric {
346+ namespace = " CWAgent"
347+ metric_name = " MemoryUtilization"
348+ period = 300
349+ stat = " Average"
350+ dimensions = {
351+ AutoScalingGroupName = var.eb_autoscaling_group_name
352+ }
353+ }
241354 }
242355
243356 tags = {
0 commit comments