Skip to content

Commit 720d7fb

Browse files
committed
fixed cloudwatch logs
1 parent 8d7e5f8 commit 720d7fb

6 files changed

Lines changed: 175 additions & 70 deletions

File tree

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
files:
2+
"/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json":
3+
mode: "000644"
4+
owner: root
5+
group: root
6+
content: |
7+
{
8+
"agent": {
9+
"metrics_collection_interval": 60,
10+
"run_as_user": "root"
11+
},
12+
"metrics": {
13+
"namespace": "CWAgent",
14+
"metrics_collected": {
15+
"mem": {
16+
"measurement": [
17+
{
18+
"name": "mem_used_percent",
19+
"rename": "MemoryUtilization",
20+
"unit": "Percent"
21+
}
22+
],
23+
"metrics_collection_interval": 60
24+
},
25+
"disk": {
26+
"measurement": [
27+
{
28+
"name": "used_percent",
29+
"rename": "DiskUtilization",
30+
"unit": "Percent"
31+
}
32+
],
33+
"metrics_collection_interval": 60,
34+
"resources": [
35+
"*"
36+
]
37+
}
38+
},
39+
"append_dimensions": {
40+
"AutoScalingGroupName": "${aws:AutoScalingGroupName}",
41+
"InstanceId": "${aws:InstanceId}"
42+
}
43+
}
44+
}
45+
46+
commands:
47+
01_install_cloudwatch_agent:
48+
command: |
49+
if ! command -v amazon-cloudwatch-agent-ctl &> /dev/null; then
50+
wget -q https://s3.amazonaws.com/amazoncloudwatch-agent/amazon_linux/amd64/latest/amazon-cloudwatch-agent.rpm
51+
rpm -U ./amazon-cloudwatch-agent.rpm
52+
rm -f ./amazon-cloudwatch-agent.rpm
53+
fi
54+
test: "[ ! -f /opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl ]"
55+
56+
02_stop_cloudwatch_agent:
57+
command: |
58+
/opt/aws/amazon-cloudwatch-agent/bin/amazon-cloudwatch-agent-ctl \
59+
-a fetch-config \
60+
-m ec2 \
61+
-c file:/opt/aws/amazon-cloudwatch-agent/etc/amazon-cloudwatch-agent.json \
62+
-s
63+
ignoreErrors: true

infrastructure/environments/production/main.tf

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -137,8 +137,8 @@ module "rds" {
137137

138138
# Monitoring
139139
performance_insights_enabled = true
140-
# Note: Alarms will be created by monitoring module
141-
# alarm_actions = [] # Empty for now to avoid circular dependency
140+
# Pass SNS topic for alarm notifications
141+
alarm_actions = [aws_sns_topic.alerts.arn]
142142
}
143143

144144
# Note: ACM Certificates are now created in the DNS module
@@ -240,6 +240,19 @@ module "frontend" {
240240
}
241241
}
242242

243+
#############
244+
# SNS Topic for Alerts (created first to avoid circular dependency)
245+
#############
246+
resource "aws_sns_topic" "alerts" {
247+
name = "${local.project_name}-${local.environment}-alerts"
248+
249+
tags = {
250+
Name = "${local.project_name}-${local.environment}-alerts"
251+
Environment = local.environment
252+
Project = local.project_name
253+
}
254+
}
255+
243256
#############
244257
# Monitoring Module
245258
#############
@@ -253,4 +266,5 @@ module "monitoring" {
253266
eb_autoscaling_group_name = module.elasticbeanstalk.autoscaling_groups[0]
254267
rds_instance_id = module.rds.db_instance_id
255268
log_retention_days = 30
269+
sns_topic_arn = aws_sns_topic.alerts.arn
256270
}

infrastructure/environments/production/outputs.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,7 +117,7 @@ output "cloudwatch_dashboard_name" {
117117

118118
output "sns_topic_arn" {
119119
description = "SNS topic ARN for alerts"
120-
value = module.monitoring.sns_topic_arn
120+
value = aws_sns_topic.alerts.arn
121121
}
122122

123123
#####################

infrastructure/modules/monitoring/main.tf

Lines changed: 90 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,5 @@
11
# Monitoring Module - CloudWatch Dashboards and Alarms
22

3-
#############
4-
# SNS Topic for Alerts
5-
#############
6-
resource "aws_sns_topic" "alerts" {
7-
name = "${var.project_name}-${var.environment}-alerts"
8-
9-
tags = {
10-
Name = "${var.project_name}-${var.environment}-alerts"
11-
Environment = var.environment
12-
Project = var.project_name
13-
}
14-
}
15-
16-
# TODO: Add email subscriptions
17-
# resource "aws_sns_topic_subscription" "alerts_email" {
18-
# topic_arn = aws_sns_topic.alerts.arn
19-
# protocol = "email"
20-
# endpoint = "your-email@example.com"
21-
# }
22-
233
#############
244
# CloudWatch Dashboard
255
#############
@@ -28,64 +8,123 @@ resource "aws_cloudwatch_dashboard" "main" {
288

299
dashboard_body = jsonencode({
3010
widgets = [
31-
# EB CPU Utilization
11+
# EC2 CPU Utilization
3212
{
3313
type = "metric"
3414
properties = {
3515
metrics = [
36-
["AWS/ElasticBeanstalk", "EnvironmentHealth", { stat = "Average" }]
16+
["AWS/EC2", "CPUUtilization", "AutoScalingGroupName", var.eb_autoscaling_group_name, { stat = "Average" }]
3717
]
3818
period = 300
3919
stat = "Average"
4020
region = var.aws_region
41-
title = "Environment Health"
21+
title = "EC2 CPU Utilization (%)"
22+
yAxis = {
23+
left = {
24+
min = 0
25+
max = 100
26+
}
27+
}
4228
}
4329
},
44-
# EB Memory Utilization
30+
# EC2 Memory Utilization
4531
{
4632
type = "metric"
4733
properties = {
4834
metrics = [
49-
["CWAgent", "mem_used_percent", "AutoScalingGroupName", var.eb_autoscaling_group_name]
35+
["CWAgent", "mem_used_percent", "AutoScalingGroupName", var.eb_autoscaling_group_name, { stat = "Average" }]
5036
]
5137
period = 300
5238
stat = "Average"
5339
region = var.aws_region
54-
title = "Memory Utilization (%)"
40+
title = "EC2 Memory Utilization (%)"
41+
yAxis = {
42+
left = {
43+
min = 0
44+
max = 100
45+
}
46+
}
5547
}
5648
},
5749
# EB Request Count
5850
{
5951
type = "metric"
6052
properties = {
6153
metrics = [
62-
["AWS/ElasticBeanstalk", "RequestCount", { stat = "Sum" }]
54+
["AWS/ElasticBeanstalk", "RequestCount", "EnvironmentName", var.eb_environment_name, { stat = "Sum" }]
6355
]
6456
period = 300
6557
stat = "Sum"
6658
region = var.aws_region
6759
title = "Request Count"
6860
}
6961
},
70-
# RDS CPU
62+
# HTTP 5xx Errors
7163
{
7264
type = "metric"
7365
properties = {
7466
metrics = [
75-
["AWS/RDS", "CPUUtilization", "DBInstanceIdentifier", var.rds_instance_id]
67+
["AWS/ElasticBeanstalk", "ApplicationRequests5xx", "EnvironmentName", var.eb_environment_name, { stat = "Sum" }]
68+
]
69+
period = 300
70+
stat = "Sum"
71+
region = var.aws_region
72+
title = "HTTP 5xx Errors"
73+
}
74+
},
75+
# RDS CPU Utilization
76+
{
77+
type = "metric"
78+
properties = {
79+
metrics = [
80+
["AWS/RDS", "CPUUtilization", "DBInstanceIdentifier", var.rds_instance_id, { stat = "Average" }]
7681
]
7782
period = 300
7883
stat = "Average"
7984
region = var.aws_region
80-
title = "RDS CPU Utilization"
85+
title = "RDS CPU Utilization (%)"
86+
yAxis = {
87+
left = {
88+
min = 0
89+
max = 100
90+
}
91+
}
8192
}
8293
},
83-
# RDS Connections
94+
# RDS Read/Write IOPS
8495
{
8596
type = "metric"
8697
properties = {
8798
metrics = [
88-
["AWS/RDS", "DatabaseConnections", "DBInstanceIdentifier", var.rds_instance_id]
99+
["AWS/RDS", "ReadIOPS", "DBInstanceIdentifier", var.rds_instance_id, { stat = "Average", label = "Read IOPS" }],
100+
[".", "WriteIOPS", ".", ".", { stat = "Average", label = "Write IOPS" }]
101+
]
102+
period = 300
103+
stat = "Average"
104+
region = var.aws_region
105+
title = "RDS Read/Write IOPS"
106+
}
107+
},
108+
# RDS Network Throughput
109+
{
110+
type = "metric"
111+
properties = {
112+
metrics = [
113+
["AWS/RDS", "NetworkReceiveThroughput", "DBInstanceIdentifier", var.rds_instance_id, { stat = "Average", label = "Network In" }],
114+
[".", "NetworkTransmitThroughput", ".", ".", { stat = "Average", label = "Network Out" }]
115+
]
116+
period = 300
117+
stat = "Average"
118+
region = var.aws_region
119+
title = "RDS Network Throughput (Bytes/sec)"
120+
}
121+
},
122+
# RDS Database Connections
123+
{
124+
type = "metric"
125+
properties = {
126+
metrics = [
127+
["AWS/RDS", "DatabaseConnections", "DBInstanceIdentifier", var.rds_instance_id, { stat = "Average" }]
89128
]
90129
period = 300
91130
stat = "Average"
@@ -98,12 +137,12 @@ resource "aws_cloudwatch_dashboard" "main" {
98137
type = "metric"
99138
properties = {
100139
metrics = [
101-
["AWS/RDS", "FreeableMemory", "DBInstanceIdentifier", var.rds_instance_id]
140+
["AWS/RDS", "FreeableMemory", "DBInstanceIdentifier", var.rds_instance_id, { stat = "Average" }]
102141
]
103142
period = 300
104143
stat = "Average"
105144
region = var.aws_region
106-
title = "RDS Freeable Memory"
145+
title = "RDS Freeable Memory (Bytes)"
107146
}
108147
}
109148
]
@@ -125,7 +164,7 @@ resource "aws_cloudwatch_metric_alarm" "eb_cpu_high" {
125164
statistic = "Average"
126165
threshold = 80
127166
alarm_description = "This metric monitors EC2 CPU utilization"
128-
alarm_actions = [aws_sns_topic.alerts.arn]
167+
alarm_actions = [var.sns_topic_arn]
129168

130169
dimensions = {
131170
AutoScalingGroupName = var.eb_autoscaling_group_name
@@ -137,15 +176,7 @@ resource "aws_cloudwatch_metric_alarm" "eb_cpu_high" {
137176
}
138177
}
139178

140-
# NOTE: Memory alarms below require CloudWatch Agent to be installed on EB instances.
141-
# To enable memory monitoring:
142-
# 1. Create .ebextensions/cloudwatch-agent.config in your app
143-
# 2. Follow AWS docs: https://docs.aws.amazon.com/AmazonCloudWatch/latest/monitoring/Install-CloudWatch-Agent.html
144-
# 3. Uncomment the alarms below
145-
146-
# High Memory Alarm (requires CloudWatch Agent) - COMMENTED OUT
147-
# Uncomment after installing CloudWatch Agent
148-
/*
179+
# High Memory Alarm
149180
resource "aws_cloudwatch_metric_alarm" "eb_memory_high" {
150181
alarm_name = "${var.project_name}-${var.environment}-eb-memory-high"
151182
comparison_operator = "GreaterThanThreshold"
@@ -156,7 +187,7 @@ resource "aws_cloudwatch_metric_alarm" "eb_memory_high" {
156187
statistic = "Average"
157188
threshold = 80
158189
alarm_description = "This metric monitors EC2 memory utilization"
159-
alarm_actions = [aws_sns_topic.alerts.arn]
190+
alarm_actions = [var.sns_topic_arn]
160191

161192
dimensions = {
162193
AutoScalingGroupName = var.eb_autoscaling_group_name
@@ -167,11 +198,8 @@ resource "aws_cloudwatch_metric_alarm" "eb_memory_high" {
167198
Project = var.project_name
168199
}
169200
}
170-
*/
171201

172-
# Critical Memory Alarm (requires CloudWatch Agent) - COMMENTED OUT
173-
# Uncomment after installing CloudWatch Agent
174-
/*
202+
# Critical Memory Alarm
175203
resource "aws_cloudwatch_metric_alarm" "eb_memory_critical" {
176204
alarm_name = "${var.project_name}-${var.environment}-eb-memory-critical"
177205
comparison_operator = "GreaterThanThreshold"
@@ -182,7 +210,7 @@ resource "aws_cloudwatch_metric_alarm" "eb_memory_critical" {
182210
statistic = "Average"
183211
threshold = 90
184212
alarm_description = "This metric monitors EC2 memory utilization - CRITICAL"
185-
alarm_actions = [aws_sns_topic.alerts.arn]
213+
alarm_actions = [var.sns_topic_arn]
186214

187215
dimensions = {
188216
AutoScalingGroupName = var.eb_autoscaling_group_name
@@ -193,20 +221,20 @@ resource "aws_cloudwatch_metric_alarm" "eb_memory_critical" {
193221
Project = var.project_name
194222
}
195223
}
196-
*/
197-
198-
# Environment Health Alarm
199-
resource "aws_cloudwatch_metric_alarm" "eb_environment_health" {
200-
alarm_name = "${var.project_name}-${var.environment}-eb-health-degraded"
201-
comparison_operator = "LessThanThreshold"
202-
evaluation_periods = 1
203-
metric_name = "EnvironmentHealth"
224+
225+
# HTTP 5xx Error Rate Alarm
226+
# This monitors server errors which indicate application health issues
227+
resource "aws_cloudwatch_metric_alarm" "eb_http_5xx_errors" {
228+
alarm_name = "${var.project_name}-${var.environment}-eb-http-5xx-high"
229+
comparison_operator = "GreaterThanThreshold"
230+
evaluation_periods = 2
231+
metric_name = "ApplicationRequests5xx"
204232
namespace = "AWS/ElasticBeanstalk"
205233
period = 300
206-
statistic = "Average"
207-
threshold = 15 # Healthy = 25, Warning = 15, Degraded = 10
208-
alarm_description = "Environment health is degraded"
209-
alarm_actions = [aws_sns_topic.alerts.arn]
234+
statistic = "Sum"
235+
threshold = 10 # Alert if more than 10 5xx errors in 5 minutes
236+
alarm_description = "High rate of HTTP 5xx errors indicates application issues"
237+
alarm_actions = [var.sns_topic_arn]
210238

211239
dimensions = {
212240
EnvironmentName = var.eb_environment_name

infrastructure/modules/monitoring/outputs.tf

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,5 @@
11
# Monitoring Module Outputs
22

3-
output "sns_topic_arn" {
4-
description = "ARN of the SNS topic for alerts"
5-
value = aws_sns_topic.alerts.arn
6-
}
7-
83
output "dashboard_name" {
94
description = "Name of the CloudWatch dashboard"
105
value = aws_cloudwatch_dashboard.main.dashboard_name

infrastructure/modules/monitoring/variables.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,3 +35,8 @@ variable "log_retention_days" {
3535
type = number
3636
default = 7
3737
}
38+
39+
variable "sns_topic_arn" {
40+
description = "ARN of the SNS topic for alarm notifications"
41+
type = string
42+
}

0 commit comments

Comments
 (0)