-
Notifications
You must be signed in to change notification settings - Fork 9
Expand file tree
/
Copy pathseed_spam_rules.py
More file actions
116 lines (102 loc) · 3.71 KB
/
seed_spam_rules.py
File metadata and controls
116 lines (102 loc) · 3.71 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# Script to seed the database with predefined spam rules
import os
import django
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "forums.settings")
django.setup()
from django.db.models import Q
from website.models import SpamRule
def seed_spam_rules():
rules = {
# Certification/Exam dump patterns
"Certification/Exam Spam": {
"score": 30,
"type": SpamRule.KEYWORD,
"patterns": [
r"exam\s+dumps?", r"braindumps?", r"practice\s+test",
r"certification\s+exam", r"test\s+preparation",
r"exam\s+questions?", r"study\s+guides?",
r"pdf\s+\+\s+testing\s+engine", r"testing\s+engine",
r"exam\s+prep", r"mock\s+exam", r"real\s+exam",
r"dumps\s+pdf", r"braindump"
],
},
# Promotional spam
"Promotional Spam": {
"score": 25,
"type": SpamRule.KEYWORD,
"patterns": [
r"click\s+here", r"join\s+now", r"limited\s+time",
r"discount", r"coupon\s+code", r"20%\s+off",
r"free\s+download", r"get\s+certified",
r"unlock\s+your\s+career", r"master\s+the",
r"boost\s+your\s+career", r"cert20",
r"at\s+checkout", r"special\s+offer",
],
},
# Suspicious domains
"Suspicious Domain": {
"score": 35,
"type": SpamRule.DOMAIN,
"patterns": [
r"dumpscafe\.com", r"certsout\.com", r"mycertshub\.com",
r"vmexam\.com", r"kissnutra\.com", r"dumps.*\.com",
r"cert.*\.com", r"exam.*\.com",
],
},
# Generic business language
"Business/Career Spam": {
"score": 15,
"type": SpamRule.KEYWORD,
"patterns": [
r"attests\s+to\s+your\s+proficiency",
r"esteemed\s+(?:accreditation|certification|credential)",
r"valuable\s+asset\s+to\s+companies",
r"demonstrates\s+your\s+ability",
r"comprehensive\s+study\s+(?:tools|materials)",
r"interactive\s+practice\s+tests",
r"real\s+exam\s+questions",
r"actual\s+exam\s+questions",
r"validated\s+by\s+.*certification",
r"urgently\s+need\s+experts",
],
},
# Gaming content
"Gaming Spam": {
"score": 20,
"type": SpamRule.KEYWORD,
"patterns": [
r"spacebar\s+clicker", r"clicker\s+game",
r"addictive\s+game", r"upgrades\s+available",
r"instant\s+rewards",
],
},
# Health/Supplement spam
"Health Spam": {
"score": 22,
"type": SpamRule.KEYWORD,
"patterns": [
r"vitalit[äa]t", r"nahrungserg[äa]nzungsmittel",
r"libido", r"fruchtbarkeit", r"energie",
r"hormonelle\s+balance", r"perforan",
],
},
}
inserted, skipped = 0, 0
for note, config in rules.items():
for pattern in config["patterns"]:
exists = SpamRule.objects.filter(
Q(pattern=pattern) & Q(type=config["type"])
).exists()
if not exists:
SpamRule.objects.create(
type=config["type"],
pattern=pattern,
score=config["score"],
notes=note,
)
inserted += 1
else:
skipped += 1
print(f"✅ Inserted {inserted} new rules, skipped {skipped} existing ones.")
# Run it
seed_spam_rules()