Skip to content

Commit 8d3ccec

Browse files
committed
Migrate vulnerability sync to the pipeline
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 6d01e09 commit 8d3ccec

1 file changed

Lines changed: 72 additions & 51 deletions

File tree

Lines changed: 72 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -9,83 +9,104 @@
99

1010
import logging
1111
import os.path
12-
from dataclasses import dataclass
1312
from itertools import zip_longest
1413

1514
import saneyaml
15+
from aboutcode.pipeline import LoopProgress
1616

1717
from fedcode.activitypub import Activity
1818
from fedcode.activitypub import UpdateActivity
1919
from fedcode.models import Note
2020
from fedcode.models import Package
2121
from fedcode.models import Repository
22-
from fedcode.models import Service
2322
from fedcode.models import Vulnerability
23+
from fedcode.pipelines import FederatedCodePipeline
2424
from fedcode.pipes import utils
2525

26-
logger = logging.getLogger(__name__)
2726

27+
class SyncVulnerableCode(FederatedCodePipeline):
28+
"""Sync VulnerableCode data from FederatedCode git repositories."""
2829

29-
@dataclass
30-
class Importer:
31-
repo_obj: Repository
32-
default_service: Service
30+
pipeline_id = "sync_vulnerablecode"
3331

34-
def run(self):
35-
repo = self.repo_obj.git_repo_obj
36-
latest_commit_hash = repo.head.commit.hexsha
37-
latest_commit = repo.commit(latest_commit_hash)
38-
if self.repo_obj.last_imported_commit:
39-
last_imported_commit = repo.commit(self.repo_obj.last_imported_commit)
40-
diffs = last_imported_commit.diff(latest_commit)
41-
else:
42-
last_imported_commit = None
43-
# Diff between empty trees and last_imported_commit
44-
diffs = latest_commit.diff("4b825dc642cb6eb9a060e54bf8d69288fbee4904", R=True)
45-
46-
if repo.head.commit.hexsha == self.repo_obj.last_imported_commit:
47-
logger.error("Nothing to import!")
48-
return
32+
@classmethod
33+
def steps(cls):
34+
return (
35+
cls.get_git_repos,
36+
cls.sync_vulnerablecode_repositories,
37+
)
4938

50-
for diff in diffs:
51-
if not diff.a_path.endswith(".yaml"):
52-
continue
39+
def get_git_repos(self):
40+
self.git_repos = Repository.objects.all()
5341

54-
if diff.a_path.startswith("."):
55-
continue
42+
def sync_vulnerablecode_repositories(self):
43+
repositories_count = self.git_repos.count()
44+
self.log(f"Syncing vulnerability from {repositories_count:,d} repositories")
5645

57-
yaml_data_a_blob = (
58-
saneyaml.load(diff.a_blob.data_stream.read()) if diff.a_blob else None
59-
)
60-
yaml_data_b_blob = (
61-
saneyaml.load(diff.b_blob.data_stream.read()) if diff.b_blob else None
46+
progress = LoopProgress(total_iterations=repositories_count, logger=self.log)
47+
for repository in progress.iter(self.git_repos.iterator(chunk_size=2000)):
48+
repository.git_repo_obj.remotes.origin.pull()
49+
sync_vulnerabilities(
50+
repository=repository,
51+
logger=self.log,
6252
)
6353

64-
if os.path.split(diff.a_path)[1].startswith("VCID") or os.path.split(diff.b_path)[
65-
1
66-
].startswith("VCID"):
67-
vul_handler(
68-
diff.change_type,
69-
self.repo_obj,
70-
yaml_data_a_blob,
71-
yaml_data_b_blob,
72-
diff.a_path,
73-
diff.b_path,
74-
)
75-
continue
7654

77-
pkg_handler(
55+
def sync_vulnerabilities(repository, logger):
56+
repo = repository.git_repo_obj
57+
latest_commit_hash = repo.head.commit.hexsha
58+
latest_commit = repo.commit(latest_commit_hash)
59+
if repository.last_imported_commit:
60+
last_imported_commit = repo.commit(repository.last_imported_commit)
61+
diffs = last_imported_commit.diff(latest_commit)
62+
else:
63+
last_imported_commit = None
64+
# Diff between empty trees and last_imported_commit
65+
diffs = latest_commit.diff("4b825dc642cb6eb9a060e54bf8d69288fbee4904", R=True)
66+
67+
if repo.head.commit.hexsha == repository.last_imported_commit:
68+
logger("Nothing to import!", level=logging.ERROR)
69+
return
70+
71+
diff_count = len(diffs)
72+
73+
logger(f"Syncing {diff_count:,d} vulnerability scan from {repository.url}")
74+
progress = LoopProgress(total_iterations=diff_count, logger=logger)
75+
for diff in progress.iter(diffs):
76+
if not diff.a_path.endswith(".yaml"):
77+
continue
78+
79+
if diff.a_path.startswith("."):
80+
continue
81+
82+
yaml_data_a_blob = saneyaml.load(diff.a_blob.data_stream.read()) if diff.a_blob else None
83+
yaml_data_b_blob = saneyaml.load(diff.b_blob.data_stream.read()) if diff.b_blob else None
84+
85+
if os.path.split(diff.a_path)[1].startswith("VCID") or os.path.split(diff.b_path)[
86+
1
87+
].startswith("VCID"):
88+
vul_handler(
7889
diff.change_type,
79-
self.default_service,
90+
repository,
8091
yaml_data_a_blob,
8192
yaml_data_b_blob,
93+
logger,
8294
)
83-
self.repo_obj.last_imported_commit = latest_commit_hash
84-
self.repo_obj.save()
85-
logger.info("The Importer run successfully")
95+
continue
96+
97+
pkg_handler(
98+
diff.change_type,
99+
repository.admin,
100+
yaml_data_a_blob,
101+
yaml_data_b_blob,
102+
)
103+
break
104+
repository.last_imported_commit = latest_commit_hash
105+
repository.save()
106+
logger("The Importer run successfully")
86107

87108

88-
def vul_handler(change_type, repo_obj, yaml_data_a_blob, yaml_data_b_blob, a_path, b_path):
109+
def vul_handler(change_type, repo_obj, yaml_data_a_blob, yaml_data_b_blob, logger):
89110
if change_type == "A": # A for added paths
90111
Vulnerability.objects.get_or_create(
91112
id=yaml_data_b_blob.get("vulnerability_id"),
@@ -108,7 +129,7 @@ def vul_handler(change_type, repo_obj, yaml_data_a_blob, yaml_data_b_blob, a_pat
108129
)
109130
vul.delete()
110131
else:
111-
logger.error(f"Invalid Vulnerability File")
132+
logger(f"Invalid Vulnerability File", level=logging.ERROR)
112133

113134

114135
def pkg_handler(change_type, default_service, yaml_data_a_blob, yaml_data_b_blob):

0 commit comments

Comments
 (0)