Skip to content

Commit 6d01e09

Browse files
committed
Add pipeline to advertise scancode.io scans
Signed-off-by: Keshav Priyadarshi <git@keshav.space>
1 parent 12c30d3 commit 6d01e09

5 files changed

Lines changed: 295 additions & 2 deletions

File tree

fedcode/pipelines/__init__.py

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,97 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# FederatedCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/federatedcode for support or download.
7+
# See https://aboutcode.org for more information about AboutCode.org OSS projects.
8+
#
9+
10+
import logging
11+
from datetime import datetime
12+
from datetime import timezone
13+
from timeit import default_timer as timer
14+
15+
from aboutcode.pipeline import BasePipeline
16+
from aboutcode.pipeline import humanize_time
17+
18+
module_logger = logging.getLogger(__name__)
19+
20+
21+
class classproperty(object):
22+
def __init__(self, fget):
23+
self.fget = fget
24+
25+
def __get__(self, owner_self, owner_cls):
26+
return self.fget(owner_cls)
27+
28+
29+
class FederatedCodePipeline(BasePipeline):
30+
pipeline_id = None # Unique Pipeline ID
31+
32+
def on_failure(self):
33+
"""
34+
Tasks to run in the event that pipeline execution fails.
35+
36+
Implement cleanup or other tasks that need to be performed
37+
on pipeline failure, such as:
38+
- Removing cloned repositories.
39+
- Deleting downloaded archives.
40+
"""
41+
pass
42+
43+
def execute(self):
44+
"""Execute each steps in the order defined on this pipeline class."""
45+
self.log(f"Pipeline [{self.pipeline_name}] starting")
46+
47+
steps = self.pipeline_class.get_steps(groups=self.selected_groups)
48+
steps_count = len(steps)
49+
pipeline_start_time = timer()
50+
51+
for current_index, step in enumerate(steps, start=1):
52+
step_name = step.__name__
53+
54+
if self.selected_steps and step_name not in self.selected_steps:
55+
self.log(f"Step [{step_name}] skipped")
56+
continue
57+
58+
self.set_current_step(f"{current_index}/{steps_count} {step_name}")
59+
self.log(f"Step [{step_name}] starting")
60+
step_start_time = timer()
61+
62+
try:
63+
step(self)
64+
except Exception as exception:
65+
self.log("Pipeline failed")
66+
on_failure_start_time = timer()
67+
self.log(f"Running [on_failure] tasks")
68+
self.on_failure()
69+
on_failure_run_time = timer() - on_failure_start_time
70+
self.log(f"Completed [on_failure] tasks in {humanize_time(on_failure_run_time)}")
71+
72+
return 1, self.output_from_exception(exception)
73+
74+
step_run_time = timer() - step_start_time
75+
self.log(f"Step [{step_name}] completed in {humanize_time(step_run_time)}")
76+
77+
self.set_current_step("") # Reset the `current_step` field on completion
78+
pipeline_run_time = timer() - pipeline_start_time
79+
self.log(f"Pipeline completed in {humanize_time(pipeline_run_time)}")
80+
81+
return 0, ""
82+
83+
def log(self, message, level=logging.INFO):
84+
"""Log the given `message` to the current module logger and execution_log."""
85+
now_local = datetime.now(timezone.utc).astimezone()
86+
timestamp = now_local.strftime("%Y-%m-%d %H:%M:%S.%f")[:-3]
87+
message = f"{timestamp} {message}"
88+
module_logger.log(level, message)
89+
self.append_to_log(message)
90+
91+
@classproperty
92+
def pipeline_id(cls):
93+
"""Return unique pipeline_id set in cls.pipeline_id"""
94+
95+
if cls.pipeline_id is None or cls.pipeline_id == "":
96+
raise NotImplementedError("pipeline_id is not defined or is empty")
97+
return cls.pipeline_id
Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,113 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# FederatedCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/federatedcode for support or download.
7+
# See https://aboutcode.org for more information about AboutCode.org OSS projects.
8+
#
9+
10+
from pathlib import Path
11+
from traceback import format_exc as traceback_format_exc
12+
13+
from aboutcode.pipeline import LoopProgress
14+
15+
from fedcode.models import Package
16+
from fedcode.models import Repository
17+
from fedcode.pipelines import FederatedCodePipeline
18+
from fedcode.pipes import utils
19+
20+
21+
class SyncScanCodeScans(FederatedCodePipeline):
22+
"""Sync Package scans from FederatedCode git repositories."""
23+
24+
pipeline_id = "sync_scancode_scans"
25+
26+
@classmethod
27+
def steps(cls):
28+
return (
29+
cls.get_git_repos,
30+
cls.sync_scan_repositories,
31+
)
32+
33+
def get_git_repos(self):
34+
self.git_repos = Repository.objects.all()
35+
36+
def sync_scan_repositories(self):
37+
repositories_count = self.git_repos.count()
38+
self.log(f"Syncing package scans from {repositories_count:,d} repositories")
39+
40+
synced_package_scan_count = 0
41+
progress = LoopProgress(total_iterations=repositories_count, logger=self.log)
42+
for repo in progress.iter(self.git_repos.iterator(chunk_size=2000)):
43+
repository, _ = Repository.objects.get_or_create(url=repo)
44+
repository.git_repo_obj.remotes.origin.pull()
45+
synced_package_scan_count += sync_scancodeio_scan(
46+
repository=repository,
47+
logger=self.log,
48+
)
49+
50+
self.log(f"Successfully synced {synced_package_scan_count:,d} package scans")
51+
52+
53+
def sync_scancodeio_scan(repository, logger):
54+
repo = repository.git_repo_obj
55+
latest_commit_hash = repo.head.commit.hexsha
56+
latest_commit = repo.commit(latest_commit_hash)
57+
58+
if last_commit_hash := repository.last_imported_commit:
59+
last_imported_commit = repo.commit(last_commit_hash)
60+
diffs = last_imported_commit.diff(latest_commit)
61+
scans = [item for item in diffs if item.a_path.endswith("scancodeio.json")]
62+
scan_count = sync_scan_from_diff(diffs=scans, repository=repository, logger=logger)
63+
else:
64+
scan_count = sync_all_scan(repository=repository, logger=logger)
65+
66+
repository.last_imported_commit = latest_commit_hash
67+
repository.save()
68+
69+
return scan_count
70+
71+
72+
def sync_scan_from_diff(diffs, repository, logger):
73+
scans = [
74+
item
75+
for item in diffs
76+
if item.a_path.endswith("scancodeio.json") or item.b_path.endswith("scancodeio.json")
77+
]
78+
scan_count = len(scans)
79+
80+
logger(f"Syncing {scan_count:,d} package scan from {repository.url}")
81+
progress = LoopProgress(total_iterations=scan_count, logger=logger)
82+
for scan in progress.iter(scans):
83+
change_type = scan.change_type
84+
if change_type in ("A", "M", "R"):
85+
scan_path = scan.b_path
86+
action = utils.create_note
87+
elif change_type == "D":
88+
scan_path = scan.a_path
89+
action = utils.delete_note
90+
91+
purl = utils.package_metadata_path_to_purl(path=Path(scan_path), version=False)
92+
package, _ = Package.objects.get_or_create(purl=str(purl), service=repository.admin)
93+
note = utils.get_scan_note(path=Path(scan_path))
94+
action(pkg=package, note_dict=note)
95+
return scan_count
96+
97+
98+
def sync_all_scan(repository, logger):
99+
repo = repository.git_repo_obj
100+
root = Path(repo.working_dir)
101+
scan_count = sum(1 for _ in root.rglob("scancodeio.json"))
102+
103+
scans = root.rglob("scancodeio.json")
104+
logger(f"Syncing {scan_count:,d} package scan from {repo.remotes.origin.url}")
105+
106+
progress = LoopProgress(total_iterations=scan_count, logger=logger)
107+
for scan in progress.iter(scans):
108+
relative_path = scan.relative_to(root)
109+
purl = utils.package_metadata_path_to_purl(relative_path, version=False)
110+
package, _ = Package.objects.get_or_create(purl=str(purl), service=repository.admin)
111+
note = utils.get_scan_note(path=relative_path)
112+
utils.create_note(pkg=package, note_dict=note)
113+
return scan_count

fedcode/pipes/utils.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# FederatedCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/federatedcode for support or download.
7+
# See https://aboutcode.org for more information about AboutCode.org OSS projects.
8+
#
9+
10+
import saneyaml
11+
from packageurl import PackageURL
12+
13+
from fedcode.activitypub import Activity
14+
from fedcode.activitypub import CreateActivity
15+
from fedcode.activitypub import DeleteActivity
16+
from fedcode.models import Note
17+
18+
19+
def create_note(pkg, note_dict):
20+
note, _ = Note.objects.get_or_create(acct=pkg.acct, content=saneyaml.dump(note_dict))
21+
pkg.notes.add(note)
22+
create_activity = CreateActivity(actor=pkg.to_ap, object=note.to_ap)
23+
Activity.federate(
24+
targets=pkg.followers_inboxes,
25+
body=create_activity.to_ap(),
26+
key_id=pkg.key_id,
27+
)
28+
29+
30+
def delete_note(pkg, note_dict):
31+
note = Note.objects.get(acct=pkg.acct, content=saneyaml.dump(note_dict))
32+
note_ap = note.to_ap
33+
note.delete()
34+
pkg.notes.remove(note)
35+
36+
deleted_activity = DeleteActivity(actor=pkg.to_ap, object=note_ap)
37+
Activity.federate(
38+
targets=pkg.followers_inboxes,
39+
body=deleted_activity.to_ap,
40+
key_id=pkg.key_id,
41+
)
42+
43+
44+
def package_metadata_path_to_purl(path, version=True):
45+
"""
46+
Return PURL from relative metadata path.
47+
48+
49+
"""
50+
parts = path.parts
51+
if len(parts) < 4:
52+
ValueError("Not a valid package metadata path.")
53+
54+
purl = f"pkg:{'/'.join(parts[:-2])}"
55+
if version:
56+
purl = f"{purl}@{parts[-2]}"
57+
return PackageURL.from_string(purl=purl)
58+
59+
60+
def get_scan_note(path):
61+
"""Return Note for Package scan."""
62+
purl = package_metadata_path_to_purl(path=path)
63+
64+
# TODO: Use tool-alias.yml to get tool for corresponding tool
65+
# for scan https://github.com/aboutcode-org/federatedcode/issues/24
66+
return {
67+
"purl": str(purl),
68+
"scans": [
69+
{
70+
"tool": "pkg:pypi/scancode-toolkit",
71+
"file_name": "scancodeio.json",
72+
},
73+
],
74+
}

requirements.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
aboutcode.pipeline==0.1.0
12
aboutcode-toolkit==10.1.0
23
alabaster==0.7.13
34
anyio==4.1.0

setup.cfg

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,15 +51,13 @@ install_requires =
5151
django-rest-framework>=0.1.0
5252
djangorestframework>=3.14.0
5353
django-environ>=0.10.0
54-
django-ninja>=1.2.1
5554
gunicorn>=21.2.0
5655
GitPython>=3.1.31
5756
requests>=2.31.0
5857
saneyaml>=0.6.0
5958
#
6059
httpx>=0.24.1
6160
http-message-signatures>=0.4.4
62-
pydantic>=2.8.2
6361

6462
anyio>=4.1.0
6563
asgiref>=3.7.2
@@ -85,7 +83,10 @@ install_requires =
8583
packageurl-python>=0.11.1
8684
packaging>=23.1
8785
pathspec>=0.11.2
86+
87+
#??
8888
Pillow>=9.5.0
89+
8990
platformdirs>=3.10.0
9091
pluggy>=1.0.0
9192
pycparser>=2.21
@@ -105,6 +106,13 @@ install_requires =
105106
unidiff>=0.7.5
106107
urllib3>=2.0.3
107108
wrapt>=1.15.0
109+
110+
#schema
111+
django-ninja>=1.2.1
112+
pydantic>=2.8.2
113+
114+
#pipeline
115+
aboutcode.pipeline>=0.1.0
108116

109117

110118
[options.extras_require]

0 commit comments

Comments
 (0)