Skip to content

feat: improve VSA generation with digest for each subject #685

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 14 commits into from
Apr 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
124 changes: 124 additions & 0 deletions src/macaron/artifact/maven.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Copyright (c) 2024 - 2024, Oracle and/or its affiliates. All rights reserved.
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/.

"""This module declares types and utilities for Maven artifacts."""

from packageurl import PackageURL

from macaron.slsa_analyzer.provenance.intoto import InTotoPayload
from macaron.slsa_analyzer.provenance.intoto.v01 import InTotoV01Subject
from macaron.slsa_analyzer.provenance.intoto.v1 import InTotoV1ResourceDescriptor
from macaron.slsa_analyzer.provenance.witness import (
extract_build_artifacts_from_witness_subjects,
is_witness_provenance_payload,
load_witness_verifier_config,
)


class MavenSubjectPURLMatcher:
"""A matcher matching a PURL identifying a Maven artifact to a provenance subject."""

@staticmethod
def get_subject_in_provenance_matching_purl(
provenance_payload: InTotoPayload, purl: PackageURL
) -> InTotoV01Subject | InTotoV1ResourceDescriptor | None:
"""Get the subject in the provenance matching the PURL.

In this case where the provenance is assumed to be built from a Java project,
the subject must be a Maven artifact.

Parameters
----------
provenance_payload : InTotoPayload
The provenance payload.
purl : PackageURL
The PackageURL identifying the matching subject.

Returns
-------
InTotoV01Subject | InTotoV1ResourceDescriptor | None
The subject in the provenance matching the given PURL.
"""
if not purl.namespace:
return None
if not purl.version:
return None
if purl.type != "maven":
return None

if not is_witness_provenance_payload(
payload=provenance_payload,
predicate_types=load_witness_verifier_config().predicate_types,
):
return None
artifact_subjects = extract_build_artifacts_from_witness_subjects(provenance_payload)

for subject in artifact_subjects:
_, _, artifact_filename = subject["name"].rpartition("/")
subject_purl = create_maven_purl_from_artifact_filename(
artifact_filename=artifact_filename,
group_id=purl.namespace,
version=purl.version,
)
if subject_purl == purl:
return subject

return None


def create_maven_purl_from_artifact_filename(
artifact_filename: str,
group_id: str,
version: str,
) -> PackageURL | None:
"""Create a Maven PackageURL given an artifact filename, a group id, and a version.

For reference, see:
- https://maven.apache.org/ref/3.9.6/maven-core/artifact-handlers.html
- https://github.com/package-url/purl-spec/blob/master/PURL-TYPES.rst#maven
Notes:
- For the time being, we are only supporting the ``"type"`` qualifier, although the
Maven section in the PackageURL docs also mention the ``"classifier"`` qualifier.
This is because not all artifact types has a unique value of ``"classifier"``
according to the Artifact Handlers table in the Maven Core reference. In addition,
not supporting the ``"classifier"`` qualifier at the moment simplifies the
implementation for PURL decoding and generation until there is a concrete use
case for this additional qualifier.
- We are only supporting only 4 artifact types: jar, pom, javadoc, and java-source.

Parameters
----------
artifact_filename : str
The filename of the artifact.
group_id : str
The group id of the artifact.
version : str
The version of the artifact.

Returns
-------
PackageURL | None
A Maven artifact PackageURL, or `None` if the filename does not follow any
of the supported artifact name patters.
"""
# Each artifact name should follow the pattern "<artifact-id>-<suffix>"
# where "<suffix>" is one of the following.
suffix_to_purl_qualifiers = {
f"-{version}.jar": {"type": "jar"},
f"-{version}.pom": {"type": "pom"},
f"-{version}-javadoc.jar": {"type": "javadoc"},
f"-{version}-sources.jar": {"type": "java-source"},
}

for suffix, purl_qualifiers in suffix_to_purl_qualifiers.items():
if artifact_filename.endswith(suffix):
artifact_id = artifact_filename[: -len(suffix)]
return PackageURL(
type="maven",
namespace=group_id,
name=artifact_id,
version=version,
qualifiers=purl_qualifiers,
)

return None
79 changes: 78 additions & 1 deletion src/macaron/database/table_definitions.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import string
from datetime import datetime
from pathlib import Path
from typing import Any
from typing import Any, Self

from packageurl import PackageURL
from sqlalchemy import (
Expand All @@ -32,9 +32,11 @@
)
from sqlalchemy.orm import Mapped, mapped_column, relationship

from macaron.artifact.maven import MavenSubjectPURLMatcher
from macaron.database.database_manager import ORMBase
from macaron.database.rfc3339_datetime import RFC3339DateTime
from macaron.errors import InvalidPURLError
from macaron.slsa_analyzer.provenance.intoto import InTotoPayload, ProvenanceSubjectPURLMatcher
from macaron.slsa_analyzer.slsa_req import ReqName

logger: logging.Logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -168,6 +170,13 @@ class Component(PackageURLMixin, ORMBase):
secondaryjoin=components_association_table.c.child_component == id,
)

#: The optional one-to-one relationship with a provenance subject in case this
#: component represents a subject in a provenance.
provenance_subject: Mapped["ProvenanceSubject | None"] = relationship(
back_populates="component",
lazy="immediate",
)

def __init__(self, purl: str, analysis: Analysis, repository: "Repository | None"):
"""
Instantiate the software component using PURL identifier.
Expand Down Expand Up @@ -528,3 +537,71 @@ class HashDigest(ORMBase):

#: The many-to-one relationship with artifacts.
artifact: Mapped["ReleaseArtifact"] = relationship(back_populates="digests", lazy="immediate")


class ProvenanceSubject(ORMBase):
"""A subject in a provenance that matches the user-provided PackageURL.

This subject may be later populated in VSAs during policy verification.
"""

__tablename__ = "_provenance_subject"

#: The primary key.
id: Mapped[int] = mapped_column(Integer, primary_key=True, autoincrement=True) # noqa: A003

#: The component id of the provenance subject.
component_id: Mapped[int] = mapped_column(
Integer,
ForeignKey("_component.id"),
nullable=False,
)

#: The required one-to-one relationship with a component.
component: Mapped[Component] = relationship(
back_populates="provenance_subject",
lazy="immediate",
)

#: The SHA256 hash of the subject.
sha256: Mapped[str] = mapped_column(String, nullable=False)

@classmethod
def from_purl_and_provenance(
cls,
purl: PackageURL,
provenance_payload: InTotoPayload,
) -> Self | None:
"""Create a ``ProvenanceSubject`` entry if there is a provenance subject matching the PURL.

Parameters
----------
purl : PackageURL
The PackageURL identifying the software component being analyzed.
provenance_payload : InTotoPayload
The provenance payload.

Returns
-------
Self | None
A ``ProvenanceSubject`` entry with the SHA256 digest of the provenance subject
matching the given PURL.
"""
subject_artifact_types: list[ProvenanceSubjectPURLMatcher] = [MavenSubjectPURLMatcher]

for subject_artifact_type in subject_artifact_types:
subject = subject_artifact_type.get_subject_in_provenance_matching_purl(
provenance_payload,
purl,
)
if subject is None:
return None
digest = subject["digest"]
if digest is None:
return None
sha256 = digest.get("sha256")
if not sha256:
return None
return cls(sha256=sha256)

return None
34 changes: 26 additions & 8 deletions src/macaron/slsa_analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
from macaron.config.global_config import global_config
from macaron.config.target_config import Configuration
from macaron.database.database_manager import DatabaseManager, get_db_manager, get_db_session
from macaron.database.table_definitions import Analysis, Component, Repository
from macaron.database.table_definitions import Analysis, Component, ProvenanceSubject, Repository
from macaron.dependency_analyzer import DependencyAnalyzer, DependencyInfo
from macaron.errors import (
CloneError,
Expand Down Expand Up @@ -332,7 +332,12 @@ def run_single(
# Create the component.
component = None
try:
component = self.add_component(analysis, analysis_target, existing_records)
component = self.add_component(
analysis,
analysis_target,
existing_records,
provenance_payload,
)
except PURLNotFoundError as error:
logger.error(error)
return Record(
Expand Down Expand Up @@ -484,6 +489,7 @@ def add_component(
analysis: Analysis,
analysis_target: AnalysisTarget,
existing_records: dict[str, Record] | None = None,
provenance_payload: InTotoPayload | None = None,
) -> Component:
"""Add a software component if it does not exist in the DB already.

Expand Down Expand Up @@ -547,18 +553,30 @@ def add_component(
raise PURLNotFoundError(
f"The repository {analysis_target.repo_path} is not available and no PURL is provided from the user."
)

repo_snapshot_purl = PackageURL(
purl = PackageURL(
type=repository.type,
namespace=repository.owner,
name=repository.name,
version=repository.commit_sha,
)
return Component(purl=str(repo_snapshot_purl), analysis=analysis, repository=repository)
else:
# If the PURL is available, we always create the software component with it whether the repository is
# available or not.
purl = analysis_target.parsed_purl

component = Component(
purl=str(purl),
analysis=analysis,
repository=repository,
)

if provenance_payload:
component.provenance_subject = ProvenanceSubject.from_purl_and_provenance(
purl=purl,
provenance_payload=provenance_payload,
)

# If the PURL is available, we always create the software component with it whether the repository is
# available or not.
return Component(purl=str(analysis_target.parsed_purl), analysis=analysis, repository=repository)
return component

@staticmethod
def parse_purl(config: Configuration) -> PackageURL | None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ class ProvenanceAvailableFacts(CheckFacts):
id: Mapped[int] = mapped_column(ForeignKey("_check_facts.id"), primary_key=True) # noqa: A003

#: The provenance asset name.
asset_name: Mapped[str] = mapped_column(String, nullable=False, info={"justification": JustificationType.TEXT})
asset_name: Mapped[str] = mapped_column(String, nullable=True, info={"justification": JustificationType.TEXT})

#: The URL for the provenance asset.
asset_url: Mapped[str] = mapped_column(String, nullable=True, info={"justification": JustificationType.HREF})
Expand Down Expand Up @@ -504,6 +504,12 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
CheckResultData
The result of the check.
"""
if ctx.dynamic_data["provenance"]:
return CheckResultData(
result_tables=[ProvenanceAvailableFacts(confidence=Confidence.HIGH)],
result_type=CheckResultType.PASSED,
)

provenance_extensions = defaults.get_list(
"slsa.verifier",
"provenance_extensions",
Expand Down
11 changes: 11 additions & 0 deletions src/macaron/slsa_analyzer/checks/provenance_l3_content_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ def run_check(self, ctx: AnalyzeContext) -> CheckResultData:
logger.info("%s check was unable to find any expectations.", self.check_info.check_id)
return CheckResultData(result_tables=[], result_type=CheckResultType.UNKNOWN)

if ctx.dynamic_data["provenance"]:
if expectation.validate(ctx.dynamic_data["provenance"]):
return CheckResultData(
result_tables=[expectation],
result_type=CheckResultType.PASSED,
)
return CheckResultData(
result_tables=[expectation],
result_type=CheckResultType.FAILED,
)

package_registry_info_entries = ctx.dynamic_data["package_registries"]
ci_services = ctx.dynamic_data["ci_services"]

Expand Down
Loading