@@ -312,6 +312,85 @@ class FrameworkModeMetadataExtractor extends string {
312312 }
313313}
314314
315+ /**
316+ * Holds if the given `endpoint` should be considered a candidate for the `extensibleType`.
317+ *
318+ * The other parameters record various other properties of interest.
319+ */
320+ predicate isCandidate(
321+ Endpoint endpoint, string package, string type, string subtypes, string name, string signature,
322+ string input, string output, string parameterName, string extensibleType, string alreadyAiModeled
323+ ) {
324+ CharacteristicsImpl::isCandidate(endpoint, _) and
325+ not exists(CharacteristicsImpl::UninterestingToModelCharacteristic u |
326+ u.appliesToEndpoint(endpoint)
327+ ) and
328+ any(FrameworkModeMetadataExtractor meta)
329+ .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
330+ alreadyAiModeled, extensibleType) and
331+ // If a node is already modeled in MaD, we don't include it as a candidate. Otherwise, we might include it as a
332+ // candidate for query A, but the model will label it as a sink for one of the sink types of query B, for which it's
333+ // already a known sink. This would result in overlap between our detected sinks and the pre-existing modeling. We
334+ // assume that, if a sink has already been modeled in a MaD model, then it doesn't belong to any additional sink
335+ // types, and we don't need to reexamine it.
336+ alreadyAiModeled.matches(["", "%ai-%"]) and
337+ AutomodelJavaUtil::includeAutomodelCandidate(package, type, name, signature)
338+ }
339+
340+ /**
341+ * Holds if the given `endpoint` is a negative example for the `extensibleType`
342+ * because of the `characteristic`.
343+ *
344+ * The other parameters record various other properties of interest.
345+ */
346+ predicate isNegativeExample(
347+ Endpoint endpoint, EndpointCharacteristic characteristic, float confidence, string package,
348+ string type, string subtypes, string name, string signature, string input, string output,
349+ string parameterName, string extensibleType
350+ ) {
351+ characteristic.appliesToEndpoint(endpoint) and
352+ // the node is known not to be an endpoint of any appropriate type
353+ forall(AutomodelEndpointTypes::EndpointType tp |
354+ tp = CharacteristicsImpl::getAPotentialType(endpoint)
355+ |
356+ characteristic.hasImplications(tp, false, _)
357+ ) and
358+ // the lowest confidence across all endpoint types should be at least highConfidence
359+ confidence =
360+ min(float c |
361+ characteristic.hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), false, c)
362+ ) and
363+ confidence >= SharedCharacteristics::highConfidence() and
364+ any(FrameworkModeMetadataExtractor meta)
365+ .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
366+ _, extensibleType) and
367+ // It's valid for a node to be both a potential source/sanitizer and a sink. We don't want to include such nodes
368+ // as negative examples in the prompt, because they're ambiguous and might confuse the model, so we explicitly exclude them here.
369+ not exists(EndpointCharacteristic characteristic2, float confidence2 |
370+ characteristic2 != characteristic
371+ |
372+ characteristic2.appliesToEndpoint(endpoint) and
373+ confidence2 >= SharedCharacteristics::maximalConfidence() and
374+ characteristic2
375+ .hasImplications(CharacteristicsImpl::getAPotentialType(endpoint), true, confidence2)
376+ )
377+ }
378+
379+ /**
380+ * Holds if the given `endpoint` is a positive example for the `endpointType`.
381+ *
382+ * The other parameters record various other properties of interest.
383+ */
384+ predicate isPositiveExample(
385+ Endpoint endpoint, string endpointType, string package, string type, string subtypes, string name,
386+ string signature, string input, string output, string parameterName, string extensibleType
387+ ) {
388+ any(FrameworkModeMetadataExtractor meta)
389+ .hasMetadata(endpoint, package, type, subtypes, name, signature, input, output, parameterName,
390+ _, extensibleType) and
391+ CharacteristicsImpl::isKnownAs(endpoint, endpointType, _)
392+ }
393+
315394/*
316395 * EndpointCharacteristic classes that are specific to Automodel for Java.
317396 */
0 commit comments