diff --git a/features.md b/features.md index 0b16169cfe9..c0f7c8eff47 100644 --- a/features.md +++ b/features.md @@ -30,7 +30,6 @@ | ProvisioningRequestAvailable| | | Enabled | Enabled | | | | | | AWSClusterHostedDNS| | | Enabled | Enabled | | | Enabled | Enabled | | AWSDedicatedHosts| | | Enabled | Enabled | | | Enabled | Enabled | -| AWSDualStackInstall| | | Enabled | Enabled | | | Enabled | Enabled | | AWSEuropeanSovereignCloudInstall| | | Enabled | Enabled | | | Enabled | Enabled | | AdditionalStorageConfig| | | Enabled | Enabled | | | Enabled | Enabled | | AutomatedEtcdBackup| | | Enabled | Enabled | | | Enabled | Enabled | @@ -93,6 +92,7 @@ | AWSServiceLBNetworkSecurityGroup| | Enabled | Enabled | Enabled | | Enabled | Enabled | Enabled | | OSStreams| | Enabled | Enabled | Enabled | | Enabled | Enabled | Enabled | | AWSClusterHostedDNSInstall| Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | +| AWSDualStackInstall| Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | | AzureClusterHostedDNSInstall| Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | | AzureWorkloadIdentity| Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | | BootImageSkewEnforcement| Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | Enabled | diff --git a/features/features.go b/features/features.go index 1d0f9bcce43..d359f947ab4 100644 --- a/features/features.go +++ b/features/features.go @@ -854,7 +854,7 @@ var ( contactPerson("sadasu"). productScope(ocpSpecific). enhancementPR("https://github.com/openshift/enhancements/pull/1806"). - enable(inTechPreviewNoUpgrade(), inDevPreviewNoUpgrade()). + enable(inDefault(), inOKD(), inTechPreviewNoUpgrade(), inDevPreviewNoUpgrade()). mustRegister() FeatureGateAzureDualStackInstall = newFeatureGate("AzureDualStackInstall"). diff --git a/payload-manifests/featuregates/featureGate-4-10-Hypershift-Default.yaml b/payload-manifests/featuregates/featureGate-4-10-Hypershift-Default.yaml index 389432b924e..b4cf444145f 100644 --- a/payload-manifests/featuregates/featureGate-4-10-Hypershift-Default.yaml +++ b/payload-manifests/featuregates/featureGate-4-10-Hypershift-Default.yaml @@ -20,9 +20,6 @@ { "name": "AWSDedicatedHosts" }, - { - "name": "AWSDualStackInstall" - }, { "name": "AWSEuropeanSovereignCloudInstall" }, @@ -292,6 +289,9 @@ { "name": "AWSClusterHostedDNSInstall" }, + { + "name": "AWSDualStackInstall" + }, { "name": "AzureClusterHostedDNSInstall" }, diff --git a/payload-manifests/featuregates/featureGate-4-10-Hypershift-OKD.yaml b/payload-manifests/featuregates/featureGate-4-10-Hypershift-OKD.yaml index 52a878dbfdd..94e4d0e6fb4 100644 --- a/payload-manifests/featuregates/featureGate-4-10-Hypershift-OKD.yaml +++ b/payload-manifests/featuregates/featureGate-4-10-Hypershift-OKD.yaml @@ -22,9 +22,6 @@ { "name": "AWSDedicatedHosts" }, - { - "name": "AWSDualStackInstall" - }, { "name": "AWSEuropeanSovereignCloudInstall" }, @@ -294,6 +291,9 @@ { "name": "AWSClusterHostedDNSInstall" }, + { + "name": "AWSDualStackInstall" + }, { "name": "AzureClusterHostedDNSInstall" }, diff --git a/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-Default.yaml b/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-Default.yaml index f0f1078fd29..e2f05430d1e 100644 --- a/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-Default.yaml +++ b/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-Default.yaml @@ -20,9 +20,6 @@ { "name": "AWSDedicatedHosts" }, - { - "name": "AWSDualStackInstall" - }, { "name": "AWSEuropeanSovereignCloudInstall" }, @@ -283,6 +280,9 @@ { "name": "AWSClusterHostedDNSInstall" }, + { + "name": "AWSDualStackInstall" + }, { "name": "AWSServiceLBNetworkSecurityGroup" }, diff --git a/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-OKD.yaml b/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-OKD.yaml index faa2bc84479..fc899ff19e5 100644 --- a/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-OKD.yaml +++ b/payload-manifests/featuregates/featureGate-4-10-SelfManagedHA-OKD.yaml @@ -22,9 +22,6 @@ { "name": "AWSDedicatedHosts" }, - { - "name": "AWSDualStackInstall" - }, { "name": "AWSEuropeanSovereignCloudInstall" }, @@ -285,6 +282,9 @@ { "name": "AWSClusterHostedDNSInstall" }, + { + "name": "AWSDualStackInstall" + }, { "name": "AWSServiceLBNetworkSecurityGroup" }, diff --git a/tools/codegen/cmd/featuregate-test-analyzer.go b/tools/codegen/cmd/featuregate-test-analyzer.go index 92e07bcece7..d05b2e1eb60 100644 --- a/tools/codegen/cmd/featuregate-test-analyzer.go +++ b/tools/codegen/cmd/featuregate-test-analyzer.go @@ -247,7 +247,7 @@ func (o *FeatureGateTestAnalyzerOptions) Run(ctx context.Context) error { summaryMarkdown := md.ExactBytes() if len(o.OutputDir) > 0 { filename := filepath.Join(o.OutputDir, "feature-promotion-summary.md") - if err := os.WriteFile(filename, summaryMarkdown, 0644); err != nil { + if err := os.WriteFile(filename, summaryMarkdown, 0o644); err != nil { errs = append(errs, err) } @@ -343,7 +343,6 @@ func buildHTMLFeatureGateData(name string, testingResults map[JobVariant]*Testin } func writeHTMLFromTemplate(filename string, featureGateHTMLData []utils.HTMLFeatureGate) error { - data := utils.HTMLTemplateData{ FeatureGates: featureGateHTMLData, } @@ -486,7 +485,6 @@ func writeTestingMarkDown(testingResults map[JobVariant]*TestingResults, md *uti } md.Text("") md.Text("") - } var ( @@ -649,7 +647,7 @@ func (a OrderedJobVariants) Less(i, j int) bool { // Map these to an ordered list of strings so that we can define the order // rather than them being alphabetical. - var networkStackOrder = map[string]string{ + networkStackOrder := map[string]string{ "": "0", "ipv4": "1", "ipv6": "2", @@ -870,7 +868,7 @@ func listTestResultForVariant(featureGate string, jobVariant JobVariant) (*Testi // Feature gates used by the installer don't need separate tests, use the overall install tests if strings.Contains(featureGate, "Install") { - testPattern = fmt.Sprintf("install should succeed") + return verifyJobBasedFeatureGatePromotion(featureGate, jobVariant) } fmt.Printf("Query sippy for all test run results for pattern %q on variant %#v\n", testPattern, jobVariant) @@ -991,3 +989,279 @@ func matchTwoNodeFeatureGates(featureGate string, topology string) bool { } return false } + +func verifyJobBasedFeatureGatePromotion(featureGate string, jobVariant JobVariant) (*TestingResults, error) { + ocpRelease, err := getRelease() + if err != nil { + return nil, fmt.Errorf("getting release version: %w", err) + } + + defaultTransport := &http.Transport{ + Proxy: http.ProxyFromEnvironment, + ForceAttemptHTTP2: true, + MaxIdleConns: 100, + IdleConnTimeout: 90 * time.Second, + TLSHandshakeTimeout: 10 * time.Second, + ExpectContinueTimeout: 1 * time.Second, + TLSClientConfig: &tls.Config{ + InsecureSkipVerify: true, + }, + } + + sippyClient := &http.Client{ + Timeout: 2 * time.Minute, + Transport: defaultTransport, + } + + jobs, err := getJobsForFeatureGateFromSippy(sippyClient, ocpRelease, featureGate, jobVariant) + if err != nil { + return nil, fmt.Errorf("getting jobs for feature-gate %q for variant %v : %w", featureGate, jobVariant, err) + } + + testResults := []TestResults{} + + for _, job := range jobs { + results, err := verifyJobPassRate(sippyClient, ocpRelease, job, jobVariant) + if err != nil { + return nil, fmt.Errorf("verifying job pass rate for job %q: %w", job.Name, err) + } + + testResults = append(testResults, *results) + } + + return &TestingResults{ + JobVariant: jobVariant, + TestResults: testResults, + }, nil +} + +func verifyJobPassRate(client *http.Client, release string, job sippy.SippyJob, variant JobVariant) (*TestResults, error) { + // Do an early check for 95% pass rate with at least 14 runs + runs := job.CurrentRuns + passes := job.CurrentPasses + + if runs < 14 { + runs += job.PreviousRuns + passes += job.PreviousPasses + + fmt.Println("current + previous runs", runs) + } + + // If we have less than 14 runs, return the current set of results as-is + // because it doesn't meet promotion criteria. + // + // This saves us from unnecessarily making calls out to Sippy to perform a more nuanced + // failures analysis of the job runs to see if failed runs are true failures or known regressions. + if runs < 14 { + return &TestResults{ + TestName: job.Name, + TotalRuns: runs, + SuccessfulRuns: passes, + FailedRuns: runs - passes, + }, nil + } + + // If we have greater than or equal to 14 runs AND they are passing at a rate of at least 95%, + // we can return early because this job has passed the promotion requirements. + // + // This saves us from unnecessarily making calls out to Sippy to perform a more nuanced + // failures analysis of the job runs to see if failed runs are true failures or known regressions. + if (passes * 100) / runs >= 95 { + return &TestResults{ + TestName: job.Name, + TotalRuns: runs, + SuccessfulRuns: passes, + FailedRuns: runs - passes, + }, nil + } + + // We haven't passed promotion requirements with this job, but jobs might be impacted + // by known regressed tests. While important to get fixed, many regressions are either + // release blockers or require an exception to not be a release blocker. + // + // We can be reasonably confident in promoting a feature if the tests that are failing + // on failed runs are only ones with known regressions for the platform being tested. + // + // From here on, we fetch the 14 most recent job runs for the job in question from Sippy, + // fetch the known regressions for the release + platform variant, and compare failing + // job runs failed tests with the known regressions - only counting failures that have + // unknown test failures as a true failure. + + jobRuns, err := getJobRunsFromSippy(client, release, job.Name) + if err != nil { + return nil, fmt.Errorf("getting job %q results from sippy: %w", job.Name, err) + } + + testResults := &TestResults{ + TestName: job.Name, + TotalRuns: len(jobRuns), + } + + triagedTestFailures, err := getTriagedTestFailuresFromSippy(client, release, variant) + if err != nil { + return nil, fmt.Errorf("getting triaged test failures from sippy: %q", err) + } + + for _, jobRun := range jobRuns { + if jobRun.OverallResult == "F" && !jobRun.KnownFailure { + + untriagedTestFailures := []string{} + for _, failure := range jobRun.FailedTestNames { + if !triagedTestFailures.Has(failure) { + untriagedTestFailures = append(untriagedTestFailures, failure) + } + } + + if len(untriagedTestFailures) > 0 { + var writer strings.Builder + writer.WriteString(fmt.Sprintf("job run %s has untriaged test failures:\n", jobRun.TestGridURL)) + for _, testFailure := range untriagedTestFailures { + writer.WriteString(fmt.Sprintf("\t- %s\n", testFailure)) + } + + fmt.Println(writer.String()) + testResults.FailedRuns++ + + continue + } + } + + testResults.SuccessfulRuns++ + } + + return testResults, nil +} + +func getJobsForFeatureGateFromSippy(client *http.Client, release, featureGate string, variant JobVariant) ([]sippy.SippyJob, error) { + // TODO: Remove this once done testing the other logic. + + out := []sippy.SippyJob{ + { + Name: "periodic-ci-openshift-release-main-nightly-5.0-e2e-aws-ovn-installer-dualstack-ipv4-primary-techpreview", + CurrentRuns: 25, + CurrentPasses: 3, + }, + { + Name: "periodic-ci-openshift-release-main-nightly-5.0-e2e-aws-ovn-installer-dualstack-ipv6-primary-techpreview", + CurrentRuns: 25, + CurrentPasses: 3, + }, + } + + return out, nil + + resp, err := client.Get(sippy.BuildSippyJobsForFeatureGateURL(featureGate, release, variant.Topology, variant.Cloud, variant.Architecture, variant.NetworkStack, variant.OS)) + if err != nil { + return nil, fmt.Errorf("getting job info: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("expected a 200 OK status code but got %s", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading response body: %w", err) + } + + defer resp.Body.Close() + + jobs := []sippy.SippyJob{} + err = json.Unmarshal(body, &jobs) + if err != nil { + return nil, fmt.Errorf("unmarshalling response body: %w", err) + } + + return jobs, nil +} + +func getJobRunsFromSippy(client *http.Client, release, jobName string) ([]sippy.SippyJobRun, error) { + resp, err := client.Get(sippy.BuildSippyJobRunsForJobURL(release, jobName, time.Now().Add(-1 * 14 * 24 * time.Hour))) + if err != nil { + return nil, fmt.Errorf("getting job info: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("expected a 200 OK status code but got %s", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading response body: %w", err) + } + + defer resp.Body.Close() + + runResults := &sippy.SippyJobRunsResult{} + err = json.Unmarshal(body, runResults) + if err != nil { + return nil, fmt.Errorf("unmarshalling response body: %w", err) + } + + return runResults.Rows, nil +} + +func getTriagedTestFailuresFromSippy(client *http.Client, release string, variant JobVariant) (sets.Set[string], error) { + reqURL, err := url.Parse("https://sippy.dptools.openshift.org/api/component_readiness/triages") + if err != nil { + panic(fmt.Sprintf("couldn't parse sippy triages url: %v", err)) + } + + resp, err := client.Get(reqURL.String()) + if err != nil { + return nil, fmt.Errorf("getting sippy triages: %w", err) + } + + if resp.StatusCode != http.StatusOK { + return nil, fmt.Errorf("expected a 200 OK status code but got %s", resp.StatusCode) + } + + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("reading response body: %w", err) + } + + defer resp.Body.Close() + + triageItems := []sippy.SippyTriageItem{} + err = json.Unmarshal(body, &triageItems) + if err != nil { + return nil, fmt.Errorf("unmarshalling response body: %w", err) + } + + regressedTests := sets.New[string]() + + for _, triageItem := range triageItems { + for _, regression := range triageItem.Regressions { + if regression.Release != release { + continue + } + + regressionVariants := sets.New(regression.Variants...) + + if !regressionVariants.Has(fmt.Sprintf("Platform:%s", variant.Cloud)) { + continue + } + + if !regressionVariants.Has(fmt.Sprintf("Topology:%s", variant.Topology)) { + continue + } + + if !regressionVariants.Has(fmt.Sprintf("Architecture:%s", variant.Architecture)) { + continue + } + + if variant.NetworkStack != "" && !regressionVariants.Has(fmt.Sprintf("NetworkStack:%s", variant.NetworkStack)) { + continue + } + + if variant.OS != "" && !regressionVariants.Has(fmt.Sprintf("OS:%s", variant.OS)) { + continue + } + + regressedTests.Insert(regression.TestName) + } + } + + return regressedTests, nil +} diff --git a/tools/codegen/pkg/sippy/json_types.go b/tools/codegen/pkg/sippy/json_types.go index 0795c2519a4..992cd141a30 100644 --- a/tools/codegen/pkg/sippy/json_types.go +++ b/tools/codegen/pkg/sippy/json_types.go @@ -5,6 +5,7 @@ import ( "fmt" "net/url" "strings" + "time" "k8s.io/apimachinery/pkg/util/sets" ) @@ -54,6 +55,38 @@ type SippyTestInfo struct { OpenBugs int `json:"open_bugs"` } +type SippyJob struct { + Name string `json:"name"` + Variants []string `json:"variants"` + CurrentRuns int `json:"current_runs"` + CurrentPasses int `json:"current_passes"` + PreviousRuns int `json:"previous_runs"` + PreviousPasses int `json:"previous_passes"` +} + +type SippyJobRun struct { + Variants []string `json:"variants"` + Failed bool `json:"failed"` + FailedTestNames []string `json:"failed_test_names"` + KnownFailure bool `json:"known_failure"` + OverallResult string `json:"overall_result"` + TestGridURL string `json:"test_grid_url"` +} + +type SippyJobRunsResult struct { + Rows []SippyJobRun `json:"rows"` +} + +type SippyTriageItem struct { + Regressions []SippyRegression `json:"regressions,omitempty"` +} + +type SippyRegression struct { + Release string `json:"release,omitempty"` + TestName string `json:"test_name,omitempty"` + Variants []string `json:"variants,omitempty"` +} + func QueriesFor(cloud, architecture, topology, networkStack, os, jobTiers, testPattern string) []*SippyQueryStruct { // Build base query items that are common to all JobTier queries baseItems := []SippyQueryItem{ @@ -219,3 +252,118 @@ func BuildSippyTestAnalysisURL(release, testName, topology, cloud, architecture, return u.String() } + +func BuildSippyJobsForFeatureGateURL(featureGate, release, topology, cloud, architecture, networkStack, os string) string { + filterItems := []SippyQueryItem{ + { + ColumnField: "variants", + Not: true, + OperatorValue: "has entry", + Value: "never-stable", + }, + { + ColumnField: "variants", + Not: true, + OperatorValue: "has entry", + Value: "aggregated", + }, + { + ColumnField: "variants", + OperatorValue: "contains", + Value: fmt.Sprintf("Capability:%s", featureGate), + }, + { + ColumnField: "variants", + OperatorValue: "has entry", + Value: fmt.Sprintf("Topology:%s", topology), + }, + { + ColumnField: "variants", + OperatorValue: "has entry", + Value: fmt.Sprintf("Platform:%s", cloud), + }, + { + ColumnField: "variants", + OperatorValue: "has entry", + Value: fmt.Sprintf("Architecture:%s", architecture), + }, + } + if networkStack != "" { + filterItems = append(filterItems, SippyQueryItem{ + ColumnField: "variants", + OperatorValue: "has entry", + Value: fmt.Sprintf("NetworkStack:%s", networkStack), + }) + } + if os != "" { + filterItems = append(filterItems, SippyQueryItem{ + ColumnField: "variants", + OperatorValue: "has entry", + Value: fmt.Sprintf("OS:%s", os), + }) + } + // Note: We don't filter by JobTier in the URL so the link shows all tiers + // The actual queries filter by each tier separately and merge results + + filterObj := SippyQueryStruct{ + Items: filterItems, + LinkOperator: "and", + } + filterJSON, err := json.Marshal(filterObj) + if err != nil { + return "" + } + + u := &url.URL{ + Scheme: "https", + Host: "sippy.dptools.openshift.org", + Path: "/api/jobs", + } + q := u.Query() + q.Set("filters", string(filterJSON)) + q.Set("release", release) + u.RawQuery = q.Encode() + + return u.String() +} + +func BuildSippyJobRunsForJobURL(release, jobName string, timestamp time.Time) string { + filterItems := []SippyQueryItem{ + { + ColumnField: "name", + OperatorValue: "equals", + Value: jobName, + }, + { + ColumnField: "timestamp", + OperatorValue: ">=", + Value: fmt.Sprintf("%d", timestamp.Unix()), + }, + } + + filterObj := SippyQueryStruct{ + Items: filterItems, + LinkOperator: "and", + } + filterJSON, err := json.Marshal(filterObj) + if err != nil { + return "" + } + + u := &url.URL{ + Scheme: "https", + Host: "sippy.dptools.openshift.org", + Path: "/api/jobs/runs", + } + q := u.Query() + q.Set("filters", string(filterJSON)) + q.Set("release", release) + q.Set("sortField", "timestamp") + q.Set("sort", "desc") + q.Set("perPage", "14") // hardcoded to 14 most recent job runs to prevent excessive "expensive" calculations of job results. + q.Set("page", "0") + u.RawQuery = q.Encode() + + return u.String() +} +