Skip to content

Commit df868ba

Browse files
committed
Introduce a dependency monitor for fences
This allows for userspace logging for a buffer and read/write dependencies on the buffer. Hook up SF to the dependency monitor. Right now this _does_ emit logs in SF when the primary display is powered down, which is likely indicative of SF being sloppy about release fences in situations where tearing won't be noticeable, but I did verify that manually making screenshotting forget to merge GPU work into a layer's release fence, which has been one way SF's torn the screen, triggers logcat, which is ultimately what we want. Bug: 360932099 Flag: com.android.graphics.surfaceflinger.flags.monitor_buffer_fences Test: manually remove screenshot fence handling and check logs Change-Id: Ica391dfa8a4f2924bb72664b9d9399e4ad9e1747
1 parent c28b463 commit df868ba

24 files changed

Lines changed: 398 additions & 10 deletions

libs/renderengine/Android.bp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ cc_defaults {
2525
defaults: [
2626
"android.hardware.graphics.composer3-ndk_shared",
2727
"renderengine_defaults",
28+
"libsurfaceflinger_common_deps",
2829
],
2930
cflags: [
3031
"-DGL_GLEXT_PROTOTYPES",
@@ -117,7 +118,10 @@ filegroup {
117118
// possible if libskia_renderengine is just pulled into librenderengine via whole_static_libs.
118119
cc_defaults {
119120
name: "librenderengine_deps",
120-
defaults: ["skia_renderengine_deps"],
121+
defaults: [
122+
"skia_renderengine_deps",
123+
"libsurfaceflinger_common_deps",
124+
],
121125
static_libs: ["libskia_renderengine"],
122126
}
123127

libs/renderengine/benchmark/Android.bp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ cc_benchmark {
2828
"android.hardware.graphics.composer3-ndk_shared",
2929
"librenderengine_deps",
3030
"surfaceflinger_defaults",
31+
"libsurfaceflinger_common_deps",
3132
],
3233
srcs: [
3334
"main.cpp",
@@ -38,7 +39,6 @@ cc_benchmark {
3839
static_libs: [
3940
"librenderengine",
4041
"libshaders",
41-
"libsurfaceflinger_common",
4242
"libtonemap",
4343
],
4444
cflags: [

libs/renderengine/skia/SkiaRenderEngine.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1236,6 +1236,16 @@ void SkiaRenderEngine::drawLayersInternal(
12361236
LOG_ALWAYS_FATAL_IF(activeSurface != dstSurface);
12371237
auto drawFence = sp<Fence>::make(flushAndSubmit(context, dstSurface));
12381238
trace(drawFence);
1239+
FenceTimePtr fenceTime = FenceTime::makeValid(drawFence);
1240+
for (const auto& layer : layers) {
1241+
if (FlagManager::getInstance().monitor_buffer_fences()) {
1242+
if (layer.source.buffer.buffer) {
1243+
layer.source.buffer.buffer->getBuffer()
1244+
->getDependencyMonitor()
1245+
.addAccessCompletion(fenceTime, "RE");
1246+
}
1247+
}
1248+
}
12391249
resultPromise->set_value(std::move(drawFence));
12401250
}
12411251

libs/ui/Android.bp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ cc_library_shared {
122122

123123
srcs: [
124124
"DebugUtils.cpp",
125+
"DependencyMonitor.cpp",
125126
"DeviceProductInfo.cpp",
126127
"DisplayIdentification.cpp",
127128
"DynamicDisplayInfo.cpp",

libs/ui/DependencyMonitor.cpp

Lines changed: 144 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,144 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
// #define LOG_NDEBUG 0
18+
#undef LOG_TAG
19+
#define LOG_TAG "DependencyMonitor"
20+
21+
#include <ui/DependencyMonitor.h>
22+
#include <ui/Fence.h>
23+
#include <utils/Timers.h>
24+
25+
#include <inttypes.h>
26+
27+
namespace android {
28+
29+
void DependencyMonitor::addIngress(FenceTimePtr fence, std::string annotation) {
30+
std::lock_guard lock(mMutex);
31+
resolveLocked();
32+
if (mDependencies.isFull() && !mDependencies.front().updateSignalTimes(true)) {
33+
ALOGD("%s: Clobbering unresolved dependencies -- make me bigger!", mToken.c_str());
34+
}
35+
36+
auto& entry = mDependencies.next();
37+
entry.reset(mToken.c_str());
38+
ALOGV("%" PRId64 "/%s: addIngress at CPU time %" PRId64 " (%s)", mDependencies.back().id,
39+
mToken.c_str(), systemTime(), annotation.c_str());
40+
41+
mDependencies.back().ingress = {std::move(fence), std::move(annotation)};
42+
}
43+
44+
void DependencyMonitor::addAccessCompletion(FenceTimePtr fence, std::string annotation) {
45+
std::lock_guard lock(mMutex);
46+
if (mDependencies.size() == 0) {
47+
return;
48+
}
49+
ALOGV("%" PRId64 "/%s: addAccessCompletion at CPU time %" PRId64 " (%s)",
50+
mDependencies.back().id, mToken.c_str(), systemTime(), annotation.c_str());
51+
mDependencies.back().accessCompletions.emplace_back(std::move(fence), std::move(annotation));
52+
}
53+
54+
void DependencyMonitor::addEgress(FenceTimePtr fence, std::string annotation) {
55+
std::lock_guard lock(mMutex);
56+
if (mDependencies.size() == 0) {
57+
return;
58+
}
59+
ALOGV("%" PRId64 "/%s: addEgress at CPU time %" PRId64 " (%s)", mDependencies.back().id,
60+
mToken.c_str(), systemTime(), annotation.c_str());
61+
mDependencies.back().egress = {std::move(fence), std::move(annotation)};
62+
}
63+
64+
void DependencyMonitor::resolveLocked() {
65+
if (mDependencies.size() == 0) {
66+
return;
67+
}
68+
69+
for (size_t i = mDependencies.size(); i > 0; i--) {
70+
auto& dependencyBlock = mDependencies[i - 1];
71+
72+
if (dependencyBlock.validated) {
73+
continue;
74+
}
75+
76+
if (!dependencyBlock.updateSignalTimes(false)) {
77+
break;
78+
}
79+
80+
dependencyBlock.validated = true;
81+
dependencyBlock.checkUnsafeAccess();
82+
}
83+
}
84+
85+
bool DependencyMonitor::DependencyBlock::updateSignalTimes(bool excludeIngress) {
86+
if (egress.fence->getSignalTime() == Fence::SIGNAL_TIME_PENDING) {
87+
return false;
88+
}
89+
90+
if (!excludeIngress && ingress.fence->getSignalTime() == Fence::SIGNAL_TIME_PENDING) {
91+
return false;
92+
}
93+
94+
for (auto& accessCompletion : accessCompletions) {
95+
if (accessCompletion.fence->getSignalTime() == Fence::SIGNAL_TIME_PENDING) {
96+
return false;
97+
}
98+
}
99+
100+
return true;
101+
}
102+
103+
void DependencyMonitor::DependencyBlock::checkUnsafeAccess() const {
104+
const nsecs_t egressTime = egress.fence->getCachedSignalTime();
105+
const nsecs_t ingressTime = ingress.fence->getCachedSignalTime();
106+
107+
ALOGV_IF(egressTime != Fence::SIGNAL_TIME_INVALID,
108+
"%" PRId64 "/%s: Egress time: %" PRId64 " (%s)", token, id, egressTime,
109+
egress.annotation.c_str());
110+
ALOGV_IF(Fence::isValidTimestamp(egressTime) && Fence::isValidTimestamp(ingressTime) &&
111+
egressTime < ingressTime,
112+
"%" PRId64 "/%s: Detected egress before ingress!: %" PRId64 " (%s) < %" PRId64 " (%s)",
113+
id, token, egressTime, egress.annotation, ingressTime, ingress.annotation.c_str());
114+
115+
for (auto& accessCompletion : accessCompletions) {
116+
const nsecs_t accessCompletionTime = accessCompletion.fence->getCachedSignalTime();
117+
if (!Fence::isValidTimestamp(accessCompletionTime)) {
118+
ALOGI("%" PRId64 "/%s: Detected invalid access completion! <%s>", id, token,
119+
accessCompletion.annotation.c_str());
120+
continue;
121+
} else {
122+
ALOGV("%" PRId64 "/%s: Access completion time: %" PRId64 " <%s>", id, token,
123+
accessCompletionTime, accessCompletion.annotation.c_str());
124+
}
125+
126+
ALOGI_IF(Fence::isValidTimestamp(egressTime) && accessCompletionTime > egressTime,
127+
"%" PRId64 "/%s: Detected access completion after egress!: %" PRId64
128+
" (%s) > %" PRId64 " (%s)",
129+
id, token, accessCompletionTime, accessCompletion.annotation.c_str(), egressTime,
130+
egress.annotation.c_str());
131+
132+
ALOGI_IF(Fence::isValidTimestamp(ingressTime) && accessCompletionTime < ingressTime,
133+
"%" PRId64 "/%s: Detected access completion prior to ingress!: %" PRId64
134+
" (%s) < %" PRId64 " (%s)",
135+
id, token, accessCompletionTime, accessCompletion.annotation.c_str(), ingressTime,
136+
ingress.annotation.c_str());
137+
}
138+
139+
ALOGV_IF(ingressTime != Fence::SIGNAL_TIME_INVALID,
140+
"%" PRId64 "/%s: Ingress time: %" PRId64 " (%s)", id, token, ingressTime,
141+
ingress.annotation.c_str());
142+
}
143+
144+
} // namespace android

libs/ui/FenceTime.cpp

Lines changed: 11 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,14 @@ FenceTime::FenceTime(nsecs_t signalTime)
5959
}
6060
}
6161

62+
FenceTimePtr FenceTime::makeValid(const sp<Fence>& fence) {
63+
if (fence && fence->isValid()) {
64+
return std::make_shared<FenceTime>(fence);
65+
} else {
66+
return std::make_shared<FenceTime>(systemTime());
67+
}
68+
}
69+
6270
void FenceTime::applyTrustedSnapshot(const Snapshot& src) {
6371
if (CC_UNLIKELY(src.state != Snapshot::State::SIGNAL_TIME)) {
6472
// Applying Snapshot::State::FENCE, could change the valid state of the
@@ -289,9 +297,10 @@ status_t FenceTime::Snapshot::unflatten(
289297
// ============================================================================
290298
void FenceTimeline::push(const std::shared_ptr<FenceTime>& fence) {
291299
std::lock_guard<std::mutex> lock(mMutex);
292-
while (mQueue.size() >= MAX_ENTRIES) {
300+
static constexpr size_t MAX_QUEUE_SIZE = 64;
301+
while (mQueue.size() >= MAX_QUEUE_SIZE) {
293302
// This is a sanity check to make sure the queue doesn't grow unbounded.
294-
// MAX_ENTRIES should be big enough not to trigger this path.
303+
// MAX_QUEUE_SIZE should be big enough not to trigger this path.
295304
// In case this path is taken though, users of FenceTime must make sure
296305
// not to rely solely on FenceTimeline to get the final timestamp and
297306
// should eventually call Fence::getSignalTime on their own.

libs/ui/GraphicBuffer.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,8 @@
2727
#include <ui/GraphicBufferMapper.h>
2828
#include <utils/Trace.h>
2929

30+
#include <string>
31+
3032
namespace android {
3133

3234
// ===========================================================================
@@ -104,6 +106,7 @@ GraphicBuffer::GraphicBuffer()
104106
usage = 0;
105107
layerCount = 0;
106108
handle = nullptr;
109+
mDependencyMonitor.setToken(std::to_string(mId));
107110
}
108111

109112
// deprecated
@@ -155,6 +158,8 @@ GraphicBuffer::GraphicBuffer(const GraphicBufferAllocator::AllocationRequest& re
155158
layerCount = request.layerCount;
156159
usage = request.usage;
157160
usage_deprecated = int(usage);
161+
std::string name = request.requestorName;
162+
mDependencyMonitor.setToken(name.append(":").append(std::to_string(mId)));
158163
}
159164
}
160165

@@ -252,6 +257,7 @@ status_t GraphicBuffer::initWithSize(uint32_t inWidth, uint32_t inHeight,
252257
usage = inUsage;
253258
usage_deprecated = int(usage);
254259
stride = static_cast<int>(outStride);
260+
mDependencyMonitor.setToken(requestorName.append(":").append(std::to_string(mId)));
255261
}
256262
return err;
257263
}
@@ -609,6 +615,14 @@ status_t GraphicBuffer::unflatten(void const*& buffer, size_t& size, int const*&
609615
mBufferMapper.getTransportSize(handle, &mTransportNumFds, &mTransportNumInts);
610616
}
611617

618+
std::string name;
619+
status_t err = mBufferMapper.getName(handle, &name);
620+
if (err != NO_ERROR) {
621+
name = "<Unknown>";
622+
}
623+
624+
mDependencyMonitor.setToken(name.append(":").append(std::to_string(mId)));
625+
612626
buffer = static_cast<void const*>(static_cast<uint8_t const*>(buffer) + sizeNeeded);
613627
size -= sizeNeeded;
614628
fds += numFds;
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
/*
2+
* Copyright 2025 The Android Open Source Project
3+
*
4+
* Licensed under the Apache License, Version 2.0 (the "License");
5+
* you may not use this file except in compliance with the License.
6+
* You may obtain a copy of the License at
7+
*
8+
* http://www.apache.org/licenses/LICENSE-2.0
9+
*
10+
* Unless required by applicable law or agreed to in writing, software
11+
* distributed under the License is distributed on an "AS IS" BASIS,
12+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
* See the License for the specific language governing permissions and
14+
* limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include <ui/FatVector.h>
20+
#include <ui/FenceTime.h>
21+
#include <ui/RingBuffer.h>
22+
23+
namespace android {
24+
25+
// Debugging class for that tries to add userspace logging for fence depencencies.
26+
// The model that a DependencyMonitor tries to follow is, for each access of some resource:
27+
// 1. There is a single ingress fence, that guards whether a resource is now safe to read from
28+
// another system.
29+
// 2. There are multiple access fences, that are fired when a resource is read.
30+
// 3. There is a single egress fence, that is fired when a resource is released and sent to another
31+
// system.
32+
//
33+
// Note that there can be repeated ingress and egress of a resource, but the assumption is that
34+
// there is exactly one egress for every ingress, unless the resource is destroyed rather than
35+
// released.
36+
//
37+
// The DependencyMonitor will log if there is an anomaly in the fences tracked for some resource.
38+
// This includes:
39+
// * If (2) happens before (1)
40+
// * If (2) happens after (3)
41+
//
42+
// Note that this class has no knowledge of the "other system". I.e., if the other system ignores
43+
// the fence reported in (3), but still takes a long time to write to the resource and produce (1),
44+
// then nothing will be logged. That other system must have its own DependencyMonitor. Conversely,
45+
// this class has imperfect knowledge of the system it is monitoring. For example, this class does
46+
// not know the precise start times of reading from a resource, the exact time that a read might
47+
// occur from a hardware unit is not known to userspace.
48+
//
49+
// In other words, this class logs specific classes of fence violations, but is not sensitive to
50+
// *all* violations. One property of this is that unless the system tracked by a DependencyMonitor
51+
// is feeding in literally incorrect fences, then there is no chance of a false positive.
52+
//
53+
// This class is thread safe.
54+
class DependencyMonitor {
55+
public:
56+
// Sets a debug token identifying the resource this monitor is tracking.
57+
void setToken(std::string token) { mToken = std::move(token); }
58+
59+
// Adds a fence that is fired when the resource ready to be ingested by the system using the
60+
// DependencyMonitor.
61+
void addIngress(FenceTimePtr fence, std::string annotation);
62+
// Adds a fence that is fired when the resource is accessed.
63+
void addAccessCompletion(FenceTimePtr fence, std::string annotation);
64+
// Adds a fence that is fired when the resource is released to another system.
65+
void addEgress(FenceTimePtr fence, std::string annotation);
66+
67+
private:
68+
struct AnnotatedFenceTime {
69+
FenceTimePtr fence;
70+
std::string annotation;
71+
};
72+
73+
struct DependencyBlock {
74+
int64_t id = -1;
75+
AnnotatedFenceTime ingress = {FenceTime::NO_FENCE, ""};
76+
FatVector<AnnotatedFenceTime> accessCompletions;
77+
AnnotatedFenceTime egress = {FenceTime::NO_FENCE, ""};
78+
bool validated = false;
79+
const char* token = nullptr;
80+
81+
void reset(const char* newToken) {
82+
static std::atomic<int64_t> counter = 0;
83+
id = counter++;
84+
ingress = {FenceTime::NO_FENCE, ""};
85+
accessCompletions.clear();
86+
egress = {FenceTime::NO_FENCE, ""};
87+
validated = false;
88+
token = newToken;
89+
}
90+
91+
// Returns true if all fences in this block have valid signal times.
92+
bool updateSignalTimes(bool excludeIngress);
93+
94+
void checkUnsafeAccess() const;
95+
};
96+
97+
void resolveLocked() REQUIRES(mMutex);
98+
99+
std::string mToken;
100+
std::mutex mMutex;
101+
ui::RingBuffer<DependencyBlock, 10> mDependencies GUARDED_BY(mMutex);
102+
};
103+
104+
} // namespace android

0 commit comments

Comments
 (0)