写这篇文章的原因是:在性能分析时发现HWC HAL经常调用了mapper,导致性能/功耗问题的回退。
分析发现,HWC HAL在拿dataspace的时候使用了mapper去拿,如果Layer层数过多,那就会多次通信。自测一个Layer增加100~250微秒的耗时。
1. Surface类成员mDataSpace
1.1 native_window_set_buffers_data_space设置mDataSpace
726 static inline int native_window_set_buffers_data_space(
727 struct ANativeWindow* window,
728 android_dataspace_t dataSpace)
729 {
730 return window->perform(window, NATIVE_WINDOW_SET_BUFFERS_DATASPACE,
731 dataSpace);
732 }
1359 case NATIVE_WINDOW_SET_BUFFERS_DATASPACE:
1360 res = dispatchSetBuffersDataSpace(args);
1361 break;
1555 int Surface::dispatchSetBuffersDataSpace(va_list args) {
1556 Dataspace dataspace = static_cast<Dataspace>(va_arg(args, int));
1557 return setBuffersDataSpace(dataspace);
1558 }
2179 int Surface::setBuffersDataSpace(Dataspace dataSpace)
2180 {
2181 ALOGV("Surface::setBuffersDataSpace");
2182 Mutex::Autolock lock(mMutex); //有锁
2183 mDataSpace = dataSpace; //保存到成员变量中
2184 return NO_ERROR;
2185 }
1.2 使用mDataSpace
初始化Surface以及disconnect时:mDataSpace = Dataspace::UNKNOWN;
query查询的时候取走mDataSpace:
1288 case NATIVE_WINDOW_DATASPACE: {
1289 *value = static_cast<int>(mDataSpace);
1290 return NO_ERROR;
1291 }
在getQueueBufferInputLocked函数中会取走mDataSpace,并继续往下传递,可想而知,我们接下来的主线就都是它了:
945 void Surface::getQueueBufferInputLocked(android_native_buffer_t* buffer, int fenceFd,
946 nsecs_t timestamp, IGraphicBufferProducer::QueueBufferInput* out) {
961 IGraphicBufferProducer::QueueBufferInput input(timestamp, isAutoTimestamp,
962 static_cast<android_dataspace>(mDataSpace), crop, mScalingMode,
963 mTransform ^ mStickyTransform, fence, mStickyTransform,
964 mEnableFrameTimestamps);
1031 *out = input; //保存input并返回
1.3 传递mDataSpace
1096 int Surface::queueBuffer(android_native_buffer_t* buffer, int fenceFd) {
1097 ATRACE_CALL(); //有标签
1115 IGraphicBufferProducer::QueueBufferOutput output;
1116 IGraphicBufferProducer::QueueBufferInput input;
1117 getQueueBufferInputLocked(buffer, fenceFd, mTimestamp, &input); //取走mDataSpace
1118 applyGrallocMetadataLocked(buffer, input);//关键函数1,往下传递mDataSpace
1123 status_t err = mGraphicBufferProducer->queueBuffer(i, input, &output);//关键函数2,往下传递mDataSpace
1.3.1 applyGrallocMetadataLocked
设置到mapper中去,最终到metadata中,后面使用mapper都会取到,如ARM GPU在使用该dataspace做合成时就可以拿到这个值。
1034 void Surface::applyGrallocMetadataLocked(
1035 android_native_buffer_t* buffer,
1036 const IGraphicBufferProducer::QueueBufferInput& queueBufferInput) {
1037 ATRACE_CALL();
1038 auto& mapper = GraphicBufferMapper::get();
1039 mapper.setDataspace(buffer->handle, static_cast<ui::Dataspace>(queueBufferInput.dataSpace));
1.3.2 queueBuffer
首先传递到BufferItem中供BLASTBufferQueue acquireBuffer时取走。至于生产者-消费者模型就攻读者自行查阅了。
831 status_t BufferQueueProducer::queueBuffer(int slot,
832 const QueueBufferInput &input, QueueBufferOutput *output) {
833 ATRACE_CALL();
834 ATRACE_BUFFER_INDEX(slot);
838 android_dataspace dataSpace;
845 input.deflate(&requestedPresentTimestamp, &isAutoTimestamp, &dataSpace,
846 &crop, &scalingMode, &transform, &acquireFence, &stickyTransform,
847 &getFrameTimestamps); //取走mDataSpace
926 // Override UNKNOWN dataspace with consumer default
927 if (dataSpace == HAL_DATASPACE_UNKNOWN) { //如果没有设置那么会取consumer端的
928 dataSpace = mCore->mDefaultBufferDataSpace;
929 }
950 item.mDataSpace = dataSpace; //最后放到item中供acquireBuffer取走
可以看到BLASTBufferQueue取走mDataSpace向SurfaceFlinger传递。
505 status_t BLASTBufferQueue::acquireNextBufferLocked(
506 const std::optional<SurfaceComposerClient::Transaction*> transaction) {
537 status_t status =
538 mBufferItemConsumer->acquireBuffer(&bufferItem, 0 /* expectedPresent */, false);
600 t->setDataspace(mSurfaceControl, static_cast<ui::Dataspace>(bufferItem.mDataSpace));
继续往下传递到HWC hal,同时hwc hal层可以通过getDataspace接口拿到对应layer的dataspace信息进行合成操作。
1763 SurfaceComposerClient::Transaction& SurfaceComposerClient::Transaction::setDataspace(
1764 const sp<SurfaceControl>& sc, ui::Dataspace dataspace) {
1770 s->what |= layer_state_t::eDataspaceChanged;
1771 s->dataspace = dataspace;
4799 uint32_t SurfaceFlinger::setClientStateLocked(const FrameTimelineInfo& frameTimelineInfo,
4800 ResolvedComposerState& composerState,
4801 int64_t desiredPresentTime, bool isAutoTimestamp,
4802 int64_t postTime, uint64_t transactionId) {
4965 if (what & layer_state_t::eDataspaceChanged) {
4966 if (layer->setDataspace(s.dataspace)) flags |= eTraversalNeeded;
4967 }
3187 bool Layer::setDataspace(ui::Dataspace dataspace) {
3188 if (mDrawingState.dataspace == dataspace) return false;
3189 mDrawingState.dataspace = dataspace;
3190 mDrawingState.modified = true;
3191 setTransactionFlags(eTransactionNeeded);
3192 return true;
3193 }
502 void OutputLayer::writeOutputDependentPerFrameStateToHWC(HWC2::Layer* hwcLayer) {
524 const auto dataspace = outputDependentState.overrideInfo.buffer
525 ? outputDependentState.overrideInfo.dataspace
526 : outputDependentState.dataspace;
528 if (auto error = hwcLayer->setDataspace(dataspace); error != hal::Error::NONE) {
785 Error Layer::setDataspace(Dataspace dataspace)
786 {
794 mDataSpace = dataspace;
795 auto intError = mComposer.setLayerDataspace(mDisplay->getId(), mId, mDataSpace); //往HWC hal层设置
796 return static_cast<Error>(intError);
797 }
hardware/interfaces/graphics/composer/2.1/utils/passthrough/include/composer-passthrough/2.1/HwcHal.h
551 !initDispatch(HWC2_FUNCTION_SET_LAYER_DATASPACE, &mDispatch.setLayerDataspace) ||
446 Error setLayerDataspace(Display display, Layer layer, int32_t dataspace) override {
447 int32_t err = mDispatch.setLayerDataspace(mDevice, display, layer, dataspace);
448 return static_cast<Error>(err);
449 }
看到这,是不是觉得恍然大悟,gralloc和hwc hal都拿到了相同的dataspace,那么GPU和HWC合成应该拿到的dataspace是一样的了,但是。。。
2. 真的是这么回事吗?
理论很精彩,但是现实很残酷,加log发现在video播放时压根没有走Surface::queueBuffer 函数,怎么回事?怎么回事?怎么回事呢?
那么我们来揭开它神秘的面纱。
/frameworks/av/media/libstagefright/
H A D MediaCodec.cpp 5381 int err = native_window_set_buffers_data_space( in handleOutputFormatChangeIfNeeded()
H A D ACodec.cpp 6749 status_t err = native_window_set_buffers_data_space( in onOutputBufferDrained()
/frameworks/base/media/jni/
H A D android_media_ImageWriter.cpp 479 res = native_window_set_buffers_data_space(anw.get(), nativeDataspace); in ImageWriter_init()
678 res = native_window_set_buffers_data_space( in ImageWriter_queueImage()
766 res = native_window_set_buffers_data_space( in attachAndQeueuGraphicBuffer()
/frameworks/av/media/libstagefright/colorconversion/
H A D SoftwareRenderer.cpp 97 // when we do native_window_set_buffers_data_space(). in resetFormatIfChanged()
445 if ((err = native_window_set_buffers_data_space(mNativeWindow.get(), dataSpace))) { in render()
看看上面代码,是不是发现这些函数位置都会调用native_window_set_buffers_data_space函数将dataspace传递到Surface中,但是这些都不会执行,是由条件的:
xref: /frameworks/av/media/libstagefright/MediaCodec.cpp
5368 if (mSoftRenderer == NULL &&
5369 mSurface != NULL &&
5370 (mFlags & kFlagUsesSoftwareRenderer)) { //有条件的kFlagUsesSoftwareRenderer
5371 AString mime;
5372 CHECK(mOutputFormat->findString("mime", &mime));
5373
5374 // TODO: propagate color aspects to software renderer to allow better
5375 // color conversion to RGB. For now, just mark dataspace for YUV
5376 // rendering.
5377 int32_t dataSpace;
5378 if (mOutputFormat->findInt32("android._dataspace", &dataSpace)) {
5379 ALOGD("[%s] setting dataspace on output surface to %#x",
5380 mComponentName.c_str(), dataSpace);
//有条件调用呢!!
5381 int err = native_window_set_buffers_data_space(
5382 mSurface.get(), (android_dataspace)dataSpace);
5384 }
那么我们看video播放视频的时候怎么把dataspace传递过来的呢?
读完下面代码你就会发现,Surface是什么?为什么我要借助Surface呢?
xref: /frameworks/av/media/codec2/sfplugin/CCodecBufferChannel.cpp
767 status_t CCodecBufferChannel::renderOutputBuffer(
768 const sp<MediaCodecBuffer> &buffer, int64_t timestampNs) {
921 IGraphicBufferProducer::QueueBufferInput qbi(
922 timestampNs,
923 false, // droppable
924 dataSpace,
925 Rect(blocks.front().crop().left,
926 blocks.front().crop().top,
927 blocks.front().crop().right(),
928 blocks.front().crop().bottom()),
929 videoScalingMode,
930 transform,
931 fence, 0);
983 SetMetadataToGralloc4Handle(dataSpace, hdrStaticInfo, hdrDynamicInfo, block.handle()); //很关键的函数
987 IGraphicBufferProducer::QueueBufferOutput qbo;
988 status_t result = mComponent->queueToOutputSurface(block, qbi, &qbo);
xref: /frameworks/av/media/codec2/hal/client/client.cpp
1576 status_t Codec2Client::Component::queueToOutputSurface(
1577 const C2ConstGraphicBlock& block,
1578 const QueueBufferInput& input,
1579 QueueBufferOutput* output) {
1580 return mOutputBufferQueue->outputBuffer(block, input, output);
1581 }
xref: /frameworks/av/media/codec2/hal/client/output.cpp
384 status_t OutputBufferQueue::outputBuffer(
385 const C2ConstGraphicBlock& block,
386 const BnGraphicBufferProducer::QueueBufferInput& input,
387 BnGraphicBufferProducer::QueueBufferOutput* output) {
399 sp<IGraphicBufferProducer> outputIgbp = mIgbp;
//接下来直接调用BufferQueueProducer的queueBuffer,不经过Surface
417 auto syncVar = syncMem ? syncMem->mem() : nullptr;
418 if(syncVar) {
419 syncVar->lock();
420 status = outputIgbp->queueBuffer(static_cast<int>(bqSlot),
421 input, output);
422 if (status == OK) {
423 syncVar->notifyQueuedLocked();
424 }
425 syncVar->unlock();
426 } else {
427 status = outputIgbp->queueBuffer(static_cast<int>(bqSlot),
428 input, output);
429 }
以上代码,video直接进入BufferQueueProducer的queueBuffer函数。根据前面分析,那么HWC HAL是能拿到对应的dataspace。
那么mapper怎么拿呢?
xref: /frameworks/av/media/codec2/sfplugin/Codec2Buffer.cpp
1170 c2_status_t SetMetadataToGralloc4Handle(
1171 android_dataspace_t dataSpace,
1172 const std::shared_ptr<const C2StreamHdrStaticMetadataInfo::output> &staticInfo,
1173 const std::shared_ptr<const C2StreamHdrDynamicMetadataInfo::output> &dynamicInfo,
1174 const C2Handle *const handle) {
1175 c2_status_t err = C2_OK;
1176 sp<IMapper4> mapper = GetMapper4();
1177 Gralloc4Buffer buffer(handle);
1178 if (!mapper || !buffer) {
1179 // Gralloc4 not supported; nothing to do
1180 return err;
1181 }
1182 {
1183 hidl_vec<uint8_t> metadata;
1184 if (gralloc4::encodeDataspace(static_cast<Dataspace>(dataSpace), &metadata) == OK) {
1185 Return<Error4> ret = mapper->set(buffer.get(), MetadataType_Dataspace, metadata);
根据上面代码在renderOutputBuffer首先会设置到mapper中,然后再传递到BufferQueueProducer中。