[CPU] optimize PagedAttention's shape inference (openvinotoolkit#23603)

### Details: - *Specific shape inference for PagedAttention* - *...* ### Tickets: - *ticket-id*
bbielawx · Apr 12, 2024 · c4af202 · c4af202
1 parent 6ae7a10
commit c4af202
Showing 1 changed file with 19 additions and 0 deletions.
diff --git a/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp b/src/plugins/intel_cpu/src/shape_inference/custom/scaled_attn.cpp
@@ -53,7 +53,26 @@ class SDPAShapeInfer : public ShapeInferEmptyPads {
     ScaledDotProductAttentionWithKVCache::Config m_config;
 };
 
+class PAShapeInfer : public ShapeInferEmptyPads {
+public:
+    PAShapeInfer() {}
+
+    IShapeInfer::Result infer(const std::vector<std::reference_wrapper<const VectorDims>>& input_shapes,
+                              const std::unordered_map<size_t, MemoryPtr>& data_dependency) override {
+        const auto& query_dims = input_shapes.front().get();
+
+        return {{query_dims}, ShapeInferStatus::success};
+    }
+
+    port_mask_t get_port_mask() const override {
+        return EMPTY_PORT_MASK;
+    }
+};
+
 ShapeInferPtr SDPAShapeInferFactory::makeShapeInfer() const {
+    if (m_op->get_type_name() == std::string("PagedAttentionExtension")) {
+        return std::make_shared<PAShapeInfer>();
+    }
     if (auto sdpa = std::dynamic_pointer_cast<const ScaledDotProductAttentionWithKVCache>(m_op)) {
         const auto& config = sdpa->get_config();
         if (config.output_BLHxS == false)