Why do I get an IndexError when running the inference.ipynb code for inference? #563

cat-and-tomato · 2025-01-17T03:28:17Z

just the code as presented，with the error：

  Traceback (most recent call last):
    File "/data/qp/project/Yolo-World/inference.py", line 131, in <module>
      img = run_image(runner,"/data/qp/data/dataset/dog.jpeg")
    File "/data/qp/project/Yolo-World/inference.py", line 65, in run_image
      output = runner.model.test_step(data_batch)[0]
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/mmengine/model/base_model/base_model.py", line 145, in test_step
      return self._run_forward(data, mode='predict')  # type: ignore
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/mmengine/model/base_model/base_model.py", line 361, in _run_forward
      results = self(**data, mode=mode)
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
      return self._call_impl(*args, **kwargs)
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
      return forward_call(*args, **kwargs)
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/mmdet/models/detectors/base.py", line 94, in forward
      return self.predict(inputs, data_samples)
    File "/data/qp/project/Yolo-World/yolo_world/models/detectors/yolo_world.py", line 48, in predict
      results_list = self.bbox_head.predict(img_feats,
    File "/data/qp/project/Yolo-World/yolo_world/models/dense_heads/yolo_world_head.py", line 408, in predict
      predictions = self.predict_by_feat(*outs,
    File "/data/qp/project/Yolo-World/yolo_world/models/dense_heads/yolo_world_head.py", line 691, in predict_by_feat
      bboxes = bboxes[conf_inds, :]
  IndexError: The shape of the mask [2150400] at index 0 does not match the shape of the indexed tensor [33600, 4] at index 0
--------------------------------------------------------------------------------------------
import numpy as np
import torch
from mmengine.config import Config
from mmengine.dataset import Compose
from mmengine.runner import Runner
from mmengine.runner.amp import autocast
from mmyolo.registry import RUNNERS
from torchvision.ops import nms




def colorstr(*input):
    """
        Helper function for style logging
    """
    *args, string = input if len(input) > 1 else ("bold", input[0])
    colors = {"bold": "\033[1m"}

    return "".join(colors[x] for x in args) + f"{string}"


import PIL.Image
import cv2
import supervision as sv

bounding_box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
mask_annotator = sv.MaskAnnotator()

class_names = ("person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, "
               "traffic light, fire hydrant, stop sign, parking meter, bench, bird, "
               "cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, "
               "backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard, "
               "sports ball, kite, baseball bat, baseball glove, skateboard, "
               "surfboard, tennis racket, bottle, wine glass, cup, fork, knife, "
               "spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, "
               "hot dog, pizza, donut, cake, chair, couch, potted plant, bed, "
               "dining table, toilet, tv, laptop, mouse, remote, keyboard, "
               "cell phone, microwave, oven, toaster, sink, refrigerator, book, "
               "clock, vase, scissors, teddy bear, hair drier, toothbrush")

class_names2 = ("dog, eye, tongue, ear, leash")


def run_image(
        runner,
        input_image,
        max_num_boxes=100,
        score_thr=0.05,
        nms_thr=0.5,
        output_image="output.png",
):
    output_image = "runs/detect/"+output_image
    texts = [[t.strip()] for t in class_names.split(",")] + [[" "]]
    data_info = runner.pipeline(dict(img_id=0, img_path=input_image,
                                     texts=texts))

    data_batch = dict(
        inputs=data_info["inputs"].unsqueeze(0),
        data_samples=[data_info["data_samples"]],
    )

    with autocast(enabled=False), torch.no_grad():
        output = runner.model.test_step(data_batch)[0]
        runner.model.class_names = texts
        pred_instances = output.pred_instances

    # nms
    keep_idxs = nms(pred_instances.bboxes, pred_instances.scores, iou_threshold=nms_thr)
    pred_instances = pred_instances[keep_idxs]
    pred_instances = pred_instances[pred_instances.scores.float() > score_thr]

    if len(pred_instances.scores) > max_num_boxes:
        indices = pred_instances.scores.float().topk(max_num_boxes)[1]
        pred_instances = pred_instances[indices]
    output.pred_instances = pred_instances

    # predictions
    pred_instances = pred_instances.cpu().numpy()

    if 'masks' in pred_instances:
        masks = pred_instances['masks']
    else:
        masks = None
        
    detections = sv.Detections(
        xyxy=pred_instances['bboxes'],
        class_id=pred_instances['labels'],
        confidence=pred_instances['scores']
    )

    # label ids with confidence scores
    labels = [
        f"{class_id} {confidence:0.2f}"
        for class_id, confidence
        in zip(detections.class_id, detections.confidence)
    ]

    # draw bounding box with label
    image = PIL.Image.open(input_image)
    svimage = np.array(image)
    svimage = bounding_box_annotator.annotate(svimage, detections)
    svimage = label_annotator.annotate(svimage, detections, labels)
    if masks is not None:
        svimage = mask_annotator.annotate(image, detections)

    # save output image
    cv2.imwrite(output_image, svimage[:, :, ::-1])
    print(f"Results saved to {colorstr('bold', output_image)}")

    return svimage[:, :, ::-1]




if __name__ == "__main__":
    # load config
    cfg = Config.fromfile(
        "configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py"
    )
    cfg.work_dir = "."
    cfg.load_from = "/data/qp/project/Yolo-World/pretrained_weights/yolo_world_v2_l_obj365v1_goldg_pretrain_1280ft-9babe3f6.pth"
    runner = Runner.from_cfg(cfg)
    runner.call_hook("before_run")
    runner.load_or_resume()
    pipeline = cfg.test_dataloader.dataset.pipeline
    runner.pipeline = Compose(pipeline)

    # img = run_image(runner,"/data/qp/data/dataset/blood_mark/blood_mark_8.png")
    img = run_image(runner,"/data/qp/data/dataset/dog.jpeg")

    sv.plot_image(img)

The text was updated successfully, but these errors were encountered:

Kenneth-X · 2025-01-17T09:11:38Z

same problem
did you solve it ?

cat-and-tomato · 2025-01-18T04:49:56Z

same problem did you solve it ?

not yet

Kenneth-X · 2025-01-24T03:27:06Z

same problem did you solve it ?

not yet
just run simple_demo.py or image_demo.py ， it works

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Why do I get an IndexError when running the inference.ipynb code for inference? #563

Why do I get an IndexError when running the inference.ipynb code for inference? #563

cat-and-tomato commented Jan 17, 2025 •

edited

Loading

Kenneth-X commented Jan 17, 2025

cat-and-tomato commented Jan 18, 2025

Kenneth-X commented Jan 24, 2025

Why do I get an IndexError when running the inference.ipynb code for inference? #563

Why do I get an IndexError when running the inference.ipynb code for inference? #563

Comments

cat-and-tomato commented Jan 17, 2025 • edited Loading

Kenneth-X commented Jan 17, 2025

cat-and-tomato commented Jan 18, 2025

Kenneth-X commented Jan 24, 2025

cat-and-tomato commented Jan 17, 2025 •

edited

Loading