Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Why do I get an IndexError when running the inference.ipynb code for inference? #563

Open
cat-and-tomato opened this issue Jan 17, 2025 · 3 comments

Comments

@cat-and-tomato
Copy link

cat-and-tomato commented Jan 17, 2025

just the code as presented,with the error:

  Traceback (most recent call last):
    File "/data/qp/project/Yolo-World/inference.py", line 131, in <module>
      img = run_image(runner,"/data/qp/data/dataset/dog.jpeg")
    File "/data/qp/project/Yolo-World/inference.py", line 65, in run_image
      output = runner.model.test_step(data_batch)[0]
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/mmengine/model/base_model/base_model.py", line 145, in test_step
      return self._run_forward(data, mode='predict')  # type: ignore
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/mmengine/model/base_model/base_model.py", line 361, in _run_forward
      results = self(**data, mode=mode)
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl
      return self._call_impl(*args, **kwargs)
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/torch/nn/modules/module.py", line 1527, in _call_impl
      return forward_call(*args, **kwargs)
    File "/data/qp/anaconda3/envs/yw/lib/python3.9/site-packages/mmdet/models/detectors/base.py", line 94, in forward
      return self.predict(inputs, data_samples)
    File "/data/qp/project/Yolo-World/yolo_world/models/detectors/yolo_world.py", line 48, in predict
      results_list = self.bbox_head.predict(img_feats,
    File "/data/qp/project/Yolo-World/yolo_world/models/dense_heads/yolo_world_head.py", line 408, in predict
      predictions = self.predict_by_feat(*outs,
    File "/data/qp/project/Yolo-World/yolo_world/models/dense_heads/yolo_world_head.py", line 691, in predict_by_feat
      bboxes = bboxes[conf_inds, :]
  IndexError: The shape of the mask [2150400] at index 0 does not match the shape of the indexed tensor [33600, 4] at index 0
--------------------------------------------------------------------------------------------
import numpy as np
import torch
from mmengine.config import Config
from mmengine.dataset import Compose
from mmengine.runner import Runner
from mmengine.runner.amp import autocast
from mmyolo.registry import RUNNERS
from torchvision.ops import nms




def colorstr(*input):
    """
        Helper function for style logging
    """
    *args, string = input if len(input) > 1 else ("bold", input[0])
    colors = {"bold": "\033[1m"}

    return "".join(colors[x] for x in args) + f"{string}"


import PIL.Image
import cv2
import supervision as sv

bounding_box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator(text_position=sv.Position.CENTER)
mask_annotator = sv.MaskAnnotator()

class_names = ("person, bicycle, car, motorcycle, airplane, bus, train, truck, boat, "
               "traffic light, fire hydrant, stop sign, parking meter, bench, bird, "
               "cat, dog, horse, sheep, cow, elephant, bear, zebra, giraffe, "
               "backpack, umbrella, handbag, tie, suitcase, frisbee, skis, snowboard, "
               "sports ball, kite, baseball bat, baseball glove, skateboard, "
               "surfboard, tennis racket, bottle, wine glass, cup, fork, knife, "
               "spoon, bowl, banana, apple, sandwich, orange, broccoli, carrot, "
               "hot dog, pizza, donut, cake, chair, couch, potted plant, bed, "
               "dining table, toilet, tv, laptop, mouse, remote, keyboard, "
               "cell phone, microwave, oven, toaster, sink, refrigerator, book, "
               "clock, vase, scissors, teddy bear, hair drier, toothbrush")

class_names2 = ("dog, eye, tongue, ear, leash")


def run_image(
        runner,
        input_image,
        max_num_boxes=100,
        score_thr=0.05,
        nms_thr=0.5,
        output_image="output.png",
):
    output_image = "runs/detect/"+output_image
    texts = [[t.strip()] for t in class_names.split(",")] + [[" "]]
    data_info = runner.pipeline(dict(img_id=0, img_path=input_image,
                                     texts=texts))

    data_batch = dict(
        inputs=data_info["inputs"].unsqueeze(0),
        data_samples=[data_info["data_samples"]],
    )

    with autocast(enabled=False), torch.no_grad():
        output = runner.model.test_step(data_batch)[0]
        runner.model.class_names = texts
        pred_instances = output.pred_instances

    # nms
    keep_idxs = nms(pred_instances.bboxes, pred_instances.scores, iou_threshold=nms_thr)
    pred_instances = pred_instances[keep_idxs]
    pred_instances = pred_instances[pred_instances.scores.float() > score_thr]

    if len(pred_instances.scores) > max_num_boxes:
        indices = pred_instances.scores.float().topk(max_num_boxes)[1]
        pred_instances = pred_instances[indices]
    output.pred_instances = pred_instances

    # predictions
    pred_instances = pred_instances.cpu().numpy()

    if 'masks' in pred_instances:
        masks = pred_instances['masks']
    else:
        masks = None
        
    detections = sv.Detections(
        xyxy=pred_instances['bboxes'],
        class_id=pred_instances['labels'],
        confidence=pred_instances['scores']
    )

    # label ids with confidence scores
    labels = [
        f"{class_id} {confidence:0.2f}"
        for class_id, confidence
        in zip(detections.class_id, detections.confidence)
    ]

    # draw bounding box with label
    image = PIL.Image.open(input_image)
    svimage = np.array(image)
    svimage = bounding_box_annotator.annotate(svimage, detections)
    svimage = label_annotator.annotate(svimage, detections, labels)
    if masks is not None:
        svimage = mask_annotator.annotate(image, detections)

    # save output image
    cv2.imwrite(output_image, svimage[:, :, ::-1])
    print(f"Results saved to {colorstr('bold', output_image)}")

    return svimage[:, :, ::-1]




if __name__ == "__main__":
    # load config
    cfg = Config.fromfile(
        "configs/pretrain/yolo_world_v2_l_vlpan_bn_2e-3_100e_4x8gpus_obj365v1_goldg_train_1280ft_lvis_minival.py"
    )
    cfg.work_dir = "."
    cfg.load_from = "/data/qp/project/Yolo-World/pretrained_weights/yolo_world_v2_l_obj365v1_goldg_pretrain_1280ft-9babe3f6.pth"
    runner = Runner.from_cfg(cfg)
    runner.call_hook("before_run")
    runner.load_or_resume()
    pipeline = cfg.test_dataloader.dataset.pipeline
    runner.pipeline = Compose(pipeline)

    # img = run_image(runner,"/data/qp/data/dataset/blood_mark/blood_mark_8.png")
    img = run_image(runner,"/data/qp/data/dataset/dog.jpeg")

    sv.plot_image(img)
@Kenneth-X
Copy link

same problem
did you solve it ?

@cat-and-tomato
Copy link
Author

same problem did you solve it ?

not yet

@Kenneth-X
Copy link

same problem did you solve it ?

not yet
just run simple_demo.py or image_demo.py , it works

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

2 participants