#35 master

Open
ZezhengZ wants to merge 80 commits from ZezhengZ/yolov5:master into wer
  1. +12
    -0
      .github/dependabot.yml
  2. +5
    -4
      .github/workflows/ci-testing.yml
  3. +10
    -9
      .github/workflows/greetings.yml
  4. +1
    -1
      .github/workflows/stale.yml
  5. +5
    -6
      Dockerfile
  6. +24
    -19
      README.md
  7. +9
    -9
      data/coco.yaml
  8. +9
    -9
      data/coco128.yaml
  9. +33
    -0
      data/hyp.finetune.yaml
  10. +3
    -1
      data/scripts/get_coco.sh
  11. +4
    -2
      data/scripts/get_voc.sh
  12. +2
    -2
      data/voc.yaml
  13. +9
    -15
      detect.py
  14. +1
    -1
      hubconf.py
  15. +48
    -18
      models/common.py
  16. +4
    -4
      models/experimental.py
  17. +58
    -0
      models/hub/anchors.yaml
  18. +1
    -1
      models/hub/yolov3-tiny.yaml
  19. +54
    -0
      models/hub/yolov5-p2.yaml
  20. +56
    -0
      models/hub/yolov5-p6.yaml
  21. +67
    -0
      models/hub/yolov5-p7.yaml
  22. +60
    -0
      models/hub/yolov5l6.yaml
  23. +60
    -0
      models/hub/yolov5m6.yaml
  24. +60
    -0
      models/hub/yolov5s6.yaml
  25. +60
    -0
      models/hub/yolov5x6.yaml
  26. +14
    -13
      models/yolo.py
  27. +8
    -8
      models/yolov5l.yaml
  28. +8
    -8
      models/yolov5m.yaml
  29. +8
    -8
      models/yolov5s.yaml
  30. +8
    -8
      models/yolov5x.yaml
  31. +1
    -1
      requirements.txt
  32. +42
    -0
      setup.py
  33. +27
    -22
      test.py
  34. +309
    -585
      train.py
  35. +457
    -0
      train_with_func.py
  36. +153
    -133
      tutorial.ipynb
  37. +11
    -1
      utils/activations.py
  38. +16
    -23
      utils/autoanchor.py
  39. +161
    -65
      utils/datasets.py
  40. +87
    -12
      utils/general.py
  41. +54
    -54
      utils/google_utils.py
  42. +129
    -117
      utils/loss.py
  43. +38
    -15
      utils/metrics.py
  44. +6
    -5
      utils/plots.py
  45. +32
    -24
      utils/torch_utils.py

+ 12
- 0
.github/dependabot.yml View File

@@ -0,0 +1,12 @@
version: 2
updates:
- package-ecosystem: pip
directory: "/"
schedule:
interval: weekly
time: "04:00"
open-pull-requests-limit: 10
reviewers:
- glenn-jocher
labels:
- dependencies

+ 5
- 4
.github/workflows/ci-testing.yml View File

@@ -66,14 +66,15 @@ jobs:
di=cpu # inference devices # define device

# train
python train.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di
python train.py --img 128 --batch 16 --weights weights/${{ matrix.model }}.pt --cfg models/${{ matrix.model }}.yaml --epochs 1 --device $di
# detect
python detect.py --weights weights/${{ matrix.model }}.pt --device $di
python detect.py --weights runs/train/exp/weights/last.pt --device $di
# test
python test.py --img 256 --batch 8 --weights weights/${{ matrix.model }}.pt --device $di
python test.py --img 256 --batch 8 --weights runs/train/exp/weights/last.pt --device $di
python test.py --img 128 --batch 16 --weights weights/${{ matrix.model }}.pt --device $di
python test.py --img 128 --batch 16 --weights runs/train/exp/weights/last.pt --device $di

python hubconf.py # hub
python models/yolo.py --cfg models/${{ matrix.model }}.yaml # inspect
python models/export.py --img 256 --batch 1 --weights weights/${{ matrix.model }}.pt # export
python models/export.py --img 128 --batch 1 --weights weights/${{ matrix.model }}.pt # export
shell: bash

+ 10
- 9
.github/workflows/greetings.yml View File

@@ -10,8 +10,8 @@ jobs:
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
pr-message: |
Hello @${{ github.actor }}, thank you for submitting a PR! To allow your work to be integrated as seamlessly as possible, we advise you to:
- Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch:
👋 Hello @${{ github.actor }}, thank you for submitting a 🚀 PR! To allow your work to be integrated as seamlessly as possible, we advise you to:
- Verify your PR is **up-to-date with origin/master.** If your PR is behind origin/master update by running the following, replacing 'feature' with the name of your local branch:
```bash
git remote add upstream https://github.com/ultralytics/yolov5.git
git fetch upstream
@@ -19,11 +19,11 @@ jobs:
git rebase upstream/master
git push -u origin -f
```
- Verify all Continuous Integration (CI) **checks are passing**.
- Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ -Bruce Lee
- Verify all Continuous Integration (CI) **checks are passing**.
- Reduce changes to the absolute **minimum** required for your bug fix or feature addition. _"It is not daily increase but daily decrease, hack away the unessential. The closer to the source, the less wastage there is."_ -Bruce Lee

issue-message: |
Hello @${{ github.actor }}, thank you for your interest in 🚀 YOLOv5! Please visit our ⭐️ [Tutorials](https://github.com/ultralytics/yolov5/wiki#tutorials) to get started, where you can find quickstart guides for simple tasks like [Custom Data Training](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) all the way to advanced concepts like [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607).
👋 Hello @${{ github.actor }}, thank you for your interest in 🚀 YOLOv5! Please visit our ⭐️ [Tutorials](https://github.com/ultralytics/yolov5/wiki#tutorials) to get started, where you can find quickstart guides for simple tasks like [Custom Data Training](https://github.com/ultralytics/yolov5/wiki/Train-Custom-Data) all the way to advanced concepts like [Hyperparameter Evolution](https://github.com/ultralytics/yolov5/issues/607).

If this is a 🐛 Bug Report, please provide screenshots and **minimum viable code to reproduce your issue**, otherwise we can not help you.

@@ -42,10 +42,11 @@ jobs:
YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):
- **Google Colab Notebook** with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
- **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5)
- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
- **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker)
- **Google Colab and Kaggle** notebooks with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)
- **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) <a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
## Status


+ 1
- 1
.github/workflows/stale.yml View File

@@ -7,7 +7,7 @@ jobs:
stale:
runs-on: ubuntu-latest
steps:
- uses: actions/stale@v1
- uses: actions/stale@v3
with:
repo-token: ${{ secrets.GITHUB_TOKEN }}
stale-issue-message: 'This issue has been automatically marked as stale because it has not had recent activity. It will be closed if no further activity occurs. Thank you for your contributions.'


+ 5
- 6
Dockerfile View File

@@ -5,10 +5,9 @@ FROM nvcr.io/nvidia/pytorch:20.12-py3
RUN apt update && apt install -y screen libgl1-mesa-glx

# Install python dependencies
RUN pip install --upgrade pip
RUN python -m pip install --upgrade pip
COPY requirements.txt .
RUN pip install -r requirements.txt
RUN pip install gsutil
RUN pip install -r requirements.txt gsutil

# Create working directory
RUN mkdir -p /usr/src/app
@@ -40,13 +39,13 @@ COPY . /usr/src/app
# sudo docker kill $(sudo docker ps -q)

# Kill all image-based
# sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov5:latest)
# sudo docker kill $(sudo docker ps -qa --filter ancestor=ultralytics/yolov5:latest)

# Bash into running container
# sudo docker container exec -it ba65811811ab bash
# sudo docker exec -it 5a9b5863d93d bash

# Bash into stopped container
# sudo docker commit 092b16b25c5b usr/resume && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco --entrypoint=sh usr/resume
# id=$(sudo docker ps -qa) && sudo docker start $id && sudo docker exec -it $id bash

# Send weights to GCP
# python -c "from utils.general import *; strip_optimizer('runs/train/exp0_*/weights/best.pt', 'tmp.pt')" && gsutil cp tmp.pt gs://*.pt


+ 24
- 19
README.md View File

@@ -4,28 +4,32 @@

![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)

This repository represents Ultralytics open-source research into future object detection methods, and incorporates our lessons learned and best practices evolved over training thousands of models on custom client datasets with our previous YOLO repository https://github.com/ultralytics/yolov3. **All code and models are under active development, and are subject to modification or deletion without notice.** Use at your own risk.
This repository represents Ultralytics open-source research into future object detection methods, and incorporates lessons learned and best practices evolved over thousands of hours of training and evolution on anonymized client datasets. **All code and models are under active development, and are subject to modification or deletion without notice.** Use at your own risk.

<img src="https://user-images.githubusercontent.com/26833433/90187293-6773ba00-dd6e-11ea-8f90-cd94afc0427f.png" width="1000">** GPU Speed measures end-to-end time per image averaged over 5000 COCO val2017 images using a V100 GPU with batch size 32, and includes image preprocessing, PyTorch FP16 inference, postprocessing and NMS. EfficientDet data from [google/automl](https://github.com/google/automl) at batch size 8.
<img src="https://user-images.githubusercontent.com/26833433/103594689-455e0e00-4eae-11eb-9cdf-7d753e2ceeeb.png" width="1000">** GPU Speed measures end-to-end time per image averaged over 5000 COCO val2017 images using a V100 GPU with batch size 32, and includes image preprocessing, PyTorch FP16 inference, postprocessing and NMS. EfficientDet data from [google/automl](https://github.com/google/automl) at batch size 8.

- **January 5, 2021**: [v4.0 release](https://github.com/ultralytics/yolov5/releases/tag/v4.0): nn.SiLU() activations, [Weights & Biases](https://wandb.ai/) logging, [PyTorch Hub](https://pytorch.org/hub/ultralytics_yolov5/) integration.
- **August 13, 2020**: [v3.0 release](https://github.com/ultralytics/yolov5/releases/tag/v3.0): nn.Hardswish() activations, data autodownload, native AMP.
- **July 23, 2020**: [v2.0 release](https://github.com/ultralytics/yolov5/releases/tag/v2.0): improved model definition, training and mAP.
- **June 22, 2020**: [PANet](https://arxiv.org/abs/1803.01534) updates: new heads, reduced parameters, improved speed and mAP [364fcfd](https://github.com/ultralytics/yolov5/commit/364fcfd7dba53f46edd4f04c037a039c0a287972).
- **June 19, 2020**: [FP16](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.half) as new default for smaller checkpoints and faster inference [d4c6674](https://github.com/ultralytics/yolov5/commit/d4c6674c98e19df4c40e33a777610a18d1961145).
- **June 9, 2020**: [CSP](https://github.com/WongKinYiu/CrossStagePartialNetworks) updates: improved speed, size, and accuracy (credit to @WongKinYiu for CSP).
- **May 27, 2020**: Public release. YOLOv5 models are SOTA among all known YOLO implementations.


## Pretrained Checkpoints

| Model | AP<sup>val</sup> | AP<sup>test</sup> | AP<sub>50</sub> | Speed<sub>GPU</sub> | FPS<sub>GPU</sub> || params | GFLOPS |
|---------- |------ |------ |------ | -------- | ------| ------ |------ | :------: |
| [YOLOv5s](https://github.com/ultralytics/yolov5/releases) | 37.0 | 37.0 | 56.2 | **2.4ms** | **416** || 7.5M | 17.5
| [YOLOv5m](https://github.com/ultralytics/yolov5/releases) | 44.3 | 44.3 | 63.2 | 3.4ms | 294 || 21.8M | 52.3
| [YOLOv5l](https://github.com/ultralytics/yolov5/releases) | 47.7 | 47.7 | 66.5 | 4.4ms | 227 || 47.8M | 117.2
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases) | **49.2** | **49.2** | **67.7** | 6.9ms | 145 || 89.0M | 221.5
| | | | | | || |
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases) + TTA|**50.8**| **50.8** | **68.9** | 25.5ms | 39 || 89.0M | 801.0
| Model | size | AP<sup>val</sup> | AP<sup>test</sup> | AP<sub>50</sub> | Speed<sub>V100</sub> | FPS<sub>V100</sub> || params | GFLOPS |
|---------- |------ |------ |------ |------ | -------- | ------| ------ |------ | :------: |
| [YOLOv5s](https://github.com/ultralytics/yolov5/releases) |640 |36.8 |36.8 |55.6 |**2.2ms** |**455** ||7.3M |17.0
| [YOLOv5m](https://github.com/ultralytics/yolov5/releases) |640 |44.5 |44.5 |63.1 |2.9ms |345 ||21.4M |51.3
| [YOLOv5l](https://github.com/ultralytics/yolov5/releases) |640 |48.1 |48.1 |66.4 |3.8ms |264 ||47.0M |115.4
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases) |640 |**50.1** |**50.1** |**68.7** |6.0ms |167 ||87.7M |218.8
| | | | | | | || |
| [YOLOv5x](https://github.com/ultralytics/yolov5/releases) + TTA |832 |**51.9** |**51.9** |**69.6** |24.9ms |40 ||87.7M |1005.3

<!---
| [YOLOv5l6](https://github.com/ultralytics/yolov5/releases) |640 |49.0 |49.0 |67.4 |4.1ms |244 ||77.2M |117.7
| [YOLOv5l6](https://github.com/ultralytics/yolov5/releases) |1280 |53.0 |53.0 |70.8 |12.3ms |81 ||77.2M |117.7
--->

** AP<sup>test</sup> denotes COCO [test-dev2017](http://cocodataset.org/#upload) server results, all other AP results denote val2017 accuracy.
** All AP numbers are for single-model single-scale without ensemble or TTA. **Reproduce mAP** by `python test.py --data coco.yaml --img 640 --conf 0.001 --iou 0.65`
@@ -33,6 +37,7 @@ This repository represents Ultralytics open-source research into future object d
** All checkpoints are trained to 300 epochs with default settings and hyperparameters (no autoaugmentation).
** Test Time Augmentation ([TTA](https://github.com/ultralytics/yolov5/issues/303)) runs at 3 image sizes. **Reproduce TTA** by `python test.py --data coco.yaml --img 832 --iou 0.65 --augment`


## Requirements

Python 3.8 or later with all [requirements.txt](https://github.com/ultralytics/yolov5/blob/master/requirements.txt) dependencies installed, including `torch>=1.7`. To install run:
@@ -60,10 +65,10 @@ $ pip install -r requirements.txt

YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):

- **Google Colab Notebook** with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
- **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5)
- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
- **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker)
- **Google Colab and Kaggle** notebooks with free GPU: <a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a> <a href="https://www.kaggle.com/ultralytics/yolov5"><img src="https://kaggle.com/static/images/open-in-kaggle.svg" alt="Open In Kaggle"></a>
- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)
- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)
- **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) <a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>


## Inference
@@ -106,7 +111,7 @@ import torch
from PIL import Image

# Model
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True) # for PIL/cv2/np inputs and NMS
model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True)

# Images
img1 = Image.open('zidane.jpg')
@@ -114,13 +119,13 @@ img2 = Image.open('bus.jpg')
imgs = [img1, img2] # batched list of images

# Inference
prediction = model(imgs, size=640) # includes NMS
result = model(imgs)
```


## Training

Download [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) and run command below. Training times for YOLOv5s/m/l/x are 2/4/6/8 days on a single V100 (multi-GPU times faster). Use the largest `--batch-size` your GPU allows (batch sizes shown for 16 GB devices).
Run commands below to reproduce results on [COCO](https://github.com/ultralytics/yolov5/blob/master/data/scripts/get_coco.sh) dataset (dataset auto-downloads on first use). Training times for YOLOv5s/m/l/x are 2/4/6/8 days on a single V100 (multi-GPU times faster). Use the largest `--batch-size` your GPU allows (batch sizes shown for 16 GB devices).
```bash
$ python train.py --data coco.yaml --cfg yolov5s.yaml --weights '' --batch-size 64
yolov5m 40


+ 9
- 9
data/coco.yaml View File

@@ -18,15 +18,15 @@ test: ../coco/test-dev2017.txt # 20288 of 40670 images, submit to https://compe
nc: 80

# class names
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush']
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush' ]

# Print classes
# with open('data/coco.yaml') as f:


+ 9
- 9
data/coco128.yaml View File

@@ -17,12 +17,12 @@ val: ../coco128/images/train2017/ # 128 images
nc: 80

# class names
names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush']
names: [ 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
'hair drier', 'toothbrush' ]

+ 33
- 0
data/hyp.finetune.yaml View File

@@ -36,3 +36,36 @@ flipud: 0.00856
fliplr: 0.5
mosaic: 1.0
mixup: 0.243

# Generations: 306
# P R mAP.5 mAP.5:.95 box obj cls
# Metrics: 0.6 0.936 0.896 0.684 0.0115 0.00805 0.00146

lr0: 0.0032
lrf: 0.12
momentum: 0.843
weight_decay: 0.00036
warmup_epochs: 2.0
warmup_momentum: 0.5
warmup_bias_lr: 0.05
box: 0.0296
cls: 0.243
cls_pw: 0.631
obj: 0.301
obj_pw: 0.911
iou_t: 0.2
anchor_t: 2.91
# anchors: 3.63
fl_gamma: 0.0
hsv_h: 0.0138
hsv_s: 0.664
hsv_v: 0.464
degrees: 0.373
translate: 0.245
scale: 0.898
shear: 0.602
perspective: 0.0
flipud: 0.00856
fliplr: 0.5
mosaic: 1.0
mixup: 0.243

+ 3
- 1
data/scripts/get_coco.sh View File

@@ -20,5 +20,7 @@ f1='train2017.zip' # 19G, 118k images
f2='val2017.zip' # 1G, 5k images
f3='test2017.zip' # 7G, 41k images (optional)
for f in $f1 $f2; do
echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
echo 'Downloading' $url$f '...' && curl -L $url$f -o $f # download, (unzip, remove in background)
unzip -q $f -d $d && rm $f &
done
wait # finish background tasks

+ 4
- 2
data/scripts/get_voc.sh View File

@@ -17,9 +17,11 @@ url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
f2=VOCtest_06-Nov-2007.zip # 438MB, 4953 images
f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
for f in $f1 $f2 $f3; do
echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
for f in $f3 $f2 $f1; do
echo 'Downloading' $url$f '...' && curl -L $url$f -o $f # download, (unzip, remove in background)
unzip -q $f -d $d && rm $f &
done
wait # finish background tasks

end=$(date +%s)
runtime=$((end - start))


+ 2
- 2
data/voc.yaml View File

@@ -17,5 +17,5 @@ val: ../VOC/images/val/ # 4952 images
nc: 20

# class names
names: ['aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor']
names: [ 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog',
'horse', 'motorbike', 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tvmonitor' ]

+ 9
- 15
detect.py View File

@@ -9,8 +9,8 @@ from numpy import random

from models.experimental import attempt_load
from utils.datasets import LoadStreams, LoadImages
from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
strip_optimizer, set_logging, increment_path
from utils.general import check_img_size, check_requirements, non_max_suppression, apply_classifier, scale_coords, \
xyxy2xywh, strip_optimizer, set_logging, increment_path
from utils.plots import plot_one_box
from utils.torch_utils import select_device, load_classifier, time_synchronized

@@ -31,7 +31,8 @@ def detect(save_img=False):

# Load model
model = attempt_load(weights, map_location=device) # load FP32 model
imgsz = check_img_size(imgsz, s=model.stride.max()) # check img_size
stride = int(model.stride.max()) # model stride
imgsz = check_img_size(imgsz, s=stride) # check img_size
if half:
model.half() # to FP16

@@ -46,23 +47,17 @@ def detect(save_img=False):
if webcam:
view_img = True
cudnn.benchmark = True # set True to speed up constant image size inference
dataset = LoadStreams(source, img_size=imgsz)
dataset = LoadStreams(source, img_size=imgsz, stride=stride)
else:
save_img = True
dataset = LoadImages(source, img_size=imgsz)
dataset = LoadImages(source, img_size=imgsz, stride=stride)

# Get names and colors
names = model.module.names if hasattr(model, 'module') else model.names
colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]

# Run inference
t0 = time.time()
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
for path, img, im0s, vid_cap in dataset:
img = torch.from_numpy(img).to(device)
img = img.half() if half else img.float() # uint8 to fp16/32
img /= 255.0 # 0 - 255 to 0.0 - 1.0
if img.ndimension() == 3:
img = img.unsqueeze(0)

@@ -97,7 +92,7 @@ def detect(save_img=False):
# Print results
for c in det[:, -1].unique():
n = (det[:, -1] == c).sum() # detections per class
s += f'{n} {names[int(c)]}s, ' # add to string
s += f"{n} {names[int(c)]}{'s' * (n > 1)}, " # add to string

# Write results
for *xyxy, conf, cls in reversed(det):
@@ -117,8 +112,6 @@ def detect(save_img=False):
# Stream results
if view_img:
cv2.imshow(str(p), im0)
if cv2.waitKey(1) == ord('q'): # q to quit
raise StopIteration

# Save results (image with detections)
if save_img:
@@ -164,6 +157,7 @@ if __name__ == '__main__':
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()
print(opt)
check_requirements()

with torch.no_grad():
if opt.update: # update all models (to fix SourceChangeWarning)


+ 1
- 1
hubconf.py View File

@@ -137,5 +137,5 @@ if __name__ == '__main__':

imgs = [Image.open(x) for x in Path('data/images').glob('*.jpg')]
results = model(imgs)
results.show()
results.print()
results.save()

+ 48
- 18
models/common.py View File

@@ -1,6 +1,7 @@
# This file contains modules common to various models

import math

import numpy as np
import requests
import torch
@@ -30,7 +31,7 @@ class Conv(nn.Module):
super(Conv, self).__init__()
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
self.bn = nn.BatchNorm2d(c2)
self.act = nn.Hardswish() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())
self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity())

def forward(self, x):
return self.act(self.bn(self.conv(x)))
@@ -105,9 +106,39 @@ class Focus(nn.Module):
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)

def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
# return self.conv(self.contract(x))


class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
def __init__(self, gain=2):
super().__init__()
self.gain = gain

def forward(self, x):
N, C, H, W = x.size() # assert (H / s == 0) and (W / s == 0), 'Indivisible gain'
s = self.gain
x = x.view(N, C, H // s, s, W // s, s) # x(1,64,40,2,40,2)
x = x.permute(0, 3, 5, 1, 2, 4).contiguous() # x(1,2,2,64,40,40)
return x.view(N, C * s * s, H // s, W // s) # x(1,256,40,40)


class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
def __init__(self, gain=2):
super().__init__()
self.gain = gain

def forward(self, x):
N, C, H, W = x.size() # assert C / s ** 2 == 0, 'Indivisible gain'
s = self.gain
x = x.view(N, s, s, C // s ** 2, H, W) # x(1,2,2,16,80,80)
x = x.permute(0, 3, 4, 1, 5, 2).contiguous() # x(1,16,80,2,80,2)
return x.view(N, C // s ** 2, H * s, W * s) # x(1,16,160,160)


class Concat(nn.Module):
@@ -210,27 +241,29 @@ class Detections:
self.xywhn = [x / g for x, g in zip(self.xywh, gn)] # xywh normalized
self.n = len(self.pred)

def display(self, pprint=False, show=False, save=False):
def display(self, pprint=False, show=False, save=False, render=False):
colors = color_list()
for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
str = f'image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
if pred is not None:
for c in pred[:, -1].unique():
n = (pred[:, -1] == c).sum() # detections per class
str += f'{n} {self.names[int(c)]}s, ' # add to string
if show or save:
str += f"{n} {self.names[int(c)]}{'s' * (n > 1)}, " # add to string
if show or save or render:
img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img # from np
for *box, conf, cls in pred: # xyxy, confidence, class
# str += '%s %.2f, ' % (names[int(cls)], conf) # label
ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10]) # plot
if pprint:
print(str.rstrip(', '))
if show:
img.show(f'image {i}') # show
if save:
f = f'results{i}.jpg'
str += f"saved to '{f}'"
img.save(f) # save
if show:
img.show(f'Image {i}') # show
if pprint:
print(str)
print(f"{'Saving' * (i == 0)} {f},", end='' if i < self.n - 1 else ' done.\n')
if render:
self.imgs[i] = np.asarray(img)

def print(self):
self.display(pprint=True) # print results
@@ -241,6 +274,10 @@ class Detections:
def save(self):
self.display(save=True) # save results

def render(self):
self.display(render=True) # render results
return self.imgs

def __len__(self):
return self.n

@@ -253,20 +290,13 @@ class Detections:
return x


class Flatten(nn.Module):
# Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
@staticmethod
def forward(x):
return x.view(x.size(0), -1)


class Classify(nn.Module):
# Classification head, i.e. x(b,c1,20,20) to x(b,c2)
def __init__(self, c1, c2, k=1, s=1, p=None, g=1): # ch_in, ch_out, kernel, stride, padding, groups
super(Classify, self).__init__()
self.aap = nn.AdaptiveAvgPool2d(1) # to x(b,c1,1,1)
self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g) # to x(b,c2,1,1)
self.flat = Flatten()
self.flat = nn.Flatten()

def forward(self, x):
z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1) # cat if list


+ 4
- 4
models/experimental.py View File

@@ -58,7 +58,7 @@ class GhostConv(nn.Module):

class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
def __init__(self, c1, c2, k, s):
def __init__(self, c1, c2, k=3, s=1): # ch_in, ch_out, kernel, stride
super(GhostBottleneck, self).__init__()
c_ = c2 // 2
self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1), # pw
@@ -105,8 +105,8 @@ class Ensemble(nn.ModuleList):
for module in self:
y.append(module(x, augment)[0])
# y = torch.stack(y).max(0)[0] # max ensemble
# y = torch.cat(y, 1) # nms ensemble
y = torch.stack(y).mean(0) # mean ensemble
# y = torch.stack(y).mean(0) # mean ensemble
y = torch.cat(y, 1) # nms ensemble
return y, None # inference, train output


@@ -119,7 +119,7 @@ def attempt_load(weights, map_location=None):

# Compatibility updates
for m in model.modules():
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6, nn.SiLU]:
m.inplace = True # pytorch 1.7.0 compatibility
elif type(m) is Conv:
m._non_persistent_buffers_set = set() # pytorch 1.6.0 compatibility


+ 58
- 0
models/hub/anchors.yaml View File

@@ -0,0 +1,58 @@
# Default YOLOv5 anchors for COCO data


# P5 -------------------------------------------------------------------------------------------------------------------
# P5-640:
anchors_p5_640:
- [ 10,13, 16,30, 33,23 ] # P3/8
- [ 30,61, 62,45, 59,119 ] # P4/16
- [ 116,90, 156,198, 373,326 ] # P5/32


# P6 -------------------------------------------------------------------------------------------------------------------
# P6-640: thr=0.25: 0.9964 BPR, 5.54 anchors past thr, n=12, img_size=640, metric_all=0.281/0.716-mean/best, past_thr=0.469-mean: 9,11, 21,19, 17,41, 43,32, 39,70, 86,64, 65,131, 134,130, 120,265, 282,180, 247,354, 512,387
anchors_p6_640:
- [ 9,11, 21,19, 17,41 ] # P3/8
- [ 43,32, 39,70, 86,64 ] # P4/16
- [ 65,131, 134,130, 120,265 ] # P5/32
- [ 282,180, 247,354, 512,387 ] # P6/64

# P6-1280: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1280, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 19,27, 44,40, 38,94, 96,68, 86,152, 180,137, 140,301, 303,264, 238,542, 436,615, 739,380, 925,792
anchors_p6_1280:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64

# P6-1920: thr=0.25: 0.9950 BPR, 5.55 anchors past thr, n=12, img_size=1920, metric_all=0.281/0.714-mean/best, past_thr=0.468-mean: 28,41, 67,59, 57,141, 144,103, 129,227, 270,205, 209,452, 455,396, 358,812, 653,922, 1109,570, 1387,1187
anchors_p6_1920:
- [ 28,41, 67,59, 57,141 ] # P3/8
- [ 144,103, 129,227, 270,205 ] # P4/16
- [ 209,452, 455,396, 358,812 ] # P5/32
- [ 653,922, 1109,570, 1387,1187 ] # P6/64


# P7 -------------------------------------------------------------------------------------------------------------------
# P7-640: thr=0.25: 0.9962 BPR, 6.76 anchors past thr, n=15, img_size=640, metric_all=0.275/0.733-mean/best, past_thr=0.466-mean: 11,11, 13,30, 29,20, 30,46, 61,38, 39,92, 78,80, 146,66, 79,163, 149,150, 321,143, 157,303, 257,402, 359,290, 524,372
anchors_p7_640:
- [ 11,11, 13,30, 29,20 ] # P3/8
- [ 30,46, 61,38, 39,92 ] # P4/16
- [ 78,80, 146,66, 79,163 ] # P5/32
- [ 149,150, 321,143, 157,303 ] # P6/64
- [ 257,402, 359,290, 524,372 ] # P7/128

# P7-1280: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1280, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 19,22, 54,36, 32,77, 70,83, 138,71, 75,173, 165,159, 148,334, 375,151, 334,317, 251,626, 499,474, 750,326, 534,814, 1079,818
anchors_p7_1280:
- [ 19,22, 54,36, 32,77 ] # P3/8
- [ 70,83, 138,71, 75,173 ] # P4/16
- [ 165,159, 148,334, 375,151 ] # P5/32
- [ 334,317, 251,626, 499,474 ] # P6/64
- [ 750,326, 534,814, 1079,818 ] # P7/128

# P7-1920: thr=0.25: 0.9968 BPR, 6.71 anchors past thr, n=15, img_size=1920, metric_all=0.273/0.732-mean/best, past_thr=0.463-mean: 29,34, 81,55, 47,115, 105,124, 207,107, 113,259, 247,238, 222,500, 563,227, 501,476, 376,939, 749,711, 1126,489, 801,1222, 1618,1227
anchors_p7_1920:
- [ 29,34, 81,55, 47,115 ] # P3/8
- [ 105,124, 207,107, 113,259 ] # P4/16
- [ 247,238, 222,500, 563,227 ] # P5/32
- [ 501,476, 376,939, 749,711 ] # P6/64
- [ 1126,489, 801,1222, 1618,1227 ] # P7/128

+ 1
- 1
models/hub/yolov3-tiny.yaml View File

@@ -22,7 +22,7 @@ backbone:
[-1, 1, Conv, [256, 3, 1]],
[-1, 1, nn.MaxPool2d, [2, 2, 0]], # 9-P5/32
[-1, 1, Conv, [512, 3, 1]],
[-1, 1, nn.ZeroPad2d, [0, 1, 0, 1]], # 11
[-1, 1, nn.ZeroPad2d, [[0, 1, 0, 1]]], # 11
[-1, 1, nn.MaxPool2d, [2, 1, 0]], # 12
]



+ 54
- 0
models/hub/yolov5-p2.yaml View File

@@ -0,0 +1,54 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors: 3

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 7-P5/32
[ -1, 1, SPP, [ 1024, [ 5, 9, 13 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 9
]

# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 13

[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 17 (P3/8-small)

[ -1, 1, Conv, [ 128, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 2 ], 1, Concat, [ 1 ] ], # cat backbone P2
[ -1, 1, C3, [ 128, False ] ], # 21 (P2/4-xsmall)

[ -1, 1, Conv, [ 128, 3, 2 ] ],
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P3
[ -1, 3, C3, [ 256, False ] ], # 24 (P3/8-small)

[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 27 (P4/16-medium)

[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 1024, False ] ], # 30 (P5/32-large)

[ [ 24, 27, 30 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5)
]

+ 56
- 0
models/hub/yolov5-p6.yaml View File

@@ -0,0 +1,56 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors: 3

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]

# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15

[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19

[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)

[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)

[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)

[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)

[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

+ 67
- 0
models/hub/yolov5-p7.yaml View File

@@ -0,0 +1,67 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors: 3

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 3, C3, [ 1024 ] ],
[ -1, 1, Conv, [ 1280, 3, 2 ] ], # 11-P7/128
[ -1, 1, SPP, [ 1280, [ 3, 5 ] ] ],
[ -1, 3, C3, [ 1280, False ] ], # 13
]

# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 1024, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 10 ], 1, Concat, [ 1 ] ], # cat backbone P6
[ -1, 3, C3, [ 1024, False ] ], # 17

[ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 21

[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 25

[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 29 (P3/8-small)

[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 26 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 32 (P4/16-medium)

[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 22 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 35 (P5/32-large)

[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 18 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 38 (P6/64-xlarge)

[ -1, 1, Conv, [ 1024, 3, 2 ] ],
[ [ -1, 14 ], 1, Concat, [ 1 ] ], # cat head P7
[ -1, 3, C3, [ 1280, False ] ], # 41 (P7/128-xxlarge)

[ [ 29, 32, 35, 38, 41 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6, P7)
]

+ 60
- 0
models/hub/yolov5l6.yaml View File

@@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.0 # model depth multiple
width_multiple: 1.0 # layer channel multiple

# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]

# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15

[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19

[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)

[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)

[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)

[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)

[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

+ 60
- 0
models/hub/yolov5m6.yaml View File

@@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 0.67 # model depth multiple
width_multiple: 0.75 # layer channel multiple

# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]

# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15

[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19

[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)

[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)

[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)

[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)

[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

+ 60
- 0
models/hub/yolov5s6.yaml View File

@@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 0.33 # model depth multiple
width_multiple: 0.50 # layer channel multiple

# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]

# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15

[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19

[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)

[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)

[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)

[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)

[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

+ 60
- 0
models/hub/yolov5x6.yaml View File

@@ -0,0 +1,60 @@
# parameters
nc: 80 # number of classes
depth_multiple: 1.33 # model depth multiple
width_multiple: 1.25 # layer channel multiple

# anchors
anchors:
- [ 19,27, 44,40, 38,94 ] # P3/8
- [ 96,68, 86,152, 180,137 ] # P4/16
- [ 140,301, 303,264, 238,542 ] # P5/32
- [ 436,615, 739,380, 925,792 ] # P6/64

# YOLOv5 backbone
backbone:
# [from, number, module, args]
[ [ -1, 1, Focus, [ 64, 3 ] ], # 0-P1/2
[ -1, 1, Conv, [ 128, 3, 2 ] ], # 1-P2/4
[ -1, 3, C3, [ 128 ] ],
[ -1, 1, Conv, [ 256, 3, 2 ] ], # 3-P3/8
[ -1, 9, C3, [ 256 ] ],
[ -1, 1, Conv, [ 512, 3, 2 ] ], # 5-P4/16
[ -1, 9, C3, [ 512 ] ],
[ -1, 1, Conv, [ 768, 3, 2 ] ], # 7-P5/32
[ -1, 3, C3, [ 768 ] ],
[ -1, 1, Conv, [ 1024, 3, 2 ] ], # 9-P6/64
[ -1, 1, SPP, [ 1024, [ 3, 5, 7 ] ] ],
[ -1, 3, C3, [ 1024, False ] ], # 11
]

# YOLOv5 head
head:
[ [ -1, 1, Conv, [ 768, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 8 ], 1, Concat, [ 1 ] ], # cat backbone P5
[ -1, 3, C3, [ 768, False ] ], # 15

[ -1, 1, Conv, [ 512, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 6 ], 1, Concat, [ 1 ] ], # cat backbone P4
[ -1, 3, C3, [ 512, False ] ], # 19

[ -1, 1, Conv, [ 256, 1, 1 ] ],
[ -1, 1, nn.Upsample, [ None, 2, 'nearest' ] ],
[ [ -1, 4 ], 1, Concat, [ 1 ] ], # cat backbone P3
[ -1, 3, C3, [ 256, False ] ], # 23 (P3/8-small)

[ -1, 1, Conv, [ 256, 3, 2 ] ],
[ [ -1, 20 ], 1, Concat, [ 1 ] ], # cat head P4
[ -1, 3, C3, [ 512, False ] ], # 26 (P4/16-medium)

[ -1, 1, Conv, [ 512, 3, 2 ] ],
[ [ -1, 16 ], 1, Concat, [ 1 ] ], # cat head P5
[ -1, 3, C3, [ 768, False ] ], # 29 (P5/32-large)

[ -1, 1, Conv, [ 768, 3, 2 ] ],
[ [ -1, 12 ], 1, Concat, [ 1 ] ], # cat head P6
[ -1, 3, C3, [ 1024, False ] ], # 32 (P5/64-xlarge)

[ [ 23, 26, 29, 32 ], 1, Detect, [ nc, anchors ] ], # Detect(P3, P4, P5, P6)
]

+ 14
- 13
models/yolo.py View File

@@ -1,18 +1,14 @@
import argparse
import logging
import math
import sys
from copy import deepcopy
from pathlib import Path

import torch
import torch.nn as nn

sys.path.append('./') # to run '$ python *.py' files in subdirectories
logger = logging.getLogger(__name__)

from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, C3, Concat, NMS, autoShape
from models.experimental import MixConv2d, CrossConv
from models.common import *
from models.experimental import *
from utils.autoanchor import check_anchor_order
from utils.general import make_divisible, check_file, set_logging
from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
@@ -75,7 +71,7 @@ class Model(nn.Module):
import yaml # for torch hub
self.yaml_file = Path(cfg).name
with open(cfg) as f:
self.yaml = yaml.load(f, Loader=yaml.FullLoader) # model dict
self.yaml = yaml.load(f, Loader=yaml.SafeLoader) # model dict

# Define model
ch = self.yaml['ch'] = self.yaml.get('ch', ch) # input channels
@@ -89,7 +85,7 @@ class Model(nn.Module):
# Build strides, anchors
m = self.model[-1] # Detect()
if isinstance(m, Detect):
s = 128 # 2x min stride
s = 256 # 2x min stride
m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))]) # forward
m.anchors /= m.stride.view(-1, 1, 1)
check_anchor_order(m)
@@ -109,9 +105,9 @@ class Model(nn.Module):
f = [None, 3, None] # flips (2-ud, 3-lr)
y = [] # outputs
for si, fi in zip(s, f):
xi = scale_img(x.flip(fi) if fi else x, si)
xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max()))
yi = self.forward_once(xi)[0] # forward
# cv2.imwrite('img%g.jpg' % s, 255 * xi[0].numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
# cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save
yi[..., :4] /= si # de-scale
if fi == 2:
yi[..., 1] = img_size[0] - yi[..., 1] # de-flip ud
@@ -214,7 +210,8 @@ def parse_model(d, ch): # model_dict, input_channels(3)
pass

n = max(round(n * gd), 1) if n > 1 else n # depth gain
if m in [Conv, Bottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP, C3]:
if m in [Conv, GhostConv, Bottleneck, GhostBottleneck, SPP, DWConv, MixConv2d, Focus, CrossConv, BottleneckCSP,
C3]:
c1, c2 = ch[f], args[0]

# Normal
@@ -242,13 +239,17 @@ def parse_model(d, ch): # model_dict, input_channels(3)
elif m is nn.BatchNorm2d:
args = [ch[f]]
elif m is Concat:
c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
c2 = sum([ch[x if x < 0 else x + 1] for x in f])
elif m is Detect:
args.append([ch[x + 1] for x in f])
if isinstance(args[1], int): # number of anchors
args[1] = [list(range(args[1] * 2))] * len(f)
elif m is Contract:
c2 = ch[f if f < 0 else f + 1] * args[0] ** 2
elif m is Expand:
c2 = ch[f if f < 0 else f + 1] // args[0] ** 2
else:
c2 = ch[f]
c2 = ch[f if f < 0 else f + 1]

m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args) # module
t = str(m)[8:-2].replace('__main__.', '') # module type


+ 8
- 8
models/yolov5l.yaml View File

@@ -14,14 +14,14 @@ backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]],
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9
[-1, 3, C3, [1024, False]], # 9
]

# YOLOv5 head
@@ -29,20 +29,20 @@ head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 3, C3, [512, False]], # 13

[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 3, C3, [256, False]], # 17 (P3/8-small)

[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)

[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)

[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

+ 8
- 8
models/yolov5m.yaml View File

@@ -14,14 +14,14 @@ backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]],
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9
[-1, 3, C3, [1024, False]], # 9
]

# YOLOv5 head
@@ -29,20 +29,20 @@ head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 3, C3, [512, False]], # 13

[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 3, C3, [256, False]], # 17 (P3/8-small)

[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)

[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)

[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

+ 8
- 8
models/yolov5s.yaml View File

@@ -14,14 +14,14 @@ backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]],
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9
[-1, 3, C3, [1024, False]], # 9
]

# YOLOv5 head
@@ -29,20 +29,20 @@ head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 3, C3, [512, False]], # 13

[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 3, C3, [256, False]], # 17 (P3/8-small)

[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)

[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)

[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

+ 8
- 8
models/yolov5x.yaml View File

@@ -14,14 +14,14 @@ backbone:
# [from, number, module, args]
[[-1, 1, Focus, [64, 3]], # 0-P1/2
[-1, 1, Conv, [128, 3, 2]], # 1-P2/4
[-1, 3, BottleneckCSP, [128]],
[-1, 3, C3, [128]],
[-1, 1, Conv, [256, 3, 2]], # 3-P3/8
[-1, 9, BottleneckCSP, [256]],
[-1, 9, C3, [256]],
[-1, 1, Conv, [512, 3, 2]], # 5-P4/16
[-1, 9, BottleneckCSP, [512]],
[-1, 9, C3, [512]],
[-1, 1, Conv, [1024, 3, 2]], # 7-P5/32
[-1, 1, SPP, [1024, [5, 9, 13]]],
[-1, 3, BottleneckCSP, [1024, False]], # 9
[-1, 3, C3, [1024, False]], # 9
]

# YOLOv5 head
@@ -29,20 +29,20 @@ head:
[[-1, 1, Conv, [512, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 6], 1, Concat, [1]], # cat backbone P4
[-1, 3, BottleneckCSP, [512, False]], # 13
[-1, 3, C3, [512, False]], # 13

[-1, 1, Conv, [256, 1, 1]],
[-1, 1, nn.Upsample, [None, 2, 'nearest']],
[[-1, 4], 1, Concat, [1]], # cat backbone P3
[-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small)
[-1, 3, C3, [256, False]], # 17 (P3/8-small)

[-1, 1, Conv, [256, 3, 2]],
[[-1, 14], 1, Concat, [1]], # cat head P4
[-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium)
[-1, 3, C3, [512, False]], # 20 (P4/16-medium)

[-1, 1, Conv, [512, 3, 2]],
[[-1, 10], 1, Concat, [1]], # cat head P5
[-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large)
[-1, 3, C3, [1024, False]], # 23 (P5/32-large)

[[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5)
]

+ 1
- 1
requirements.txt View File

@@ -6,7 +6,7 @@ matplotlib>=3.2.2
numpy>=1.18.5
opencv-python>=4.1.2
Pillow
PyYAML>=5.3
PyYAML>=5.3.1
scipy>=1.4.1
tensorboard>=2.2
torch>=1.7.0


+ 42
- 0
setup.py View File

@@ -0,0 +1,42 @@
#!/usr/bin/env python

from setuptools import setup, find_packages

exec(open('mindcv/version.py').read())

setup(
name="mindcv",
author="MindSpore Ecosystem",
author_email='mindspore-ecosystem@example.com',
url="https://github.com/mindspore-lab/mindcv",
project_urls={
'Sources': 'https://github.com/mindspore-lab/mindcv',
'Issue Tracker': 'https://github.com/mindspore-lab/mindcv/issues',
},
description="A toolbox of vision models and algorithms based on MindSpore.",
license="Apache Software License 2.0",
include_package_data=True,
packages=find_packages(include=['mindcv', 'mindcv.*']),
install_requires=[
'numpy >= 1.17.0',
'PyYAML >= 5.3',
'tqdm'
],
python_requires='>=3.7',
classifiers=[
'Development Status :: 2 - Pre-Alpha',
'Intended Audience :: Developers',
'License :: OSI Approved :: Apache Software License',
'Natural Language :: English',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
],
test_suite='tests',
tests_require=[
'pytest',
],
version=__version__,
zip_safe=False
)

+ 27
- 22
test.py View File

@@ -11,9 +11,8 @@ from tqdm import tqdm

from models.experimental import attempt_load
from utils.datasets import create_dataloader
from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, box_iou, \
non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path
from utils.loss import compute_loss
from utils.general import coco80_to_coco91_class, check_dataset, check_file, check_img_size, check_requirements, \
box_iou, non_max_suppression, scale_coords, xyxy2xywh, xywh2xyxy, set_logging, increment_path, colorstr
from utils.metrics import ap_per_class, ConfusionMatrix
from utils.plots import plot_images, output_to_target, plot_study_txt
from utils.torch_utils import select_device, time_synchronized
@@ -36,8 +35,8 @@ def test(data,
save_hybrid=False, # for hybrid auto-labelling
save_conf=False, # save auto-label confidences
plots=True,
log_imgs=0): # number of logged images
log_imgs=0, # number of logged images
compute_loss=None):
# Initialize/load model and set device
training = model is not None
if training: # called by train.py
@@ -68,7 +67,7 @@ def test(data,
model.eval()
is_coco = data.endswith('coco.yaml') # is COCO dataset
with open(data) as f:
data = yaml.load(f, Loader=yaml.FullLoader) # model dict
data = yaml.load(f, Loader=yaml.SafeLoader) # model dict
check_dataset(data) # check
nc = 1 if single_cls else int(data['nc']) # number of classes
iouv = torch.linspace(0.5, 0.95, 10).to(device) # iou vector for mAP@0.5:0.95
@@ -83,10 +82,11 @@ def test(data,

# Dataloader
if not training:
img = torch.zeros((1, 3, imgsz, imgsz), device=device) # init img
_ = model(img.half() if half else img) if device.type != 'cpu' else None # run once
if device.type != 'cpu':
model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once
path = data['test'] if opt.task == 'test' else data['val'] # path to val/test images
dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True)[0]
dataloader = create_dataloader(path, imgsz, batch_size, model.stride.max(), opt, pad=0.5, rect=True,
prefix=colorstr('test: ' if opt.task == 'test' else 'val: '))[0]

seen = 0
confusion_matrix = ConfusionMatrix(nc=nc)
@@ -110,8 +110,8 @@ def test(data,
t0 += time_synchronized() - t

# Compute loss
if training:
loss += compute_loss([x.float() for x in train_out], targets, model)[1][:3] # box, obj, cls
if compute_loss:
loss += compute_loss([x.float() for x in train_out], targets)[1][:3] # box, obj, cls

# Run NMS
targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels
@@ -178,7 +178,7 @@ def test(data,
tbox = xywh2xyxy(labels[:, 1:5])
scale_coords(img[si].shape[1:], tbox, shapes[si][0], shapes[si][1]) # native-space labels
if plots:
confusion_matrix.process_batch(pred, torch.cat((labels[:, 0:1], tbox), 1))
confusion_matrix.process_batch(predn, torch.cat((labels[:, 0:1], tbox), 1))

# Per target class
for cls in torch.unique(tcls_tensor):
@@ -215,7 +215,7 @@ def test(data,
stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy
if len(stats) and stats[0].any():
p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
p, r, ap50, ap = p[:, 0], r[:, 0], ap[:, 0], ap.mean(1) # [P, R, AP@0.5, AP@0.5:0.95]
ap50, ap = ap[:, 0], ap.mean(1) # AP@0.5, AP@0.5:0.95
mp, mr, map50, map = p.mean(), r.mean(), ap50.mean(), ap.mean()
nt = np.bincount(stats[3].astype(np.int64), minlength=nc) # number of targets per class
else:
@@ -226,7 +226,7 @@ def test(data,
print(pf % ('all', seen, nt.sum(), mp, mr, map50, map))

# Print results per class
if verbose and nc > 1 and len(stats):
if (verbose or (nc < 50 and not training)) and nc > 1 and len(stats):
for i, c in enumerate(ap_class):
print(pf % (names[c], seen, nt[c], p[i], r[i], ap50[i], ap[i]))

@@ -239,8 +239,8 @@ def test(data,
if plots:
confusion_matrix.plot(save_dir=save_dir, names=list(names.values()))
if wandb and wandb.run:
wandb.log({"Images": wandb_images})
wandb.log({"Validation": [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]})
val_batches = [wandb.Image(str(f), caption=f.name) for f in sorted(save_dir.glob('test*.jpg'))]
wandb.log({"Images": wandb_images, "Validation": val_batches}, commit=False)

# Save JSON
if save_json and len(jdict):
@@ -302,6 +302,7 @@ if __name__ == '__main__':
opt.save_json |= opt.data.endswith('coco.yaml')
opt.data = check_file(opt.data) # check file
print(opt)
check_requirements()

if opt.task in ['val', 'test']: # run normally
test(opt.data,
@@ -319,16 +320,20 @@ if __name__ == '__main__':
save_conf=opt.save_conf,
)

elif opt.task == 'speed': # speed benchmarks
for w in opt.weights:
test(opt.data, w, opt.batch_size, opt.img_size, 0.25, 0.45, save_json=False, plots=False)

elif opt.task == 'study': # run over a range of settings and save/plot
for weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
f = 'study_%s_%s.txt' % (Path(opt.data).stem, Path(weights).stem) # filename to save to
x = list(range(320, 800, 64)) # x axis
x = list(range(256, 1536 + 128, 128)) # x axis (image sizes)
for w in opt.weights:
f = f'study_{Path(opt.data).stem}_{Path(w).stem}.txt' # filename to save to
y = [] # y axis
for i in x: # img-size
print('\nRunning %s point %s...' % (f, i))
r, _, t = test(opt.data, weights, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json,
print(f'\nRunning {f} point {i}...')
r, _, t = test(opt.data, w, opt.batch_size, i, opt.conf_thres, opt.iou_thres, opt.save_json,
plots=False)
y.append(r + t) # results and times
np.savetxt(f, y, fmt='%10.4g') # save
os.system('zip -r study.zip study_*.txt')
plot_study_txt(f, x) # plot
plot_study_txt(x=x) # plot

+ 309
- 585
train.py View File

@@ -1,594 +1,318 @@
import argparse
import logging
import math
''' Model training pipeline '''
import os
import logging
import mindspore as ms
import random
import time
from pathlib import Path
from threading import Thread
from warnings import warn

import numpy as np
import torch.distributed as dist
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.optim.lr_scheduler as lr_scheduler
import torch.utils.data
import yaml
from torch.cuda import amp
from torch.nn.parallel import DistributedDataParallel as DDP
from torch.utils.tensorboard import SummaryWriter
from tqdm import tqdm

import test # import test.py to get mAP after each epoch
from models.experimental import attempt_load
from models.yolo import Model
from utils.autoanchor import check_anchors
from utils.datasets import create_dataloader
from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
fitness, strip_optimizer, get_latest_run, check_dataset, check_file, check_git_status, check_img_size, \
print_mutation, set_logging
from utils.google_utils import attempt_download
from utils.loss import compute_loss
from utils.plots import plot_images, plot_labels, plot_results, plot_evolution
from utils.torch_utils import ModelEMA, select_device, intersect_dicts, torch_distributed_zero_first

logger = logging.getLogger(__name__)

try:
import wandb
except ImportError:
wandb = None
logger.info("Install Weights & Biases for experiment logging via 'pip install wandb' (recommended)")


def train(hyp, opt, device, tb_writer=None, wandb=None):
logger.info(f'Hyperparameters {hyp}')
save_dir, epochs, batch_size, total_batch_size, weights, rank = \
Path(opt.save_dir), opt.epochs, opt.batch_size, opt.total_batch_size, opt.weights, opt.global_rank

# Directories
wdir = save_dir / 'weights'
wdir.mkdir(parents=True, exist_ok=True) # make dir
last = wdir / 'last.pt'
best = wdir / 'best.pt'
results_file = save_dir / 'results.txt'

# Save run settings
with open(save_dir / 'hyp.yaml', 'w') as f:
yaml.dump(hyp, f, sort_keys=False)
with open(save_dir / 'opt.yaml', 'w') as f:
yaml.dump(vars(opt), f, sort_keys=False)

# Configure
plots = not opt.evolve # create plots
cuda = device.type != 'cpu'
init_seeds(2 + rank)
with open(opt.data) as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader) # data dict
with torch_distributed_zero_first(rank):
check_dataset(data_dict) # check
train_path = data_dict['train']
test_path = data_dict['val']
nc = 1 if opt.single_cls else int(data_dict['nc']) # number of classes
names = ['item'] if opt.single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names
assert len(names) == nc, '%g names found for nc=%g dataset in %s' % (len(names), nc, opt.data) # check

# Model
pretrained = weights.endswith('.pt')
if pretrained:
with torch_distributed_zero_first(rank):
attempt_download(weights) # download if not found locally
ckpt = torch.load(weights, map_location=device) # load checkpoint
if hyp.get('anchors'):
ckpt['model'].yaml['anchors'] = round(hyp['anchors']) # force autoanchor
model = Model(opt.cfg or ckpt['model'].yaml, ch=3, nc=nc).to(device) # create
exclude = ['anchor'] if opt.cfg or hyp.get('anchors') else [] # exclude keys
state_dict = ckpt['model'].float().state_dict() # to FP32
state_dict = intersect_dicts(state_dict, model.state_dict(), exclude=exclude) # intersect
model.load_state_dict(state_dict, strict=False) # load
logger.info('Transferred %g/%g items from %s' % (len(state_dict), len(model.state_dict()), weights)) # report
from mindspore import nn, Tensor
from mindspore import FixedLossScaleManager, Model
from mindspore.communication import init, get_rank, get_group_size
import mindspore.dataset.transforms as transforms

from mindcv.models import create_model
from mindcv.data import create_dataset, create_transforms, create_loader
from mindcv.loss import create_loss
from mindcv.optim import create_optimizer
from mindcv.scheduler import create_scheduler
from mindcv.utils import StateMonitor, AllReduceSum, TrainOneStepWithEMA
from mindcv.utils.random import set_seed
from config import parse_args

import moxing as mox


# TODO: arg parser already has a logger
logger = logging.getLogger('train')
logger.setLevel(logging.INFO)
h1 = logging.StreamHandler()
formatter1 = logging.Formatter('%(message)s',)
logger.addHandler(h1)
h1.setFormatter(formatter1)

def train(args):
''' main train function'''

ms.set_context(mode=args.mode)
if args.distribute:
init()
device_num = get_group_size()
rank_id = get_rank()
ms.set_auto_parallel_context(device_num=device_num,
parallel_mode='data_parallel',
gradients_mean=True)
else:
model = Model(opt.cfg, ch=3, nc=nc).to(device) # create

# Freeze
freeze = [] # parameter names to freeze (full or partial)
for k, v in model.named_parameters():
v.requires_grad = True # train all layers
if any(x in k for x in freeze):
print('freezing %s' % k)
v.requires_grad = False

# Optimizer
nbs = 64 # nominal batch size
accumulate = max(round(nbs / total_batch_size), 1) # accumulate loss before optimizing
hyp['weight_decay'] *= total_batch_size * accumulate / nbs # scale weight_decay

pg0, pg1, pg2 = [], [], [] # optimizer parameter groups
for k, v in model.named_modules():
if hasattr(v, 'bias') and isinstance(v.bias, nn.Parameter):
pg2.append(v.bias) # biases
if isinstance(v, nn.BatchNorm2d):
pg0.append(v.weight) # no decay
elif hasattr(v, 'weight') and isinstance(v.weight, nn.Parameter):
pg1.append(v.weight) # apply decay

if opt.adam:
optimizer = optim.Adam(pg0, lr=hyp['lr0'], betas=(hyp['momentum'], 0.999)) # adjust beta1 to momentum
device_num = None
rank_id = None

set_seed(args.seed, rank_id)

# create dataset
dataset_train = create_dataset(
name=args.dataset,
root=args.data_dir,
split=args.train_split,
shuffle=args.shuffle,
num_samples=args.num_samples,
num_shards=device_num,
shard_id=rank_id,
num_parallel_workers=args.num_parallel_workers,
download=args.dataset_download,
num_aug_repeats=args.aug_repeats)

if args.num_classes is None:
num_classes = dataset_train.num_classes()
else:
optimizer = optim.SGD(pg0, lr=hyp['lr0'], momentum=hyp['momentum'], nesterov=True)

optimizer.add_param_group({'params': pg1, 'weight_decay': hyp['weight_decay']}) # add pg1 with weight_decay
optimizer.add_param_group({'params': pg2}) # add pg2 (biases)
logger.info('Optimizer groups: %g .bias, %g conv.weight, %g other' % (len(pg2), len(pg1), len(pg0)))
del pg0, pg1, pg2

# Scheduler https://arxiv.org/pdf/1812.01187.pdf
# https://pytorch.org/docs/stable/_modules/torch/optim/lr_scheduler.html#OneCycleLR
lf = lambda x: ((1 + math.cos(x * math.pi / epochs)) / 2) * (1 - hyp['lrf']) + hyp['lrf'] # cosine
scheduler = lr_scheduler.LambdaLR(optimizer, lr_lambda=lf)
# plot_lr_scheduler(optimizer, scheduler, epochs)

# Logging
if wandb and wandb.run is None:
opt.hyp = hyp # add hyperparameters
wandb_run = wandb.init(config=opt, resume="allow",
project='YOLOv5' if opt.project == 'runs/train' else Path(opt.project).stem,
name=save_dir.stem,
id=ckpt.get('wandb_id') if 'ckpt' in locals() else None)
loggers = {'wandb': wandb} # loggers dict

# Resume
start_epoch, best_fitness = 0, 0.0
if pretrained:
# Optimizer
if ckpt['optimizer'] is not None:
optimizer.load_state_dict(ckpt['optimizer'])
best_fitness = ckpt['best_fitness']

# Results
if ckpt.get('training_results') is not None:
with open(results_file, 'w') as file:
file.write(ckpt['training_results']) # write results.txt

# Epochs
start_epoch = ckpt['epoch'] + 1
if opt.resume:
assert start_epoch > 0, '%s training to %g epochs is finished, nothing to resume.' % (weights, epochs)
if epochs < start_epoch:
logger.info('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
(weights, ckpt['epoch'], epochs))
epochs += ckpt['epoch'] # finetune additional epochs

del ckpt, state_dict

# Image sizes
gs = int(max(model.stride)) # grid size (max stride)
imgsz, imgsz_test = [check_img_size(x, gs) for x in opt.img_size] # verify imgsz are gs-multiples

# DP mode
if cuda and rank == -1 and torch.cuda.device_count() > 1:
model = torch.nn.DataParallel(model)

# SyncBatchNorm
if opt.sync_bn and cuda and rank != -1:
model = torch.nn.SyncBatchNorm.convert_sync_batchnorm(model).to(device)
logger.info('Using SyncBatchNorm()')

# EMA
ema = ModelEMA(model) if rank in [-1, 0] else None

# DDP mode
if cuda and rank != -1:
model = DDP(model, device_ids=[opt.local_rank], output_device=opt.local_rank)

# Trainloader
dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect, rank=rank,
world_size=opt.world_size, workers=opt.workers,
image_weights=opt.image_weights)
mlc = np.concatenate(dataset.labels, 0)[:, 0].max() # max label class
nb = len(dataloader) # number of batches
assert mlc < nc, 'Label class %g exceeds nc=%g in %s. Possible class labels are 0-%g' % (mlc, nc, opt.data, nc - 1)

# Process 0
if rank in [-1, 0]:
ema.updates = start_epoch * nb // accumulate # set EMA updates
testloader = create_dataloader(test_path, imgsz_test, total_batch_size, gs, opt, # testloader
hyp=hyp, cache=opt.cache_images and not opt.notest, rect=True,
rank=-1, world_size=opt.world_size, workers=opt.workers, pad=0.5)[0]

if not opt.resume:
labels = np.concatenate(dataset.labels, 0)
c = torch.tensor(labels[:, 0]) # classes
# cf = torch.bincount(c.long(), minlength=nc) + 1. # frequency
# model._initialize_biases(cf.to(device))
if plots:
plot_labels(labels, save_dir, loggers)
if tb_writer:
tb_writer.add_histogram('classes', c, 0)

# Anchors
if not opt.noautoanchor:
check_anchors(dataset, model=model, thr=hyp['anchor_t'], imgsz=imgsz)

# Model parameters
hyp['cls'] *= nc / 80. # scale coco-tuned hyp['cls'] to current dataset
model.nc = nc # attach number of classes to model
model.hyp = hyp # attach hyperparameters to model
model.gr = 1.0 # iou loss ratio (obj_loss = 1.0 or iou)
model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device) * nc # attach class weights
model.names = names

# Start training
t0 = time.time()
nw = max(round(hyp['warmup_epochs'] * nb), 1000) # number of warmup iterations, max(3 epochs, 1k iterations)
# nw = min(nw, (epochs - start_epoch) / 2 * nb) # limit warmup to < 1/2 of training
maps = np.zeros(nc) # mAP per class
results = (0, 0, 0, 0, 0, 0, 0) # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
scheduler.last_epoch = start_epoch - 1 # do not move
scaler = amp.GradScaler(enabled=cuda)
logger.info('Image sizes %g train, %g test\n'
'Using %g dataloader workers\nLogging results to %s\n'
'Starting training for %g epochs...' % (imgsz, imgsz_test, dataloader.num_workers, save_dir, epochs))
for epoch in range(start_epoch, epochs): # epoch ------------------------------------------------------------------
model.train()

# Update image weights (optional)
if opt.image_weights:
# Generate indices
if rank in [-1, 0]:
cw = model.class_weights.cpu().numpy() * (1 - maps) ** 2 / nc # class weights
iw = labels_to_image_weights(dataset.labels, nc=nc, class_weights=cw) # image weights
dataset.indices = random.choices(range(dataset.n), weights=iw, k=dataset.n) # rand weighted idx
# Broadcast if DDP
if rank != -1:
indices = (torch.tensor(dataset.indices) if rank == 0 else torch.zeros(dataset.n)).int()
dist.broadcast(indices, 0)
if rank != 0:
dataset.indices = indices.cpu().numpy()

# Update mosaic border
# b = int(random.uniform(0.25 * imgsz, 0.75 * imgsz + gs) // gs * gs)
# dataset.mosaic_border = [b - imgsz, -b] # height, width borders

mloss = torch.zeros(4, device=device) # mean losses
if rank != -1:
dataloader.sampler.set_epoch(epoch)
pbar = enumerate(dataloader)
logger.info(('\n' + '%10s' * 8) % ('Epoch', 'gpu_mem', 'box', 'obj', 'cls', 'total', 'targets', 'img_size'))
if rank in [-1, 0]:
pbar = tqdm(pbar, total=nb) # progress bar
optimizer.zero_grad()
for i, (imgs, targets, paths, _) in pbar: # batch -------------------------------------------------------------
ni = i + nb * epoch # number integrated batches (since train start)
imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0

# Warmup
if ni <= nw:
xi = [0, nw] # x interp
# model.gr = np.interp(ni, xi, [0.0, 1.0]) # iou loss ratio (obj_loss = 1.0 or iou)
accumulate = max(1, np.interp(ni, xi, [1, nbs / total_batch_size]).round())
for j, x in enumerate(optimizer.param_groups):
# bias lr falls from 0.1 to lr0, all other lrs rise from 0.0 to lr0
x['lr'] = np.interp(ni, xi, [hyp['warmup_bias_lr'] if j == 2 else 0.0, x['initial_lr'] * lf(epoch)])
if 'momentum' in x:
x['momentum'] = np.interp(ni, xi, [hyp['warmup_momentum'], hyp['momentum']])

# Multi-scale
if opt.multi_scale:
sz = random.randrange(imgsz * 0.5, imgsz * 1.5 + gs) // gs * gs # size
sf = sz / max(imgs.shape[2:]) # scale factor
if sf != 1:
ns = [math.ceil(x * sf / gs) * gs for x in imgs.shape[2:]] # new shape (stretched to gs-multiple)
imgs = F.interpolate(imgs, size=ns, mode='bilinear', align_corners=False)

# Forward
with amp.autocast(enabled=cuda):
pred = model(imgs) # forward
loss, loss_items = compute_loss(pred, targets.to(device), model) # loss scaled by batch_size
if rank != -1:
loss *= opt.world_size # gradient averaged between devices in DDP mode

# Backward
scaler.scale(loss).backward()

# Optimize
if ni % accumulate == 0:
scaler.step(optimizer) # optimizer.step
scaler.update()
optimizer.zero_grad()
if ema:
ema.update(model)

# Print
if rank in [-1, 0]:
mloss = (mloss * i + loss_items) / (i + 1) # update mean losses
mem = '%.3gG' % (torch.cuda.memory_reserved() / 1E9 if torch.cuda.is_available() else 0) # (GB)
s = ('%10s' * 2 + '%10.4g' * 6) % (
'%g/%g' % (epoch, epochs - 1), mem, *mloss, targets.shape[0], imgs.shape[-1])
pbar.set_description(s)

# Plot
if plots and ni < 3:
f = save_dir / f'train_batch{ni}.jpg' # filename
Thread(target=plot_images, args=(imgs, targets, paths, f), daemon=True).start()
# if tb_writer:
# tb_writer.add_image(f, result, dataformats='HWC', global_step=epoch)
# tb_writer.add_graph(model, imgs) # add model to tensorboard
elif plots and ni == 3 and wandb:
wandb.log({"Mosaics": [wandb.Image(str(x), caption=x.name) for x in save_dir.glob('train*.jpg')]})

# end batch ------------------------------------------------------------------------------------------------
# end epoch ----------------------------------------------------------------------------------------------------

# Scheduler
lr = [x['lr'] for x in optimizer.param_groups] # for tensorboard
scheduler.step()

# DDP process 0 or single-GPU
if rank in [-1, 0]:
# mAP
if ema:
ema.update_attr(model, include=['yaml', 'nc', 'hyp', 'gr', 'names', 'stride', 'class_weights'])
final_epoch = epoch + 1 == epochs
if not opt.notest or final_epoch: # Calculate mAP
results, maps, times = test.test(opt.data,
batch_size=total_batch_size,
imgsz=imgsz_test,
model=ema.ema,
single_cls=opt.single_cls,
dataloader=testloader,
save_dir=save_dir,
plots=plots and final_epoch,
log_imgs=opt.log_imgs if wandb else 0)

# Write
with open(results_file, 'a') as f:
f.write(s + '%10.4g' * 7 % results + '\n') # P, R, mAP@.5, mAP@.5-.95, val_loss(box, obj, cls)
if len(opt.name) and opt.bucket:
os.system('gsutil cp %s gs://%s/results/results%s.txt' % (results_file, opt.bucket, opt.name))

# Log
tags = ['train/box_loss', 'train/obj_loss', 'train/cls_loss', # train loss
'metrics/precision', 'metrics/recall', 'metrics/mAP_0.5', 'metrics/mAP_0.5:0.95',
'val/box_loss', 'val/obj_loss', 'val/cls_loss', # val loss
'x/lr0', 'x/lr1', 'x/lr2'] # params
for x, tag in zip(list(mloss[:-1]) + list(results) + lr, tags):
if tb_writer:
tb_writer.add_scalar(tag, x, epoch) # tensorboard
if wandb:
wandb.log({tag: x}) # W&B

# Update best mAP
fi = fitness(np.array(results).reshape(1, -1)) # weighted combination of [P, R, mAP@.5, mAP@.5-.95]
if fi > best_fitness:
best_fitness = fi

# Save model
save = (not opt.nosave) or (final_epoch and not opt.evolve)
if save:
with open(results_file, 'r') as f: # create checkpoint
ckpt = {'epoch': epoch,
'best_fitness': best_fitness,
'training_results': f.read(),
'model': ema.ema,
'optimizer': None if final_epoch else optimizer.state_dict(),
'wandb_id': wandb_run.id if wandb else None}

# Save last, best and delete
torch.save(ckpt, last)
if best_fitness == fi:
torch.save(ckpt, best)
del ckpt
# end epoch ----------------------------------------------------------------------------------------------------
# end training

if rank in [-1, 0]:
# Strip optimizers
final = best if best.exists() else last # final model
for f in [last, best]:
if f.exists():
strip_optimizer(f) # strip optimizers
if opt.bucket:
os.system(f'gsutil cp {final} gs://{opt.bucket}/weights') # upload

# Plots
if plots:
plot_results(save_dir=save_dir) # save as results.png
if wandb:
files = ['results.png', 'precision_recall_curve.png', 'confusion_matrix.png']
wandb.log({"Results": [wandb.Image(str(save_dir / f), caption=f) for f in files
if (save_dir / f).exists()]})
if opt.log_artifacts:
wandb.log_artifact(artifact_or_path=str(final), type='model', name=save_dir.stem)

# Test best.pt
logger.info('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))
if opt.data.endswith('coco.yaml') and nc == 80: # if COCO
for conf, iou, save_json in ([0.25, 0.45, False], [0.001, 0.65, True]): # speed, mAP tests
results, _, _ = test.test(opt.data,
batch_size=total_batch_size,
imgsz=imgsz_test,
conf_thres=conf,
iou_thres=iou,
model=attempt_load(final, device).half(),
single_cls=opt.single_cls,
dataloader=testloader,
save_dir=save_dir,
save_json=save_json,
plots=False)

num_classes = args.num_classes

# create transforms
transform_list = create_transforms(
dataset_name=args.dataset,
is_training=True,
image_resize=args.image_resize,
scale=args.scale,
ratio=args.ratio,
hflip=args.hflip,
vflip=args.vflip,
color_jitter=args.color_jitter,
interpolation=args.interpolation,
auto_augment=args.auto_augment,
mean=args.mean,
std=args.std,
re_prob=args.re_prob,
re_scale=args.re_scale,
re_ratio=args.re_ratio,
re_value=args.re_value,
re_max_attempts=args.re_max_attempts
)
target_transform = transforms.OneHot(num_classes) if args.loss == 'BCE' else None

# load dataset
loader_train = create_loader(
dataset=dataset_train,
batch_size=args.batch_size,
drop_remainder=args.drop_remainder,
is_training=True,
mixup=args.mixup,
cutmix=args.cutmix,
cutmix_prob=args.cutmix_prob,
num_classes=num_classes,
transform=transform_list,
target_transform=target_transform,
num_parallel_workers=args.num_parallel_workers,
)

if args.val_while_train:
dataset_eval = create_dataset(
name=args.dataset,
root=args.data_dir,
split=args.val_split,
num_shards=device_num,
shard_id=rank_id,
num_parallel_workers=args.num_parallel_workers,
download=args.dataset_download)

transform_list_eval = create_transforms(
dataset_name=args.dataset,
is_training=False,
image_resize=args.image_resize,
crop_pct=args.crop_pct,
interpolation=args.interpolation,
mean=args.mean,
std=args.std
)

loader_eval = create_loader(
dataset=dataset_eval,
batch_size=args.batch_size,
drop_remainder=False,
is_training=False,
transform=transform_list_eval,
target_transform=target_transform,
num_parallel_workers=args.num_parallel_workers,
)
# validation dataset count
eval_count = dataset_eval.get_dataset_size()
if args.distribute:
all_reduce = AllReduceSum()
eval_count = all_reduce(Tensor(eval_count, ms.int32))
else:
dist.destroy_process_group()

wandb.run.finish() if wandb and wandb.run else None
torch.cuda.empty_cache()
return results


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--weights', type=str, default='yolov5s.pt', help='initial weights path')
parser.add_argument('--cfg', type=str, default='', help='model.yaml path')
parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path')
parser.add_argument('--hyp', type=str, default='data/hyp.scratch.yaml', help='hyperparameters path')
parser.add_argument('--epochs', type=int, default=300)
parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs')
parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='[train, test] image sizes')
parser.add_argument('--rect', action='store_true', help='rectangular training')
parser.add_argument('--resume', nargs='?', const=True, default=False, help='resume most recent training')
parser.add_argument('--nosave', action='store_true', help='only save final checkpoint')
parser.add_argument('--notest', action='store_true', help='only test final epoch')
parser.add_argument('--noautoanchor', action='store_true', help='disable autoanchor check')
parser.add_argument('--evolve', action='store_true', help='evolve hyperparameters')
parser.add_argument('--bucket', type=str, default='', help='gsutil bucket')
parser.add_argument('--cache-images', action='store_true', help='cache images for faster training')
parser.add_argument('--image-weights', action='store_true', help='use weighted image selection for training')
parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%%')
parser.add_argument('--single-cls', action='store_true', help='train multi-class data as single-class')
parser.add_argument('--adam', action='store_true', help='use torch.optim.Adam() optimizer')
parser.add_argument('--sync-bn', action='store_true', help='use SyncBatchNorm, only available in DDP mode')
parser.add_argument('--local_rank', type=int, default=-1, help='DDP parameter, do not modify')
parser.add_argument('--log-imgs', type=int, default=16, help='number of images for W&B logging, max 100')
parser.add_argument('--log-artifacts', action='store_true', help='log artifacts, i.e. final trained model')
parser.add_argument('--workers', type=int, default=8, help='maximum number of dataloader workers')
parser.add_argument('--project', default='runs/train', help='save to project/name')
parser.add_argument('--name', default='exp', help='save to project/name')
parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
opt = parser.parse_args()

# Set DDP variables
opt.total_batch_size = opt.batch_size
opt.world_size = int(os.environ['WORLD_SIZE']) if 'WORLD_SIZE' in os.environ else 1
opt.global_rank = int(os.environ['RANK']) if 'RANK' in os.environ else -1
set_logging(opt.global_rank)
if opt.global_rank in [-1, 0]:
check_git_status()

# Resume
if opt.resume: # resume an interrupted run
ckpt = opt.resume if isinstance(opt.resume, str) else get_latest_run() # specified or most recent path
assert os.path.isfile(ckpt), 'ERROR: --resume checkpoint does not exist'
with open(Path(ckpt).parent.parent / 'opt.yaml') as f:
opt = argparse.Namespace(**yaml.load(f, Loader=yaml.FullLoader)) # replace
opt.cfg, opt.weights, opt.resume = '', ckpt, True
logger.info('Resuming training from %s' % ckpt)
loader_eval = None

num_batches = loader_train.get_dataset_size()
# Train dataset count
train_count = dataset_train.get_dataset_size()
if args.distribute:
all_reduce = AllReduceSum()
train_count = all_reduce(Tensor(train_count, ms.int32))

# create model
network = create_model(model_name=args.model,
num_classes=num_classes,
in_channels=args.in_channels,
drop_rate=args.drop_rate,
drop_path_rate=args.drop_path_rate,
pretrained=args.pretrained,
checkpoint_path=args.ckpt_path,
use_ema=args.use_ema)
num_params = sum([param.size for param in network.get_parameters()])

# create loss
loss = create_loss(name=args.loss,
reduction=args.reduction,
label_smoothing=args.label_smoothing,
aux_factor=args.aux_factor)

# create learning rate schedule
lr_scheduler = create_scheduler(num_batches,
scheduler=args.scheduler,
lr=args.lr,
min_lr=args.min_lr,
warmup_epochs=args.warmup_epochs,
warmup_factor=args.warmup_factor,
decay_epochs=args.decay_epochs,
decay_rate=args.decay_rate,
milestones=args.multi_step_decay_milestones,
num_epochs=args.epoch_size,
lr_epoch_stair=args.lr_epoch_stair,
num_cycles=args.num_cycles,
cycle_decay=args.cycle_decay)
# resume training if ckpt_path is given
if args.ckpt_path != '' and args.resume_opt:
opt_ckpt_path = os.path.join(args.ckpt_save_dir, f'optim_{args.model}.ckpt')
else:
# opt.hyp = opt.hyp or ('hyp.finetune.yaml' if opt.weights else 'hyp.scratch.yaml')
opt.data, opt.cfg, opt.hyp = check_file(opt.data), check_file(opt.cfg), check_file(opt.hyp) # check files
assert len(opt.cfg) or len(opt.weights), 'either --cfg or --weights must be specified'
opt.img_size.extend([opt.img_size[-1]] * (2 - len(opt.img_size))) # extend to 2 sizes (train, test)
opt.name = 'evolve' if opt.evolve else opt.name
opt.save_dir = increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok | opt.evolve) # increment run

# DDP mode
device = select_device(opt.device, batch_size=opt.batch_size)
if opt.local_rank != -1:
assert torch.cuda.device_count() > opt.local_rank
torch.cuda.set_device(opt.local_rank)
device = torch.device('cuda', opt.local_rank)
dist.init_process_group(backend='nccl', init_method='env://') # distributed backend
assert opt.batch_size % opt.world_size == 0, '--batch-size must be multiple of CUDA device count'
opt.batch_size = opt.total_batch_size // opt.world_size

# Hyperparameters
with open(opt.hyp) as f:
hyp = yaml.load(f, Loader=yaml.FullLoader) # load hyps
if 'box' not in hyp:
warn('Compatibility: %s missing "box" which was renamed from "giou" in %s' %
(opt.hyp, 'https://github.com/ultralytics/yolov5/pull/1120'))
hyp['box'] = hyp.pop('giou')

# Train
logger.info(opt)
if not opt.evolve:
tb_writer = None # init loggers
if opt.global_rank in [-1, 0]:
logger.info(f'Start Tensorboard with "tensorboard --logdir {opt.project}", view at http://localhost:6006/')
tb_writer = SummaryWriter(opt.save_dir) # Tensorboard
train(hyp, opt, device, tb_writer, wandb)

# Evolve hyperparameters (optional)
opt_ckpt_path = ''

# create optimizer
#TODO: consistent naming opt, name, dataset_name
if args.use_ema:
optimizer = create_optimizer(network.trainable_params(),
opt=args.opt,
lr=lr_scheduler,
weight_decay=args.weight_decay,
momentum=args.momentum,
nesterov=args.use_nesterov,
filter_bias_and_bn=args.filter_bias_and_bn,
checkpoint_path=opt_ckpt_path,
eps=args.eps)
else:
# Hyperparameter evolution metadata (mutation scale 0-1, lower_limit, upper_limit)
meta = {'lr0': (1, 1e-5, 1e-1), # initial learning rate (SGD=1E-2, Adam=1E-3)
'lrf': (1, 0.01, 1.0), # final OneCycleLR learning rate (lr0 * lrf)
'momentum': (0.3, 0.6, 0.98), # SGD momentum/Adam beta1
'weight_decay': (1, 0.0, 0.001), # optimizer weight decay
'warmup_epochs': (1, 0.0, 5.0), # warmup epochs (fractions ok)
'warmup_momentum': (1, 0.0, 0.95), # warmup initial momentum
'warmup_bias_lr': (1, 0.0, 0.2), # warmup initial bias lr
'box': (1, 0.02, 0.2), # box loss gain
'cls': (1, 0.2, 4.0), # cls loss gain
'cls_pw': (1, 0.5, 2.0), # cls BCELoss positive_weight
'obj': (1, 0.2, 4.0), # obj loss gain (scale with pixels)
'obj_pw': (1, 0.5, 2.0), # obj BCELoss positive_weight
'iou_t': (0, 0.1, 0.7), # IoU training threshold
'anchor_t': (1, 2.0, 8.0), # anchor-multiple threshold
'anchors': (2, 2.0, 10.0), # anchors per output grid (0 to ignore)
'fl_gamma': (0, 0.0, 2.0), # focal loss gamma (efficientDet default gamma=1.5)
'hsv_h': (1, 0.0, 0.1), # image HSV-Hue augmentation (fraction)
'hsv_s': (1, 0.0, 0.9), # image HSV-Saturation augmentation (fraction)
'hsv_v': (1, 0.0, 0.9), # image HSV-Value augmentation (fraction)
'degrees': (1, 0.0, 45.0), # image rotation (+/- deg)
'translate': (1, 0.0, 0.9), # image translation (+/- fraction)
'scale': (1, 0.0, 0.9), # image scale (+/- gain)
'shear': (1, 0.0, 10.0), # image shear (+/- deg)
'perspective': (0, 0.0, 0.001), # image perspective (+/- fraction), range 0-0.001
'flipud': (1, 0.0, 1.0), # image flip up-down (probability)
'fliplr': (0, 0.0, 1.0), # image flip left-right (probability)
'mosaic': (1, 0.0, 1.0), # image mixup (probability)
'mixup': (1, 0.0, 1.0)} # image mixup (probability)

assert opt.local_rank == -1, 'DDP mode not implemented for --evolve'
opt.notest, opt.nosave = True, True # only test/save final epoch
# ei = [isinstance(x, (int, float)) for x in hyp.values()] # evolvable indices
yaml_file = Path(opt.save_dir) / 'hyp_evolved.yaml' # save best result here
if opt.bucket:
os.system('gsutil cp gs://%s/evolve.txt .' % opt.bucket) # download evolve.txt if exists

for _ in range(300): # generations to evolve
if Path('evolve.txt').exists(): # if evolve.txt exists: select best hyps and mutate
# Select parent(s)
parent = 'single' # parent selection method: 'single' or 'weighted'
x = np.loadtxt('evolve.txt', ndmin=2)
n = min(5, len(x)) # number of previous results to consider
x = x[np.argsort(-fitness(x))][:n] # top n mutations
w = fitness(x) - fitness(x).min() # weights
if parent == 'single' or len(x) == 1:
# x = x[random.randint(0, n - 1)] # random selection
x = x[random.choices(range(n), weights=w)[0]] # weighted selection
elif parent == 'weighted':
x = (x * w.reshape(n, 1)).sum(0) / w.sum() # weighted combination

# Mutate
mp, s = 0.8, 0.2 # mutation probability, sigma
npr = np.random
npr.seed(int(time.time()))
g = np.array([x[0] for x in meta.values()]) # gains 0-1
ng = len(meta)
v = np.ones(ng)
while all(v == 1): # mutate until a change occurs (prevent duplicates)
v = (g * (npr.random(ng) < mp) * npr.randn(ng) * npr.random() * s + 1).clip(0.3, 3.0)
for i, k in enumerate(hyp.keys()): # plt.hist(v.ravel(), 300)
hyp[k] = float(x[i + 7] * v[i]) # mutate

# Constrain to limits
for k, v in meta.items():
hyp[k] = max(hyp[k], v[1]) # lower limit
hyp[k] = min(hyp[k], v[2]) # upper limit
hyp[k] = round(hyp[k], 5) # significant digits

# Train mutation
results = train(hyp.copy(), opt, device, wandb=wandb)

# Write mutation results
print_mutation(hyp.copy(), results, yaml_file, opt.bucket)
optimizer = create_optimizer(network.trainable_params(),
opt=args.opt,
lr=lr_scheduler,
weight_decay=args.weight_decay,
momentum=args.momentum,
nesterov=args.use_nesterov,
filter_bias_and_bn=args.filter_bias_and_bn,
loss_scale=args.loss_scale,
checkpoint_path=opt_ckpt_path,
eps=args.eps)

# Define eval metrics.
if num_classes >= 5:
eval_metrics = {'Top_1_Accuracy': nn.Top1CategoricalAccuracy(),
'Top_5_Accuracy': nn.Top5CategoricalAccuracy()}
else:
eval_metrics = {'Top_1_Accuracy': nn.Top1CategoricalAccuracy()}

# init model
if args.use_ema:
net_with_loss = nn.WithLossCell(network, loss)

if args.dynamic_loss_scale:
loss_scale_manager = nn.DynamicLossScaleUpdateCell(loss_scale_value=args.loss_scale, scale_factor=2,
scale_window=1000)
else:
loss_scale_manager = nn.FixedLossScaleUpdateCell(loss_scale_value=args.loss_scale)
ms.amp.auto_mixed_precision(net_with_loss, amp_level=args.amp_level)
net_with_loss = TrainOneStepWithEMA(net_with_loss, optimizer, scale_sense=loss_scale_manager,
use_ema=args.use_ema, ema_decay=args.ema_decay)
eval_network = nn.WithEvalCell(network, loss, args.amp_level in ["O2", "O3", "auto"])
model = Model(net_with_loss, eval_network=eval_network, metrics=eval_metrics, eval_indexes=[0, 1, 2])
else:
if args.dynamic_loss_scale:
loss_scale_manager = ms.amp.DynamicLossScaleManager(init_loss_scale=args.loss_scale, scale_factor=2,
scale_window=1000)
else:
loss_scale_manager = FixedLossScaleManager(loss_scale=args.loss_scale, drop_overflow_update=False)
model = Model(network, loss_fn=loss, optimizer=optimizer, metrics=eval_metrics, amp_level=args.amp_level,
loss_scale_manager=loss_scale_manager)

# callback
# save checkpoint, summary training loss
# recorad val acc and do model selection if val dataset is availabe
begin_epoch = 0
if args.ckpt_path != '':
if args.ckpt_path != '':
begin_step = optimizer.global_step.asnumpy()[0]
begin_epoch = args.ckpt_path.split('/')[-1].split('-')[1].split('_')[0]
begin_epoch = int(begin_epoch)

summary_dir = f"./{args.ckpt_save_dir}/summary"
assert (args.ckpt_save_policy != 'top_k' or args.val_while_train == True), \
"ckpt_save_policy is top_k, val_while_train must be True."
state_cb = StateMonitor(model, summary_dir=summary_dir,
dataset_val=loader_eval,
val_interval=args.val_interval,
metric_name=list(eval_metrics.keys()),
ckpt_dir=args.ckpt_save_dir,
ckpt_save_interval=args.ckpt_save_interval,
best_ckpt_name=args.model + '_best.ckpt',
rank_id=rank_id,
device_num=device_num,
log_interval=args.log_interval,
keep_checkpoint_max=args.keep_checkpoint_max,
model_name=args.model,
last_epoch=begin_epoch,
ckpt_save_policy=args.ckpt_save_policy)

callbacks = [state_cb]
# log
if rank_id in [None, 0]:
logger.info(f"-" * 40)
logger.info(f"Num devices: {device_num if device_num is not None else 1} \n"
f"Distributed mode: {args.distribute} \n"
f"Num training samples: {train_count}")
if args.val_while_train:
logger.info(f"Num validation samples: {eval_count}")
logger.info(f"Num classes: {num_classes} \n"
f"Num batches: {num_batches} \n"
f"Batch size: {args.batch_size} \n"
f"Auto augment: {args.auto_augment} \n"
f"Model: {args.model} \n"
f"Model param: {num_params} \n"
f"Num epochs: {args.epoch_size} \n"
f"Optimizer: {args.opt} \n"
f"LR: {args.lr} \n"
f"LR Scheduler: {args.scheduler}")
logger.info(f"-" * 40)

if args.ckpt_path != '':
logger.info(f"Resume training from {args.ckpt_path}, last step: {begin_step}, last epoch: {begin_epoch}")
else:
logger.info('Start training')

model.train(args.epoch_size, loader_train, callbacks=callbacks, dataset_sink_mode=args.dataset_sink_mode)

# Plot results
plot_evolution(yaml_file)
print(f'Hyperparameter evolution complete. Best results saved as: {yaml_file}\n'
f'Command to train a new model with these hyperparameters: $ python train.py --hyp {yaml_file}')
if __name__ == '__main__':
args = parse_args()
obs_data_url = os.path.join(args.data_url, 'imagenet')
try:
mox.file.copy_parallel(src_url=obs_data_url, dst_url=args.data_dir)
print(f"Successfully Download {obs_data_url} to {args.data_dir}")
except Exception as e:
print(f"moxing download {obs_data_url} to {args.data_dir} failed: {e}")
# args.data_dir = "/cache/sfs/data/imagenet"
# args.ckpt_save_dir = "/cache/output"
train(args)

# modelarts
obs_train_url = os.path.join(args.train_url, 'ckpt')
try:
mox.file.copy_parallel(src_url=args.ckpt_save_dir, dst_url=obs_train_url)
print(f"Successfully Upload {args.ckpt_save_dir} to {obs_train_url}")
except Exception as e:
print(f"moxing upload {args.ckpt_save_dir} to {obs_train_url} failed: {e}")

+ 457
- 0
train_with_func.py View File

@@ -0,0 +1,457 @@
"""Use the PYNATIVE mode to train the network"""
import os
import logging
from time import time
import numpy as np
from tqdm import tqdm

import mindspore as ms
from mindspore import nn, Tensor, ops, SummaryRecord
from mindspore.communication import init, get_rank, get_group_size
from mindspore.parallel._utils import _get_device_num, _get_gradients_mean

from mindcv.models import create_model
from mindcv.data import create_dataset, create_transforms, create_loader
from mindcv.loss import create_loss
from mindcv.optim import create_optimizer
from mindcv.scheduler import create_scheduler
from mindcv.utils import CheckpointManager, AllReduceSum, NoLossScaler
from mindcv.utils.random import set_seed
from config import parse_args

logger = logging.getLogger('train')
logger.setLevel(logging.INFO)
h1 = logging.StreamHandler()
formatter1 = logging.Formatter('%(message)s',)
logger.addHandler(h1)
h1.setFormatter(formatter1)


def train(args):
"""Train network."""

ms.set_context(mode=args.mode)

if args.distribute:
init()
device_num = get_group_size()
rank_id = get_rank()
ms.set_auto_parallel_context(device_num=device_num,
parallel_mode='data_parallel',
gradients_mean=True)
dist_sum = AllReduceSum()
else:
device_num = None
rank_id = None
dist_sum = None

set_seed(args.seed, rank_id)

# create dataset
dataset_train = create_dataset(
name=args.dataset,
root=args.data_dir,
split=args.train_split,
shuffle=args.shuffle,
num_samples=args.num_samples,
num_shards=device_num,
shard_id=rank_id,
num_parallel_workers=args.num_parallel_workers,
download=args.dataset_download,
num_aug_repeats=args.aug_repeats)
if args.num_classes is None:
num_classes = dataset_train.num_classes()
else:
num_classes = args.num_classes

# create transforms
transform_list = create_transforms(
dataset_name=args.dataset,
is_training=True,
image_resize=args.image_resize,
scale=args.scale,
ratio=args.ratio,
hflip=args.hflip,
vflip=args.vflip,
color_jitter=args.color_jitter,
interpolation=args.interpolation,
auto_augment=args.auto_augment,
mean=args.mean,
std=args.std,
re_prob=args.re_prob,
re_scale=args.re_scale,
re_ratio=args.re_ratio,
re_value=args.re_value,
re_max_attempts=args.re_max_attempts
)

# load dataset
loader_train = create_loader(
dataset=dataset_train,
batch_size=args.batch_size,
drop_remainder=False,
is_training=True,
mixup=args.mixup,
cutmix=args.cutmix,
cutmix_prob=args.cutmix_prob,
num_classes=num_classes,
transform=transform_list,
num_parallel_workers=args.num_parallel_workers,
)

if args.val_while_train:
dataset_eval = create_dataset(
name=args.dataset,
root=args.data_dir,
split=args.val_split,
num_shards=device_num,
shard_id=rank_id,
num_parallel_workers=args.num_parallel_workers,
download=args.dataset_download)

transform_list_eval = create_transforms(
dataset_name=args.dataset,
is_training=False,
image_resize=args.image_resize,
crop_pct=args.crop_pct,
interpolation=args.interpolation,
mean=args.mean,
std=args.std
)

loader_eval = create_loader(
dataset=dataset_eval,
batch_size=args.batch_size,
drop_remainder=False,
is_training=False,
transform=transform_list_eval,
num_parallel_workers=args.num_parallel_workers,
)
# validation dataset count
eval_count = dataset_eval.get_dataset_size()
if args.distribute:
eval_count = dist_sum(Tensor(eval_count, ms.int32))

num_batches = loader_train.get_dataset_size()
# Train dataset count
train_count = dataset_train.get_dataset_size()
if args.distribute:
train_count = dist_sum(Tensor(train_count, ms.int32))

# create model
network = create_model(model_name=args.model,
num_classes=num_classes,
in_channels=args.in_channels,
drop_rate=args.drop_rate,
drop_path_rate=args.drop_path_rate,
pretrained=args.pretrained,
checkpoint_path=args.ckpt_path)

num_params = sum([param.size for param in network.get_parameters()])

# create loss
ms.amp.auto_mixed_precision(network, amp_level=args.amp_level)
loss = create_loss(name=args.loss,
reduction=args.reduction,
label_smoothing=args.label_smoothing,
aux_factor=args.aux_factor)

# create learning rate schedule
lr_scheduler = create_scheduler(num_batches,
scheduler=args.scheduler,
lr=args.lr,
min_lr=args.min_lr,
warmup_epochs=args.warmup_epochs,
warmup_factor=args.warmup_factor,
decay_epochs=args.decay_epochs,
decay_rate=args.decay_rate,
milestones=args.multi_step_decay_milestones,
num_epochs=args.epoch_size)

# resume training if ckpt_path is given
if args.ckpt_path != '' and args.resume_opt:
opt_ckpt_path = os.path.join(args.ckpt_save_dir, f'optim_{args.model}.ckpt')
else:
opt_ckpt_path = ''

# create optimizer
optimizer = create_optimizer(network.trainable_params(),
opt=args.opt,
lr=lr_scheduler,
weight_decay=args.weight_decay,
momentum=args.momentum,
nesterov=args.use_nesterov,
filter_bias_and_bn=args.filter_bias_and_bn,
loss_scale=args.loss_scale,
checkpoint_path=opt_ckpt_path)
from mindspore.amp import LossScaler, StaticLossScaler

# set loss scale for mixed precision training
if args.amp_level != 'O0':
if args.dynamic_loss_scale:
loss_scaler = DynamicLossScaler(args.loss_scale, 2, 1000)
else:
loss_scaler = StaticLossScaler(args.loss_scale)
else:
loss_scaler = NoLossScaler()

# resume
begin_step = 0
begin_epoch = 0
if args.ckpt_path != '':
begin_step = optimizer.global_step.asnumpy()[0]
begin_epoch = args.ckpt_path.split('/')[-1].split('_')[0].split('-')[-1]
begin_epoch = int(begin_epoch)

# log
if rank_id in [None, 0]:

logger.info(f"-" * 40)
logger.info(f"Num devices: {device_num if device_num is not None else 1} \n"
f"Distributed mode: {args.distribute} \n"
f"Num training samples: {train_count}")
if args.val_while_train:
logger.info(f"Num validation samples: {eval_count}")
logger.info(f"Num classes: {num_classes} \n"
f"Num batches: {num_batches} \n"
f"Batch size: {args.batch_size} \n"
f"Auto augment: {args.auto_augment} \n"
f"Model: {args.model} \n"
f"Model param: {num_params} \n"
f"Num epochs: {args.epoch_size} \n"
f"Optimizer: {args.opt} \n"
f"LR: {args.lr} \n"
f"LR Scheduler: {args.scheduler}")
logger.info(f"-" * 40)

if args.ckpt_path != '':
logger.info(f"Resume training from {args.ckpt_path}, last step: {begin_step}, last epoch: {begin_epoch}")
else:
logger.info('Start training')

if not os.path.exists(args.ckpt_save_dir):
os.makedirs(args.ckpt_save_dir)

log_path = os.path.join(args.ckpt_save_dir, 'result.log')
if not (os.path.exists(log_path) and args.ckpt_path != ''): # if not resume training
with open(log_path, 'w') as fp:
fp.write('Epoch\tTrainLoss\tValAcc\tTime\n')

best_acc = 0
summary_dir = f"./{args.ckpt_save_dir}/summary_01"


# Training
need_flush_from_cache = True
assert (args.ckpt_save_policy != 'top_k' or args.val_while_train == True), \
"ckpt_save_policy is top_k, val_while_train must be True."
manager = CheckpointManager(ckpt_save_policy=args.ckpt_save_policy)
with SummaryRecord(summary_dir) as summary_record:
for t in range(begin_epoch, args.epoch_size):
epoch_start = time()

train_loss = train_epoch(network,
loader_train,
loss,
optimizer,
epoch=t,
n_epochs=args.epoch_size,
loss_scaler=loss_scaler,
reduce_fn=dist_sum,
summary_record=summary_record,
rank_id=rank_id,
log_interval=args.log_interval)

# val while train
test_acc = Tensor(-1.0)
if args.val_while_train:
if ((t + 1) % args.val_interval == 0) or (t + 1 == args.epoch_size):
if rank_id in [None, 0]:
logger.info('Validating...')
val_start = time()
test_acc = test_epoch(network, loader_eval, dist_sum, rank_id=rank_id)
test_acc = 100 * test_acc
if rank_id in [0, None]:
val_time = time() - val_start
logger.info(f"Val time: {val_time:.2f} \t Val acc: {test_acc:0.3f}")
if test_acc > best_acc:
best_acc = test_acc
save_best_path = os.path.join(args.ckpt_save_dir, f"{args.model}-best.ckpt")
ms.save_checkpoint(network, save_best_path, async_save=True)
logger.info(f"=> New best val acc: {test_acc:0.3f}")

# add to summary
current_step = (t + 1) * num_batches + begin_step
if not isinstance(test_acc, Tensor):
test_acc = Tensor(test_acc)
if summary_record is not None:
summary_record.add_value('scalar', 'test_dataset_accuracy', test_acc)
summary_record.record(int(current_step))

# Save checkpoint
if rank_id in [0, None]:
if ((t + 1) % args.ckpt_save_interval == 0) or (t + 1 == args.epoch_size):
if need_flush_from_cache:
need_flush_from_cache = flush_from_cache(network)

ms.save_checkpoint(optimizer, os.path.join(args.ckpt_save_dir, f'{args.model}_optim.ckpt'),
async_save=True)
save_path = os.path.join(args.ckpt_save_dir, f"{args.model}-{t + 1}_{num_batches}.ckpt")
ckpoint_filelist = manager.save_ckpoint(network, num_ckpt=args.keep_checkpoint_max,
metric=test_acc, save_path=save_path)
if args.ckpt_save_policy == 'top_k':
checkpoints_str = "Top K accuracy checkpoints: \n"
for ch in ckpoint_filelist:
checkpoints_str += '{}\n'.format(ch)
logger.info(checkpoints_str)
else:
logger.info(f"Saving model to {save_path}")

epoch_time = time() - epoch_start
logger.info(f'Epoch {t + 1} time:{epoch_time:.3f}s')
logger.info("-" * 80)
with open(log_path, 'a') as fp:
fp.write(f'{t+1}\t{train_loss.asnumpy():.7f}\t{test_acc.asnumpy():.3f}\t{epoch_time:.2f}\n')

logger.info("Done!")


def train_epoch(network, dataset, loss_fn, optimizer, epoch, n_epochs, loss_scaler, reduce_fn=None, summary_record=None, rank_id=None, log_interval=100):
"""Training an epoch network"""
# Define forward function
def forward_fn(data, label):
logits = network(data)
loss = loss_fn(logits, label)
loss = loss_scaler.scale(loss)
return loss, logits

# Get gradient function
grad_fn = ops.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True)
if args.distribute:
mean = _get_gradients_mean()
degree = _get_device_num()
grad_reducer = nn.DistributedGradReducer(optimizer.parameters, mean, degree)
else:
grad_reducer = ops.functional.identity

# Define function of one-step training
@ms.ms_function
def train_step(data, label):
(loss, logits), grads = grad_fn(data, label)
grads = grad_reducer(grads)
status = ms.amp.all_finite(grads)
if status:
loss = loss_scaler.unscale(loss)
grads = loss_scaler.unscale(grads)
loss = ops.depend(loss, optimizer(grads))
loss = ops.depend(loss, loss_scaler.adjust(status))

return loss, logits

network.set_train()
n_batches = dataset.get_dataset_size()
n_steps = n_batches * n_epochs
epoch_width, batch_width, step_width = len(str(n_epochs)), len(str(n_batches)), len(str(n_steps))
total, correct = 0, 0
start = time()

num_batches = dataset.get_dataset_size()
for batch, (data, label) in enumerate(dataset.create_tuple_iterator()):
loss, logits = train_step(data, label)

if len(label.shape) == 1:
correct += (logits.argmax(1) == label).asnumpy().sum()
else: #one-hot or soft label
correct += (logits.argmax(1) == label.argmax(1)).asnumpy().sum()
total += len(data)

if (batch + 1) % log_interval == 0 or (batch + 1) >= num_batches or batch==0:
step = epoch * n_batches + batch
if optimizer.dynamic_lr:
cur_lr = optimizer.learning_rate(Tensor(step)).asnumpy()
else:
cur_lr = optimizer.learning_rate.asnumpy()
logger.info(f"Epoch:[{epoch+1:{epoch_width}d}/{n_epochs:{epoch_width}d}], "
f"batch:[{batch+1:{batch_width}d}/{n_batches:{batch_width}d}], "
f"loss:{loss.asnumpy():8.6f}, lr: {cur_lr:.7f}, time:{time() - start:.6f}s")
start = time()
if rank_id in [0, None]:
if not isinstance(loss, Tensor):
loss = Tensor(loss)
if summary_record is not None:
summary_record.add_value('scalar', 'loss', loss)
summary_record.record(step)
if args.distribute:
correct = reduce_fn(Tensor(correct, ms.float32))
total = reduce_fn(Tensor(total, ms.float32))
correct /= total
correct = correct.asnumpy()
else:
correct /= total

if rank_id in [0, None]:
logger.info(f"Training accuracy: {(100 * correct):0.3f}")
if not isinstance(correct, Tensor):
correct = Tensor(correct)
if summary_record is not None:
summary_record.add_value('scalar', 'train_dataset_accuracy', correct)
summary_record.record(step)
return loss


def test_epoch(network, dataset, reduce_fn=None, rank_id=None):
"""Test network accuracy and loss."""
network.set_train(False) # TODO: check freeze

correct, total = 0, 0
for data, label in tqdm(dataset.create_tuple_iterator()):
pred = network(data)
total += len(data)
if len(label.shape) == 1:
correct += (pred.argmax(1) == label).asnumpy().sum()
else: #one-hot or soft label
correct += (pred.argmax(1) == label.argmax(1)).asnumpy().sum()

if rank_id is not None:
#dist_sum = AllReduceSum()
correct = reduce_fn(Tensor(correct, ms.float32))
total = reduce_fn(Tensor(total, ms.float32))
correct /= total
correct = correct.asnumpy()
else:
correct /= total

return correct

def flush_from_cache(network):
"""Flush cache data to host if tensor is cache enable."""
has_cache_params = False
params = network.get_parameters()
for param in params:
if param.cache_enable:
has_cache_params = True
Tensor(param).flush_from_cache()
if not has_cache_params:
need_flush_from_cache = False
else:
need_flush_from_cache = True
return need_flush_from_cache

if __name__ == '__main__':
args = parse_args()
# data sync for cloud platform if enabled
if args.enable_modelarts:
import moxing as mox
args.data_dir = f'/cache/{args.data_url}'
mox.file.copy_parallel(src_url= os.path.join(args.data_url, args.dataset) , dst_url= args.data_dir)
train(args)

if args.enable_modelarts:
mox.file.copy_parallel(src_url= args.ckpt_save_dir, dst_url=args.train_url)

+ 153
- 133
tutorial.ipynb View File

@@ -16,7 +16,7 @@
"accelerator": "GPU",
"widgets": {
"application/vnd.jupyter.widget-state+json": {
"02ac0588602847eea00a0205f87bcce2": {
"811fd52fef65422c8267bafcde8a2c3d": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
@@ -28,15 +28,15 @@
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_c472ea49806447a68b5a9221a4ddae85",
"layout": "IPY_MODEL_8f41b90117224eef9133a9c3a103dbba",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_091fdf499bd44a80af7281d16da4aa93",
"IPY_MODEL_c79f69c959de4427ba102a87a9f46d80"
"IPY_MODEL_ca2fb37af6ed43d4a74cdc9f2ac5c4a5",
"IPY_MODEL_29419ae5ebb9403ea73f7e5a68037bdd"
]
}
},
"c472ea49806447a68b5a9221a4ddae85": {
"8f41b90117224eef9133a9c3a103dbba": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -87,12 +87,12 @@
"left": null
}
},
"091fdf499bd44a80af7281d16da4aa93": {
"ca2fb37af6ed43d4a74cdc9f2ac5c4a5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_c42ae5af74a0491187827d0a1fc259bb",
"style": "IPY_MODEL_6511b4dfb10b48d1bc98bcfb3987bfa0",
"_dom_classes": [],
"description": "100%",
"_model_name": "FloatProgressModel",
@@ -107,30 +107,30 @@
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_5a90f72d3a2d46cb9ad915daa3ead8b4"
"layout": "IPY_MODEL_64f0badf1a8f489885aa984dd62d37dc"
}
},
"c79f69c959de4427ba102a87a9f46d80": {
"29419ae5ebb9403ea73f7e5a68037bdd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_2a7ed6611da34662b10e37fd4f4e4438",
"style": "IPY_MODEL_f569911c5cfc4d81bb1bdfa83447afc8",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 781M/781M [00:23&lt;00:00, 35.1MB/s]",
"value": " 781M/781M [00:23&lt;00:00, 34.2MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_fead0160658445bf9e966daa4481cad0"
"layout": "IPY_MODEL_84943ade566440aaa2dcf3b3b27e7074"
}
},
"c42ae5af74a0491187827d0a1fc259bb": {
"6511b4dfb10b48d1bc98bcfb3987bfa0": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
@@ -145,7 +145,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
"5a90f72d3a2d46cb9ad915daa3ead8b4": {
"64f0badf1a8f489885aa984dd62d37dc": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -196,7 +196,7 @@
"left": null
}
},
"2a7ed6611da34662b10e37fd4f4e4438": {
"f569911c5cfc4d81bb1bdfa83447afc8": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
@@ -210,7 +210,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
"fead0160658445bf9e966daa4481cad0": {
"84943ade566440aaa2dcf3b3b27e7074": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -261,7 +261,7 @@
"left": null
}
},
"cf1ab9fde7444d3e874fcd407ba8f0f8": {
"8501ed1563e4452eac9df6b7a66e8f8c": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HBoxModel",
"state": {
@@ -273,15 +273,15 @@
"_view_count": null,
"_view_module_version": "1.5.0",
"box_style": "",
"layout": "IPY_MODEL_9ee03f9c85f34155b2645e89c9211547",
"layout": "IPY_MODEL_d2bb96801e1f46f4a58e02534f7026ff",
"_model_module": "@jupyter-widgets/controls",
"children": [
"IPY_MODEL_933ebc451c09490aadf71afbbb3dff2a",
"IPY_MODEL_8e7c55cbca624432a84fa7ad8f3a4016"
"IPY_MODEL_468a796ef06b4a24bcba6fbd4a0a8db5",
"IPY_MODEL_42ad5c1ea7be4835bffebf90642178f1"
]
}
},
"9ee03f9c85f34155b2645e89c9211547": {
"d2bb96801e1f46f4a58e02534f7026ff": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -332,50 +332,50 @@
"left": null
}
},
"933ebc451c09490aadf71afbbb3dff2a": {
"468a796ef06b4a24bcba6fbd4a0a8db5": {
"model_module": "@jupyter-widgets/controls",
"model_name": "FloatProgressModel",
"state": {
"_view_name": "ProgressView",
"style": "IPY_MODEL_dd62d83b35d04a178840772e82bd2f2e",
"style": "IPY_MODEL_c58b5536d98f4814831934e9c30c4d78",
"_dom_classes": [],
"description": "100%",
"_model_name": "FloatProgressModel",
"bar_style": "success",
"max": 22090455,
"max": 22091032,
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": 22090455,
"value": 22091032,
"_view_count": null,
"_view_module_version": "1.5.0",
"orientation": "horizontal",
"min": 0,
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_d5c4f3d1c8b046e3a163faaa6b3a51ab"
"layout": "IPY_MODEL_505597101151486ea29e9ab754544d27"
}
},
"8e7c55cbca624432a84fa7ad8f3a4016": {
"42ad5c1ea7be4835bffebf90642178f1": {
"model_module": "@jupyter-widgets/controls",
"model_name": "HTMLModel",
"state": {
"_view_name": "HTMLView",
"style": "IPY_MODEL_78d1da8efb504b03878ca9ce5b404006",
"style": "IPY_MODEL_de6e7b4b4a1c408c9f89d89b07a13bcd",
"_dom_classes": [],
"description": "",
"_model_name": "HTMLModel",
"placeholder": "​",
"_view_module": "@jupyter-widgets/controls",
"_model_module_version": "1.5.0",
"value": " 21.1M/21.1M [00:01&lt;00:00, 16.9MB/s]",
"value": " 21.1M/21.1M [00:01&lt;00:00, 18.2MB/s]",
"_view_count": null,
"_view_module_version": "1.5.0",
"description_tooltip": null,
"_model_module": "@jupyter-widgets/controls",
"layout": "IPY_MODEL_d28208ba1213436a93926a01d99d97ae"
"layout": "IPY_MODEL_f5cc9c7d4c274b2d81327ba3163c43fd"
}
},
"dd62d83b35d04a178840772e82bd2f2e": {
"c58b5536d98f4814831934e9c30c4d78": {
"model_module": "@jupyter-widgets/controls",
"model_name": "ProgressStyleModel",
"state": {
@@ -390,7 +390,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
"d5c4f3d1c8b046e3a163faaa6b3a51ab": {
"505597101151486ea29e9ab754544d27": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -441,7 +441,7 @@
"left": null
}
},
"78d1da8efb504b03878ca9ce5b404006": {
"de6e7b4b4a1c408c9f89d89b07a13bcd": {
"model_module": "@jupyter-widgets/controls",
"model_name": "DescriptionStyleModel",
"state": {
@@ -455,7 +455,7 @@
"_model_module": "@jupyter-widgets/controls"
}
},
"d28208ba1213436a93926a01d99d97ae": {
"f5cc9c7d4c274b2d81327ba3163c43fd": {
"model_module": "@jupyter-widgets/base",
"model_name": "LayoutModel",
"state": {
@@ -550,7 +550,7 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "888d5c41-00e9-47d8-d230-dded99325bea"
"outputId": "c6ad57c2-40b7-4764-b07d-19ee2ceaabaf"
},
"source": [
"!git clone https://github.com/ultralytics/yolov5 # clone repo\n",
@@ -604,15 +604,15 @@
{
"output_type": "stream",
"text": [
"Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', img_size=640, iou_thres=0.45, save_conf=False, save_dir='runs/detect', save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt'])\n",
"Using torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130MB)\n",
"Namespace(agnostic_nms=False, augment=False, classes=None, conf_thres=0.25, device='', exist_ok=False, img_size=640, iou_thres=0.45, name='exp', project='runs/detect', save_conf=False, save_txt=False, source='data/images/', update=False, view_img=False, weights=['yolov5s.pt'])\n",
"YOLOv5 v4.0-21-gb26a2f6 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130.5MB)\n",
"\n",
"Fusing layers... \n",
"Model Summary: 232 layers, 7459581 parameters, 0 gradients\n",
"image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.012s)\n",
"image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.012s)\n",
"Model Summary: 224 layers, 7266973 parameters, 0 gradients, 17.0 GFLOPS\n",
"image 1/2 /content/yolov5/data/images/bus.jpg: 640x480 4 persons, 1 buss, 1 skateboards, Done. (0.011s)\n",
"image 2/2 /content/yolov5/data/images/zidane.jpg: 384x640 2 persons, 2 ties, Done. (0.011s)\n",
"Results saved to runs/detect/exp\n",
"Done. (0.113s)\n"
"Done. (0.110s)\n"
],
"name": "stdout"
},
@@ -670,19 +670,19 @@
"id": "WQPtK1QYVaD_",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 66,
"height": 65,
"referenced_widgets": [
"02ac0588602847eea00a0205f87bcce2",
"c472ea49806447a68b5a9221a4ddae85",
"091fdf499bd44a80af7281d16da4aa93",
"c79f69c959de4427ba102a87a9f46d80",
"c42ae5af74a0491187827d0a1fc259bb",
"5a90f72d3a2d46cb9ad915daa3ead8b4",
"2a7ed6611da34662b10e37fd4f4e4438",
"fead0160658445bf9e966daa4481cad0"
"811fd52fef65422c8267bafcde8a2c3d",
"8f41b90117224eef9133a9c3a103dbba",
"ca2fb37af6ed43d4a74cdc9f2ac5c4a5",
"29419ae5ebb9403ea73f7e5a68037bdd",
"6511b4dfb10b48d1bc98bcfb3987bfa0",
"64f0badf1a8f489885aa984dd62d37dc",
"f569911c5cfc4d81bb1bdfa83447afc8",
"84943ade566440aaa2dcf3b3b27e7074"
]
},
"outputId": "780d8f5f-766e-4b99-e370-11f9b884c27a"
"outputId": "59a7a546-8492-492e-861d-70a2c85a6794"
},
"source": [
"# Download COCO val2017\n",
@@ -695,7 +695,7 @@
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "02ac0588602847eea00a0205f87bcce2",
"model_id": "811fd52fef65422c8267bafcde8a2c3d",
"version_minor": 0,
"version_major": 2
},
@@ -723,7 +723,7 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "013935a5-ba81-4810-b723-0cb01cf7bc79"
"outputId": "427c211e-e283-4e87-f7b3-7b8dfb11a4a5"
},
"source": [
"# Run YOLOv5x on COCO val2017\n",
@@ -734,45 +734,47 @@
{
"output_type": "stream",
"text": [
"Namespace(augment=False, batch_size=32, conf_thres=0.001, data='./data/coco.yaml', device='', exist_ok=False, img_size=640, iou_thres=0.65, name='exp', project='runs/test', save_conf=False, save_json=True, save_txt=False, single_cls=False, task='val', verbose=False, weights=['yolov5x.pt'])\n",
"Using torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130MB)\n",
"Namespace(augment=False, batch_size=32, conf_thres=0.001, data='./data/coco.yaml', device='', exist_ok=False, img_size=640, iou_thres=0.65, name='exp', project='runs/test', save_conf=False, save_hybrid=False, save_json=True, save_txt=False, single_cls=False, task='val', verbose=False, weights=['yolov5x.pt'])\n",
"YOLOv5 v4.0-21-gb26a2f6 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130.5MB)\n",
"\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v3.1/yolov5x.pt to yolov5x.pt...\n",
"100% 170M/170M [00:05<00:00, 32.6MB/s]\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5x.pt to yolov5x.pt...\n",
"100% 168M/168M [00:05<00:00, 31.9MB/s]\n",
"\n",
"Fusing layers... \n",
"Model Summary: 484 layers, 88922205 parameters, 0 gradients\n",
"Scanning labels ../coco/labels/val2017.cache (4952 found, 0 missing, 48 empty, 0 duplicate, for 5000 images): 5000it [00:00, 14785.71it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:30<00:00, 1.74it/s]\n",
" all 5e+03 3.63e+04 0.409 0.754 0.672 0.484\n",
"Speed: 5.9/2.1/7.9 ms inference/NMS/total per 640x640 image at batch-size 32\n",
"Model Summary: 476 layers, 87730285 parameters, 0 gradients, 218.8 GFLOPS\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/labels/val2017' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:01<00:00, 2791.81it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mNew cache created: ../coco/labels/val2017.cache\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco/labels/val2017.cache' for images and labels... 4952 found, 48 missing, 0 empty, 0 corrupted: 100% 5000/5000 [00:00<00:00, 13332180.55it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 157/157 [01:30<00:00, 1.73it/s]\n",
" all 5e+03 3.63e+04 0.419 0.765 0.68 0.486\n",
"Speed: 5.2/2.0/7.2 ms inference/NMS/total per 640x640 image at batch-size 32\n",
"\n",
"Evaluating pycocotools mAP... saving runs/test/exp/yolov5x_predictions.json...\n",
"loading annotations into memory...\n",
"Done (t=0.43s)\n",
"Done (t=0.41s)\n",
"creating index...\n",
"index created!\n",
"Loading and preparing results...\n",
"DONE (t=4.67s)\n",
"DONE (t=5.26s)\n",
"creating index...\n",
"index created!\n",
"Running per image evaluation...\n",
"Evaluate annotation type *bbox*\n",
"DONE (t=92.11s).\n",
"DONE (t=93.97s).\n",
"Accumulating evaluation results...\n",
"DONE (t=13.24s).\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.492\n",
" Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.676\n",
" Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.534\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.318\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.541\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.633\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.376\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.617\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.670\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.493\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.723\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.812\n",
"DONE (t=15.06s).\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.501\n",
" Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.687\n",
" Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.544\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.338\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.548\n",
" Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.637\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.378\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.628\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.680\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.520\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.729\n",
" Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.826\n",
"Results saved to runs/test/exp\n"
],
"name": "stdout"
@@ -833,19 +835,19 @@
"id": "Knxi2ncxWffW",
"colab": {
"base_uri": "https://localhost:8080/",
"height": 66,
"height": 65,
"referenced_widgets": [
"cf1ab9fde7444d3e874fcd407ba8f0f8",
"9ee03f9c85f34155b2645e89c9211547",
"933ebc451c09490aadf71afbbb3dff2a",
"8e7c55cbca624432a84fa7ad8f3a4016",
"dd62d83b35d04a178840772e82bd2f2e",
"d5c4f3d1c8b046e3a163faaa6b3a51ab",
"78d1da8efb504b03878ca9ce5b404006",
"d28208ba1213436a93926a01d99d97ae"
"8501ed1563e4452eac9df6b7a66e8f8c",
"d2bb96801e1f46f4a58e02534f7026ff",
"468a796ef06b4a24bcba6fbd4a0a8db5",
"42ad5c1ea7be4835bffebf90642178f1",
"c58b5536d98f4814831934e9c30c4d78",
"505597101151486ea29e9ab754544d27",
"de6e7b4b4a1c408c9f89d89b07a13bcd",
"f5cc9c7d4c274b2d81327ba3163c43fd"
]
},
"outputId": "59f9a94b-21e1-4626-f36a-a8e1b1e5c8f6"
"outputId": "c68a3db4-1314-46b4-9e52-83532eb65749"
},
"source": [
"# Download COCO128\n",
@@ -858,12 +860,12 @@
"output_type": "display_data",
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "cf1ab9fde7444d3e874fcd407ba8f0f8",
"model_id": "8501ed1563e4452eac9df6b7a66e8f8c",
"version_minor": 0,
"version_major": 2
},
"text/plain": [
"HBox(children=(FloatProgress(value=0.0, max=22090455.0), HTML(value='')))"
"HBox(children=(FloatProgress(value=0.0, max=22091032.0), HTML(value='')))"
]
},
"metadata": {
@@ -923,7 +925,7 @@
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "138f2d1d-364c-405a-cf13-ea91a2aff915"
"outputId": "6af7116a-01ab-4b94-e5d7-b37c17dc95de"
},
"source": [
"# Train YOLOv5s on COCO128 for 3 epochs\n",
@@ -934,75 +936,79 @@
{
"output_type": "stream",
"text": [
"Using torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130MB)\n",
"\u001b[34m\u001b[1mgithub: \u001b[0mup to date with https://github.com/ultralytics/yolov5 ✅\n",
"YOLOv5 v4.0-21-gb26a2f6 torch 1.7.0+cu101 CUDA:0 (Tesla V100-SXM2-16GB, 16130.5MB)\n",
"\n",
"Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='', data='./data/coco128.yaml', device='', epochs=3, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[640, 640], local_rank=-1, log_imgs=16, multi_scale=False, name='exp', noautoanchor=False, nosave=True, notest=False, project='runs/train', rect=False, resume=False, save_dir='runs/train/exp', single_cls=False, sync_bn=False, total_batch_size=16, weights='yolov5s.pt', workers=8, world_size=1)\n",
"Namespace(adam=False, batch_size=16, bucket='', cache_images=True, cfg='', data='./data/coco128.yaml', device='', epochs=3, evolve=False, exist_ok=False, global_rank=-1, hyp='data/hyp.scratch.yaml', image_weights=False, img_size=[640, 640], local_rank=-1, log_artifacts=False, log_imgs=16, multi_scale=False, name='exp', noautoanchor=False, nosave=True, notest=False, project='runs/train', quad=False, rect=False, resume=False, save_dir='runs/train/exp', single_cls=False, sync_bn=False, total_batch_size=16, weights='yolov5s.pt', workers=8, world_size=1)\n",
"\u001b[34m\u001b[1mwandb: \u001b[0mInstall Weights & Biases for YOLOv5 logging with 'pip install wandb' (recommended)\n",
"Start Tensorboard with \"tensorboard --logdir runs/train\", view at http://localhost:6006/\n",
"2020-11-20 11:45:17.042357: I tensorflow/stream_executor/platform/default/dso_loader.cc:48] Successfully opened dynamic library libcudart.so.10.1\n",
"Hyperparameters {'lr0': 0.01, 'lrf': 0.2, 'momentum': 0.937, 'weight_decay': 0.0005, 'warmup_epochs': 3.0, 'warmup_momentum': 0.8, 'warmup_bias_lr': 0.1, 'box': 0.05, 'cls': 0.5, 'cls_pw': 1.0, 'obj': 1.0, 'obj_pw': 1.0, 'iou_t': 0.2, 'anchor_t': 4.0, 'fl_gamma': 0.0, 'hsv_h': 0.015, 'hsv_s': 0.7, 'hsv_v': 0.4, 'degrees': 0.0, 'translate': 0.1, 'scale': 0.5, 'shear': 0.0, 'perspective': 0.0, 'flipud': 0.0, 'fliplr': 0.5, 'mosaic': 1.0, 'mixup': 0.0}\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v3.1/yolov5s.pt to yolov5s.pt...\n",
"100% 14.5M/14.5M [00:01<00:00, 14.8MB/s]\n",
"2021-01-17 19:56:03.945851: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1\n",
"\u001b[34m\u001b[1mhyperparameters: \u001b[0mlr0=0.01, lrf=0.2, momentum=0.937, weight_decay=0.0005, warmup_epochs=3.0, warmup_momentum=0.8, warmup_bias_lr=0.1, box=0.05, cls=0.5, cls_pw=1.0, obj=1.0, obj_pw=1.0, iou_t=0.2, anchor_t=4.0, fl_gamma=0.0, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, degrees=0.0, translate=0.1, scale=0.5, shear=0.0, perspective=0.0, flipud=0.0, fliplr=0.5, mosaic=1.0, mixup=0.0\n",
"Downloading https://github.com/ultralytics/yolov5/releases/download/v4.0/yolov5s.pt to yolov5s.pt...\n",
"100% 14.1M/14.1M [00:00<00:00, 15.8MB/s]\n",
"\n",
"\n",
" from n params module arguments \n",
" 0 -1 1 3520 models.common.Focus [3, 32, 3] \n",
" 1 -1 1 18560 models.common.Conv [32, 64, 3, 2] \n",
" 2 -1 1 19904 models.common.BottleneckCSP [64, 64, 1] \n",
" 2 -1 1 18816 models.common.C3 [64, 64, 1] \n",
" 3 -1 1 73984 models.common.Conv [64, 128, 3, 2] \n",
" 4 -1 1 161152 models.common.BottleneckCSP [128, 128, 3] \n",
" 4 -1 1 156928 models.common.C3 [128, 128, 3] \n",
" 5 -1 1 295424 models.common.Conv [128, 256, 3, 2] \n",
" 6 -1 1 641792 models.common.BottleneckCSP [256, 256, 3] \n",
" 6 -1 1 625152 models.common.C3 [256, 256, 3] \n",
" 7 -1 1 1180672 models.common.Conv [256, 512, 3, 2] \n",
" 8 -1 1 656896 models.common.SPP [512, 512, [5, 9, 13]] \n",
" 9 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False] \n",
" 9 -1 1 1182720 models.common.C3 [512, 512, 1, False] \n",
" 10 -1 1 131584 models.common.Conv [512, 256, 1, 1] \n",
" 11 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 12 [-1, 6] 1 0 models.common.Concat [1] \n",
" 13 -1 1 378624 models.common.BottleneckCSP [512, 256, 1, False] \n",
" 13 -1 1 361984 models.common.C3 [512, 256, 1, False] \n",
" 14 -1 1 33024 models.common.Conv [256, 128, 1, 1] \n",
" 15 -1 1 0 torch.nn.modules.upsampling.Upsample [None, 2, 'nearest'] \n",
" 16 [-1, 4] 1 0 models.common.Concat [1] \n",
" 17 -1 1 95104 models.common.BottleneckCSP [256, 128, 1, False] \n",
" 17 -1 1 90880 models.common.C3 [256, 128, 1, False] \n",
" 18 -1 1 147712 models.common.Conv [128, 128, 3, 2] \n",
" 19 [-1, 14] 1 0 models.common.Concat [1] \n",
" 20 -1 1 313088 models.common.BottleneckCSP [256, 256, 1, False] \n",
" 20 -1 1 296448 models.common.C3 [256, 256, 1, False] \n",
" 21 -1 1 590336 models.common.Conv [256, 256, 3, 2] \n",
" 22 [-1, 10] 1 0 models.common.Concat [1] \n",
" 23 -1 1 1248768 models.common.BottleneckCSP [512, 512, 1, False] \n",
" 23 -1 1 1182720 models.common.C3 [512, 512, 1, False] \n",
" 24 [17, 20, 23] 1 229245 models.yolo.Detect [80, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]]\n",
"Model Summary: 283 layers, 7468157 parameters, 7468157 gradients\n",
"Model Summary: 283 layers, 7276605 parameters, 7276605 gradients, 17.1 GFLOPS\n",
"\n",
"Transferred 370/370 items from yolov5s.pt\n",
"Optimizer groups: 62 .bias, 70 conv.weight, 59 other\n",
"Scanning images: 100% 128/128 [00:00<00:00, 5395.63it/s]\n",
"Scanning labels ../coco128/labels/train2017.cache (126 found, 0 missing, 2 empty, 0 duplicate, for 128 images): 128it [00:00, 13972.28it/s]\n",
"Caching images (0.1GB): 100% 128/128 [00:00<00:00, 173.55it/s]\n",
"Scanning labels ../coco128/labels/train2017.cache (126 found, 0 missing, 2 empty, 0 duplicate, for 128 images): 128it [00:00, 8693.98it/s]\n",
"Caching images (0.1GB): 100% 128/128 [00:00<00:00, 133.30it/s]\n",
"NumExpr defaulting to 2 threads.\n",
"Transferred 362/362 items from yolov5s.pt\n",
"Scaled weight_decay = 0.0005\n",
"Optimizer groups: 62 .bias, 62 conv.weight, 59 other\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 2647.74it/s]\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mNew cache created: ../coco128/labels/train2017.cache\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 1503840.09it/s]\n",
"\u001b[34m\u001b[1mtrain: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:00<00:00, 176.03it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mScanning '../coco128/labels/train2017.cache' for images and labels... 128 found, 0 missing, 2 empty, 0 corrupted: 100% 128/128 [00:00<00:00, 24200.82it/s]\n",
"\u001b[34m\u001b[1mval: \u001b[0mCaching images (0.1GB): 100% 128/128 [00:01<00:00, 123.25it/s]\n",
"Plotting labels... \n",
"\n",
"Analyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946\n",
"\u001b[34m\u001b[1mautoanchor: \u001b[0mAnalyzing anchors... anchors/target = 4.26, Best Possible Recall (BPR) = 0.9946\n",
"Image sizes 640 train, 640 test\n",
"Using 2 dataloader workers\n",
"Logging results to runs/train/exp\n",
"Starting training for 3 epochs...\n",
"\n",
" Epoch gpu_mem box obj cls total targets img_size\n",
" 0/2 5.24G 0.04202 0.06745 0.01503 0.1245 194 640: 100% 8/8 [00:03<00:00, 2.01it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:03<00:00, 2.40it/s]\n",
" all 128 929 0.404 0.758 0.701 0.45\n",
" 0/2 3.27G 0.04357 0.06779 0.01869 0.1301 207 640: 100% 8/8 [00:04<00:00, 1.95it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:05<00:00, 1.36it/s]\n",
" all 128 929 0.392 0.732 0.657 0.428\n",
"\n",
" Epoch gpu_mem box obj cls total targets img_size\n",
" 1/2 5.12G 0.04461 0.05874 0.0169 0.1202 142 640: 100% 8/8 [00:01<00:00, 4.14it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:01<00:00, 5.75it/s]\n",
" all 128 929 0.403 0.772 0.703 0.453\n",
" 1/2 7.47G 0.04308 0.06636 0.02083 0.1303 227 640: 100% 8/8 [00:02<00:00, 3.88it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:01<00:00, 5.07it/s]\n",
" all 128 929 0.387 0.737 0.657 0.432\n",
"\n",
" Epoch gpu_mem box obj cls total targets img_size\n",
" 2/2 5.12G 0.04445 0.06545 0.01667 0.1266 149 640: 100% 8/8 [00:01<00:00, 4.15it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:06<00:00, 1.18it/s]\n",
" all 128 929 0.395 0.767 0.702 0.452\n",
"Optimizer stripped from runs/train/exp/weights/last.pt, 15.2MB\n",
"3 epochs completed in 0.006 hours.\n",
" 2/2 7.48G 0.04461 0.06864 0.01866 0.1319 191 640: 100% 8/8 [00:02<00:00, 3.57it/s]\n",
" Class Images Targets P R mAP@.5 mAP@.5:.95: 100% 8/8 [00:02<00:00, 2.82it/s]\n",
" all 128 929 0.385 0.742 0.658 0.431\n",
"Optimizer stripped from runs/train/exp/weights/last.pt, 14.8MB\n",
"3 epochs completed in 0.007 hours.\n",
"\n"
],
"name": "stdout"
@@ -1114,10 +1120,23 @@
"\n",
"YOLOv5 may be run in any of the following up-to-date verified environments (with all dependencies including [CUDA](https://developer.nvidia.com/cuda)/[CUDNN](https://developer.nvidia.com/cudnn), [Python](https://www.python.org/) and [PyTorch](https://pytorch.org/) preinstalled):\n",
"\n",
"- **Google Colab Notebook** with free GPU: <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a>\n",
"- **Kaggle Notebook** with free GPU: [https://www.kaggle.com/ultralytics/yolov5](https://www.kaggle.com/ultralytics/yolov5)\n",
"- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart) \n",
"- **Docker Image** https://hub.docker.com/r/ultralytics/yolov5. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) ![Docker Pulls](https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker)\n"
"- **Google Colab and Kaggle** notebooks with free GPU: <a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"></a> <a href=\"https://www.kaggle.com/ultralytics/yolov5\"><img src=\"https://kaggle.com/static/images/open-in-kaggle.svg\" alt=\"Open In Kaggle\"></a>\n",
"- **Google Cloud** Deep Learning VM. See [GCP Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/GCP-Quickstart)\n",
"- **Amazon** Deep Learning AMI. See [AWS Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/AWS-Quickstart)\n",
"- **Docker Image**. See [Docker Quickstart Guide](https://github.com/ultralytics/yolov5/wiki/Docker-Quickstart) <a href=\"https://hub.docker.com/r/ultralytics/yolov5\"><img src=\"https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker\" alt=\"Docker Pulls\"></a>\n"
]
},
{
"cell_type": "markdown",
"metadata": {
"id": "6Qu7Iesl0p54"
},
"source": [
"# Status\n",
"\n",
"![CI CPU testing](https://github.com/ultralytics/yolov5/workflows/CI%20CPU%20testing/badge.svg)\n",
"\n",
"If this badge is green, all [YOLOv5 GitHub Actions](https://github.com/ultralytics/yolov5/actions) Continuous Integration (CI) tests are currently passing. CI tests verify correct operation of YOLOv5 training ([train.py](https://github.com/ultralytics/yolov5/blob/master/train.py)), testing ([test.py](https://github.com/ultralytics/yolov5/blob/master/test.py)), inference ([detect.py](https://github.com/ultralytics/yolov5/blob/master/detect.py)) and export ([export.py](https://github.com/ultralytics/yolov5/blob/master/models/export.py)) on MacOS, Windows, and Ubuntu every 24 hours and on every commit.\n"
]
},
{
@@ -1151,10 +1170,11 @@
"id": "mcKoSIK2WSzj"
},
"source": [
"# Test all\n",
"# Reproduce\n",
"%%shell\n",
"for x in s m l x; do\n",
" python test.py --weights yolov5$x.pt --data coco.yaml --img 640\n",
"for x in yolov5s yolov5m yolov5l yolov5x; do\n",
" python test.py --weights $x.pt --data coco.yaml --img 640 --conf 0.25 --iou 0.45 # speed\n",
" python test.py --weights $x.pt --data coco.yaml --img 640 --conf 0.001 --iou 0.65 # mAP\n",
"done"
],
"execution_count": null,


+ 11
- 1
utils/activations.py View File

@@ -5,7 +5,7 @@ import torch.nn as nn
import torch.nn.functional as F


# SiLU https://arxiv.org/pdf/1905.02244.pdf ----------------------------------------------------------------------------
# SiLU https://arxiv.org/pdf/1606.08415.pdf ----------------------------------------------------------------------------
class SiLU(nn.Module): # export-friendly version of nn.SiLU()
@staticmethod
def forward(x):
@@ -32,6 +32,16 @@ class MemoryEfficientSwish(nn.Module):
sx = torch.sigmoid(x)
return grad_output * (sx * (1 + x * (1 - sx)))


class FReLU(nn.Module):
def __init__(self, c1, k=3): # ch_in, kernel
super().__init__()
self.conv = nn.Conv2d(c1, c1, k, 1, 1, groups=c1, bias=False)
self.bn = nn.BatchNorm2d(c1)

def forward(self, x):
return torch.max(x, self.bn(self.conv(x)))

def forward(self, x):
return self.F.apply(x)



+ 16
- 23
utils/autoanchor.py View File

@@ -6,21 +6,13 @@ import yaml
from scipy.cluster.vq import kmeans
from tqdm import tqdm


def check_anchor_order(m):
# Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
a = m.anchor_grid.prod(-1).view(-1) # anchor area
da = a[-1] - a[0] # delta a
ds = m.stride[-1] - m.stride[0] # delta s
if da.sign() != ds.sign(): # same order
print('Reversing anchor order')
m.anchors[:] = m.anchors.flip(0)
m.anchor_grid[:] = m.anchor_grid.flip(0)
from utils.general import colorstr


def check_anchors(dataset, model, thr=4.0, imgsz=640):
# Check anchor fit to data, recompute if necessary
print('\nAnalyzing anchors... ', end='')
prefix = colorstr('autoanchor: ')
print(f'\n{prefix}Analyzing anchors... ', end='')
m = model.module.model[-1] if hasattr(model, 'module') else model.model[-1] # Detect()
shapes = imgsz * dataset.shapes / dataset.shapes.max(1, keepdims=True)
scale = np.random.uniform(0.9, 1.1, size=(shapes.shape[0], 1)) # augment scale
@@ -35,7 +27,7 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
return bpr, aat

bpr, aat = metric(m.anchor_grid.clone().cpu().view(-1, 2))
print('anchors/target = %.2f, Best Possible Recall (BPR) = %.4f' % (aat, bpr), end='')
print(f'anchors/target = {aat:.2f}, Best Possible Recall (BPR) = {bpr:.4f}', end='')
if bpr < 0.98: # threshold to recompute
print('. Attempting to improve anchors, please wait...')
na = m.anchor_grid.numel() // 2 # number of anchors
@@ -46,9 +38,9 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
m.anchor_grid[:] = new_anchors.clone().view_as(m.anchor_grid) # for inference
m.anchors[:] = new_anchors.clone().view_as(m.anchors) / m.stride.to(m.anchors.device).view(-1, 1, 1) # loss
check_anchor_order(m)
print('New anchors saved to model. Update model *.yaml to use these anchors in the future.')
print(f'{prefix}New anchors saved to model. Update model *.yaml to use these anchors in the future.')
else:
print('Original anchors better than new anchors. Proceeding with original anchors.')
print(f'{prefix}Original anchors better than new anchors. Proceeding with original anchors.')
print('') # newline


@@ -70,6 +62,7 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
from utils.autoanchor import *; _ = kmean_anchors()
"""
thr = 1. / thr
prefix = colorstr('autoanchor: ')

def metric(k, wh): # compute metrics
r = wh[:, None] / k[None]
@@ -85,16 +78,16 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
k = k[np.argsort(k.prod(1))] # sort small to large
x, best = metric(k, wh0)
bpr, aat = (best > thr).float().mean(), (x > thr).float().mean() * n # best possible recall, anch > thr
print('thr=%.2f: %.4f best possible recall, %.2f anchors past thr' % (thr, bpr, aat))
print('n=%g, img_size=%s, metric_all=%.3f/%.3f-mean/best, past_thr=%.3f-mean: ' %
(n, img_size, x.mean(), best.mean(), x[x > thr].mean()), end='')
print(f'{prefix}thr={thr:.2f}: {bpr:.4f} best possible recall, {aat:.2f} anchors past thr')
print(f'{prefix}n={n}, img_size={img_size}, metric_all={x.mean():.3f}/{best.mean():.3f}-mean/best, '
f'past_thr={x[x > thr].mean():.3f}-mean: ', end='')
for i, x in enumerate(k):
print('%i,%i' % (round(x[0]), round(x[1])), end=', ' if i < len(k) - 1 else '\n') # use in *.cfg
return k

if isinstance(path, str): # *.yaml file
with open(path) as f:
data_dict = yaml.load(f, Loader=yaml.FullLoader) # model dict
data_dict = yaml.load(f, Loader=yaml.SafeLoader) # model dict
from utils.datasets import LoadImagesAndLabels
dataset = LoadImagesAndLabels(data_dict['train'], augment=True, rect=True)
else:
@@ -107,12 +100,12 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
# Filter
i = (wh0 < 3.0).any(1).sum()
if i:
print('WARNING: Extremely small objects found. '
'%g of %g labels are < 3 pixels in width or height.' % (i, len(wh0)))
print(f'{prefix}WARNING: Extremely small objects found. {i} of {len(wh0)} labels are < 3 pixels in size.')
wh = wh0[(wh0 >= 2.0).any(1)] # filter > 2 pixels
# wh = wh * (np.random.rand(wh.shape[0], 1) * 0.9 + 0.1) # multiply by random scale 0-1

# Kmeans calculation
print('Running kmeans for %g anchors on %g points...' % (n, len(wh)))
print(f'{prefix}Running kmeans for {n} anchors on {len(wh)} points...')
s = wh.std(0) # sigmas for whitening
k, dist = kmeans(wh / s, n, iter=30) # points, mean distance
k *= s
@@ -135,7 +128,7 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
# Evolve
npr = np.random
f, sh, mp, s = anchor_fitness(k), k.shape, 0.9, 0.1 # fitness, generations, mutation prob, sigma
pbar = tqdm(range(gen), desc='Evolving anchors with Genetic Algorithm') # progress bar
pbar = tqdm(range(gen), desc=f'{prefix}Evolving anchors with Genetic Algorithm:') # progress bar
for _ in pbar:
v = np.ones(sh)
while (v == 1).all(): # mutate until a change occurs (prevent duplicates)
@@ -144,7 +137,7 @@ def kmean_anchors(path='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen=10
fg = anchor_fitness(kg)
if fg > f:
f, k = fg, kg.copy()
pbar.desc = 'Evolving anchors with Genetic Algorithm: fitness = %.4f' % f
pbar.desc = f'{prefix}Evolving anchors with Genetic Algorithm: fitness = {f:.4f}'
if verbose:
print_results(k)



+ 161
- 65
utils/datasets.py View File

@@ -15,11 +15,12 @@ from threading import Thread
import cv2
import numpy as np
import torch
import torch.nn.functional as F
from PIL import Image, ExifTags
from torch.utils.data import Dataset
from tqdm import tqdm

from utils.general import xyxy2xywh, xywh2xyxy, clean_str
from utils.general import xyxy2xywh, xywh2xyxy, xywhn2xyxy, clean_str
from utils.torch_utils import torch_distributed_zero_first

# Parameters
@@ -55,7 +56,7 @@ def exif_size(img):


def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=False, cache=False, pad=0.0, rect=False,
rank=-1, world_size=1, workers=8, image_weights=False):
rank=-1, world_size=1, workers=8, image_weights=False, quad=False, prefix=''):
# Make sure only the first process in DDP process the dataset first, and the following others can use the cache
with torch_distributed_zero_first(rank):
dataset = LoadImagesAndLabels(path, imgsz, batch_size,
@@ -66,8 +67,8 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
single_cls=opt.single_cls,
stride=int(stride),
pad=pad,
rank=rank,
image_weights=image_weights)
image_weights=image_weights,
prefix=prefix)

batch_size = min(batch_size, len(dataset))
nw = min([os.cpu_count() // world_size, batch_size if batch_size > 1 else 0, workers]) # number of workers
@@ -79,7 +80,7 @@ def create_dataloader(path, imgsz, batch_size, stride, opt, hyp=None, augment=Fa
num_workers=nw,
sampler=sampler,
pin_memory=True,
collate_fn=LoadImagesAndLabels.collate_fn)
collate_fn=LoadImagesAndLabels.collate_fn4 if quad else LoadImagesAndLabels.collate_fn)
return dataloader, dataset


@@ -118,7 +119,7 @@ class _RepeatSampler(object):


class LoadImages: # for inference
def __init__(self, path, img_size=640):
def __init__(self, path, img_size=640, stride=32):
p = str(Path(path)) # os-agnostic
p = os.path.abspath(p) # absolute path
if '*' in p:
@@ -128,13 +129,14 @@ class LoadImages: # for inference
elif os.path.isfile(p):
files = [p] # files
else:
raise Exception('ERROR: %s does not exist' % p)
raise Exception(f'ERROR: {p} does not exist')

images = [x for x in files if x.split('.')[-1].lower() in img_formats]
videos = [x for x in files if x.split('.')[-1].lower() in vid_formats]
ni, nv = len(images), len(videos)

self.img_size = img_size
self.stride = stride
self.files = images + videos
self.nf = ni + nv # number of files
self.video_flag = [False] * ni + [True] * nv
@@ -143,8 +145,8 @@ class LoadImages: # for inference
self.new_video(videos[0]) # new video
else:
self.cap = None
assert self.nf > 0, 'No images or videos found in %s. Supported formats are:\nimages: %s\nvideos: %s' % \
(p, img_formats, vid_formats)
assert self.nf > 0, f'No images or videos found in {p}. ' \
f'Supported formats are:\nimages: {img_formats}\nvideos: {vid_formats}'

def __iter__(self):
self.count = 0
@@ -170,17 +172,17 @@ class LoadImages: # for inference
ret_val, img0 = self.cap.read()

self.frame += 1
print('video %g/%g (%g/%g) %s: ' % (self.count + 1, self.nf, self.frame, self.nframes, path), end='')
print(f'video {self.count + 1}/{self.nf} ({self.frame}/{self.nframes}) {path}: ', end='')

else:
# Read image
self.count += 1
img0 = cv2.imread(path) # BGR
assert img0 is not None, 'Image Not Found ' + path
print('image %g/%g %s: ' % (self.count, self.nf, path), end='')
print(f'image {self.count}/{self.nf} {path}: ', end='')

# Padded resize
img = letterbox(img0, new_shape=self.img_size)[0]
img = letterbox(img0, self.img_size, stride=self.stride)[0]

# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
@@ -198,8 +200,9 @@ class LoadImages: # for inference


class LoadWebcam: # for inference
def __init__(self, pipe='0', img_size=640):
def __init__(self, pipe='0', img_size=640, stride=32):
self.img_size = img_size
self.stride = stride

if pipe.isnumeric():
pipe = eval(pipe) # local camera
@@ -237,12 +240,12 @@ class LoadWebcam: # for inference
break

# Print
assert ret_val, 'Camera Error %s' % self.pipe
assert ret_val, f'Camera Error {self.pipe}'
img_path = 'webcam.jpg'
print('webcam %g: ' % self.count, end='')
print(f'webcam {self.count}: ', end='')

# Padded resize
img = letterbox(img0, new_shape=self.img_size)[0]
img = letterbox(img0, self.img_size, stride=self.stride)[0]

# Convert
img = img[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416
@@ -255,9 +258,10 @@ class LoadWebcam: # for inference


class LoadStreams: # multiple IP or RTSP cameras
def __init__(self, sources='streams.txt', img_size=640):
def __init__(self, sources='streams.txt', img_size=640, stride=32):
self.mode = 'stream'
self.img_size = img_size
self.stride = stride

if os.path.isfile(sources):
with open(sources, 'r') as f:
@@ -270,20 +274,20 @@ class LoadStreams: # multiple IP or RTSP cameras
self.sources = [clean_str(x) for x in sources] # clean source names for later
for i, s in enumerate(sources):
# Start the thread to read frames from the video stream
print('%g/%g: %s... ' % (i + 1, n, s), end='')
print(f'{i + 1}/{n}: {s}... ', end='')
cap = cv2.VideoCapture(eval(s) if s.isnumeric() else s)
assert cap.isOpened(), 'Failed to open %s' % s
assert cap.isOpened(), f'Failed to open {s}'
w = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
h = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
fps = cap.get(cv2.CAP_PROP_FPS) % 100
_, self.imgs[i] = cap.read() # guarantee first frame
thread = Thread(target=self.update, args=([i, cap]), daemon=True)
print(' success (%gx%g at %.2f FPS).' % (w, h, fps))
print(f' success ({w}x{h} at {fps:.2f} FPS).')
thread.start()
print('') # newline

# check for common shapes
s = np.stack([letterbox(x, new_shape=self.img_size)[0].shape for x in self.imgs], 0) # inference shapes
s = np.stack([letterbox(x, self.img_size, stride=self.stride)[0].shape for x in self.imgs], 0) # shapes
self.rect = np.unique(s, axis=0).shape[0] == 1 # rect inference if all shapes equal
if not self.rect:
print('WARNING: Different stream shapes detected. For optimal performance supply similarly-shaped streams.')
@@ -312,7 +316,7 @@ class LoadStreams: # multiple IP or RTSP cameras
raise StopIteration

# Letterbox
img = [letterbox(x, new_shape=self.img_size, auto=self.rect)[0] for x in img0]
img = [letterbox(x, self.img_size, auto=self.rect, stride=self.stride)[0] for x in img0]

# Stack
img = np.stack(img, 0)
@@ -335,7 +339,7 @@ def img2label_paths(img_paths):

class LoadImagesAndLabels(Dataset): # for training/testing
def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, rect=False, image_weights=False,
cache_images=False, single_cls=False, stride=32, pad=0.0, rank=-1):
cache_images=False, single_cls=False, stride=32, pad=0.0, prefix=''):
self.img_size = img_size
self.augment = augment
self.hyp = hyp
@@ -357,11 +361,11 @@ class LoadImagesAndLabels(Dataset): # for training/testing
parent = str(p.parent) + os.sep
f += [x.replace('./', parent) if x.startswith('./') else x for x in t] # local to global path
else:
raise Exception('%s does not exist' % p)
raise Exception(f'{prefix}{p} does not exist')
self.img_files = sorted([x.replace('/', os.sep) for x in f if x.split('.')[-1].lower() in img_formats])
assert self.img_files, 'No images found'
assert self.img_files, f'{prefix}No images found'
except Exception as e:
raise Exception('Error loading data from %s: %s\nSee %s' % (path, e, help_url))
raise Exception(f'{prefix}Error loading data from {path}: {e}\nSee {help_url}')

# Check cache
self.label_files = img2label_paths(self.img_files) # labels
@@ -369,15 +373,15 @@ class LoadImagesAndLabels(Dataset): # for training/testing
if cache_path.is_file():
cache = torch.load(cache_path) # load
if cache['hash'] != get_hash(self.label_files + self.img_files) or 'results' not in cache: # changed
cache = self.cache_labels(cache_path) # re-cache
cache = self.cache_labels(cache_path, prefix) # re-cache
else:
cache = self.cache_labels(cache_path) # cache
cache = self.cache_labels(cache_path, prefix) # cache

# Display cache
[nf, nm, ne, nc, n] = cache.pop('results') # found, missing, empty, corrupted, total
desc = f"Scanning '{cache_path}' for images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
tqdm(None, desc=desc, total=n, initial=n)
assert nf > 0 or not augment, f'No labels found in {cache_path}. Can not train without labels. See {help_url}'
tqdm(None, desc=prefix + desc, total=n, initial=n)
assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {help_url}'

# Read cache
cache.pop('hash') # remove hash
@@ -431,9 +435,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
for i, x in pbar:
self.imgs[i], self.img_hw0[i], self.img_hw[i] = x # img, hw_original, hw_resized = load_image(self, i)
gb += self.imgs[i].nbytes
pbar.desc = 'Caching images (%.1fGB)' % (gb / 1E9)
pbar.desc = f'{prefix}Caching images ({gb / 1E9:.1f}GB)'

def cache_labels(self, path=Path('./labels.cache')):
def cache_labels(self, path=Path('./labels.cache'), prefix=''):
# Cache dataset labels, check images and read shapes
x = {} # dict
nm, nf, ne, nc = 0, 0, 0, 0 # number missing, found, empty, duplicate
@@ -444,7 +448,8 @@ class LoadImagesAndLabels(Dataset): # for training/testing
im = Image.open(im_file)
im.verify() # PIL verify
shape = exif_size(im) # image size
assert (shape[0] > 9) & (shape[1] > 9), 'image size <10 pixels'
assert (shape[0] > 9) & (shape[1] > 9), f'image size {shape} <10 pixels'
assert im.format.lower() in img_formats, f'invalid image format {im.format}'

# verify labels
if os.path.isfile(lb_file):
@@ -465,18 +470,18 @@ class LoadImagesAndLabels(Dataset): # for training/testing
x[im_file] = [l, shape]
except Exception as e:
nc += 1
print('WARNING: Ignoring corrupted image and/or label %s: %s' % (im_file, e))
print(f'{prefix}WARNING: Ignoring corrupted image and/or label {im_file}: {e}')

pbar.desc = f"Scanning '{path.parent / path.stem}' for images and labels... " \
pbar.desc = f"{prefix}Scanning '{path.parent / path.stem}' for images and labels... " \
f"{nf} found, {nm} missing, {ne} empty, {nc} corrupted"

if nf == 0:
print(f'WARNING: No labels found in {path}. See {help_url}')
print(f'{prefix}WARNING: No labels found in {path}. See {help_url}')

x['hash'] = get_hash(self.label_files + self.img_files)
x['results'] = [nf, nm, ne, nc, i + 1]
torch.save(x, path) # save for next time
logging.info(f"New cache created: {path}")
logging.info(f'{prefix}New cache created: {path}')
return x

def __len__(self):
@@ -514,16 +519,9 @@ class LoadImagesAndLabels(Dataset): # for training/testing
img, ratio, pad = letterbox(img, shape, auto=False, scaleup=self.augment)
shapes = (h0, w0), ((h / h0, w / w0), pad) # for COCO mAP rescaling

# Load labels
labels = []
x = self.labels[index]
if x.size > 0:
# Normalized xywh to pixel xyxy format
labels = x.copy()
labels[:, 1] = ratio[0] * w * (x[:, 1] - x[:, 3] / 2) + pad[0] # pad width
labels[:, 2] = ratio[1] * h * (x[:, 2] - x[:, 4] / 2) + pad[1] # pad height
labels[:, 3] = ratio[0] * w * (x[:, 1] + x[:, 3] / 2) + pad[0]
labels[:, 4] = ratio[1] * h * (x[:, 2] + x[:, 4] / 2) + pad[1]
labels = self.labels[index].copy()
if labels.size: # normalized xywh to pixel xyxy format
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], ratio[0] * w, ratio[1] * h, padw=pad[0], padh=pad[1])

if self.augment:
# Augment imagespace
@@ -578,6 +576,32 @@ class LoadImagesAndLabels(Dataset): # for training/testing
l[:, 0] = i # add target image index for build_targets()
return torch.stack(img, 0), torch.cat(label, 0), path, shapes

@staticmethod
def collate_fn4(batch):
img, label, path, shapes = zip(*batch) # transposed
n = len(shapes) // 4
img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]

ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale
for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW
i *= 4
if random.random() < 0.5:
im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
0].type(img[i].type())
l = label[i]
else:
im = torch.cat((torch.cat((img[i], img[i + 1]), 1), torch.cat((img[i + 2], img[i + 3]), 1)), 2)
l = torch.cat((label[i], label[i + 1] + ho, label[i + 2] + wo, label[i + 3] + ho + wo), 0) * s
img4.append(im)
label4.append(l)

for i, l in enumerate(label4):
l[:, 0] = i # add target image index for build_targets()

return torch.stack(img4, 0), torch.cat(label4, 0), path4, shapes4


# Ancillary functions --------------------------------------------------------------------------------------------------
def load_image(self, index):
@@ -610,14 +634,20 @@ def augment_hsv(img, hgain=0.5, sgain=0.5, vgain=0.5):
img_hsv = cv2.merge((cv2.LUT(hue, lut_hue), cv2.LUT(sat, lut_sat), cv2.LUT(val, lut_val))).astype(dtype)
cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR, dst=img) # no return needed

# Histogram equalization
# if random.random() < 0.2:
# for i in range(3):
# img[:, :, i] = cv2.equalizeHist(img[:, :, i])

def hist_equalize(img, clahe=True, bgr=False):
# Equalize histogram on BGR image 'img' with img.shape(n,m,3) and range 0-255
yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV if bgr else cv2.COLOR_RGB2YUV)
if clahe:
c = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
yuv[:, :, 0] = c.apply(yuv[:, :, 0])
else:
yuv[:, :, 0] = cv2.equalizeHist(yuv[:, :, 0]) # equalize Y channel histogram
return cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR if bgr else cv2.COLOR_YUV2RGB) # convert YUV image to RGB


def load_mosaic(self, index):
# loads images in a mosaic
# loads images in a 4-mosaic

labels4 = []
s = self.img_size
@@ -647,13 +677,9 @@ def load_mosaic(self, index):
padh = y1a - y1b

# Labels
x = self.labels[index]
labels = x.copy()
if x.size > 0: # Normalized xywh to pixel xyxy format
labels[:, 1] = w * (x[:, 1] - x[:, 3] / 2) + padw
labels[:, 2] = h * (x[:, 2] - x[:, 4] / 2) + padh
labels[:, 3] = w * (x[:, 1] + x[:, 3] / 2) + padw
labels[:, 4] = h * (x[:, 2] + x[:, 4] / 2) + padh
labels = self.labels[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padw, padh) # normalized xywh to pixel xyxy format
labels4.append(labels)

# Concat/clip labels
@@ -674,6 +700,76 @@ def load_mosaic(self, index):
return img4, labels4


def load_mosaic9(self, index):
# loads images in a 9-mosaic

labels9 = []
s = self.img_size
indices = [index] + [self.indices[random.randint(0, self.n - 1)] for _ in range(8)] # 8 additional image indices
for i, index in enumerate(indices):
# Load image
img, _, (h, w) = load_image(self, index)

# place img in img9
if i == 0: # center
img9 = np.full((s * 3, s * 3, img.shape[2]), 114, dtype=np.uint8) # base image with 4 tiles
h0, w0 = h, w
c = s, s, s + w, s + h # xmin, ymin, xmax, ymax (base) coordinates
elif i == 1: # top
c = s, s - h, s + w, s
elif i == 2: # top right
c = s + wp, s - h, s + wp + w, s
elif i == 3: # right
c = s + w0, s, s + w0 + w, s + h
elif i == 4: # bottom right
c = s + w0, s + hp, s + w0 + w, s + hp + h
elif i == 5: # bottom
c = s + w0 - w, s + h0, s + w0, s + h0 + h
elif i == 6: # bottom left
c = s + w0 - wp - w, s + h0, s + w0 - wp, s + h0 + h
elif i == 7: # left
c = s - w, s + h0 - h, s, s + h0
elif i == 8: # top left
c = s - w, s + h0 - hp - h, s, s + h0 - hp

padx, pady = c[:2]
x1, y1, x2, y2 = [max(x, 0) for x in c] # allocate coords

# Labels
labels = self.labels[index].copy()
if labels.size:
labels[:, 1:] = xywhn2xyxy(labels[:, 1:], w, h, padx, pady) # normalized xywh to pixel xyxy format
labels9.append(labels)

# Image
img9[y1:y2, x1:x2] = img[y1 - pady:, x1 - padx:] # img9[ymin:ymax, xmin:xmax]
hp, wp = h, w # height, width previous

# Offset
yc, xc = [int(random.uniform(0, s)) for x in self.mosaic_border] # mosaic center x, y
img9 = img9[yc:yc + 2 * s, xc:xc + 2 * s]

# Concat/clip labels
if len(labels9):
labels9 = np.concatenate(labels9, 0)
labels9[:, [1, 3]] -= xc
labels9[:, [2, 4]] -= yc

np.clip(labels9[:, 1:], 0, 2 * s, out=labels9[:, 1:]) # use with random_perspective
# img9, labels9 = replicate(img9, labels9) # replicate

# Augment
img9, labels9 = random_perspective(img9, labels9,
degrees=self.hyp['degrees'],
translate=self.hyp['translate'],
scale=self.hyp['scale'],
shear=self.hyp['shear'],
perspective=self.hyp['perspective'],
border=self.mosaic_border) # border to remove

return img9, labels9


def replicate(img, labels):
# Replicate labels
h, w = img.shape[:2]
@@ -691,8 +787,8 @@ def replicate(img, labels):
return img, labels


def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True):
# Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = img.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
@@ -707,7 +803,7 @@ def letterbox(img, new_shape=(640, 640), color=(114, 114, 114), auto=True, scale
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, 32), np.mod(dh, 32) # wh padding
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
@@ -811,12 +907,12 @@ def random_perspective(img, targets=(), degrees=10, translate=.1, scale=.1, shea
return img, targets


def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1): # box1(4,n), box2(4,n)
def box_candidates(box1, box2, wh_thr=2, ar_thr=20, area_thr=0.1, eps=1e-16): # box1(4,n), box2(4,n)
# Compute candidate boxes: box1 before augment, box2 after augment, wh_thr (pixels), aspect_ratio_thr, area_ratio
w1, h1 = box1[2] - box1[0], box1[3] - box1[1]
w2, h2 = box2[2] - box2[0], box2[3] - box2[1]
ar = np.maximum(w2 / (h2 + 1e-16), h2 / (w2 + 1e-16)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + 1e-16) > area_thr) & (ar < ar_thr) # candidates
ar = np.maximum(w2 / (h2 + eps), h2 / (w2 + eps)) # aspect ratio
return (w2 > wh_thr) & (h2 > wh_thr) & (w2 * h2 / (w1 * h1 + eps) > area_thr) & (ar < ar_thr) # candidates


def cutout(image, labels):


+ 87
- 12
utils/general.py View File

@@ -25,6 +25,7 @@ from utils.torch_utils import init_torch_seeds
torch.set_printoptions(linewidth=320, precision=5, profile='long')
np.set_printoptions(linewidth=320, formatter={'float_kind': '{:11.5g}'.format}) # format short g, %precision=5
cv2.setNumThreads(0) # prevent OpenCV from multithreading (incompatible with PyTorch DataLoader)
os.environ['NUMEXPR_MAX_THREADS'] = str(min(os.cpu_count(), 8)) # NumExpr max threads


def set_logging(rank=-1):
@@ -34,6 +35,7 @@ def set_logging(rank=-1):


def init_seeds(seed=0):
# Initialize random number generator (RNG) seeds
random.seed(seed)
np.random.seed(seed)
init_torch_seeds(seed)
@@ -45,12 +47,44 @@ def get_latest_run(search_dir='.'):
return max(last_list, key=os.path.getctime) if last_list else ''


def check_online():
# Check internet connectivity
import socket
try:
socket.create_connection(("1.1.1.1", 53)) # check host accesability
return True
except OSError:
return False


def check_git_status():
# Suggest 'git pull' if repo is out of date
if platform.system() in ['Linux', 'Darwin'] and not os.path.isfile('/.dockerenv'):
s = subprocess.check_output('if [ -d .git ]; then git fetch && git status -uno; fi', shell=True).decode('utf-8')
if 'Your branch is behind' in s:
print(s[s.find('Your branch is behind'):s.find('\n\n')] + '\n')
# Recommend 'git pull' if code is out of date
print(colorstr('github: '), end='')
try:
assert Path('.git').exists(), 'skipping check (not a git repository)'
assert not Path('/workspace').exists(), 'skipping check (Docker image)' # not Path('/.dockerenv').exists()
assert check_online(), 'skipping check (offline)'

cmd = 'git fetch && git config --get remote.origin.url'
url = subprocess.check_output(cmd, shell=True).decode().strip().rstrip('.git') # github repo url
branch = subprocess.check_output('git rev-parse --abbrev-ref HEAD', shell=True).decode().strip() # checked out
n = int(subprocess.check_output(f'git rev-list {branch}..origin/master --count', shell=True)) # commits behind
if n > 0:
s = f"⚠️ WARNING: code is out of date by {n} commit{'s' * (n > 1)}. " \
f"Use 'git pull' to update or 'git clone {url}' to download latest."
else:
s = f'up to date with {url} ✅'
print(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)
except Exception as e:
print(e)


def check_requirements(file='requirements.txt', exclude=()):
# Check installed dependencies meet requirements
import pkg_resources
requirements = [f'{x.name}{x.specifier}' for x in pkg_resources.parse_requirements(Path(file).open())
if x.name not in exclude]
pkg_resources.require(requirements) # DistributionNotFound or VersionConflict exception if requirements not met


def check_img_size(img_size, s=32):
@@ -102,6 +136,36 @@ def clean_str(s):
return re.sub(pattern="[|@#!¡·$€%&()=?¿^*;:,¨´><+]", repl="_", string=s)


def one_cycle(y1=0.0, y2=1.0, steps=100):
# lambda function for sinusoidal ramp from y1 to y2
return lambda x: ((1 - math.cos(x * math.pi / steps)) / 2) * (y2 - y1) + y1


def colorstr(*input):
# Colors a string https://en.wikipedia.org/wiki/ANSI_escape_code, i.e. colorstr('blue', 'hello world')
*args, string = input if len(input) > 1 else ('blue', 'bold', input[0]) # color arguments, string
colors = {'black': '\033[30m', # basic colors
'red': '\033[31m',
'green': '\033[32m',
'yellow': '\033[33m',
'blue': '\033[34m',
'magenta': '\033[35m',
'cyan': '\033[36m',
'white': '\033[37m',
'bright_black': '\033[90m', # bright colors
'bright_red': '\033[91m',
'bright_green': '\033[92m',
'bright_yellow': '\033[93m',
'bright_blue': '\033[94m',
'bright_magenta': '\033[95m',
'bright_cyan': '\033[96m',
'bright_white': '\033[97m',
'end': '\033[0m', # misc
'bold': '\033[1m',
'underline': '\033[4m'}
return ''.join(colors[x] for x in args) + f'{string}' + colors['end']


def labels_to_class_weights(labels, nc=80):
# Get class weights (inverse frequency) from training labels
if labels[0] is None: # no labels loaded
@@ -161,6 +225,16 @@ def xywh2xyxy(x):
return y


def xywhn2xyxy(x, w=640, h=640, padw=32, padh=32):
# Convert nx4 boxes from [x, y, w, h] normalized to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = w * (x[:, 0] - x[:, 2] / 2) + padw # top left x
y[:, 1] = h * (x[:, 1] - x[:, 3] / 2) + padh # top left y
y[:, 2] = w * (x[:, 0] + x[:, 2] / 2) + padw # bottom right x
y[:, 3] = h * (x[:, 1] + x[:, 3] / 2) + padh # bottom right y
return y


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
@@ -276,6 +350,7 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
# Settings
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
max_det = 300 # maximum number of detections per image
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label = nc > 1 # multiple labels per box (adds 0.5ms/img)
@@ -323,13 +398,12 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
# if not torch.isfinite(x).all():
# x = x[torch.isfinite(x).all(1)]

# If none remain process next image
# Check shape
n = x.shape[0] # number of boxes
if not n:
if not n: # no boxes
continue

# Sort by confidence
# x = x[x[:, 4].argsort(descending=True)]
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence

# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
@@ -347,6 +421,7 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non

output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f'WARNING: NMS time limit {time_limit}s exceeded')
break # time limit exceeded

return output
@@ -355,8 +430,8 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non
def strip_optimizer(f='weights/best.pt', s=''): # from utils.general import *; strip_optimizer()
# Strip optimizer from 'f' to finalize training, optionally save as 's'
x = torch.load(f, map_location=torch.device('cpu'))
x['optimizer'] = None
x['training_results'] = None
for key in 'optimizer', 'training_results', 'wandb_id':
x[key] = None
x['epoch'] = -1
x['model'].half() # to FP16
for p in x['model'].parameters():


+ 54
- 54
utils/google_utils.py View File

@@ -6,84 +6,84 @@ import subprocess
import time
from pathlib import Path

import requests
import torch


def gsutil_getsize(url=''):
# gs://bucket/file size https://cloud.google.com/storage/docs/gsutil/commands/du
s = subprocess.check_output('gsutil du %s' % url, shell=True).decode('utf-8')
s = subprocess.check_output(f'gsutil du {url}', shell=True).decode('utf-8')
return eval(s.split(' ')[0]) if len(s) else 0 # bytes


def attempt_download(weights):
# Attempt to download pretrained weights if not found locally
weights = str(weights).strip().replace("'", '')
file = Path(weights).name.lower()

msg = weights + ' missing, try downloading from https://github.com/ultralytics/yolov5/releases/'
models = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt'] # available models
redundant = False # offer second download option

if file in models and not os.path.isfile(weights):
# Google Drive
# d = {'yolov5s.pt': '1R5T6rIyy3lLwgFXNms8whc-387H0tMQO',
# 'yolov5m.pt': '1vobuEExpWQVpXExsJ2w-Mbf3HJjWkQJr',
# 'yolov5l.pt': '1hrlqD1Wdei7UT4OgT785BEk1JwnSvNEV',
# 'yolov5x.pt': '1mM8aZJlWTxOg7BZJvNUMrTnA2AbeCVzS'}
# r = gdrive_download(id=d[file], name=weights) if file in d else 1
# if r == 0 and os.path.exists(weights) and os.path.getsize(weights) > 1E6: # check
# return

try: # GitHub
url = 'https://github.com/ultralytics/yolov5/releases/download/v3.1/' + file
print('Downloading %s to %s...' % (url, weights))
torch.hub.download_url_to_file(url, weights)
assert os.path.exists(weights) and os.path.getsize(weights) > 1E6 # check
except Exception as e: # GCP
print('Download error: %s' % e)
assert redundant, 'No secondary mirror'
url = 'https://storage.googleapis.com/ultralytics/yolov5/ckpt/' + file
print('Downloading %s to %s...' % (url, weights))
r = os.system('curl -L %s -o %s' % (url, weights)) # torch.hub.download_url_to_file(url, weights)
finally:
if not (os.path.exists(weights) and os.path.getsize(weights) > 1E6): # check
os.remove(weights) if os.path.exists(weights) else None # remove partial downloads
print('ERROR: Download failure: %s' % msg)
print('')
return


def gdrive_download(id='1uH2BylpFxHKEGXKL6wJJlsgMU2YEjxuc', name='tmp.zip'):
# Downloads a file from Google Drive. from utils.google_utils import *; gdrive_download()
def attempt_download(file, repo='ultralytics/yolov5'):
# Attempt file download if does not exist
file = Path(str(file).strip().replace("'", '').lower())

if not file.exists():
try:
response = requests.get(f'https://api.github.com/repos/{repo}/releases/latest').json() # github api
assets = [x['name'] for x in response['assets']] # release assets, i.e. ['yolov5s.pt', 'yolov5m.pt', ...]
tag = response['tag_name'] # i.e. 'v1.0'
except: # fallback plan
assets = ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']
tag = subprocess.check_output('git tag', shell=True).decode().split()[-1]

name = file.name
if name in assets:
msg = f'{file} missing, try downloading from https://github.com/{repo}/releases/'
redundant = False # second download option
try: # GitHub
url = f'https://github.com/{repo}/releases/download/{tag}/{name}'
print(f'Downloading {url} to {file}...')
torch.hub.download_url_to_file(url, file)
assert file.exists() and file.stat().st_size > 1E6 # check
except Exception as e: # GCP
print(f'Download error: {e}')
assert redundant, 'No secondary mirror'
url = f'https://storage.googleapis.com/{repo}/ckpt/{name}'
print(f'Downloading {url} to {file}...')
os.system(f'curl -L {url} -o {file}') # torch.hub.download_url_to_file(url, weights)
finally:
if not file.exists() or file.stat().st_size < 1E6: # check
file.unlink(missing_ok=True) # remove partial downloads
print(f'ERROR: Download failure: {msg}')
print('')
return


def gdrive_download(id='16TiPfZj7htmTyhntwcZyEEAejOUxuT6m', file='tmp.zip'):
# Downloads a file from Google Drive. from yolov5.utils.google_utils import *; gdrive_download()
t = time.time()

print('Downloading https://drive.google.com/uc?export=download&id=%s as %s... ' % (id, name), end='')
os.remove(name) if os.path.exists(name) else None # remove existing
os.remove('cookie') if os.path.exists('cookie') else None
file = Path(file)
cookie = Path('cookie') # gdrive cookie
print(f'Downloading https://drive.google.com/uc?export=download&id={id} as {file}... ', end='')
file.unlink(missing_ok=True) # remove existing file
cookie.unlink(missing_ok=True) # remove existing cookie

# Attempt file download
out = "NUL" if platform.system() == "Windows" else "/dev/null"
os.system('curl -c ./cookie -s -L "drive.google.com/uc?export=download&id=%s" > %s ' % (id, out))
os.system(f'curl -c ./cookie -s -L "drive.google.com/uc?export=download&id={id}" > {out}')
if os.path.exists('cookie'): # large file
s = 'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm=%s&id=%s" -o %s' % (get_token(), id, name)
s = f'curl -Lb ./cookie "drive.google.com/uc?export=download&confirm={get_token()}&id={id}" -o {file}'
else: # small file
s = 'curl -s -L -o %s "drive.google.com/uc?export=download&id=%s"' % (name, id)
s = f'curl -s -L -o {file} "drive.google.com/uc?export=download&id={id}"'
r = os.system(s) # execute, capture return
os.remove('cookie') if os.path.exists('cookie') else None
cookie.unlink(missing_ok=True) # remove existing cookie

# Error check
if r != 0:
os.remove(name) if os.path.exists(name) else None # remove partial
file.unlink(missing_ok=True) # remove partial
print('Download error ') # raise Exception('Download error')
return r

# Unzip if archive
if name.endswith('.zip'):
if file.suffix == '.zip':
print('unzipping... ', end='')
os.system('unzip -q %s' % name) # unzip
os.remove(name) # remove zip to free space
os.system(f'unzip -q {file}') # unzip
file.unlink() # remove zip to free space

print('Done (%.1fs)' % (time.time() - t))
print(f'Done ({time.time() - t:.1f}s)')
return r




+ 129
- 117
utils/loss.py View File

@@ -85,121 +85,133 @@ class QFocalLoss(nn.Module):
return loss


def compute_loss(p, targets, model): # predictions, targets, model
device = targets.device
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
tcls, tbox, indices, anchors = build_targets(p, targets, model) # targets
h = model.hyp # hyperparameters

# Define criteria
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device)) # weight=model.class_weights)
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))

# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
cp, cn = smooth_BCE(eps=0.0)

# Focal loss
g = h['fl_gamma'] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)

# Losses
nt = 0 # number of targets
no = len(p) # number of outputs
balance = [4.0, 1.0, 0.4] if no == 3 else [4.0, 1.0, 0.4, 0.1] # P3-5 or P3-6
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj

n = b.shape[0] # number of targets
if n:
nt += n # cumulative targets
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets

# Regression
pxy = ps[:, :2].sigmoid() * 2. - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss

# Objectness
tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio

# Classification
if model.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(ps[:, 5:], cn, device=device) # targets
t[range(n), tcls[i]] = cp
lcls += BCEcls(ps[:, 5:], t) # BCE

# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

lobj += BCEobj(pi[..., 4], tobj) * balance[i] # obj loss

s = 3 / no # output count scaling
lbox *= h['box'] * s
lobj *= h['obj'] * s * (1.4 if no == 4 else 1.)
lcls *= h['cls'] * s
bs = tobj.shape[0] # batch size

loss = lbox + lobj + lcls
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()


def build_targets(p, targets, model):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module
na, nt = det.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices

g = 0.5 # bias
off = torch.tensor([[0, 0],
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
], device=targets.device).float() * g # offsets

for i in range(det.nl):
anchors = det.anchors[i]
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain

# Match targets to anchors
t = targets * gain
if nt:
# Matches
r = t[:, :, 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t'] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter

# Offsets
class ComputeLoss:
# Compute losses
def __init__(self, model, autobalance=False):
super(ComputeLoss, self).__init__()
device = next(model.parameters()).device # get model device
h = model.hyp # hyperparameters

# Define criteria
BCEcls = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['cls_pw']], device=device))
BCEobj = nn.BCEWithLogitsLoss(pos_weight=torch.tensor([h['obj_pw']], device=device))

# Class label smoothing https://arxiv.org/pdf/1902.04103.pdf eqn 3
self.cp, self.cn = smooth_BCE(eps=0.0)

# Focal loss
g = h['fl_gamma'] # focal loss gamma
if g > 0:
BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)

det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module
self.balance = {3: [3.67, 1.0, 0.43], 4: [3.78, 1.0, 0.39, 0.22], 5: [3.88, 1.0, 0.37, 0.17, 0.10]}[det.nl]
# self.balance = [1.0] * det.nl
self.ssi = (det.stride == 16).nonzero(as_tuple=False).item() # stride 16 index
self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, model.gr, h, autobalance
for k in 'na', 'nc', 'nl', 'anchors':
setattr(self, k, getattr(det, k))

def __call__(self, p, targets): # predictions, targets, model
device = targets.device
lcls, lbox, lobj = torch.zeros(1, device=device), torch.zeros(1, device=device), torch.zeros(1, device=device)
tcls, tbox, indices, anchors = self.build_targets(p, targets) # targets

# Losses
for i, pi in enumerate(p): # layer index, layer predictions
b, a, gj, gi = indices[i] # image, anchor, gridy, gridx
tobj = torch.zeros_like(pi[..., 0], device=device) # target obj

n = b.shape[0] # number of targets
if n:
ps = pi[b, a, gj, gi] # prediction subset corresponding to targets

# Regression
pxy = ps[:, :2].sigmoid() * 2. - 0.5
pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
pbox = torch.cat((pxy, pwh), 1) # predicted box
iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target)
lbox += (1.0 - iou).mean() # iou loss

# Objectness
tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) # iou ratio

# Classification
if self.nc > 1: # cls loss (only if multiple classes)
t = torch.full_like(ps[:, 5:], self.cn, device=device) # targets
t[range(n), tcls[i]] = self.cp
lcls += self.BCEcls(ps[:, 5:], t) # BCE

# Append targets to text file
# with open('targets.txt', 'a') as file:
# [file.write('%11.5g ' * 4 % tuple(x) + '\n') for x in torch.cat((txy[i], twh[i]), 1)]

obji = self.BCEobj(pi[..., 4], tobj)
lobj += obji * self.balance[i] # obj loss
if self.autobalance:
self.balance[i] = self.balance[i] * 0.9999 + 0.0001 / obji.detach().item()

if self.autobalance:
self.balance = [x / self.balance[self.ssi] for x in self.balance]
lbox *= self.hyp['box']
lobj *= self.hyp['obj']
lcls *= self.hyp['cls']
bs = tobj.shape[0] # batch size

loss = lbox + lobj + lcls
return loss * bs, torch.cat((lbox, lobj, lcls, loss)).detach()

def build_targets(self, p, targets):
# Build targets for compute_loss(), input targets(image,class,x,y,w,h)
na, nt = self.na, targets.shape[0] # number of anchors, targets
tcls, tbox, indices, anch = [], [], [], []
gain = torch.ones(7, device=targets.device) # normalized to gridspace gain
ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) # same as .repeat_interleave(nt)
targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) # append anchor indices

g = 0.5 # bias
off = torch.tensor([[0, 0],
[1, 0], [0, 1], [-1, 0], [0, -1], # j,k,l,m
# [1, 1], [1, -1], [-1, 1], [-1, -1], # jk,jm,lk,lm
], device=targets.device).float() * g # offsets

for i in range(self.nl):
anchors = self.anchors[i]
gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] # xyxy gain

# Match targets to anchors
t = targets * gain
if nt:
# Matches
r = t[:, :, 4:6] / anchors[:, None] # wh ratio
j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare
# j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
t = t[j] # filter

# Offsets
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
else:
t = targets[0]
offsets = 0

# Define
b, c = t[:, :2].long().T # image, class
gxy = t[:, 2:4] # grid xy
gxi = gain[[2, 3]] - gxy # inverse
j, k = ((gxy % 1. < g) & (gxy > 1.)).T
l, m = ((gxi % 1. < g) & (gxi > 1.)).T
j = torch.stack((torch.ones_like(j), j, k, l, m))
t = t.repeat((5, 1, 1))[j]
offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
else:
t = targets[0]
offsets = 0

# Define
b, c = t[:, :2].long().T # image, class
gxy = t[:, 2:4] # grid xy
gwh = t[:, 4:6] # grid wh
gij = (gxy - offsets).long()
gi, gj = gij.T # grid xy indices

# Append
a = t[:, 6].long() # anchor indices
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class

return tcls, tbox, indices, anch
gwh = t[:, 4:6] # grid wh
gij = (gxy - offsets).long()
gi, gj = gij.T # grid xy indices

# Append
a = t[:, 6].long() # anchor indices
indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) # image, anchor, grid indices
tbox.append(torch.cat((gxy - gij, gwh), 1)) # box
anch.append(anchors[a]) # anchors
tcls.append(c) # class

return tcls, tbox, indices, anch

+ 38
- 15
utils/metrics.py View File

@@ -15,7 +15,7 @@ def fitness(x):
return (x[:, :4] * w).sum(1)


def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision-recall_curve.png', names=[]):
def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='.', names=()):
""" Compute the average precision, given the recall and precision curves.
Source: https://github.com/rafaelpadilla/Object-Detection-Metrics.
# Arguments
@@ -35,12 +35,11 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision

# Find unique classes
unique_classes = np.unique(target_cls)
nc = unique_classes.shape[0] # number of classes, number of detections

# Create Precision-Recall curve and compute AP for each class
px, py = np.linspace(0, 1, 1000), [] # for plotting
pr_score = 0.1 # score to evaluate P and R https://github.com/ultralytics/yolov3/issues/898
s = [unique_classes.shape[0], tp.shape[1]] # number class, number iou thresholds (i.e. 10 for mAP0.5...0.95)
ap, p, r = np.zeros(s), np.zeros(s), np.zeros(s)
ap, p, r = np.zeros((nc, tp.shape[1])), np.zeros((nc, 1000)), np.zeros((nc, 1000))
for ci, c in enumerate(unique_classes):
i = pred_cls == c
n_l = (target_cls == c).sum() # number of labels
@@ -55,25 +54,28 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir='precision

# Recall
recall = tpc / (n_l + 1e-16) # recall curve
r[ci] = np.interp(-pr_score, -conf[i], recall[:, 0]) # r at pr_score, negative x, xp because xp decreases
r[ci] = np.interp(-px, -conf[i], recall[:, 0], left=0) # negative x, xp because xp decreases

# Precision
precision = tpc / (tpc + fpc) # precision curve
p[ci] = np.interp(-pr_score, -conf[i], precision[:, 0]) # p at pr_score
p[ci] = np.interp(-px, -conf[i], precision[:, 0], left=1) # p at pr_score

# AP from recall-precision curve
for j in range(tp.shape[1]):
ap[ci, j], mpre, mrec = compute_ap(recall[:, j], precision[:, j])
if plot and (j == 0):
if plot and j == 0:
py.append(np.interp(px, mrec, mpre)) # precision at mAP@0.5

# Compute F1 score (harmonic mean of precision and recall)
# Compute F1 (harmonic mean of precision and recall)
f1 = 2 * p * r / (p + r + 1e-16)

if plot:
plot_pr_curve(px, py, ap, save_dir, names)
plot_pr_curve(px, py, ap, Path(save_dir) / 'PR_curve.png', names)
plot_mc_curve(px, f1, Path(save_dir) / 'F1_curve.png', names, ylabel='F1')
plot_mc_curve(px, p, Path(save_dir) / 'P_curve.png', names, ylabel='Precision')
plot_mc_curve(px, r, Path(save_dir) / 'R_curve.png', names, ylabel='Recall')

return p, r, ap, f1, unique_classes.astype('int32')
i = f1.mean(0).argmax() # max F1 index
return p[:, i], r[:, i], ap, f1[:, i], unique_classes.astype('int32')


def compute_ap(recall, precision):
@@ -181,13 +183,14 @@ class ConfusionMatrix:

# Plots ----------------------------------------------------------------------------------------------------------------

def plot_pr_curve(px, py, ap, save_dir='.', names=()):
def plot_pr_curve(px, py, ap, save_dir='pr_curve.png', names=()):
# Precision-recall curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)
py = np.stack(py, axis=1)

if 0 < len(names) < 21: # show mAP in legend if < 10 classes
if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py.T):
ax.plot(px, y, linewidth=1, label=f'{names[i]} %.3f' % ap[i, 0]) # plot(recall, precision)
ax.plot(px, y, linewidth=1, label=f'{names[i]} {ap[i, 0]:.3f}') # plot(recall, precision)
else:
ax.plot(px, py, linewidth=1, color='grey') # plot(recall, precision)

@@ -197,4 +200,24 @@ def plot_pr_curve(px, py, ap, save_dir='.', names=()):
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
fig.savefig(Path(save_dir) / 'precision_recall_curve.png', dpi=250)
fig.savefig(Path(save_dir), dpi=250)


def plot_mc_curve(px, py, save_dir='mc_curve.png', names=(), xlabel='Confidence', ylabel='Metric'):
# Metric-confidence curve
fig, ax = plt.subplots(1, 1, figsize=(9, 6), tight_layout=True)

if 0 < len(names) < 21: # display per-class legend if < 21 classes
for i, y in enumerate(py):
ax.plot(px, y, linewidth=1, label=f'{names[i]}') # plot(confidence, metric)
else:
ax.plot(px, py.T, linewidth=1, color='grey') # plot(confidence, metric)

y = py.mean(0)
ax.plot(px, y, linewidth=3, color='blue', label=f'all classes {y.max():.2f} at {px[y.argmax()]:.3f}')
ax.set_xlabel(xlabel)
ax.set_ylabel(ylabel)
ax.set_xlim(0, 1)
ax.set_ylim(0, 1)
plt.legend(bbox_to_anchor=(1.04, 1), loc="upper left")
fig.savefig(Path(save_dir), dpi=250)

+ 6
- 5
utils/plots.py View File

@@ -31,7 +31,7 @@ def color_list():
def hex2rgb(h):
return tuple(int(h[1 + i:1 + i + 2], 16) for i in (0, 2, 4))

return [hex2rgb(h) for h in plt.rcParams['axes.prop_cycle'].by_key()['color']]
return [hex2rgb(h) for h in matplotlib.colors.TABLEAU_COLORS.values()] # or BASE_ (8), CSS4_ (148), XKCD_ (949)


def hist2d(x, y, n=100):
@@ -190,6 +190,7 @@ def plot_lr_scheduler(optimizer, scheduler, epochs=300, save_dir=''):
plt.xlim(0, epochs)
plt.ylim(0)
plt.savefig(Path(save_dir) / 'LR.png', dpi=200)
plt.close()


def plot_test_txt(): # from utils.plots import *; plot_test()
@@ -244,9 +245,9 @@ def plot_study_txt(path='', x=None): # from utils.plots import *; plot_study_tx
'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet')

ax2.grid()
ax2.set_yticks(np.arange(30, 60, 5))
ax2.set_xlim(0, 30)
ax2.set_ylim(28, 50)
ax2.set_yticks(np.arange(30, 55, 5))
ax2.set_ylim(29, 51)
ax2.set_xlabel('GPU Speed (ms/img)')
ax2.set_ylabel('COCO AP val')
ax2.legend(loc='lower right')
@@ -294,13 +295,13 @@ def plot_labels(labels, save_dir=Path(''), loggers=None):
# loggers
for k, v in loggers.items() or {}:
if k == 'wandb' and v:
v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]})
v.log({"Labels": [v.Image(str(x), caption=x.name) for x in save_dir.glob('*labels*.jpg')]}, commit=False)


def plot_evolution(yaml_file='data/hyp.finetune.yaml'): # from utils.plots import *; plot_evolution()
# Plot hyperparameter evolution results in evolve.txt
with open(yaml_file) as f:
hyp = yaml.load(f, Loader=yaml.FullLoader)
hyp = yaml.load(f, Loader=yaml.SafeLoader)
x = np.loadtxt('evolve.txt', ndmin=2)
f = fitness(x)
# weights = (f - f.min()) ** 2 # for weighted results


+ 32
- 24
utils/torch_utils.py View File

@@ -3,9 +3,11 @@
import logging
import math
import os
import subprocess
import time
from contextlib import contextmanager
from copy import deepcopy
from pathlib import Path

import torch
import torch.backends.cudnn as cudnn
@@ -36,42 +38,49 @@ def init_torch_seeds(seed=0):
# Speed-reproducibility tradeoff https://pytorch.org/docs/stable/notes/randomness.html
torch.manual_seed(seed)
if seed == 0: # slower, more reproducible
cudnn.deterministic = True
cudnn.benchmark = False
cudnn.benchmark, cudnn.deterministic = False, True
else: # faster, less reproducible
cudnn.deterministic = False
cudnn.benchmark = True
cudnn.benchmark, cudnn.deterministic = True, False


def git_describe():
# return human-readable git description, i.e. v5.0-5-g3e25f1e https://git-scm.com/docs/git-describe
if Path('.git').exists():
return subprocess.check_output('git describe --tags --long --always', shell=True).decode('utf-8')[:-1]
else:
return ''


def select_device(device='', batch_size=None):
# device = 'cpu' or '0' or '0,1,2,3'
cpu_request = device.lower() == 'cpu'
if device and not cpu_request: # if device requested other than 'cpu'
s = f'YOLOv5 {git_describe()} torch {torch.__version__} ' # string
cpu = device.lower() == 'cpu'
if cpu:
os.environ['CUDA_VISIBLE_DEVICES'] = '-1' # force torch.cuda.is_available() = False
elif device: # non-cpu device requested
os.environ['CUDA_VISIBLE_DEVICES'] = device # set environment variable
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availablity
assert torch.cuda.is_available(), f'CUDA unavailable, invalid device {device} requested' # check availability

cuda = False if cpu_request else torch.cuda.is_available()
cuda = not cpu and torch.cuda.is_available()
if cuda:
c = 1024 ** 2 # bytes to MB
ng = torch.cuda.device_count()
if ng > 1 and batch_size: # check that batch_size is compatible with device_count
assert batch_size % ng == 0, f'batch-size {batch_size} not multiple of GPU count {ng}'
x = [torch.cuda.get_device_properties(i) for i in range(ng)]
s = f'Using torch {torch.__version__} '
for i, d in enumerate((device or '0').split(',')):
if i == 1:
s = ' ' * len(s)
logger.info(f"{s}CUDA:{d} ({x[i].name}, {x[i].total_memory / c}MB)")
n = torch.cuda.device_count()
if n > 1 and batch_size: # check that batch_size is compatible with device_count
assert batch_size % n == 0, f'batch-size {batch_size} not multiple of GPU count {n}'
space = ' ' * len(s)
for i, d in enumerate(device.split(',') if device else range(n)):
p = torch.cuda.get_device_properties(i)
s += f"{'' if i == 0 else space}CUDA:{d} ({p.name}, {p.total_memory / 1024 ** 2}MB)\n" # bytes to MB
else:
logger.info(f'Using torch {torch.__version__} CPU')
s += 'CPU\n'

logger.info('') # skip a line
logger.info(s) # skip a line
return torch.device('cuda:0' if cuda else 'cpu')


def time_synchronized():
# pytorch-accurate time
torch.cuda.synchronize() if torch.cuda.is_available() else None
if torch.cuda.is_available():
torch.cuda.synchronize()
return time.time()


@@ -226,8 +235,8 @@ def load_classifier(name='resnet101', n=2):
return model


def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio
# scales img(bs,3,y,x) by ratio
def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416)
# scales img(bs,3,y,x) by ratio constrained to gs-multiple
if ratio == 1.0:
return img
else:
@@ -235,7 +244,6 @@ def scale_img(img, ratio=1.0, same_shape=False): # img(16,3,256,416), r=ratio
s = (int(h * ratio), int(w * ratio)) # new size
img = F.interpolate(img, size=s, mode='bilinear', align_corners=False) # resize
if not same_shape: # pad/crop img
gs = 32 # (pixels) grid size
h, w = [math.ceil(x * ratio / gs) * gs for x in (h, w)]
return F.pad(img, [0, w - s[1], 0, h - s[0]], value=0.447) # value = imagenet mean



Loading…
Cancel
Save