Skip to content

BasicAI 标注规范

Cover

AI 的三大基石是算法、算力和数据,BasicAI 作为一个 AI 数据标注平台,能够为 AI 模型训练提供丰富和多样的标注数据,这些数据大多是半结构化的 JSON,本文描述了 BasicAI 标注平台对外导入/导出时所涉及到的各类数据的格式规范,包括 Dataset、Data、Classification、Class、Annotation Object、Labelmap 等,以便算法工程师参考。

通用约定

  1. 时间使用 UTC 时区的 ISO 8601 格式,比如 2012-03-29T10:05:45Z;
  2. 需要 UUID 的地方统一使用 V4 版本,比如 6ec0bd7f-11c0-43da-975e-2a8ad9ebae0b
  3. 所有类型常量使用大写+下划线方式,比如 3D_BOX;
  4. 重要数据带有数据版本 version,比如 Classification 和 Class,以便数据对比和防止修改时的并发冲突,在引用这些数据的地方需要保存引用这些数据时的版本;

数据结构

Data structure

一个数据集(Dataset)包含一到多个数据(Data)或连续帧(Scene),一个连续帧包含一到多个数据。每个数据中会标出零到多个对象(Object,比如人、车),多个数据中表示同一物体的多个对象组成一个追踪对象(Tracking Object)。

Classification 模板

Classification 用于对数据整体进行标注,比如整张图片处于什么环境(雪山、草地、沙漠),只有一个属性(Attribute),支持嵌套属性,也就是选项(Option)还可以再有属性。

classification template

json
{
  // ID
  "id": 1,
  // Version
  "version": 1,
  // Attribute
  "attribute": {
    // ID
    "id": "64a16626-153f-4136-b7b4-572c10db08c3",
    // Version
    "version": 1,
    // Name
    "name": "Brand",
    // Type, RADIO, DROPDOWN, TEXT, MULTI_SELECTION, RANKING etc.
    "type": "RADIO",
    // Is required
    "required": true,
    // Options
    "options": [
      {
        "id": "92f6b36c-0589-4310-bdc9-5b6918a9a2be",
        "name": "Benz",
        "attributes": [
          {
            "id": "24ce829b-9b60-4b88-99f7-379b144a3cb3",
            "version": 1,
            "name": "Level",
            "type": "RADIO",
            "required": false,
            "options": [
              {
                "id": "9dbdbe60-9a95-4f6f-9a4e-6bf84efb8382",
                "name": "Car"
              },
              {
                "id": "e4497a0f-5c66-40a3-bce7-d9abc59ebbb3",
                "name": "SUV"
              }
            ]
          },
          {
            "id": "0669d8b1-f175-4f46-a8d2-cea7bf9dc1e2",
            "version": 1,
            "name": "Power",
            "type": "MULTI_SELECTION",
            "required": false,
            "options": [
              {
                "id": "14c9b60d-b4cb-475d-9cad-6dc8be58d67b",
                "name": "Gasoline",
                "attributes": [
                  {
                    "id": "9c0d296f-7141-4fd9-9cbe-c3d96a93d411",
                    "version": 1,
                    "name": "Displacement",
                    "type": "RADIO",
                    "required": false,
                    "options": [
                      {
                        "id": "bd290f13-5c04-4828-bf2c-2ccfbca31ef3",
                        "name": "1.0L"
                      },
                      {
                        "id": "4858b7e1-c86c-40a9-96cf-7d1c2aa6ec77",
                        "name": "1.5L"
                      },
                      {
                        "id": "973100cc-ae0a-4379-995f-a98fcf0c4944",
                        "name": "2.0L"
                      }
                    ]
                  }
                ]
              },
              {
                "id": "98b033e7-6c1c-4ade-95d5-684a5e6a26e0",
                "name": "Battery",
                "attributes": [
                  {
                    "id": "8e790d10-9931-43f6-82bb-60619f1a25ea",
                    "version": 1,
                    "name": "Capacity",
                    "type": "RADIO",
                    "required": false,
                    "options": [
                      {
                        "id": "ea9080e5-ce3b-4df8-a55e-b4f53a1f1213",
                        "name": "<20kWh"
                      },
                      {
                        "id": "fec71b3d-a7db-4d6c-a68a-d9c66ec49c6e",
                        "name": "20~50kWh"
                      },
                      {
                        "id": "d995f663-766b-403c-bfef-ee6aa1ae0379",
                        "name": "50~100kWh"
                      },
                      {
                        "id": "7f90dbcd-f81e-4638-b342-4dad454bf728",
                        "name": ">=100kWh"
                      }
                    ]
                  }
                ]
              }
            ]
          }
        ]
      },
      {
        "id": "f7192a9a-ecee-4eb3-825c-5007c47c1c86",
        "name": "BMW"
      }
    ]
  }
}

Class 模板

Class 用于标注数据中出现的对象,比如车的颜色,与 Classification 类似,但可以包含多个属性。

Class template

json
{
  // ID
  "id": 1,
  // Version
  "version": 1,
  // Name
  "name": "Car",
  // Color
  "color": "#7dfaf2",
  // Tool type, BOUNDING_BOX, CUBOID, POLYLINE etc.
  "toolType": "CUBOID",
  // Tool options
  "toolOptions": {},
  // Attributes
  "attributes": [
    // ... more details can reference classification template
    {
      "id": "e0c006ee-9808-4fb4-bda9-c047b2c0534f",
      // Version
      "version": 1,
      "name": "Car condition",
      "type": "RANKING",
      "required": true,
      "options": [
        {
          "id": "0e4a932e-9f44-412b-8178-012464ab397f",
          "name": "Good"
        },
        {
          "id": "8303aee3-e816-49bd-938a-99903b5cd44c",
          "name": "Generally"
        },
        {
          "id": "d872f65e-0d2c-4626-8161-3e3b295ab2d2",
          "name": "Poor"
        }
      ]
    }
  ]
}

Data 标注结果

标注对象

json
{
  // Data ID, ignored when importing
  "dataId": 1,
  // Result source ID, ignored when importing
  "sourceId": 1,
  // Result source Type, DATA_FLOW, TASK, MODEL, EXTERNAL_GROUND_TRUTH, EXTERNAL_MODEL etc.
  "sourceType": "TASK",
  // Result source Name
  "sourceName": "20221009000000",
  // Data validity, VALID, INVALID, or UNKNOWN
  "validity": "VALID",
  // Classifications
  "classifications": [
    {
      // ID, ignored when importing
      "id": "6633b193-516d-4f91-acac-c6e224c6aada",
      // Classification template ID, the template must exist in dataset when importing
      "classificationId": 1,
      // Classification value version, optional, default to 1
      "classificationVersion": 1,
      // Classification attribute values, may be nested, flatten into an array
      "values": [
        {
          // Attribute ID
          "id": "64a16626-153f-4136-b7b4-572c10db08c3",
          // Attribute value version, optional, default to 1
          "attributeVersion": 1,
          // Parent attribute ID, null for root attribute
          "pid": null,
          // Parent attribute value, only needed when parent selected multiple values
          "pvalue": null,
          "name": "Brand",
          "type": "RADIO",
          "value": "Benz",
          "alias": "Trademark",
          // Is the lowest attribute value
          "isLeaf": false
        },
        {
          "id": "24ce829b-9b60-4b88-99f7-379b144a3cb3",
          "attributeVersion": 1,
          "pid": "64a16626-153f-4136-b7b4-572c10db08c3",
          "pvalue": null,
          "name": "Level",
          "type": "RADIO",
          "value": "Car",
          "alias": null,
          "isLeaf": true
        },
        {
          "id": "0669d8b1-f175-4f46-a8d2-cea7bf9dc1e2",
          "attributeVersion": 1,
          "pid": "64a16626-153f-4136-b7b4-572c10db08c3",
          "pvalue": null,
          "name": "Power",
          "type": "MULTI_SELECTION",
          "value": [
            "Gasoline",
            "Battery"
          ],
          "alias": null,
          "isLeaf": false
        },
        {
          "id": "9c0d296f-7141-4fd9-9cbe-c3d96a93d411",
          "attributeVersion": 1,
          "pid": "0669d8b1-f175-4f46-a8d2-cea7bf9dc1e2",
          "pvalue": "Gasoline",
          "name": "Displacement",
          "type": "RADIO",
          "value": "2.0L",
          "alias": null,
          "isLeaf": true
        },
        {
          "id": "8e790d10-9931-43f6-82bb-60619f1a25ea",
          "attributeVersion": 1,
          "pid": "0669d8b1-f175-4f46-a8d2-cea7bf9dc1e2",
          "pvalue": "Battery",
          "name": "Capacity",
          "type": "RADIO",
          "value": "20~50kWh",
          "alias": null,
          "isLeaf": true
        }
      ]
    }
  ],
  // Geometry annotation objects
  "instances": [
    {
      // ID, ignored when importing
      "id": "6ec0bd7f-11c0-43da-975e-2a8ad9ebae0b",
      // Type, 3D_BOX, 2D_BOX, CHAT_THUMB, CLIP etc.
      "type": "3D_BOX",
      // Version, optional, default to 1
      "version": 1,
      // Creator ID, ignored when importing
      "createdBy": 1,
      // Create time, ignored when importing
      "createdAt": "2012-03-29T10:05:45Z",
      // Tracking object ID
      "trackId": "J0lkBP7r",
      // Tracking object name
      "trackName": "Car 1",
      // Class template ID, the template must exist in dataset when importing
      "classId": 1,
      // Class value version, optional, default to 1
      "classVersion": 1,
      // Class name, ignored when importing
      "className": "Car",
      // Class values, may be nested, flatten into an array
      "classValues": [
        // ... more details can reference classification annotion objects
        {
          "id": "e0c006ee-9808-4fb4-bda9-c047b2c0534f",
          "attributeVersion": 1,
          "pid": null,
          "pvalue": null,
          "name": "Car condition",
          "type": "RANKING",
          // Arrange from high to low according to the possibility
          "value": [
              "Generally",
              "Poor"
          ],
          "alias": null,
          "isLeaf": true
        }
      ],
      // Model recognition confidence, only when recognized by model
      "modelConfidence": 0.83232,
      // Model recognition class, only when recognized by model
      "modelClass": "Person",
      // The closest distance to the original point at the XY plane, only for point cloud annotation object, automatically supplement when importing
      "minDistance": 10,
      // The farthest distance to the original point at the XY plane, only for point cloud annotation object, automatically supplement when importing
      "maxDistance": 30,
      // The maximum height on the Z axis, only for point cloud annotation object, automatically supplement when importing
      "minHeight": 0,
      // The minimum height on the Z axis, only for point cloud annotation object, automatically supplement when importing
      "maxHeight": 5,
      // Device name, used to distinguish different data files in multimodal data
      "deviceName": "lidar_point_cloud_0",
      // DICOM information,only for medical image annotation objects
      "dicom": {
          "patient": "Inv234_Exp_56_Group78",
          "study": "1.3.6.1.4.1.14519.5.2.1.7009.2403.334240657131972136850343327463",
          "series": "1.3.6.1.4.1.14519.5.2.1.7009.2403.226151125820845824875394858561",
          "instance": "1.3.6.1.4.1.14519.5.2.1.7009.2403.199347090945987353639771262550",
          // Frame, fixed to 1 for single frame images
          "frame": 1,
      },
      // Contour information, according to the specific type
      "contour": {
        // Including points, only for point cloud, automatically supplement when importing
        "pointN": 3057,
        // Area, only for the image, automatically supplement when importing
        "area": 100,
        "points": [],
        "size3D": {
          "x": 2.2125,
          "y": 4.993,
          "z": 1.38
        },
        "center3D": {
          "x": 3.9220652257399284,
          "y": -6.013346632547687,
          "z": 0.39
        },
        "rotation3D": {
          "x": 0,
          "y": 0,
          "z": -1.6622375116055483
        },
        "interior": [
            {
                "points": []
            }
        ]
      }
    },
    {
      "id": "62383691-8077-44c2-ad8a-a6a7f54390c5",
      "type": "2D_BOX",
      // ...
      "contour": {
        "points": [
          {
            "x": 976.6271156786534,
            "y": 509.30442784738
          },
          {
            "x": 976.9704479066531,
            "y": 518.6737077997769
          }
        ],
        "viewIndex": 3
      }
    },
    {
      "id": "0d2c92c4-e96d-4672-abc7-a3480f412753",
      "type": "CHAT_THUMB",
      // ...
      // Thumb direction, up or down
      "direction": "up",
      // Message ID
      "messageId": "8EuFV13C"
    },
    {
      "id": "e1b566f7-ae75-4836-a8a8-7cb9721d3ca3",
      "type": "CLIP",
      // ...
      // Valid or not
      "isValid": true,
      // Note
      "note": "Weather in Paris.",
      // Start time, unit milliseconds
      "start": 340,
      // End time, unit milliseconds
      "end": 2365
    }
  ],
  // Segmentation annotation objects
  "segments": [
    {
      "id": "0d2c92c4-e96d-4672-abc7-a3480f412753",
      // Type, SEGMENTATION, MASK etc.
      "type": "MASK",
      // ...
      // Contour information
      "contour": {
        // Including points, only for point cloud, automatically supplement when importing
        "pointN": 3057,
        // Area, only for image, automatically supplement when importing
        "area": 100
      },
      // Number, begin from 1
      "no": 1
    }
  ],
  // Labelmaps, more details can reference "Labelmap" section
  "segmentations": [
    {
      // Labelmap file ID
      "resultFileId": 1,
      // Device name, used to distinguish different data files in multimodal data
      "deviceName": "lidar_point_cloud_0",
      // Segmentation annotation object number, optional, used for each segmentation annotation object has it's own labelmap file
      "segmentNo": 1
    }
  ]
}

标签地图

点云分割

每个点云对应一个分割结果文件,格式为 PCD 文件格式,其中只包含一个字段 seg,其值为当前点位所属分割对象的编号,没有归属则为 0。

txt
# PCD v0.7
FIELDS seg
SIZE 1
TYPE U
COUNT 1
WIDTH 169171
HEIGHT 1
VIEWPOINT 0 0 0 1 0 0 0
POINTS 169171
DATA binary
............

图片分割

每个图片对应一个分割结果文件,格式为 RGBA 32 位模式的 PNG,像素值 (R, G, B) 存放的是当前点位所属分割对象的编号,编号 = R + G×256 + B×256^2,没有归属则为 0。

Dataset 导入/导出

因为涉及到大量文件,因此无论导入还是导出都采用压缩包,压缩包里的目录结构需遵循规范。

  1. 导入结果时需要同时导入数据,不支持将结果关联到已有数据;
  2. 对于大数据集,建议只导出数据信息,不导出数据文件,否则压缩包会非常大,需要时可依据数据信息文件里的地址去下载;
  3. 数据信息文件和结果文件与对应的数据文件同级,为了避免不同类型的文件重名,增加了 data 和 result 类型子目录;
  4. 这里只定义了最下面几层的目录结构,其上可添加任意路径,导入时会记住每个数据的原始路径,导出时可选择按原始路径导出;

点云数据集

lidar_point_cloud_0 必有。

txt
.
├── camera_config // Camera config, more details can reference "Point Cloud Camera Config" section
│   ├── data1.json
│   └── data2.json
├── camera_image_0 // Camer image 0
│   ├── data1.jpg
│   └── data2.jpg
├── camera_image_1 // Camer image 1
│   ├── data1.jpg
│   └── data2.jpg
├── camera_image_2 // Camer image 2
│   ├── data1.jpg
│   └── data2.jpg
├── lidar_config // Lidar config
│   ├── data1.json
│   └── data2.json
├── lidar_point_cloud_0 // Lidar point cloud 0
│   ├── data1.pcd
│   └── data2.pcd
├── lidar_point_cloud_1 // Lidar point cloud 1
│   ├── data1.pcd
│   └── data2.pcd
├── radar_config // Radar config
│   ├── data1.json
│   └── data2.json
├── radar_point_cloud_0 // Radar point cloud 0
│   ├── data1.pcd
│   └── data2.pcd
├── radar_point_cloud_1 // Radar point cloud 1
│   ├── data1.pcd
│   └── data2.pcd
├── gps // GPS position of collection car
│   ├── data1.json
│   └── data2.json
├── data // Data info, only for exporting, more details can reference "Data Info" section
│   ├── data1.json
│   └── data2.json
├── result // Annotation result, more details can reference "Data Annotation Result" section
│   ├── data1.json
│   ├── data1_lidar_point_cloud_0_segmentation.pcd
│   ├── data2.json
│   └── data2_lidar_point_cloud_0_segmentation.pcd
├── scene_1 // Scene, the structure is similiar to the root directory
│   ├── camera_config
│   ├── ...
│   ├── data
│   └── result
└── batch1 // Batch, the structure is similiar to the root directory
    ├── camera_config
    ├── ...
    ├── data
    ├── result
    └── scene_1

图片数据集

对于只有 image_0 的单模态数据,导入/导出时可以省略 image_0 目录。

txt
.
├── image_0 // Image 0
│   ├── data1.jpg
│   └── data2.jpg
├── data // Data info, only for exporting, more details can reference "Data Info" section
│   ├── data1.json
│   └── data2.json
├── result // Annotation result, more details can reference "Data Annotation Result" section
│   ├── data1.json
│   ├── data1_image_0_segmentation.png 
│   ├── data2.json
│   └── data2_image_0_segmentation.png
└── scene_1 // Scene, the structure is similiar to the root directory
│   ├── image_0
│   ├── ...
│   ├── data
│   └── result
└── batch1 // Batch, the structure is similiar to the root directory
    ├── image_0
    ├── ...
    ├── data
    ├── result
    └── scene_1

音视频数据集

av_0 必有,暂不支持多模态数据。如果上传压缩包里的文件缺少 av_0 目录层级,处理程序会自动为其加上。

txt
.
├── av_0 // Audio & video 0
│   ├── data1.mp4
│   └── data2.mp4
├── data // Data info, only for exporting, more details can reference "Data Info" section
│   ├── data1.json
│   └── data2.json
├── result // Annotation result, more details can reference "Data Annotation Result" section
│   ├── data1.json
│   └── data2.json
└── batch1 // Batch, the structure is similiar to the root directory
    ├── av_0
    ├── data
    └── result

医学图像数据集

一个 Study 包含一到多个 Series,一个 Series 包含一到多个 Instance,一个 Instance 存放在一个 DICOM 文件中。每个 Instance (也就是 Image)包含一到多个 Frame,分为单帧图片(Single-Frame Image)和多帧图片(Multi-Frame Image),多帧图片比较少见,并且支持的系统很少,暂不考虑。
这里的 Study、Series、Instance 目录或文件的命名仅供参考,只要保证层级结构正确即可,处理压缩包时会从最底层的 DICOM 文件开始向上寻找用户指定的 Data 层数(因为上层可能还有更多附加目录,因此无法自动检测出用户想要的 Data 层数)。最终会以找到的最上层的目录作为 Data 划分依据,因此一个 Data 可能是一个 Study、Series 或 Instance,甚至包含任意的一组 DICOM 文件。实际使用的 Study、Series、Instance 信息来自于从 DICOM 文件里提取得到的元数据,而不是这里的目录或文件名,这样更准确,同时也保持了目录结构的灵活性。

txt
.
├── study_0 // Study 0
│   ├── series_0 // Series 0
│   │   ├── instance_0.dcm // Instance 0
│   │   └── instance_1.dcm // Instance 1
│   └── series_1 // Series 1
├── study_1 // Study 1
├── data // Data info, only for exporting, more details can reference "Data Info" section
│   ├── study_0.json
│   └── study_1.json
├── result // Annotation result, more details can reference "Data Annotation Result" section
│   ├── study_0.json
│   └── study_1.json
└── batch1 // Batch, the structure is similiar to the root directory
    ├── study_0
    ├── ...
    ├── data
    └── result

Data 信息

由于单个数据集下的数据量可能会非常大,所以无法直接导出数据文件,只能导出数据信息,里面包含各种数据文件的下载地址。不同类型的数据包含的数据文件不一样,具体可参考“多个 JSON 文件的 Dataset 标注结果格式”。

json
{
  // Data ID, ignored when importing
  "dataId": 1,
  // Type, LIDAR_BASIC、LIDAR_FUSION、IMAGE、AV etc.
  "type": "LIDAR_FUSION",
  // Name
  "name": "01",
  // Camera config file
  "cameraConfig": {
    // Filename
    "filename": "01.json",
    // Download url
    "url": "",
    // Path in zip package, only exist when uploading by zip package, ignored when importing
    "zipPath": ""
  },
  // Camera image files
  "cameraImages": [
    {
      "filename": "01.jpg",
      "url": "",
      "zipPath": "",
      "width": 1920,
      "height": 1080
    }
  ],
  // Lidar config file
  "lidarConfig": {
    "filename": "01.json",
    "url": "",
    "zipPath": ""
  },
  // Lidar point cloud files
  "lidarPointClouds": [
    {
      "filename": "01.pcd",
      "url": "",
      "binaryUrl": "",
      "zipPath": ""
    }
  ],
  // Radar config file
  "radarConfig": {
    "filename": "01.json",
    "url": "",
    "zipPath": ""
  },
  // Radar point cloud files
  "radarPointClouds": [
    {
      "filename": "01.pcd",
      "url": "",
      // Binary file converted from original file, only exist when original file is ascii format
      "binaryUrl": "",
      "zipPath": ""
    }
  ],
  // Image files
  "images": [
    {
      "filename": "01.jpg",
      "url": "",
      "zipPath": "",
      "width": 1920,
      "height": 1080
    }
  ],
  // Text files
  "texts": [
    {
      "filename": "01.txt",
      "url": "",
      "zipPath": ""
    }
  ],
  // Audio & video files
  "avs": [
    {
      "filename": "01.wav",
      "url": "",
      "zipPath": ""
    }
  ],
  // DICOM files
  "dicoms": [
    {
      "filename": "instance_0.dcm",
      "url": "",
      "zipPath": ""
    }
  ]
}

点云相机参数

json
[
  {
    // Internal params
    "camera_internal": {
      "fx": 382.06535583,
      "cx": 326.66902661,
      "fy": 421.05123478,
      "cy": 254.70249315
    },
    // External params
    "camera_external": [
      0.76866726,
      0.04361939,
      0.63815985,
      -1.59,
      -0.63870827,
      -0.00174367,
      0.76944701,
      0.91,
      0.03467555,
      -0.9990467,
      0.02651976,
      0.96,
      0,
      0,
      0,
      1
    ],
    // Whether external params is row major or not, default false
    "rowMajor": true,
    // Distortion K params, up to 8, optional
    "distortionK": [
      -0.30912646651268,
      0.0669714063405991
    ],
    // Distortion P params, up to 2, optional
    "distortionP": [
      0.00262697599828243,
      0.00106896553188562
    ],
    // Distortion InvP params, optional
    "distortionInvP": [
      800.836,
      515.212,
      -36.9548,
      39.5822,
      85.4095,
      -23.9415,
      -40.625,
      32.0152,
      37.9534,
      9.22325
    ],
    // Image width, optional
    "width": 1920,
    // Image height, optional
    "height": 1280
  }
]

畸变算法请参考 Camera Calibration Theory

蜀ICP备2021032756号