实现nnUNet的dataset.Json 文件生成。
需要有python编译环境,最好在nnUNet的conda envs中运行
首先激活自己的环境,安装一个包
pip install --upgrade batchgenerators
在编译器Pycharm或者环境中运行该函数
// 该功能可实现nnUNet的Json文件生成
import os
from batchgenerators.utilities.file_and_folder_operations import save_json, subfiles
from typing import Tuple
import numpy as np
def get_identifiers_from_splitted_files(folder: str):
uniques = np.unique([i[:-7] for i in subfiles(folder, suffix='.nii.gz', join=False)])
return uniques
def generate_dataset_json(output_file: str, imagesTr_dir: str, imagesTs_dir: str, modalities: Tuple,
labels: dict, dataset_name: str, license: str = "Hebut AI", dataset_description: str = "",
dataset_reference="oai-zib", dataset_release='11/2021'):
"""
:param output_file: This needs to be the full path to the dataset.json you intend to write, so
output_file='DATASET_PATH/dataset.json' where the folder DATASET_PATH points to is the one with the
imagesTr and labelsTr subfolders
:param imagesTr_dir: path to the imagesTr folder of that dataset
:param imagesTs_dir: path to the imagesTs folder of that dataset. Can be None
:param modalities: tuple of strings with modality names. must be in the same order as the images (first entry
corresponds to _0000.nii.gz, etc). Example: ('T1', 'T2', 'FLAIR').
:param labels: dict with int->str (key->value) mapping the label IDs to label names. Note that 0 is always
supposed to be background! Example: {0: 'background', 1: 'edema', 2: 'enhancing tumor'}
:param dataset_name: The name of the dataset. Can be anything you want
:param license:
:param dataset_description:
:param dataset_reference: website of the dataset, if available
:param dataset_release:
:return:
"""
train_identifiers = get_identifiers_from_splitted_files(imagesTr_dir)
if imagesTs_dir is not None:
test_identifiers = get_identifiers_from_splitted_files(imagesTs_dir)
else:
test_identifiers = []
json_dict = {}
json_dict['name'] = "Knee"
json_dict['description'] = "Knee Segmentation"
json_dict['tensorImageSize'] = "3D"
json_dict['reference'] = dataset_reference
json_dict['licence'] = license
json_dict['release'] = dataset_release
json_dict['modality'] = {"0": "MRI"}
json_dict['labels'] = {
"0": "background",
"1": "FB",
"2": "FC",
"3": "TB",
"4": "TC"
}
json_dict['numTraining'] = len(train_identifiers)
json_dict['numTest'] = len(test_identifiers)
json_dict['training'] = [
{'image': "./imagesTr/%s.nii.gz" % i, "label": "./labelsTr/%s.nii.gz" % i} for i
in
train_identifiers]
json_dict['test'] = ["./imagesTs/%s.nii.gz" % i for i in test_identifiers]
output_file += "dataset.json"
if not output_file.endswith("dataset.json"):
print("WARNING: output file name is not dataset.json! This may be intentional or not. You decide. "
"Proceeding anyways...")
save_json(json_dict, os.path.join(output_file))
if __name__ == "__main__":
"""
一共两个函数,其中共有6个参数
在如下6个参数中修改为自己数据集的路径和数据描述内容
"""
output_file = r'H:/Project/nnUNet-1-master/DATASET/nnUNet_raw/nnUNet_raw_data/Task666_Knee'
imagesTr_dir = r'H:/Project/nnUNet-1-master/DATASET/nnUNet_raw/nnUNet_raw_data/Task666_KneeimagesTr'
imagesTs_dir = r'H:/Project/nnUNet-1-master/DATASET/nnUNet_raw/nnUNet_raw_data/Task666_KneeimagesTs'
labelsTr = r'H:/Project/nnUNet-1-master/DATASET/nnUNet_raw/nnUNet_raw_data/Task666_KneelabelsTr'
modalities = '"0": "MRI"'
labels = {
"0": "background",
"1": "FB",
"2": "FC",
"3": "TB",
"4": "TC"
}
get_identifiers_from_splitted_files(output_file)
generate_dataset_json(output_file,
imagesTr_dir,
imagesTs_dir,
labelsTr,
modalities,
labels
)



