劃分資料集

2021-10-08 04:11:12 字數 3519 閱讀 1714

import glob

import shutil

total_file =

'total_file.txt'

train =

'train.txt'

val =

'val.txt'

bin_path =

'data_l/v/*.bin'

# 總的點雲資料集檔案路徑(如果是自己的路徑,就是 :自己點雲資料夾/*.bin)

bin_train_dir =

'train/v'

#訓練集點雲資料夾,如data_dir/training/velodyne (自己手動建)

bin_val_dir =

'val/v'

#訓練集點雲資料夾,如data_dir/validation/velodyne (自己手動建)

total_bin = glob.glob(bin_path)

print

(total_bin)

split_rate =

0.5total_bin_num =

len(total_bin)

with

open

(total_file,

'w')

as f:

for i in

range(0

, total_bin_num)

: bin_ = total_bin[i]

.split(

'/')[-

1].split(

'.bin')[

0]# bin_格式: 000, 001,

f.write(bin_+

"\n"

)with

open

(train,

'w')

as f:

for i in

range(0

,int

(total_bin_num*split_rate)):

bin_ = total_bin[i]

.split(

'/')[-

1].split(

'.bin')[

0]# bin_格式: 000, 001,

f.write(bin_+

"\n"

)with

open

(val,

'w')

as f:

for i in

range

(int

(total_bin_num*split_rate)

, total_bin_num)

: bin_ = total_bin[i]

.split(

'/')[-

1].split(

'.bin')[

0]# bin_格式: 000, 001,

f.write(bin_+

"\n"

)#劃分點雲

for i in

range(0

,int

(total_bin_num*split_rate)):

shutil.copy(total_bin[i]

, bin_train_dir)

for i in

range

(int

(total_bin_num*split_rate)

, total_bin_num)

: shutil.copy(total_bin[i]

, bin_val_dir)

#劃分label

label_dir =

'data_l/l'

#劃分前存放所有樣本label的資料夾位址

label_train_dir =

'train/label'

#訓練集label資料夾,如data_dir/training/label_2 (自己手動建)

label_val_dir =

'val/label'

#驗證集label資料夾,如data_dir/validation/label_2 (自己手動建)

with

open

(train,

'r')

as f:

label_files = f.readlines(

)for files in label_files:

files = files.strip(

'\n'

) files_path = label_dir +

'/'+ files +

'.txt'

shutil.copy(files_path, label_train_dir)

with

open

(val,

'r')

as f:

label_files = f.readlines(

)for files in label_files:

files = files.strip(

'\n'

) files_path = label_dir +

'/'+ files +

'.txt'

shutil.copy(files_path, label_val_dir)

#劃分image_dir =

'data_l/image'

#劃分前存放資料夾

image_train_dir =

'train/image'

#訓練集資料夾,如data_dir/training/image_2 (自己手動建)

image_val_dir =

'val/image'

#驗證集集資料夾,如data_dir/validation/image_2 (自己手動建)

with

open

(train,

'r')

as f:

label_files = f.readlines(

)for files in label_files:

files = files.strip(

'\n'

) files_path = image_dir +

'/'+ files +

'.png'

shutil.copy(files_path, image_train_dir)

with

open

(val,

'r')

as f:

label_files = f.readlines(

)for files in label_files:

files = files.strip(

'\n'

) files_path = image_dir +

'/'+ files +

'.png'

shutil.copy(files_path, image_val_dir)

劃分資料集

如果要在乙個二維資料散點圖中間畫一條線將資料集分開,是按照x劃分還是y劃分呢 from math import log defcreatedataset dataset 1,1,yes 1,1,yes 1,0,no 0,1,no 0,1,no labels no su cing flippers r...

sklearn劃分資料集

train test split是用得最多的資料集劃分包,它的引數有五個 arrays 要切分的資料集,通過傳入兩個,x資料集和目標y test size 測試集樣本大小 random state 隨機種子數 shuffle 是否要對資料集隨機打亂 stratify 可以理解為分層抽樣的設定值,通過...

資料集按類劃分 資料集劃分方法

留出法 直接將資料集d劃分為兩個互斥的集合,乙個為訓練集s,乙個為測試集t,即d s t,s t 在s上進行模型學習,然後用t來評估其測試誤差,作為對泛化誤差的估計。單次使用留出法得到的估計結果往往不夠穩定可靠,在使用留出法時,一般要採用若干次隨機劃分 重複進行模型評估後取平均值作為留出法的評估結果...