重做五種乳腺癌亞型的json檔案

2021-10-11 11:47:27 字數 3409 閱讀 3414

# 1、用乙個list來儲存五種亞型

import pandas as pd

df = pd.

read_csv

("c:/users/administrator/desktop/重做json/first1.csv"

,header=none,low_memory=false)

data = df.values

data =

list

(map

(list,

zip(

*data)))

sublist = data[1]

print

(len

(sublist)

)print

(sublist)

# 2、是將606個svs的名稱獲取並儲存到乙個list中

import os

list =

path1 =

"g:/brca-clinical-svs-all/brca-606" #需要複製的檔案所在位址

filename_list = os.

listdir

(path1)

print

(filename_list)

print

(len

(filename_list)

)qwb =0#

for filename in filename_list:

portion = os.path.

splitext

(filename)

# print(portion[0]) #檔案的名稱

test =

"g:/brca-clinical-svs-all/brca-606"

+'/'

+ portion[0]

test_list = os.

listdir

(test)

for testname in test_list: # 遍歷列表下的所有檔名

if testname.

endswith

('.svs'):

# print(testname)

list.

(testname)

print

(list)

print

(len

(list)

)###上面是將606個svs的名稱獲取並儲存到乙個list中

# 3、json的讀操作

import json

##這裡是讀取的操作

jsonfile =

"c:/users/administrator/desktop/test/metadata.cart.2019-05-22.json"

with open

(jsonfile) as fid:

jdata = json.

loads

(fid.

read()

)###[

'cases'][

0]['diagnoses'][

0]['tumor_grade'

]print

(len

(jdata)

)print

(jdata[0]

['cases'][

0]['diagnoses'][

0]['tumor_grade'])

# print('這是讀取到檔案資料的資料型別:', type(jdata))

print

(jdata[0]

["file_name"

])###tcga-ew-a1pg-

01a-

01-tsa.

2e6d10d1-ae2a-

4ca4-

9f75

-e9fe64e32b70.svs

##匹配

yeslist =

houlist =

a =0

for j in range

(len

(jdata)):

# print(jdata[0]["file_name"])

for i in range

(len

(list)):

if list[i] in jdata[j]

["file_name"]:

##yeslist代表修改前的list

yeslist.

(jdata[j]

['cases'][

0]['diagnoses'][

0]['tumor_grade'])

temp = jdata[j]

['cases'][

0]['diagnoses'][

0]['tumor_grade'

] jdata[j]

['cases'][

0]['diagnoses'][

0]['tumor_grade'

]= sublist[i]

houlist.

(jdata[j]

['cases'][

0]['diagnoses'][

0]['tumor_grade'])

a = a +

1# print(a)###列印出1038個,是因為含有606個樣本的多

print

("看看有多少匹配成功的:"

,len

(yeslist)

)print

(yeslist)

print

(len

(houlist)

)print

(houlist)###修改後的json的這一字段的屬性

with open

("c:/users/administrator/desktop/test/metadata.cart.2019-05-22.json"

,"w"

) as jsonfile:

json.

dump

(jdata, jsonfile,ensure_ascii=false)

# #######改寫json中的內容

# tmp = jdata[0]['cases'][0]['diagnoses'][0]['tumor_grade']

# jdata[0]['cases'][0]['diagnoses'][0]['tumor_grade'] = "la"

## with open("replayscript.json", "w") as jsonfile:

# json.dump(jdata, jsonfile,ensure_ascii=false)

## print(jdata[0]['cases'][0]['diagnoses'][0]['tumor_grade'])

ROC PR案例 乳腺癌資料集

import warnings warnings.filterwarnings ignore import matplotlib.pyplot as plt plt.rcparams font.sans serif simhei 顯示中文 plt.rcparams axes.unicode minu...

邏輯回歸模型 乳腺癌資料集

匯入資料集 from sklearn import datasets import warnings warnings.filterwarnings ignore df datasets.load breast cancer x df.data y df.target x.shape 檢視屬性維度 ...

sklearn之Knn實戰乳腺癌資料案例

from sklearn.datasets import load breast cancer from sklearn.neighbors import kneighborsclassifier from sklearn.model selection import train test spli...