I have done 5 fold cross validation to create 5 fold: three for training data, one for validation and one for test data. I want to perform Kaplan-Meier method in this data.
I have computed 10 unique time points from the dataset:
def compute_eval_time(args):
local_unique_times = []
for client_id in range(args.n_parties):
train_df, val_df, _, _ = get_local_dataset(args.dataset, args.partition, client_id, args.run_id)
train_time, _ = train_df["time"], train_df["event"]
val_time, _ = val_df["time"], val_df["event"]
local_unique_times.append(np.unique(np.concatenate([train_time, val_time])))
glob_unique_time=np.unique(np.concatenate(local_unique_times))
pseudo_evaltime=np.percentile(glob_unique_time, np.arange(args.pseudo_perc_interval, 99, args.pseudo_perc_interval))
return pseudo_evaltime
Now, I want to train the Kaplan-Meier method using my train and validation data and then want to evaluate the trained KM method using my test data. For this, I am using Kaplan-Meier Fitter from lifelines Python package.
def train_net_all(net_id, train_time_all, train_status_all, val_time_all, val_status_all, test_time_all, test_status_all, evaltime, args, device):
logger.info('Training network %s' % str(net_id))
print('Training network %s' % str(net_id))
train_val_time = np.concatenate((train_time_all, val_time_all))
train_val_status = np.concatenate((train_status_all, val_status_all))
kmf = KaplanMeierFitter(label="waltons_data")
kmf.fit(train_val_time, train_val_status)
test_survival_probabilities = kmf.survival_function_at_times(test_time_all)
ev1 = EvalSurv(test_survival_probabilities, test_time_all, test_status_all == 1, censor_surv='km')
cindex = 0.5
# Integrated Brier score
try:
brier = float(ev1.integrated_brier_score(evaltime))
except Exception as e:
print(f"[Brier ERROR] {type(e).__name__}: {e}")
brier = 0.25
return cindex, brier
What do you think? Is there any logical bug in my code? Please guide me. Any suggestions or feedback will be highly appreciated.