syntax fix

parent 610bf392
......@@ -1321,8 +1321,8 @@ class TranslationDataset(CachedDataset2):
self._num_seqs = len(self._seq_order)
return True
def get_seq_difficulty(self, curriculum_learing):
mode = curriculum_learing['difficulty']
def get_seq_difficulty(self, curriculum_learning):
mode = curriculum_learning['difficulty']
if mode == 'seq_len_only_source':
cur_seq_difficulty = [len(seq) for seq in self._data[self._main_data_key]]
elif mode == 'seq_len_only_target':
......@@ -1356,16 +1356,16 @@ class TranslationDataset(CachedDataset2):
elif mode == 'neg_log_likelihood':
# neg log likelihood should be 0 <= x < infity; the bigger x the easier the sentence -> reverse order
# TODO check if this works
with open(curriculum_learing['neg_log_likelihood_file'], "r") as f:
with open(curriculum_learning['neg_log_likelihood_file'], "r") as f:
f1 = f.readlines()
f2 = [float(x) for x in f1]
cur_seq_difficulty = f2
if 'reverse' in curriculum_learing and curriculum_learing['reverse']:
curriculum_learing['reverse'] = False
if 'reverse' in curriculum_learning and curriculum_learning['reverse']:
curriculum_learning['reverse'] = False
else:
curriculum_learing['reverse'] = True
curriculum_learning['reverse'] = True
elif mode == 'kmeans_only_source':
kmeans = KMeans(n_clusters=curriculum_learing['number_of_clusters'], random_state=0).fit(self._data[self._main_data_key])
kmeans = KMeans(n_clusters=curriculum_learning['number_of_clusters'], random_state=0).fit(self._data[self._main_data_key])
print("kmeans")
print(kmeans)
cur_seq_difficulty = kmeans.labels
......@@ -1374,13 +1374,13 @@ class TranslationDataset(CachedDataset2):
else:
raise NotImplementedError("This difficulty mode is not implemented.")
# norm it between 0 and 1
if not 'norm' in curriculum_learing:
curriculum_learing['norm'] = 'equally_distant'
if curriculum_learing['norm'] == 'longest_sentence':
if not 'norm' in curriculum_learning:
curriculum_learning['norm'] = 'equally_distant'
if curriculum_learning['norm'] == 'longest_sentence':
# TODO if norm works fine
max_seq = numpy.amax(cur_seq_difficulty)
cur_seq_difficulty = cur_seq_difficulty / max_seq
elif curriculum_learing['norm'] == 'equally_distant':
elif curriculum_learning['norm'] == 'equally_distant':
print('cur_seq_difficulty_at_the_start')
print(cur_seq_difficulty)
idx_sorted = numpy.argsort(cur_seq_difficulty)
......@@ -1392,16 +1392,16 @@ class TranslationDataset(CachedDataset2):
cur_seq_difficulty = idx_sorted / max_seq
print('cur_seq_difficulty_at_the_end')
print(cur_seq_difficulty)
elif curriculum_learing['norm'] == 'no_norm':
elif curriculum_learning['norm'] == 'no_norm':
pass
else:
raise NotImplementedError("This norm mode is not implemented.")
# TODO check if reverse works fine
if 'reverse' in curriculum_learing and curriculum_learing['reverse']:
if 'reverse' in curriculum_learning and curriculum_learning['reverse']:
cur_seq_difficulty = [1 - difficulty for difficulty in cur_seq_difficulty]
self.seq_difficulty = cur_seq_difficulty
def get_model_competence(self, curriculum_learing):
def get_model_competence(self, curriculum_learning):
"""
:param epoch: t in formula
......@@ -1410,14 +1410,14 @@ class TranslationDataset(CachedDataset2):
:param p: the pth root
:return:
"""
mode = curriculum_learing['competence']
T = curriculum_learing['total_number_of_iterations']
c0 = curriculum_learing['initial_competence']
mode = curriculum_learning['competence']
T = curriculum_learning['total_number_of_iterations']
c0 = curriculum_learning['initial_competence']
if mode == 'linear':
competence = min(1, self.epoch * ((1 - c0) / T) + c0)
elif mode == 'pth-root':
p = curriculum_learing['p']
p = curriculum_learning['p']
competence = min(1, (self.epoch * ((1 - c0 ** p) / T) + c0 ** p) ** (1. / p))
else:
raise NotImplementedError("This competence mode is not implemented.")
......@@ -1437,9 +1437,8 @@ class TranslationDataset(CachedDataset2):
if curriculum_learning['difficulty'] == 'kmeans_only_source':
self._seq_order = [i for i in range(len(self._data[self._main_data_key])) if
(self.seq_difficulty[i] == curriculum_learning['cur_cluster'])]
curriculum_learning['cur_cluster'] = curriculum_learning['cur_cluster'] + 1) % curriculum_learning['number_of_clusters']
else:
curriculum_learning['cur_cluster'] = (curriculum_learning['cur_cluster'] + 1) % curriculum_learning['number_of_clusters']
else:
self._seq_order = [i for i in range(len(self._data[self._main_data_key])) if (self.seq_difficulty[i] <= competence)]
self._num_seqs = len(self._seq_order)
# print("num_seqs should be less thatn 6million")
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment