Here are the examples of the python api numpy.ediff1d taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
87 Examples
3
Source : test_arraysetops.py
with Apache License 2.0
from aws-samples
with Apache License 2.0
from aws-samples
def test_ediff1d_scalar_handling(self,
ary,
prepend,
append,
expected):
# maintain backwards-compatibility
# of scalar prepend / append behavior
# in ediff1d following fix for gh-11490
actual = np.ediff1d(ary=ary,
to_end=append,
to_begin=prepend)
assert_equal(actual, expected)
def test_isin(self):
3
Source : metrics.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def __init__(self, URM_train):
super(Novelty, self).__init__()
URM_train = sps.csc_matrix(URM_train)
URM_train.eliminate_zeros()
self.item_popularity = np.ediff1d(URM_train.indptr)
self.novelty = 0.0
self.n_evaluated_users = 0
self.n_items = len(self.item_popularity)
self.n_interactions = self.item_popularity.sum()
def add_recommendations(self, recommended_items_ids):
3
Source : metrics.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def __init__(self, URM_train):
super(AveragePopularity, self).__init__()
URM_train = sps.csc_matrix(URM_train)
URM_train.eliminate_zeros()
item_popularity = np.ediff1d(URM_train.indptr)
self.cumulative_popularity = 0.0
self.n_evaluated_users = 0
self.n_items = URM_train.shape[0]
self.n_interactions = item_popularity.sum()
self.item_popularity_normalized = item_popularity/item_popularity.max()
def add_recommendations(self, recommended_items_ids):
3
Source : NonPersonalizedRecommender.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def fit(self):
# Use np.ediff1d and NOT a sum done over the rows as there might be values other than 0/1
self.item_pop = np.ediff1d(self.URM_train.tocsc().indptr)
self.n_items = self.URM_train.shape[1]
def _compute_item_score(self, user_id_array, items_to_compute = None):
3
Source : Utility.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def filter_urm(urm, user_min_number_ratings=1, item_min_number_ratings=1):
# keep only users with at least n ratings, same for the items
# NOTE: this operation re index both users and items (we get a more compact URM)
urm = sps.csr_matrix(urm)
urm.eliminate_zeros()
users_to_select_mask = np.ediff1d(urm.indptr) >= user_min_number_ratings
urm = urm[users_to_select_mask, :]
urm = sps.csc_matrix(urm)
items_to_select_mask = np.ediff1d(urm.indptr) >= item_min_number_ratings
urm = urm[:, items_to_select_mask]
return urm.tocsr()
def print_stat_urm(urm, title=''):
3
Source : spectrum1D.py
with BSD 3-Clause "New" or "Revised" License
from cylammarco
with BSD 3-Clause "New" or "Revised" License
from cylammarco
def add_wavelength(self, wave):
"""
Add the wavelength of each effective pixel.
Parameters
----------
wave: list or 1d-array
The wavelength values at each effective pixel.
"""
self.wave = wave
# Note that the native pixels have varing bin size.
self.wave_bin = np.nanmedian(np.array(np.ediff1d(wave)))
self.wave_start = np.min(wave)
self.wave_end = np.max(wave)
def remove_wavelength(self):
3
Source : spectrum1D.py
with BSD 3-Clause "New" or "Revised" License
from cylammarco
with BSD 3-Clause "New" or "Revised" License
from cylammarco
def add_wavelength_resampled(self, wave_resampled):
"""
Add the wavelength of the resampled spectrum which has an evenly
distributed wavelength spacing.
Parameters
----------
wave: list or 1d-array
The resampled wavelength values.
"""
# We assume that the resampled spectrum has fixed bin size
self.wave_bin = np.nanmedian(np.array(np.ediff1d(wave_resampled)))
self.wave_start = np.min(wave_resampled)
self.wave_end = np.max(wave_resampled)
self.wave_resampled = wave_resampled
def remove_wavelength_resampled(self):
3
Source : test_arraysetops.py
with Apache License 2.0
from dashanji
with Apache License 2.0
from dashanji
def test_ediff1d_scalar_handling(self,
ary,
prepend,
append,
expected):
# maintain backwards-compatibility
# of scalar prepend / append behavior
# in ediff1d following fix for gh-11490
actual = np.ediff1d(ary=ary,
to_end=append,
to_begin=prepend)
assert_equal(actual, expected)
assert actual.dtype == expected.dtype
def test_isin(self):
3
Source : functions.py
with MIT License
from eng-tools
with MIT License
from eng-tools
def clean_out_non_changing(values):
"""
Takes an array removes all values that are the same as the previous value.
:param values: array of floats
:return: cleaned array, indices of clean values in original array
"""
# diff_values = np.diff(values)
# diff_values = np.insert(diff_values, 0, values[0])
diff_values = np.ediff1d(values, to_begin=values[0])
non_zero_indices = np.where(diff_values != 0)[0]
non_zero_indices = np.insert(non_zero_indices, 0, 0)
cleaned_values = np.take(values, non_zero_indices)
return cleaned_values, non_zero_indices
def get_peak_array_indices(values, ptype='all'):
3
Source : misc.py
with MIT License
from fmonegaglia
with MIT License
from fmonegaglia
def ediff1d0( x ):
'''
Compute the element-wise different of an array starting from 0
'''
if len( x ) == 0:
return np.ediff1d( x )
return np.ediff1d( x, to_begin=0 )
def NaNs( N ):
3
Source : mountainClimberCP.py
with Apache License 2.0
from gxiaolab
with Apache License 2.0
from gxiaolab
def crs(cov_array):
"""Calculate cumulative read sum """
vert_array = np.insert(np.ediff1d(cov_array), [0], 0)
vert_sum_array = np.cumsum(np.absolute(vert_array))
if max(vert_sum_array) == 0:
vert_sum_norm_array = ['NA']
else:
vert_sum_norm_array = vert_sum_array / max(vert_sum_array)
return vert_sum_norm_array, vert_array
def ks_test(vert_sum_array, make_plots, out_prefix):
3
Source : test_quantity_non_ufuncs.py
with BSD 3-Clause "New" or "Revised" License
from holzschu
with BSD 3-Clause "New" or "Revised" License
from holzschu
def test_ediff1d(self):
# ediff1d works always as it calls the Quantity method.
self.check1(np.ediff1d)
x = np.arange(10.) * u.m
out = np.ediff1d(x, to_begin=-12.5*u.cm, to_end=1*u.km)
expected = np.ediff1d(x.value, to_begin=-0.125, to_end=1000.) * x.unit
assert_array_equal(out, expected)
class TestDatetimeFunctions(BasicTestSetup):
3
Source : test_heart.py
with GNU General Public License v3.0
from JanCBrammer
with GNU General Public License v3.0
from JanCBrammer
def compute_rmssd(peaks):
rr = np.ediff1d(peaks, to_begin=0)
rr[0] = np.mean(rr[1:])
rmssd = np.sqrt(np.mean(rr ** 2))
return rmssd
@pytest.fixture
3
Source : metrics.py
with GNU Affero General Public License v3.0
from MaurizioFD
with GNU Affero General Public License v3.0
from MaurizioFD
def __init__(self, URM_test, ignore_items):
super(Items_In_GT, self).__init__()
URM_test.eliminate_zeros()
self.interaction_in_GT_counter = np.ediff1d(sps.csc_matrix(URM_test).indptr)
self.ignore_items = ignore_items.astype(np.int).copy()
def add_recommendations(self, recommended_items_ids):
3
Source : metrics.py
with GNU Affero General Public License v3.0
from MaurizioFD
with GNU Affero General Public License v3.0
from MaurizioFD
def __init__(self, URM_test, ignore_users):
super(Users_In_GT, self).__init__()
URM_test.eliminate_zeros()
self.interaction_in_GT_counter = np.ediff1d(sps.csr_matrix(URM_test).indptr)
self.ignore_users = ignore_users.astype(np.int).copy()
def add_recommendations(self, recommended_items_ids):
3
Source : metrics.py
with GNU Affero General Public License v3.0
from MaurizioFD
with GNU Affero General Public License v3.0
from MaurizioFD
def __init__(self, URM_train):
super(Novelty, self).__init__()
URM_train = sps.csc_matrix(URM_train)
URM_train.eliminate_zeros()
self.item_popularity = np.ediff1d(URM_train.indptr)
self.novelty = 0.0
self.n_evaluated_users = 0
self.n_items = len(self.item_popularity)
self.n_interactions = self.item_popularity.sum()
def add_recommendations(self, recommended_items_ids):
3
Source : run_IJCAI_17_DELF.py
with GNU Affero General Public License v3.0
from MaurizioFD
with GNU Affero General Public License v3.0
from MaurizioFD
def get_cold_items(URM):
cold_items_flag = np.ediff1d(sps.csc_matrix(URM).indptr) == 0
return np.arange(0, URM.shape[1])[cold_items_flag]
def read_data_split_and_search(dataset_name,
3
Source : utils.py
with Apache License 2.0
from mcindoe
with Apache License 2.0
from mcindoe
def simulate_wiener_process(time_steps: np.ndarray, n_paths: int) -> np.ndarray:
"""
Simulate `n_paths` paths of the Wiener process at time
steps as provided in the `time_steps` array
Returns:
A np.ndarray of shape (n, len(time_steps)) with each row
representing a realisation of the path
"""
time_differences = np.ediff1d(time_steps)
normal_noise = np.random.normal(size=(n_paths, len(time_differences))) * time_differences
wiener_paths = np.insert(np.cumsum(normal_noise, axis=1), 0, 0, axis=1)
return wiener_paths
def simulate_gbm_process(
3
Source : chunkify_raw.py
with Mozilla Public License 2.0
from nanoporetech
with Mozilla Public License 2.0
from nanoporetech
def replace_repeats_with_zero(arr):
"""Replace repeated elements in 1d array with 0"""
arr[np.ediff1d(arr, to_begin=1) == 0] = 0
return arr
def fill_zeros_with_prev(arr):
3
Source : visualize.py
with MIT License
from tmoer
with MIT License
from tmoer
def estimate_sd(steps,returns,steps_raw,returns_raw):
boundaries = np.append(np.append([0],steps[:-1] + np.ediff1d(steps)),[np.Inf])
sds = []
for i,mean in enumerate(returns):
index = elementwise_and(steps_raw > boundaries[i],steps_raw < boundaries[i+1])
sd = np.sqrt(np.sum(np.square(returns_raw[index]-mean)))
sds.append(sd)
return sds
def symmetric_remove(x,n):
3
Source : analyzers.py
with MIT License
from vs-uulm
with MIT License
from vs-uulm
def pinpointMinima(self):
"""
Pinpoint the exact positions of local minima within the scope of each smoothed local minimum.
The exact position is looked for in self.bitcongruences.
:return: One exact local minium m in the interval ( center(m_n-1, m_n), center(m_n, m_n+1) )
for each n in (0, smoothed local minimum, -1)
"""
localminima = MessageAnalyzer.localMinima(self.values) # List[idx], List[min]
# localmaxima = MessageAnalyzer.localMaxima(self.values) # List[idx], List[max]
# for lminix in range(len(localminima)):
# localminima[lminix]
lminAO = [0] + localminima[0] + [len(self._message.data)]
lminMed = (numpy.round(numpy.ediff1d(lminAO) / 2) + lminAO[:-1]).astype(int)
bclmins = [medl + numpy.argmin(self.bitcongruences[medl:medr]) for medl, medr in zip(lminMed[:-1], lminMed[1:])]
return bclmins
class BitCongruenceDelta(BitCongruence):
3
Source : analyzers.py
with MIT License
from vs-uulm
with MIT License
from vs-uulm
def risingDeltas(self) -> List[Tuple[int, numpy.ndarray]]:
"""
the deltas in the original bcd (so: 2nd delta) between minima and maxima in smoothed bcd
:return: offset of and the bcd-delta values starting at this position in rising parts of the smoothed bcd.
Thus, offset is a minimum + 1 and the array covers the indices up to the following maximum, itself included.
"""
extrema = self.extrema()
risingdeltas = [ ( i[0] + 1, numpy.ediff1d(self.bcdeltas[i[0]:j[0]+1]) ) # include index of max
for i, j in zip(extrema[:-1], extrema[1:])
if i[1] == False and j[1] == True and j[0]+1 - i[0] > 1]
# risingdeltas[-1][0] >= len(self.bcdeltas)
return risingdeltas
def inflectionPoints(self) -> Tuple[List[int], List[float]]:
3
Source : analyzers.py
with MIT License
from vs-uulm
with MIT License
from vs-uulm
def analyze(self):
super().analyze()
self._values = numpy.ediff1d(self._values).tolist()
# self._values = numpy.divide(numpy.diff(self._values, n=8), 8).tolist()
class HorizonBitcongruence2ndDelta(HorizonBitcongruence):
3
Source : analyzers.py
with MIT License
from vs-uulm
with MIT License
from vs-uulm
def analyze(self):
super().analyze()
self._values = numpy.ediff1d(self._values).tolist()
# self._values = numpy.divide(numpy.diff(self._values, n=8), 8).tolist()
def messageSegmentation(self) -> List[MessageSegment]:
3
Source : segments.py
with MIT License
from vs-uulm
with MIT License
from vs-uulm
def tokenDelta(tokenlist, unitsize=U_BYTE):
"""
Relative differences between subsequent token values.
:return:
"""
if len(tokenlist) < 2:
raise ValueError("Needs at least two tokens to determine a delta. Message is {}".format(tokenlist))
if unitsize == MessageAnalyzer.U_NIBBLE:
tokens = MessageAnalyzer.nibblesFromBytes(tokenlist)
else:
tokens = tokenlist
return list(numpy.ediff1d(tokens))
@staticmethod
3
Source : vector.py
with GNU General Public License v3.0
from yupidevs
with GNU General Public License v3.0
from yupidevs
def delta(self):
"""Vector : Calculates the differnece between each item"""
if len(self.shape) > 1:
new_vec = []
for i in range(self.shape[1]):
new_vec.append(np.ediff1d(self[:, i]))
return Vector.create(new_vec).T
else:
return Vector.create(np.ediff1d(self))
@property
0
Source : DataReader_utils.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def remove_empty_rows_and_cols(URM, ICM = None):
URM = check_matrix(URM, "csr")
rows = URM.indptr
numRatings = np.ediff1d(rows)
user_mask = numRatings >= 1
URM = URM[user_mask,:]
cols = URM.tocsc().indptr
numRatings = np.ediff1d(cols)
item_mask = numRatings >= 1
URM = URM[:,item_mask]
removedUsers = np.arange(0, len(user_mask))[np.logical_not(user_mask)]
removedItems = np.arange(0, len(item_mask))[np.logical_not(item_mask)]
if ICM is not None:
ICM = ICM[item_mask,:]
return URM.tocsr(), ICM.tocsr(), removedUsers, removedItems
return URM.tocsr(), removedUsers, removedItems
from Data_manager.IncrementalSparseMatrix import IncrementalSparseMatrix
0
Source : DataReader_utils.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def remove_features(ICM, min_occurrence = 5, max_percentage_occurrence = 0.30, reconcile_mapper = None):
"""
The function eliminates the values associated to feature occurring in less than the minimal percentage of items
or more then the max. Shape of ICM is reduced deleting features.
:param ICM:
:param minPercOccurrence:
:param max_percentage_occurrence:
:param reconcile_mapper: DICT mapper [token] -> index
:return: ICM
:return: deletedFeatures
:return: DICT mapper [token] -> index
"""
ICM = check_matrix(ICM, 'csc')
n_items = ICM.shape[0]
cols = ICM.indptr
numOccurrences = np.ediff1d(cols)
feature_mask = np.logical_and(numOccurrences >= min_occurrence, numOccurrences < = n_items * max_percentage_occurrence)
ICM = ICM[:,feature_mask]
deletedFeatures = np.arange(0, len(feature_mask))[np.logical_not(feature_mask)]
print("RemoveFeatures: removed {} features with less then {} occurrences, removed {} features with more than {} occurrencies".format(
sum(numOccurrences < min_occurrence), min_occurrence,
sum(numOccurrences > n_items * max_percentage_occurrence), int(n_items * max_percentage_occurrence)
))
if reconcile_mapper is not None:
reconcile_mapper = reconcile_mapper_with_removed_tokens(reconcile_mapper, deletedFeatures)
return ICM, deletedFeatures, reconcile_mapper
return ICM, deletedFeatures
def reconcile_mapper_with_removed_tokens(key_to_value_dict, values_to_remove):
0
Source : Dataset.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def print_statistics(self):
self._assert_is_initialized()
URM_all = self.get_URM_all()
n_users, n_items = URM_all.shape
n_interactions = URM_all.nnz
URM_all = sps.csr_matrix(URM_all)
user_profile_length = np.ediff1d(URM_all.indptr)
max_interactions_per_user = user_profile_length.max()
avg_interactions_per_user = n_interactions/n_users
min_interactions_per_user = user_profile_length.min()
URM_all = sps.csc_matrix(URM_all)
item_profile_length = np.ediff1d(URM_all.indptr)
max_interactions_per_item = item_profile_length.max()
avg_interactions_per_item = n_interactions/n_items
min_interactions_per_item = item_profile_length.min()
print("DataReader: current dataset is: {}\n"
"\tNumber of items: {}\n"
"\tNumber of users: {}\n"
"\tNumber of interactions in URM_all: {}\n"
"\tValue range in URM_all: {:.2f}-{:.2f}\n"
"\tInteraction density: {:.2E}\n"
"\tInteractions per user:\n"
"\t\t Min: {:.2E}\n"
"\t\t Avg: {:.2E}\n"
"\t\t Max: {:.2E}\n"
"\tInteractions per item:\n"
"\t\t Min: {:.2E}\n"
"\t\t Avg: {:.2E}\n"
"\t\t Max: {:.2E}\n"
"\tGini Index: {:.2f}\n".format(
self.__class__,
n_items,
n_users,
n_interactions,
np.min(URM_all.data), np.max(URM_all.data),
compute_density(URM_all),
min_interactions_per_user,
avg_interactions_per_user,
max_interactions_per_user,
min_interactions_per_item,
avg_interactions_per_item,
max_interactions_per_item,
gini_index(user_profile_length),
))
if self._HAS_ICM:
for ICM_name, ICM_object in self.AVAILABLE_ICM.items():
n_items, n_features = ICM_object.shape
min_value = np.min(ICM_object.data)
max_value = np.max(ICM_object.data)
format_string = "2E" if np.max([np.abs(min_value), np.abs(max_value)])>100 else "2f"
statistics_string = "\tICM name: {}, Value range: {:.{format_string}} / {:.{format_string}}, Num features: {}, feature occurrences: {}, density {:.2E}".format(
ICM_name,
min_value, max_value,
n_features,
ICM_object.nnz,
compute_density(ICM_object),
format_string = format_string
)
print(statistics_string)
print("\n")
#########################################################################################################
########## ##########
########## CLONE ##########
########## ##########
#########################################################################################################
def copy(self):
0
Source : MatrixFactorizationImpressions_Cython.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def _estimate_user_factors(self, ITEM_factors_Y):
profile_length = np.ediff1d(self.URM_train.indptr)
profile_length_sqrt = np.sqrt(profile_length)
# Estimating the USER_factors using ITEM_factors_Y
if self.verbose:
print("{}: Estimating user factors... ".format(self.algorithm_name))
USER_factors = self.URM_train.dot(ITEM_factors_Y)
#Divide every row for the sqrt of the profile length
for user_index in range(self.n_users):
if profile_length_sqrt[user_index] > 0:
USER_factors[user_index, :] /= profile_length_sqrt[user_index]
if self.verbose:
print("{}: Estimating user factors... done!".format(self.algorithm_name))
return USER_factors
def set_URM_train(self, URM_train_new, estimate_item_similarity_for_cold_users = False, **kwargs):
0
Source : IALSRecommender.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def fit(self, epochs = 300,
num_factors = 20,
confidence_scaling = "linear",
alpha = 1.0,
epsilon = 1.0,
reg = 1e-3,
init_mean=0.0,
init_std=0.1,
**earlystopping_kwargs):
"""
:param epochs:
:param num_factors:
:param confidence_scaling: supported scaling modes for the observed values: 'linear' or 'log'
:param alpha: Confidence weight, confidence c = 1 + alpha*r where r is the observed "rating".
:param reg: Regularization constant.
:param epsilon: epsilon used in log scaling only
:param init_mean: mean used to initialize the latent factors
:param init_std: standard deviation used to initialize the latent factors
:return:
"""
if confidence_scaling not in self.AVAILABLE_CONFIDENCE_SCALING:
raise ValueError("Value for 'confidence_scaling' not recognized. Acceptable values are {}, provided was '{}'".format(self.AVAILABLE_CONFIDENCE_SCALING, confidence_scaling))
self.num_factors = num_factors
self.alpha = alpha
self.epsilon = epsilon
self.reg = reg
self.USER_factors = self._init_factors(self.n_users, False) # don't need values, will compute them
self.ITEM_factors = self._init_factors(self.n_items)
self._build_confidence_matrix(confidence_scaling)
warm_user_mask = np.ediff1d(self.URM_train.indptr) > 0
warm_item_mask = np.ediff1d(self.URM_train.tocsc().indptr) > 0
self.warm_users = np.arange(0, self.n_users, dtype=np.int32)[warm_user_mask]
self.warm_items = np.arange(0, self.n_items, dtype=np.int32)[warm_item_mask]
self.regularization_diagonal = np.diag(self.reg * np.ones(self.num_factors))
self._update_best_model()
self._train_with_early_stopping(epochs,
algorithm_name = self.RECOMMENDER_NAME,
**earlystopping_kwargs)
self.USER_factors = self.USER_factors_best
self.ITEM_factors = self.ITEM_factors_best
def _build_confidence_matrix(self, confidence_scaling):
0
Source : run_results_gathering.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def print_results(urm_test_split: csr_matrix):
urm_test = urm_test_split.copy()
n_test_users = np.sum(np.ediff1d(urm_test.indptr) >= 1)
result_loader = ResultFolderLoader(EXPERIMENTS_FOLDER_PATH,
base_algorithm_list=None,
other_algorithm_list=None,
KNN_similarity_list=KNN_SIMILARITY_LIST,
ICM_names_list=None,
UCM_names_list=None)
article_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
"article_metrics_latex_results.txt")
result_loader.generate_latex_results(article_metrics_latex_results_filename,
metrics_list=["RECALL", "MAP"],
cutoffs_list=METRICS_CUTOFF_TO_REPORT_LIST,
table_title=None,
highlight_best=True)
beyond_accuracy_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
"beyond_accuracy_metrics_latex_results.txt")
result_loader.generate_latex_results(beyond_accuracy_metrics_latex_results_filename,
metrics_list=["DIVERSITY_MEAN_INTER_LIST",
"DIVERSITY_HERFINDAHL",
"COVERAGE_ITEM",
"DIVERSITY_GINI",
"SHANNON_ENTROPY"],
cutoffs_list=OTHERS_CUTOFF_TO_REPORT_LIST,
table_title=None,
highlight_best=True)
all_metrics_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
"all_metrics_latex_results.txt")
result_loader.generate_latex_results(all_metrics_latex_results_filename,
metrics_list=["PRECISION",
"RECALL",
"MAP",
"MRR",
"NDCG",
"F1",
"HIT_RATE",
"ARHR",
"NOVELTY",
"DIVERSITY_MEAN_INTER_LIST",
"DIVERSITY_HERFINDAHL",
"COVERAGE_ITEM",
"DIVERSITY_GINI",
"SHANNON_ENTROPY"],
cutoffs_list=OTHERS_CUTOFF_TO_REPORT_LIST,
table_title=None,
highlight_best=True)
time_latex_results_filename = os.path.join(RESULTS_EXPORT_FOLDER_PATH,
"time_latex_results.txt")
result_loader.generate_latex_time_statistics(time_latex_results_filename,
n_evaluation_users=n_test_users,
table_title=None)
if __name__ == '__main__':
0
Source : print_negative_items_stats.py
with GNU Affero General Public License v3.0
from ContentWise
with GNU Affero General Public License v3.0
from ContentWise
def print_negative_items_stats(URM_train, URM_validation, URM_test, URM_test_negative):
URM_train = URM_train.copy()
URM_validation = URM_validation.copy()
URM_test = URM_test.copy()
URM_test_negative = URM_test_negative.copy()
import traceback
URM_test_negative_csr = sps.csr_matrix(URM_test_negative)
user_negatives = np.ediff1d(URM_test_negative_csr.indptr)
print("Max num negatives is {}, min num negatives is {} (nonzero is {}), users with less than max are {} of {}".format(np.max(user_negatives),
np.min(user_negatives),
np.min(user_negatives[user_negatives!=0]),
np.sum(user_negatives!=np.max(user_negatives)),
URM_test_negative_csr.shape[0]))
from Utils.assertions_on_data_for_experiments import assert_disjoint_matrices
remove_overlapping_data_flag = False
print("Intersection between URM_test_negative and URM_train + URM_validation")
try:
assert_disjoint_matrices([URM_train + URM_validation, URM_test_negative])
except:
traceback.print_exc()
remove_overlapping_data_flag = True
print("Intersection between URM_test_negative and URM_test")
try:
assert_disjoint_matrices([URM_test, URM_test_negative])
except:
traceback.print_exc()
remove_overlapping_data_flag = True
if remove_overlapping_data_flag:
print("Removing overlapping data from URM_negative")
URM_positive = URM_train + URM_validation + URM_test
URM_positive.data = np.ones_like(URM_positive.data)
URM_test_negative.data = np.ones_like(URM_test_negative.data)
# Subtract from the URM_test_negative train items
# A - B = B - A*B
URM_test_negative_not_positive = URM_test_negative - URM_test_negative.multiply(URM_positive)
URM_test_negative_not_positive = sps.csr_matrix(URM_test_negative_not_positive)
user_negatives_not_positives = np.ediff1d(URM_test_negative_not_positive.indptr)
print("URM test negatives non overlapping with positives: Max num negatives is {}, min num negatives is {} (nonzero is {}), users with less than max are {} of {}".format(np.max(user_negatives_not_positives),
np.min(user_negatives_not_positives),
np.min(user_negatives_not_positives[user_negatives_not_positives!=0]),
np.sum(user_negatives_not_positives!=np.max(user_negatives_not_positives)),
URM_test_negative_csr.shape[0]))
URM_train_all = URM_train + URM_validation
URM_train_all = sps.csr_matrix(URM_train_all)
user_train_profile = np.ediff1d(URM_train_all.indptr)
user_test_profile = np.ediff1d(sps.csr_matrix(URM_test).indptr)
assert np.array_equal(logical_iff(np.array([False, False, True, True]),
np.array([False, True, False, True])),
np.array([True, False, False, True]))
print_iff_result("User presence in train data IFF presence in test", user_train_profile>0, user_test_profile>0)
print_iff_result("User presence in test data IFF presence in negative items test", user_test_profile>0, user_negatives>0)
print_iff_result("User presence in train data IFF presence in negative items test", user_train_profile>0, user_negatives>0)
0
Source : test_tokenization.py
with Apache License 2.0
from deepset-ai
with Apache License 2.0
from deepset-ai
def test_bert_custom_vocab(caplog):
caplog.set_level(logging.CRITICAL)
lang_model = "bert-base-cased"
tokenizer = Tokenizer.load(
pretrained_model_name_or_path=lang_model,
do_lower_case=False
)
#deprecated: tokenizer.add_custom_vocab("samples/tokenizer/custom_vocab.txt")
tokenizer.add_tokens(new_tokens=["neverseentokens"])
basic_text = "Some Text with neverseentokens plus !215?#. and a combined-token_with/chars"
# original tokenizer from transformer repo
tokenized = tokenizer.tokenize(basic_text)
assert tokenized == ['Some', 'Text', 'with', 'neverseentokens', 'plus', '!', '215', '?', '#', '.', 'and', 'a', 'combined', '-', 'token', '_', 'with', '/', 'ch', '##ars']
# ours with metadata
encoded = tokenizer.encode_plus(basic_text, add_special_tokens=False).encodings[0]
offsets = [x[0] for x in encoded.offsets]
start_of_word_single = [True] + list(np.ediff1d(encoded.words) > 0)
assert encoded.tokens == tokenized
assert offsets == [0, 5, 10, 15, 31, 36, 37, 40, 41, 42, 44, 48, 50, 58, 59, 64, 65, 69, 70, 72]
assert start_of_word_single == [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]
def test_fast_bert_custom_vocab(caplog):
0
Source : test_tokenization.py
with Apache License 2.0
from deepset-ai
with Apache License 2.0
from deepset-ai
def test_fast_bert_custom_vocab(caplog):
caplog.set_level(logging.CRITICAL)
lang_model = "bert-base-cased"
tokenizer = Tokenizer.load(
pretrained_model_name_or_path=lang_model,
do_lower_case=False, use_fast=True
)
#deprecated: tokenizer.add_custom_vocab("samples/tokenizer/custom_vocab.txt")
tokenizer.add_tokens(new_tokens=["neverseentokens"])
basic_text = "Some Text with neverseentokens plus !215?#. and a combined-token_with/chars"
# original tokenizer from transformer repo
tokenized = tokenizer.tokenize(basic_text)
assert tokenized == ['Some', 'Text', 'with', 'neverseentokens', 'plus', '!', '215', '?', '#', '.', 'and', 'a', 'combined', '-', 'token', '_', 'with', '/', 'ch', '##ars']
# ours with metadata
encoded = tokenizer.encode_plus(basic_text, add_special_tokens=False).encodings[0]
offsets = [x[0] for x in encoded.offsets]
start_of_word_single = [True] + list(np.ediff1d(encoded.words) > 0)
assert encoded.tokens == tokenized
assert offsets == [0, 5, 10, 15, 31, 36, 37, 40, 41, 42, 44, 48, 50, 58, 59, 64, 65, 69, 70, 72]
assert start_of_word_single == [True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, True, False]
@pytest.mark.parametrize("model_name, tokenizer_type", [
0
Source : _helpers.py
with MIT License
from douglasdavis
with MIT License
from douglasdavis
def likely_uniform_bins(edges: np.ndarray) -> bool:
"""Test if bin edges describe a set of fixed width bins."""
diffs = np.ediff1d(edges)
return bool(np.all(np.isclose(diffs, diffs[0])))
0
Source : functions.py
with MIT License
from eng-tools
with MIT License
from eng-tools
def determine_indices_of_peaks_for_cleaned_array(values):
"""
Determines the position of values that form a local peak in a signal.
Warning: data must be cleaned so that adjacent points have the same value
Parameters
----------
values: array_like
Array of values that peaks will be found in
Returns
-------
peak_indices: array_like of int
Array of indices of peaks
"""
diff = np.ediff1d(values, to_begin=0)
# if negative then direction has switched
# direction_switch = np.insert(direction_switch, 0, 0)
peak_indices = np.where(diff[1:] * diff[:-1] < 0)[0]
peak_indices = np.insert(peak_indices, 0, 0) # Include first and last value
peak_indices = np.insert(peak_indices, len(peak_indices), len(values) - 1)
return peak_indices
def _determine_peak_only_series_4_cleaned_data(values):
0
Source : accesses.py
with Apache License 2.0
from fleetspace
with Apache License 2.0
from fleetspace
def _find_accesses(sat, gs, start, end, ts):
"""finds a single timestamp from each access in the provided time
window.
"""
pair = gs.observe(sat)
def f(t, use_horizonmask=True):
"""function to maximize"""
def _get_horizon(az_deg):
return gs.horizon_mask[int(az_deg)]
get_horizon = vectorize(_get_horizon)
t = ts.tai(jd=t)
alt, az, distance = pair.at(t).altaz()
if use_horizonmask:
horizon = get_horizon(az.degrees)
return alt.degrees - horizon
return alt.degrees
def minusf(t):
"""function to minimize"""
return -f(t)
t0 = start.tai
orbit_period_per_minute = TAU / sat._vec.model.no
orbit_period = orbit_period_per_minute / 24.0 / 60.0
step = orbit_period / 6.0
t = arange(start.tai - step, end.tai + (2 * step), step)
deg_above_cutoff = f(t)
left_diff = ediff1d(deg_above_cutoff, to_begin=0.0)
right_diff = ediff1d(deg_above_cutoff, to_end=0.0)
maxima = (left_diff > 0.0) & (right_diff < 0.0)
def find_highest(t):
result = optimize.minimize_scalar(
minusf, bracket=[t + step, t, t - step], tol=JD_SEC / t
)
return result.x
t_highest = [find_highest(ti) for ti in t[maxima]]
dt_highest = ts.tai(jd=t_highest)
def find_rising(t):
"""Provide a moment of maximum altitude as `t`."""
rising = optimize.brentq(f, t - 2 * step, t)
return rising
def find_setting(t):
"""Provide a moment of maximum altitude as `t`."""
setting = optimize.brentq(f, t + 2 * step, t)
return setting
passes = [ti for ti in t_highest if f(ti) > 0.0]
dt_passes = ts.tai(jd=passes)
max_alts = [f(ti, use_horizonmask=False) for ti in passes]
t_rising = [find_rising(ti) for ti in passes]
dt_rising = ts.tai(jd=t_rising)
t_setting = [find_setting(ti) for ti in passes]
dt_setting = ts.tai(jd=t_setting)
zipped = zip(dt_rising, dt_setting, max_alts)
return sat, gs, zipped
class AccessCalculator(object):
0
Source : interpolation.py
with MIT License
from fmonegaglia
with MIT License
from fmonegaglia
def CurvaturePCS( *args, **kwargs ):
'''
CurvaturePCS(*args, **kwargs)
======================
Compute the channel centerline's arc-length, inflection angle and curvature
from a PCS. Three methods are available:
1 - Finite differences
2 - Guneralp and Rhoads 2007 (requires spline derivatives)
3 - Schwenk et al. 2015
Arguments
---------
x_PCS interpolated x coordinate array (Method 1,2,3)
y_PCS interpolated y coordinate array (Method 1,2,3)
d1x_PCS 1st order spatial x derivative interpolated (Method 2)
d1y_PCS 1st order spatial y derivative interpolated (Method 2)
d2x_PCS 2st order spatial x derivative interpolated (Method 2)
d2y_PCS 2st order spatial y derivative interpolated (Method 2)
method method to be used (default 1)
return_diff return spatial differences for x, y and arc-length coordinates (default False)
apply_filter Apply a one-step window-averaged smoothing of the channel curvature (default False)
Returns
-------
s centerline's arc-length
theta inflection angle of the centerline
Cs intrinsic centerline curvature
dx spatial difference array for x coordinates (optional)
dy spatial difference array for y coordinates (optional)
ds spatial difference array for arc-length (optional)
'''
method = kwargs.pop( 'method', 1 )
return_diff = kwargs.pop( 'return_diff', False )
apply_filter = kwargs.pop( 'apply_filter', False )
x = args[0]
y = args[1]
dx = np.ediff1d( x, to_begin=0 )
dy = np.ediff1d( y, to_begin=0 )
ds = np.sqrt( dx**2 + dy**2 )
s = np.cumsum( ds )
theta = np.arctan2( dy, dx )
for i in xrange(1,theta.size):
if theta[i] - theta[i-1] > np.pi: theta[i] -= 2*np.pi
elif theta[i] - theta[i-1] < -np.pi: theta[i] += 2*np.pi
if method == 1:
Cs = -np.gradient( theta, np.gradient(s) )
elif method == 2:
d1x = args[2]
d1y = args[3]
d2x = args[4]
d2y = args[5]
Cs = - ( d1x*d2y - d1y*d2x ) / ( d1x**2 + d1y**2 )**(3/2)
elif method == 3:
ax = x[1:-1] - x[:-2]
bx = x[2:] - x[:-2]
cx = x[2:] - x[1:-1]
ay = y[1:-1] - y[:-2]
by = y[2:] - y[:-2]
cy = y[2:] - y[1:-1]
Cs = 2 * (ay*bx - ax*by) / \
np.sqrt( (ax**2+ay**2) * (bx**2+by**2) * (cx**2+cy**2) )
# Fix First and Last Point with finite difference
Cs = np.concatenate((np.array( -(theta[1]-theta[0])/(s[1]-s[0] ) ), Cs,
np.array( -(theta[-1]-theta[-2])/(s[-1]-s[-2]) )))
if apply_filter:
Cs[1:-1] = (Cs[:-2] + 2*Cs[1:-1] + Cs[2:]) / 4
if return_diff:
return s, theta, Cs, dx, dy, ds
else:
return s, theta, Cs
def WidthPCS( s, B, sPCS, kind='linear' ):
0
Source : logarithmic.py
with BSD 3-Clause "New" or "Revised" License
from holzschu
with BSD 3-Clause "New" or "Revised" License
from holzschu
def ediff1d(self, to_end=None, to_begin=None):
return self._wrap_function(np.ediff1d, to_end, to_begin,
unit=self.unit._copy(dimensionless_unscaled))
_supported_functions = (FunctionQuantity._supported_functions |
0
Source : quantity.py
with BSD 3-Clause "New" or "Revised" License
from holzschu
with BSD 3-Clause "New" or "Revised" License
from holzschu
def ediff1d(self, to_end=None, to_begin=None):
return self._wrap_function(np.ediff1d, to_end, to_begin)
def nansum(self, axis=None, out=None, keepdims=False):
0
Source : heart.py
with GNU General Public License v3.0
from JanCBrammer
with GNU General Public License v3.0
from JanCBrammer
def heart_stats(peaks, sfreq, nsamp):
"""Compute instantaneous cardiac features.
Compute heart period and -rate based on cardiac extrema (R-peaks or
systolic peaks). Cardiac period and -rate are calculated as horizontal
(temporal) peak-peak differences. I.e., to each peak assign the horizontal
difference to the preceding peak.
Parameters
----------
peaks : ndarray
Cardiac extrema (R-peaks or systolic peaks).
sfreq : int
Sampling frequency of the cardiac signal containing `peaks`.
nsamp : int
The length of the signal containing `peaks`. In samples.
Returns
-------
periodintp, rateintp : ndarray, ndarray
Vectors with witb `nsamp` elements, containing the instantaneous
heart period, and -rate.
"""
rr = np.ediff1d(peaks, to_begin=0) / sfreq
rr[0] = np.mean(rr[1:])
periodintp = interp_stats(peaks, rr, nsamp)
rateintp = 60 / periodintp
return periodintp, rateintp
def correct_peaks(peaks, sfreq, iterative=True):
0
Source : base.py
with Apache License 2.0
from kakao
with Apache License 2.0
from kakao
def _prepare_validation_data(self):
if hasattr(self, 'vali_data'):
return True
db = self.handle
num_users, num_items = db.attrs['num_users'], db.attrs['num_items']
row = db['vali']['row'][::]
col = db['vali']['col'][::]
val = db['vali']['val'][::]
_temp_mat = csr_matrix((val, (row, col)), (num_users, num_items))
indptr = _temp_mat.indptr[1:]
key = _temp_mat.indices
vali_rows = np.arange(len(indptr))[np.ediff1d(indptr, to_begin=indptr[0]) > 0]
vali_gt = {
u: set(key[indptr[u - 1]:indptr[u]]) if u != 0 else set(key[:indptr[0]])
for u in vali_rows}
validation_seen = {}
max_seen_size = 0
for rowid in vali_rows:
seen, *_ = self.get(rowid)
validation_seen[rowid] = set(seen)
max_seen_size = max(len(seen), max_seen_size)
validation_seen = validation_seen
validation_max_seen_size = max_seen_size
self.vali_data = {
"row": row,
"col": col,
"val": val,
"vali_rows": vali_rows,
"vali_gt": vali_gt,
"validation_seen": validation_seen,
"validation_max_seen_size": validation_max_seen_size
}
return True
def _sort_and_compressed_binarization(self, mm_path, num_lines, max_key, sort_key):
0
Source : cft.py
with MIT License
from mad-lab-fau
with MIT License
from mad-lab-fau
def onset(
self,
data: pd.DataFrame,
is_cft_interval: Optional[bool] = False,
compute_baseline: Optional[bool] = True,
hr_baseline: Optional[float] = None,
) -> Dict[str, Any]:
"""Compute **CFT onset**.
The CFT onset is defined as the time point after beginning of the CFT Interval where three consecutive
heart beats are lower than the Baseline heart rate (typically the Interval directly before the CFT).
This function computes the following CFT onset parameter:
* ``onset``: location of CFT onset. This value is the same datatype as the index of ``data``
(i.e., either a absolute datetime timestamp or a relative timestamp in time since recording).
* ``onset_latency``: CFT onset latency, i.e., the duration between beginning of the CFT Interval and
CFT onset in seconds.
* ``onset_idx``: location of CFT onset as array index
* ``onset_hr``: heart rate at CFT onset in bpm
* ``onset_hr_brady_percent``: bradycardia at CFT onset, i.e., relative change of CFT onset heart rate compared
to Baseline heart rate in percent.
* ``onset_slope``: Slope between Baseline heart rate and CFT onset heart rate, computed as:
`onset_slope = (onset_hr - baseline_hr) / onset_latency`
Parameters
----------
data : :class:`~pandas.DataFrame`
input data
is_cft_interval : bool, optional
``True`` if the heart rate data passed via ``data`` contains only the CFT Interval,
``False`` if it contains the data during the whole CFT procedure. Default: ``False``
compute_baseline : bool, optional
``True`` if Baseline Interval is included in data passed via ``data`` and Baseline heart rate
should be computed or ``False`` if Baseline heart rate is passed separately via ``hr_baseline``.
Default: ``True``
hr_baseline : float, optional
mean heart rate during Baseline Interval or ``None`` if Baseline interval is present in ``data`` and
Baseline heart rate is computed from there. Default: ``None``
Returns
-------
dict
dictionary with CFT onset parameter
"""
df_hr_cft, hr_baseline = self._sanitize_cft_input(data, is_cft_interval, compute_baseline, hr_baseline)
# bradycardia mask (True where heart rate is below baseline, False otherwise)
hr_brady = df_hr_cft < hr_baseline
# bradycardia borders (1 where we have a change between lower and higher heart rate)
brady_border = np.abs(np.ediff1d(hr_brady.astype(int), to_begin=0))
# filter out the phases where we have at least 3 heart rate values lower than baseline
brady_phases = hr_brady.groupby([np.cumsum(brady_border)]).filter(lambda df: df.sum() >= 3)
# CFT onset is the third beat
onset = brady_phases.index[2]
# TODO check index handling again...
onset_latency = (onset - df_hr_cft.index[0]).total_seconds()
onset_idx = df_hr_cft.index.get_loc(onset)
# heart rate at onset point
hr_onset = np.squeeze(df_hr_cft.loc[onset])
return {
"onset": onset,
"onset_latency": onset_latency,
"onset_idx": onset_idx,
"onset_hr": hr_onset,
"onset_hr_percent": (1 - hr_onset / hr_baseline) * 100,
"onset_slope": (hr_onset - hr_baseline) / onset_latency,
}
def peak_bradycardia(
0
Source : plotting.py
with MIT License
from mad-lab-fau
with MIT License
from mad-lab-fau
def hr_mean_plot( # pylint:disable=too-many-branches
data: MeanSeDataFrame,
**kwargs,
) -> Tuple[plt.Figure, plt.Axes]:
r"""Plot course of heart rate as mean ± standard error over phases (and subphases) of a psychological protocol.
The correct plot is automatically inferred from the provided data:
* only ``phase`` index level: plot phases over x axis
* ``phase`` and ``subphase`` index levels: plot subphases over x axis, highlight phases as vertical spans
* additionally: ``condition`` level: plot data of different conditions individually
(corresponds to ``hue`` parameter in :func:`~biopsykit.plotting.lineplot`)
Parameters
----------
data : :class:`~biopsykit.utils.datatype_helper.MeanSeDataFrame`
Heart rate data to plot. Must be provided as ``MeanSeDataFrame`` with columns ``mean`` and ``se``
computed over phases (and, if available, subphases)
**kwargs
additional parameters to be passed to the plot, such as:
* ``ax``: pre-existing axes for the plot. Otherwise, a new figure and axes object is created and returned.
* ``figsize``: tuple specifying figure dimensions
* ``palette``: color palette to plot data from different conditions. If ``palette`` is a str then it is
assumed to be the name of a ``fau_colors`` palette (``fau_colors.cmaps._fields``).
* ``is_relative``: boolean indicating whether heart rate data is relative (in % relative to baseline)
or absolute (in bpm). Default: ``False``
* ``order``: list specifying the order of categorical values (i.e., conditions) along the x axis.
* ``x_offset``: offset value to move different groups along the x axis for better visualization.
Default: 0.05
* ``xlabel``: label of x axis. Default: "Subphases" (if subphases are present)
or "Phases" (if only phases are present).
* ``ylabel``: label of y axis. Default: ":math:`\Delta HR [%]`"
* ``ylims``: list to manually specify y axis limits, float to specify y axis margin
(see :meth:`~matplotlib.axes.Axes.margins()` for further information), or ``None`` to automatically infer
y axis limits.
* ``marker``: string or list of strings to specify marker style.
If ``marker`` is a string, then marker of each line will have the same style.
If ``marker`` is a list, then marker of each line will have a different style.
* ``linestyle``: string or list of strings to specify line style.
If ``linestyle`` is a string, then each line will have the same style.
If ``linestyle`` is a list, then each line will have a different style.
Returns
-------
fig : :class:`~matplotlib.figure.Figure`
figure object
ax : :class:`~matplotlib.axes.Axes`
axes object
See Also
--------
:func:`~biopsykit.plotting.lineplot`
Plot data as lineplot with mean and standard error
"""
fig, ax = _plot_get_fig_ax(**kwargs)
kwargs.update({"ax": ax})
num_conditions = 1
if "condition" in data.index.names:
num_conditions = len(data.index.names)
# get all plot parameter
palette = kwargs.get("palette", cmaps.faculties)
palette = _get_palette(palette, num_conditions)
sns.set_palette(palette)
ylabel_default = _hr_mean_plot_params.get("ylabel")
if kwargs.get("is_relative", False):
ylabel_default = r"$\Delta$ HR [%]"
ylabel = kwargs.get("ylabel", ylabel_default)
ylims = kwargs.get("ylims", None)
phase_dict = _hr_mean_get_phases_subphases(data)
num_phases = len(phase_dict)
num_subphases = [len(arr) for arr in phase_dict.values()]
x_vals = _hr_mean_get_x_vals(num_phases, num_subphases)
# build x axis, axis limits and limits for phase spans
dist = np.mean(np.ediff1d(x_vals))
x_lims = np.append(x_vals, x_vals[-1] + dist)
x_lims = x_lims - 0.5 * np.ediff1d(x_lims, to_end=dist)
if "condition" in data.index.names:
data_grp = {key: df for key, df in data.groupby("condition")} # pylint:disable=unnecessary-comprehension
order = kwargs.get("order", list(data_grp.keys()))
data_grp = {key: data_grp[key] for key in order}
for i, (key, df) in enumerate(data_grp.items()):
_hr_mean_plot(df, x_vals, key, index=i, **kwargs)
else:
_hr_mean_plot(data, x_vals, "Data", index=0, **kwargs)
# add decorators to phases if subphases are present
if sum(num_subphases) > 0:
_hr_mean_plot_subphase_annotations(phase_dict, x_lims, **kwargs)
# customize x axis
ax.tick_params(axis="x", bottom=True)
ax.set_xticks(x_vals)
ax.set_xlim(np.min(x_lims), np.max(x_lims))
_hr_mean_style_x_axis(ax, phase_dict, num_subphases)
# customize y axis
ax.tick_params(axis="y", which="major", left=True)
ax.set_ylabel(ylabel)
_hr_mean_plot_set_axis_lims(ylims, ax)
# customize legend
if "condition" in data.index.names:
_hr_mean_add_legend(**kwargs)
fig.tight_layout()
return fig, ax
def _hr_mean_plot_set_axis_lims(ylims: Union[Sequence[float], float], ax: plt.Axes):
0
Source : ecg.py
with MIT License
from mad-lab-fau
with MIT License
from mad-lab-fau
def _edr_peak_peak_interval(
ecg: pd.DataFrame, peaks: np.array, troughs: np.array # pylint:disable=unused-argument
) -> np.array:
"""Estimate respiration signal from ECG based on `peak-peak-interval` method.
The `peak-peak-interval` method is based on computing RR intervals.
.. note::
To ensure the same length for the resulting array after computing successive differences
the first value will be replaced by the mean of all RR intervals in the array
Parameters
----------
ecg : :class:`~pandas.Series`
pandas series with ecg signal (unused but needed for consistent method signature)
peaks : :class:`~numpy.array`
array with peak indices
troughs : :class:`~numpy.array`
array with trough indices (unused but needed for consistent method signature)
Returns
-------
:class:`~numpy.array`
estimated raw respiration signal
"""
peak_interval = np.ediff1d(peaks, to_begin=0)
peak_interval[0] = peak_interval.mean()
return peak_interval
def _correct_outlier_correlation(rpeaks: pd.DataFrame, bool_mask: np.array, corr_thres: float, **kwargs) -> np.array:
0
Source : ecg.py
with MIT License
from mad-lab-fau
with MIT License
from mad-lab-fau
def _correct_outlier_correlation(rpeaks: pd.DataFrame, bool_mask: np.array, corr_thres: float, **kwargs) -> np.array:
"""Apply outlier correction method 'correlation'.
This function compute the cross-correlation coefficient between every single beat and the average of all detected
beats. It marks beats as outlier if the cross-correlation coefficient is below a certain threshold.
Parameters
----------
rpeaks : :class:`~pandas.DataFrame`
dataframe with detected R peaks. Output from :meth:`biopsykit.signals.ecg.EcgProcessor.ecg_process()`
bool_mask : :class:`numpy.array`
boolean array with beats marked as outlier.
Results of this outlier correction method will be combined with the array using a logical 'or'
corr_thres : float
threshold for cross-correlation coefficient. Beats below that threshold will be marked as outlier
**kwargs : additional parameters required for this outlier function, such as:
* ecg_signal :class:`~pandas.DataFrame`
dataframe with processed ECG signal. Output from :meth:`biopsykit.signals.ecg.EcgProcessor.ecg_process()`
* sampling_rate : float
sampling rate of recorded data in Hz
Returns
-------
:class:`numpy.array`
boolean array with beats marked as outlier. Logical 'or' combination of ``bool_mask`` and results from
this algorithm
"""
ecg_signal = kwargs.get("ecg_signal", None)
sampling_rate = kwargs.get("sampling_rate", None)
if any(v is None for v in [ecg_signal, sampling_rate]):
raise ValueError(
"Cannot apply outlier correction method 'correlation' because not all additionally required arguments "
"were provided! Make sure you pass the following arguments: 'ecg_signal', 'sampling_rate'."
)
# signal outlier
# segment individual heart beats
heartbeats = nk.ecg_segment(ecg_signal["ECG_Clean"], rpeaks["R_Peak_Idx"], int(sampling_rate))
heartbeats = nk.epochs_to_df(heartbeats)
heartbeats_pivoted = heartbeats.pivot(index="Time", columns="Label", values="Signal")
heartbeats = heartbeats.set_index("Index")
heartbeats = heartbeats.loc[heartbeats.index.intersection(rpeaks["R_Peak_Idx"])].sort_values(by="Label")
heartbeats = heartbeats[~heartbeats.index.duplicated()]
heartbeats_pivoted.columns = heartbeats.index
# compute the average over all heart beats and compute the correlation coefficient between all beats and
# the average
mean_beat = heartbeats_pivoted.mean(axis=1)
heartbeats_pivoted["mean"] = mean_beat
corr_coeff = heartbeats_pivoted.corr()["mean"].abs().sort_values(ascending=True)
corr_coeff = corr_coeff.drop("mean")
# compute RR intervals (in seconds) from R Peak Locations
rpeaks["RR_Interval"] = np.ediff1d(rpeaks["R_Peak_Idx"], to_end=0) / sampling_rate
# signal outlier: drop all beats that are below a correlation coefficient threshold
return np.logical_or(bool_mask, rpeaks["R_Peak_Idx"].isin(corr_coeff[corr_coeff < corr_thres].index))
def _correct_outlier_quality(
0
Source : ecg.py
with MIT License
from mad-lab-fau
with MIT License
from mad-lab-fau
def _correct_outlier_statistical_rr_diff(
rpeaks: pd.DataFrame, bool_mask: np.array, stat_thres: float, **kwargs # pylint:disable=unused-argument
) -> np.array:
"""Apply outlier correction method 'statistical_rr_diff'.
This function marks beats as outlier if their successive differences of RR intervals are within the xx % highest or
lowest values, i.e. if their z-score is above a threshold, e.g. ``1.96`` => 5% (2.5% highest, 2.5% lowest values);
``2.576`` => 1% (0.5% highest, 0.5% lowest values).
Parameters
----------
rpeaks : :class:`~pandas.DataFrame`
dataframe with detected R peaks. Output from :meth:`biopsykit.signals.ecg.EcgProcessor.ecg_process()`
bool_mask : :class:`numpy.array`
boolean array with beats marked as outlier.
Results of this outlier correction method will be combined with the array using a logical 'or'
stat_thres : float
threshold for z-score. Beats above that threshold will be marked as outlier
Returns
-------
:class:`numpy.array`
boolean array with beats marked as outlier. Logical 'or' combination of ``bool_mask`` and results from
this algorithm
"""
# statistical outlier: remove the x% highest and lowest successive differences of RR intervals
# (1.96 std = 5% outlier, 2.576 std = 1% outlier)
diff_rri = np.ediff1d(rpeaks["RR_Interval"], to_end=0)
z_score = (diff_rri - np.nanmean(diff_rri)) / np.nanstd(diff_rri, ddof=1)
return np.logical_or(bool_mask, np.abs(z_score) > stat_thres)
def _correct_outlier_artifact(
0
Source : plotting.py
with MIT License
from mad-lab-fau
with MIT License
from mad-lab-fau
def _get_rr_intervals(rpeaks: pd.DataFrame, sampling_rate: Optional[int] = 256) -> np.array:
rri = (np.ediff1d(rpeaks["R_Peak_Idx"], to_begin=0) / sampling_rate) * 1000
rri = rri[1:]
return rri
0
Source : rsp.py
with MIT License
from mad-lab-fau
with MIT License
from mad-lab-fau
def _rsp_rate(cls, extrema: np.array, sampling_rate: int, desired_length: int) -> np.array:
"""Compute continuous respiration rate from extrema values.
Parameters
----------
extrema: :class:`numpy.array`
List of respiration extrema (peaks or troughs)
sampling_rate : float
Sampling rate of recorded data
desired_length : int
Desired length of the output signal
Returns
-------
:class:`numpy.array`
Respiration rate array interpolated to desired length
"""
rsp_rate_raw = (sampling_rate * 60) / np.ediff1d(extrema)
# remove last sample
x_old = extrema[:-1]
x_new = np.linspace(x_old[0], x_old[-1], desired_length)
return nk.signal_interpolate(x_old, rsp_rate_raw, x_new, method="linear")
See More Examples