Here are the examples of the python api web.utils.standardize_string taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
1 Examples
3
Example 1
Project: word-embeddings-benchmarks Source File: test_embedding.py
def test_standardize():
url = "https://www.dropbox.com/s/rm756kjvckxa5ol/top100-sgns-googlenews-300.bin?dl=1"
file_name = _fetch_file(url, "test")
w = Embedding.from_word2vec(file_name, binary=True)
w2 = w.standardize_words(inplace=False, lower=False, clean_words=True)
w3 = Embedding.from_word2vec(file_name, binary=True)
assert len(w2.words) == 95
for word in w.vocabulary.words:
if standardize_string(word, lower=False, clean_words=True):
assert np.array_equal(w[word], w2[standardize_string(word, lower=False, clean_words=True)])
w3.standardize_words(inplace=True, clean_words=True, lower=False)
assert len(w3.words) == 95
for word in w.vocabulary.words:
if standardize_string(word, lower=False):
assert np.array_equal(w[word], w3[standardize_string(word, lower=False, clean_words=True)])