Here are the examples of the python api scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
4 Examples
0
Example 1
Project: scrapi Source File: test_migrations.py
@pytest.mark.django_db
@pytest.mark.cassandra
@pytest.mark.parametrize('processor_name', ['postgres', 'cassandra'])
def test_rename(processor_name, monkeypatch):
real_es = scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es
scrapi.processing.elasticsearch.es = mock.MagicMock()
monkeypatch.setattr('scrapi.settings.CANONICAL_PROCESSOR', processor_name)
processor = get_processor(processor_name)
processor.process_raw(RAW)
processor.process_normalized(RAW, NORMALIZED)
queryset = processor.get(source=RAW['source'], docID=RAW['docID'])
old_source = NORMALIZED['shareProperties']['source']
assert queryset.normalized.attributes['shareProperties']['source'] == utils.RECORD['shareProperties']['source']
assert queryset.normalized.attributes['shareProperties']['source'] == old_source
new_record = copy.deepcopy(utils.RECORD)
new_record['shareProperties']['source'] = 'wwe_news'
test_harvester.short_name = 'wwe_news'
registry['wwe_news'] = test_harvester
tasks.migrate(rename, sources=[old_source], target='wwe_news', dry=False)
queryset = processor.get(source='wwe_news', docID=RAW['docID'])
assert queryset.normalized.attributes['shareProperties']['source'] == 'wwe_news'
scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es = real_es
test_harvester.short_name = RAW['source']
registry['test'] = test_harvester
del registry['wwe_news']
0
Example 2
Project: scrapi Source File: test_migrations.py
@pytest.mark.django_db
@pytest.mark.cassandra
@pytest.mark.parametrize('processor_name', ['postgres', 'cassandra'])
def test_delete(processor_name, monkeypatch):
real_es = scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es
scrapi.processing.elasticsearch.es = mock.MagicMock()
monkeypatch.setattr('scrapi.settings.CANONICAL_PROCESSOR', processor_name)
print('Canonical Processor is {}'.format(scrapi.settings.CANONICAL_PROCESSOR))
processor = get_processor(processor_name)
processor.process_raw(RAW)
processor.process_normalized(RAW, NORMALIZED)
queryset = processor.get(docID=RAW['docID'], source=RAW['source'])
assert queryset
tasks.migrate(delete, sources=[RAW['source']], dry=False)
queryset = processor.get(docID=RAW['docID'], source=RAW['source'])
assert not queryset
scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es = real_es
0
Example 3
Project: scrapi Source File: test_migrations.py
@pytest.mark.django_db
@pytest.mark.cassandra
@pytest.mark.parametrize('processor_name', ['postgres', 'cassandra'])
def test_renormalize(processor_name, monkeypatch):
# Set up
# real_es = scrapi.processing.elasticsearch.es
real_es = scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es
scrapi.processing.elasticsearch.es = mock.MagicMock()
monkeypatch.setattr('scrapi.settings.CANONICAL_PROCESSOR', processor_name)
# Process raw and normalized with fake docs
processor = get_processor(processor_name)
processor.process_raw(RAW)
processor.process_normalized(RAW, NORMALIZED)
# Check to see those docs were processed
queryset = processor.get(docID=RAW['docID'], source=RAW['source'])
assert queryset
# Create a new doucment to be renormalized
new_raw = copy.deepcopy(RAW)
new_raw.attributes['docID'] = 'get_the_tables'
new_raw.attributes['doc'] = new_raw.attributes['doc'].encode('utf-8')
# This is basically like running the improved harvester right?
processor.create(new_raw.attributes)
tasks.migrate(renormalize, sources=[RAW['source']], dry=False)
queryset = processor.get(docID='get_the_tables', source=RAW['source'])
assert queryset
scrapi.processing.elasticsearch.es = real_es
processor.delete(docID='get_the_tables', source=RAW['source'])
0
Example 4
Project: scrapi Source File: test_migrations.py
@pytest.mark.django_db
@pytest.mark.cassandra
@pytest.mark.elasticsearch
@pytest.mark.parametrize('canonical', ['postgres', 'cassandra'])
@pytest.mark.parametrize('destination', ['postgres', 'cassandra', 'elasticsearch'])
def test_cross_db(canonical, destination, monkeypatch, index='test'):
if canonical == destination:
return
monkeypatch.setattr('scrapi.settings.CANONICAL_PROCESSOR', canonical)
if destination != 'elasticsearch':
real_es = scrapi.processing.elasticsearch.ElasticsearchProcessor.manager.es
scrapi.processing.elasticsearch.es = mock.MagicMock()
else:
monkeypatch.setattr('scrapi.settings.ELASTIC_INDEX', 'test')
# Get the test docuements into the caonical processor
canonical_processor = get_processor(canonical)
canonical_processor.process_raw(RAW)
canonical_processor.process_normalized(RAW, NORMALIZED)
destination_processor = get_processor(destination)
# Check to see canonical_processor is there, and destination is not
canonical_doc = canonical_processor.get(docID=RAW['docID'], source=RAW['source'])
assert canonical_doc
if destination != 'elasticsearch':
destination_doc = destination_processor.get(docID=RAW['docID'], source=RAW['source'])
assert not destination_doc
else:
destination_doc = destination_processor.get(docID=RAW['docID'], index=index, source=RAW['source'])
assert not destination_doc
# Migrate from the canonical to the destination
tasks.migrate(cross_db, target_db=destination, dry=False, sources=['test'], index=index)
# Check to see if the docuement made it to the destinaton, and is still in the canonical
if destination != 'elasticsearch':
destination_doc = destination_processor.get(docID=RAW['docID'], source=RAW['source'])
assert destination_doc
else:
destination_doc = destination_processor.get(docID=RAW['docID'], index=index, source=RAW['source'])
assert destination_doc.normalized
canonical_doc = canonical_processor.get(docID=RAW['docID'], source=RAW['source'])
assert canonical_doc
if destination != 'elasticsearch':
scrapi.processing.elasticsearch.es = real_es