Here are the examples of the python api scrapi.base.helpers.build_properties taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
10 Examples
3
Example 1
Project: scrapi Source File: neurovault.py
@property
def schema(self):
return {
'contributors': ('/authors', process_contributors),
'uris': {
'objectUris': ('/url', '/full_dataset_url', compose(filter_none, lambda x, y: [x, y])),
'descriptorUris': ('/DOI', '/paper_url', compose(filter_none, lambda x, y: [('http://dx.doi.org/{}'.format(x) if x else None), y])),
'canonicalUri': '/url',
},
'title': '/name',
'providerUpdatedDateTime': ('/modify_date', datetime_formatter),
'description': '/description',
'otherProperties': build_properties(
('owner_name', '/owner_name'),
)
}
0
Example 2
Project: scrapi Source File: __init__.py
@property
def formatted_properties(self):
return {
'otherProperties': build_properties(*list(map(self.format_property, self.property_list)))
}
0
Example 3
@property
def schema(self):
return {
'contributors': (
'/creators',
compose(
default_name_parser,
lambda authors: [author['creator'] for author in authors]
)
),
'uris': ('/url', process_urls),
'title': '/title',
'providerUpdatedDateTime': ('/publicationDate', datetime_formatter),
'description': '/abstract',
'freeToRead': {
'startDate': ('/openaccess', '/publicationDate', lambda x, y: y if x == 'true' else None)
},
'publisher': {
'name': '/publisher'
},
'subjects': ('/genre', lambda x: [x] if x else []),
'otherProperties': build_properties(
('url', '/url'),
('doi', '/doi'),
('isbn', '/isbn'),
('printIsbn', '/printIsbn'),
('electronicIsbn', '/electronicIsbn'),
('volume', '/volume'),
('number', '/number'),
('startingPage', '/startingPage'),
('copyright', '/copyright'),
('identifier', '/identifier')
)
}
0
Example 4
@property
def schema(self):
return {
'title': ('/title', lambda x: x[0] if x else ''),
'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''),
'providerUpdatedDateTime': ('/issued/date-parts',
compose(datetime_formatter, lambda x: ' '.join([str(part) for part in x[0]]))),
'uris': {
'canonicalUri': '/URL'
},
'contributors': ('/author', compose(lambda x: [
process_contributor(*[
'{} {}'.format(entry.get('given'), entry.get('family')),
entry.get('ORCID')
]) for entry in x
], lambda x: x or [])),
'sponsorships': ('/funder', lambda x: process_sponsorships(x) if x else []),
'tags': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
'subjects': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
'otherProperties': build_properties(
('journalTitle', '/container-title'),
('volume', '/volume'),
('issue', '/issue'),
('publisher', '/publisher'),
('type', '/type'),
('ISSN', '/ISSN'),
('ISBN', '/ISBN'),
('member', '/member'),
('score', '/score'),
('issued', '/issued'),
('deposited', '/deposited'),
('indexed', '/indexed'),
('page', '/page'),
('issue', '/issue'),
('volume', '/volume'),
('referenceCount', '/reference-count'),
('updatePolicy', '/update-policy'),
('depositedTimestamp', '/deposited/timestamp')
)
}
0
Example 5
@property
def schema(self):
return {
'title': ('/title', lambda x: x or ''),
'description': ('/notes'),
'providerUpdatedDateTime': ('/metadata_modified', datetime_formatter),
'uris': {
'canonicalUri': ('/name', lambda x: construct_url(self.url, self.dataset_path, x)), # Construct new urls directing to LWBIN
'objectUris': ('/url', '/extras', process_object_uris) # Default urls from the metadata directing to source pages
},
'contributors': ('/author', '/author_email', process_contributors),
'licenses': ('/license_title', '/license_url', '/license_id', process_licenses),
'tags': ('/tags', lambda x: [tag['name'].lower() for tag in (x or [])]),
'freeToRead': {
'startDate': ('/isopen', '/metadata_created', lambda x, y: parse(y).date().isoformat() if x else None)
},
'otherProperties': build_properties(
('maintainer', '/maintainer'),
('maintainerEmail', '/maintainer_email'),
('revisionTimestamp', ('/revision_timestamp', datetime_formatter)),
('id', '/id'),
('metadataCreated', ('/metadata_created', datetime_formatter)),
('state', '/state'),
('version', '/version'),
('creatorUserId', '/creator_user_id'),
('type', '/type'),
('numberOfResources', '/num_resources'),
('numberOfTags', '/num_tags'),
('name', '/name'),
('groups', '/groups'),
)
}
0
Example 6
@property
def schema(self):
return {
"contributors": ('//PIS/PI/PI_NAME/node()', '//ORG_NAME', nih_name_parser),
"uris": {
"canonicalUri": ("//APPLICATION_ID/node()", compose(self.construct_project_url, single_result)),
"descriptorUris": ("//APPLICATION_ID/node()", "//FOA_NUMBER/node()",
self.construct_descriptor_uris)
},
"providerUpdatedDateTime": ("AWARD_NOTICE_DATE/node()", compose(datetime_formatter, single_result)),
"title": ('//PROJECT_TITLE/node()', single_result),
"tags": ('//PROJECT_TERMSX/TERM/node()'),
"otherProperties": build_properties(
("applicationID", "//APPLICATION_ID/node()"),
('activity', '//ACTIVITY/node()'),
('administeringIC', '//ADMINISTERING_IC/node()'),
('arraFunded', '//ARRA_FUNDED/node()'),
('budgetStart', '//BUDGET_START/node()'),
('budgetEnd', '//BUDGET_END/node()'),
('FOANumber', '//FOA_NUMBER/node()'),
('fullProjectNumber', '//FULL_PROJECT_NUM/node()'),
('fundingICs', '//FUNDING_ICs/node()'),
('fiscalYear', '//FY/node()'),
('NIHSpendingCats', '//NIH_SPENDING_CATS/@xsi:nil'),
('organizationCity', '//ORG_CITY/node()'),
('organizationCountry', '//ORG_CONTRY/node()'),
('organizationDistrict', '//ORG_DISTRICT/node()'),
('organizationDUNS', '//ORG_DUNS/node()'),
('organizationDept', '//ORG_DEPT/node()'),
('organizationFIPS', '//ORG_FIPS/node()'),
('organizationState', '//ORG_STATE/node()'),
('organizationZipcode', '//ORG_ZIPCODE/node()'),
('ICName', '//IC_NAME/node()'),
('organizationName', '//ORG_NAME/node()'),
('projectStart', '//PROJECT_START/node()'),
('projectEnd', '//PROJECT_END/node()'),
('PHR', '//PHR/node()'),
('serialNumber', '//SERIAL_NUMBER/node()'),
('studySection', '//STUDY_SECTION/node()'),
('studySectionName', '//STUDY_SECTION_NAME/node()'),
('supportYear', '//SUPPORT_YEAR/node()'),
('suffix', '//SUFFIX/node()'),
('subProjectID', '//SUBPROJECT_ID/@xsi:nil'),
('totalCost', '//TOTAL_COST/node()'),
('totalCostSubProject', '//TOTAL_COST_SUB_PROJECT/node()'),
('coreProjectNumber', '//CORE_PROJECT_NUM/node()'),
('CFDACode', '//CFDA_CODE/node()'),
('programOfficerName', '//PROGRAM_OFFICER_NAME/node()'),
('edInstType', '//ED_INST_TYPE/node()'),
('awardNoticeDate', '//AWARD_NOTICE_DATE/node()'),
('fundingMechanism', '//FUNDING_MECHANISM/node()')
)
}
0
Example 7
@property
def schema(self):
return {
'contributors': ('/contributors', process_contributors),
'title': ('/title', lambda x: x or ''),
'providerUpdatedDateTime': ('/date_registered', datetime_formatter),
'description': '/description',
'uris': {
'canonicalUri': ('/url', url_from_guid),
'providerUris': ('/url', compose(coerce_to_list, url_from_guid))
},
'tags': '/tags',
'otherProperties': build_properties(
('parent_title', '/parent_title'),
('category', '/category'),
('wiki_link', '/wiki_link'),
('is_component', '/is_component'),
('is_registration', '/is_registration'),
('parent_url', '/parent_url'),
('journal Id', '/journal Id')
)
}
0
Example 8
@property
def schema(self):
return {
'title': ('/title', lambda x: x if x else ''),
'providerUpdatedDateTime': ('/date', datetime_formatter),
'uris': {
'canonicalUri': '/uri',
'providerUris': ('/uri', lambda x: [x]),
'objectUris': ('/pmid', '/doi', process_object_uris)
},
'contributors': '/authors',
'subjects': '/subjects',
'tags': '/keywords',
'publisher': ('/publisher', lambda x: {'name': x} if x else ''),
'otherProperties': build_properties(
('journalTitle', '/journalTitle'),
('abstract', ('/abstract', lambda x: x if x else '')),
('type', '/types'),
('ISSN', ('/issn', lambda x: x if x else '')),
('number', '/number'),
('ISBN', '/isbn'),
('startPage', '/startPage'),
('endPage', '/endPage'),
('volume', '/volume'),
)
}
0
Example 9
@property
def schema(self):
return {
'title': ('/title', lambda x: x[0] if x else ''),
'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''),
'providerUpdatedDateTime': ('/issued/date-parts', lambda x: datetime_formatter(' '.join(
[part for part in x[0]])
)),
'uris': {
'canonicalUri': '/URL'
},
'contributors': ('/author', lambda x: [
process_contributor(*[
'{} {}'.format(entry.get('given'), entry.get('family')),
entry.get('ORCID')
]) for entry in x
]),
'otherProperties': build_properties(
('referenceCount', '/reference-count'),
('updatePolicy', '/update-policy'),
('depositedTimestamp', '/deposited/timestamp'),
('Empty', '/trash/not-here'),
('Empty2', '/')
)
}
0
Example 10
Project: scrapi Source File: test_transformer.py
def test_arg_kwargs(self):
def process_title(title, title1="test"):
return title[0] + (title1[0] if isinstance(title1, list) else title1)
def process_title2(title1="test"):
return title1[0] if isinstance(title1, list) else title1
args = ("//dc:title/node()", )
kwargs = {"title1": "//dc:title/node()"}
self.harvester.schema = updated_schema(
TEST_SCHEMA,
{
'title': (pack(*args, **kwargs), process_title),
'otherProperties': build_properties(
('title2', (pack(*args), process_title)),
('title3', (pack(**kwargs), process_title2)),
('title4', (pack('//dc:title/node()', title1='//dc:title/node()'), process_title))
)
}
)
results = [self.harvester.normalize(record) for record in self.harvester.harvest(days_back=1)]
for result in results:
assert result['title'] == "TestTest"
assert result['otherProperties'][0]['properties']['title2'] == 'Testtest'
assert result['otherProperties'][1]['properties']['title3'] == 'Test'
assert result['otherProperties'][2]['properties']['title4'] == "TestTest"