Here are the examples of the python api scrapi.base.helpers.compose taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
8 Examples
3
Example 1
Project: scrapi Source File: __init__.py
def format_property(self, property):
if property == 'date':
fn = compose(lambda x: list(map(null_on_error(datetime_formatter), x)), coerce_to_list, self.resolve_property)
else:
fn = self.resolve_property
return (property, (
'//dc:{}/node()'.format(property),
'//ns0:{}/node()'.format(property),
fn)
)
3
Example 2
@property
def schema(self):
return updated_schema(self._schema, {
"description": ("//dc:description/node()", get_second_description),
"uris": {
"canonicalUri": ('//dc:identifier/node()', compose(single_result, oai_extract_dois)),
"objectUris": ('//dc:identifier/node()', oai_extract_dois)
}
})
3
Example 3
@property
def schema(self):
return {
'contributors': ('/authors', process_contributors),
'uris': {
'objectUris': ('/url', '/full_dataset_url', compose(filter_none, lambda x, y: [x, y])),
'descriptorUris': ('/DOI', '/paper_url', compose(filter_none, lambda x, y: [('http://dx.doi.org/{}'.format(x) if x else None), y])),
'canonicalUri': '/url',
},
'title': '/name',
'providerUpdatedDateTime': ('/modify_date', datetime_formatter),
'description': '/description',
'otherProperties': build_properties(
('owner_name', '/owner_name'),
)
}
0
Example 4
Project: scrapi Source File: biomedcentral.py
@property
def schema(self):
return {
'contributors': (
'/creators',
compose(
default_name_parser,
lambda authors: [author['creator'] for author in authors]
)
),
'uris': ('/url', process_urls),
'title': '/title',
'providerUpdatedDateTime': ('/publicationDate', datetime_formatter),
'description': '/abstract',
'freeToRead': {
'startDate': ('/openaccess', '/publicationDate', lambda x, y: y if x == 'true' else None)
},
'publisher': {
'name': '/publisher'
},
'subjects': ('/genre', lambda x: [x] if x else []),
'otherProperties': build_properties(
('url', '/url'),
('doi', '/doi'),
('isbn', '/isbn'),
('printIsbn', '/printIsbn'),
('electronicIsbn', '/electronicIsbn'),
('volume', '/volume'),
('number', '/number'),
('startingPage', '/startingPage'),
('copyright', '/copyright'),
('identifier', '/identifier')
)
}
0
Example 5
Project: scrapi Source File: crossref.py
@property
def schema(self):
return {
'title': ('/title', lambda x: x[0] if x else ''),
'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''),
'providerUpdatedDateTime': ('/issued/date-parts',
compose(datetime_formatter, lambda x: ' '.join([str(part) for part in x[0]]))),
'uris': {
'canonicalUri': '/URL'
},
'contributors': ('/author', compose(lambda x: [
process_contributor(*[
'{} {}'.format(entry.get('given'), entry.get('family')),
entry.get('ORCID')
]) for entry in x
], lambda x: x or [])),
'sponsorships': ('/funder', lambda x: process_sponsorships(x) if x else []),
'tags': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
'subjects': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
'otherProperties': build_properties(
('journalTitle', '/container-title'),
('volume', '/volume'),
('issue', '/issue'),
('publisher', '/publisher'),
('type', '/type'),
('ISSN', '/ISSN'),
('ISBN', '/ISBN'),
('member', '/member'),
('score', '/score'),
('issued', '/issued'),
('deposited', '/deposited'),
('indexed', '/indexed'),
('page', '/page'),
('issue', '/issue'),
('volume', '/volume'),
('referenceCount', '/reference-count'),
('updatePolicy', '/update-policy'),
('depositedTimestamp', '/deposited/timestamp')
)
}
0
Example 6
Project: scrapi Source File: nih.py
@property
def schema(self):
return {
"contributors": ('//PIS/PI/PI_NAME/node()', '//ORG_NAME', nih_name_parser),
"uris": {
"canonicalUri": ("//APPLICATION_ID/node()", compose(self.construct_project_url, single_result)),
"descriptorUris": ("//APPLICATION_ID/node()", "//FOA_NUMBER/node()",
self.construct_descriptor_uris)
},
"providerUpdatedDateTime": ("AWARD_NOTICE_DATE/node()", compose(datetime_formatter, single_result)),
"title": ('//PROJECT_TITLE/node()', single_result),
"tags": ('//PROJECT_TERMSX/TERM/node()'),
"otherProperties": build_properties(
("applicationID", "//APPLICATION_ID/node()"),
('activity', '//ACTIVITY/node()'),
('administeringIC', '//ADMINISTERING_IC/node()'),
('arraFunded', '//ARRA_FUNDED/node()'),
('budgetStart', '//BUDGET_START/node()'),
('budgetEnd', '//BUDGET_END/node()'),
('FOANumber', '//FOA_NUMBER/node()'),
('fullProjectNumber', '//FULL_PROJECT_NUM/node()'),
('fundingICs', '//FUNDING_ICs/node()'),
('fiscalYear', '//FY/node()'),
('NIHSpendingCats', '//NIH_SPENDING_CATS/@xsi:nil'),
('organizationCity', '//ORG_CITY/node()'),
('organizationCountry', '//ORG_CONTRY/node()'),
('organizationDistrict', '//ORG_DISTRICT/node()'),
('organizationDUNS', '//ORG_DUNS/node()'),
('organizationDept', '//ORG_DEPT/node()'),
('organizationFIPS', '//ORG_FIPS/node()'),
('organizationState', '//ORG_STATE/node()'),
('organizationZipcode', '//ORG_ZIPCODE/node()'),
('ICName', '//IC_NAME/node()'),
('organizationName', '//ORG_NAME/node()'),
('projectStart', '//PROJECT_START/node()'),
('projectEnd', '//PROJECT_END/node()'),
('PHR', '//PHR/node()'),
('serialNumber', '//SERIAL_NUMBER/node()'),
('studySection', '//STUDY_SECTION/node()'),
('studySectionName', '//STUDY_SECTION_NAME/node()'),
('supportYear', '//SUPPORT_YEAR/node()'),
('suffix', '//SUFFIX/node()'),
('subProjectID', '//SUBPROJECT_ID/@xsi:nil'),
('totalCost', '//TOTAL_COST/node()'),
('totalCostSubProject', '//TOTAL_COST_SUB_PROJECT/node()'),
('coreProjectNumber', '//CORE_PROJECT_NUM/node()'),
('CFDACode', '//CFDA_CODE/node()'),
('programOfficerName', '//PROGRAM_OFFICER_NAME/node()'),
('edInstType', '//ED_INST_TYPE/node()'),
('awardNoticeDate', '//AWARD_NOTICE_DATE/node()'),
('fundingMechanism', '//FUNDING_MECHANISM/node()')
)
}
0
Example 7
@property
def schema(self):
id_stanza = './gmd:identificationInfo/gmd:MD_DataIdentification/'
cite_stanza = id_stanza + 'gmd:citation/gmd:CI_Citation/'
return {
'title': (cite_stanza + 'gmd:title', compose(xml_text_only, single_result)),
'description': (id_stanza + 'gmd:abstract', compose(xml_text_only, single_result)),
'contributors': (cite_stanza + 'gmd:citedResponsibleParty/gmd:CI_ResponsibleParty', compose(parse_contributors, filter_to_contributors)),
'uris': {
'canonicalUri': (
'./gmd:fileIdentifier',
compose(lambda x: str(self.canonical_base_url).format(x), xml_text_only, single_result)
),
},
'publisher': (
cite_stanza + 'gmd:citedResponsibleParty/gmd:CI_ResponsibleParty',
compose(extract_organization, single_result, filter_to_publishers),
),
'providerUpdatedDateTime': ('./gmd:dateStamp/gco:DateTime/node()', compose(datetime_formatter, single_result)),
'languages': ('./gmd:language/gmd:LanguageCode', compose(language_codes, xml_text_only_list, coerce_to_list)),
'subjects': (id_stanza + 'gmd:descriptiveKeywords/gmd:MD_Keywords', lambda x: filter_keywords(x)),
}
0
Example 8
Project: scrapi Source File: osf.py
@property
def schema(self):
return {
'contributors': ('/contributors', process_contributors),
'title': ('/title', lambda x: x or ''),
'providerUpdatedDateTime': ('/date_registered', datetime_formatter),
'description': '/description',
'uris': {
'canonicalUri': ('/url', url_from_guid),
'providerUris': ('/url', compose(coerce_to_list, url_from_guid))
},
'tags': '/tags',
'otherProperties': build_properties(
('parent_title', '/parent_title'),
('category', '/category'),
('wiki_link', '/wiki_link'),
('is_component', '/is_component'),
('is_registration', '/is_registration'),
('parent_url', '/parent_url'),
('journal Id', '/journal Id')
)
}