scrapi.base.helpers.compose

Here are the examples of the python api scrapi.base.helpers.compose taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

8 Examples 7

Example 1

Project: scrapi Source File: __init__.py
    def format_property(self, property):
        if property == 'date':
            fn = compose(lambda x: list(map(null_on_error(datetime_formatter), x)), coerce_to_list, self.resolve_property)
        else:
            fn = self.resolve_property
        return (property, (
            '//dc:{}/node()'.format(property),
            '//ns0:{}/node()'.format(property),
            fn)
        )

Example 2

Project: scrapi Source File: datacite.py
Function: schema
    @property
    def schema(self):
        return updated_schema(self._schema, {
            "description": ("//dc:description/node()", get_second_description),
            "uris": {
                "canonicalUri": ('//dc:identifier/node()', compose(single_result, oai_extract_dois)),
                "objectUris": ('//dc:identifier/node()', oai_extract_dois)
            }
        })

Example 3

Project: scrapi Source File: neurovault.py
Function: schema
    @property
    def schema(self):
        return {
            'contributors': ('/authors', process_contributors),
            'uris': {
                'objectUris': ('/url', '/full_dataset_url', compose(filter_none, lambda x, y: [x, y])),
                'descriptorUris': ('/DOI', '/paper_url', compose(filter_none, lambda x, y: [('http://dx.doi.org/{}'.format(x) if x else None), y])),
                'canonicalUri': '/url',
            },
            'title': '/name',
            'providerUpdatedDateTime': ('/modify_date', datetime_formatter),
            'description': '/description',
            'otherProperties': build_properties(
                ('owner_name', '/owner_name'),
            )
        }

Example 4

Project: scrapi Source File: biomedcentral.py
    @property
    def schema(self):
        return {
            'contributors': (
                '/creators',
                compose(
                    default_name_parser,
                    lambda authors: [author['creator'] for author in authors]
                )
            ),
            'uris': ('/url', process_urls),
            'title': '/title',
            'providerUpdatedDateTime': ('/publicationDate', datetime_formatter),
            'description': '/abstract',
            'freeToRead': {
                'startDate': ('/openaccess', '/publicationDate', lambda x, y: y if x == 'true' else None)
            },
            'publisher': {
                'name': '/publisher'
            },
            'subjects': ('/genre', lambda x: [x] if x else []),
            'otherProperties': build_properties(
                ('url', '/url'),
                ('doi', '/doi'),
                ('isbn', '/isbn'),
                ('printIsbn', '/printIsbn'),
                ('electronicIsbn', '/electronicIsbn'),
                ('volume', '/volume'),
                ('number', '/number'),
                ('startingPage', '/startingPage'),
                ('copyright', '/copyright'),
                ('identifier', '/identifier')
            )
        }

Example 5

Project: scrapi Source File: crossref.py
    @property
    def schema(self):
        return {
            'title': ('/title', lambda x: x[0] if x else ''),
            'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''),
            'providerUpdatedDateTime': ('/issued/date-parts',
                                        compose(datetime_formatter, lambda x: ' '.join([str(part) for part in x[0]]))),
            'uris': {
                'canonicalUri': '/URL'
            },
            'contributors': ('/author', compose(lambda x: [
                process_contributor(*[
                    '{} {}'.format(entry.get('given'), entry.get('family')),
                    entry.get('ORCID')
                ]) for entry in x
            ], lambda x: x or [])),
            'sponsorships': ('/funder', lambda x: process_sponsorships(x) if x else []),
            'tags': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
            'subjects': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
            'otherProperties': build_properties(
                ('journalTitle', '/container-title'),
                ('volume', '/volume'),
                ('issue', '/issue'),
                ('publisher', '/publisher'),
                ('type', '/type'),
                ('ISSN', '/ISSN'),
                ('ISBN', '/ISBN'),
                ('member', '/member'),
                ('score', '/score'),
                ('issued', '/issued'),
                ('deposited', '/deposited'),
                ('indexed', '/indexed'),
                ('page', '/page'),
                ('issue', '/issue'),
                ('volume', '/volume'),
                ('referenceCount', '/reference-count'),
                ('updatePolicy', '/update-policy'),
                ('depositedTimestamp', '/deposited/timestamp')
            )
        }

Example 6

Project: scrapi Source File: nih.py
    @property
    def schema(self):
        return {
            "contributors": ('//PIS/PI/PI_NAME/node()', '//ORG_NAME', nih_name_parser),
            "uris": {
                "canonicalUri": ("//APPLICATION_ID/node()", compose(self.construct_project_url, single_result)),
                "descriptorUris": ("//APPLICATION_ID/node()", "//FOA_NUMBER/node()",
                                   self.construct_descriptor_uris)
            },
            "providerUpdatedDateTime": ("AWARD_NOTICE_DATE/node()", compose(datetime_formatter, single_result)),
            "title": ('//PROJECT_TITLE/node()', single_result),
            "tags": ('//PROJECT_TERMSX/TERM/node()'),
            "otherProperties": build_properties(
                ("applicationID", "//APPLICATION_ID/node()"),
                ('activity', '//ACTIVITY/node()'),
                ('administeringIC', '//ADMINISTERING_IC/node()'),
                ('arraFunded', '//ARRA_FUNDED/node()'),
                ('budgetStart', '//BUDGET_START/node()'),
                ('budgetEnd', '//BUDGET_END/node()'),
                ('FOANumber', '//FOA_NUMBER/node()'),
                ('fullProjectNumber', '//FULL_PROJECT_NUM/node()'),
                ('fundingICs', '//FUNDING_ICs/node()'),
                ('fiscalYear', '//FY/node()'),
                ('NIHSpendingCats', '//NIH_SPENDING_CATS/@xsi:nil'),
                ('organizationCity', '//ORG_CITY/node()'),
                ('organizationCountry', '//ORG_CONTRY/node()'),
                ('organizationDistrict', '//ORG_DISTRICT/node()'),
                ('organizationDUNS', '//ORG_DUNS/node()'),
                ('organizationDept', '//ORG_DEPT/node()'),
                ('organizationFIPS', '//ORG_FIPS/node()'),
                ('organizationState', '//ORG_STATE/node()'),
                ('organizationZipcode', '//ORG_ZIPCODE/node()'),
                ('ICName', '//IC_NAME/node()'),
                ('organizationName', '//ORG_NAME/node()'),
                ('projectStart', '//PROJECT_START/node()'),
                ('projectEnd', '//PROJECT_END/node()'),
                ('PHR', '//PHR/node()'),
                ('serialNumber', '//SERIAL_NUMBER/node()'),
                ('studySection', '//STUDY_SECTION/node()'),
                ('studySectionName', '//STUDY_SECTION_NAME/node()'),
                ('supportYear', '//SUPPORT_YEAR/node()'),
                ('suffix', '//SUFFIX/node()'),
                ('subProjectID', '//SUBPROJECT_ID/@xsi:nil'),
                ('totalCost', '//TOTAL_COST/node()'),
                ('totalCostSubProject', '//TOTAL_COST_SUB_PROJECT/node()'),
                ('coreProjectNumber', '//CORE_PROJECT_NUM/node()'),
                ('CFDACode', '//CFDA_CODE/node()'),
                ('programOfficerName', '//PROGRAM_OFFICER_NAME/node()'),
                ('edInstType', '//ED_INST_TYPE/node()'),
                ('awardNoticeDate', '//AWARD_NOTICE_DATE/node()'),
                ('fundingMechanism', '//FUNDING_MECHANISM/node()')
            )
        }

Example 7

Project: scrapi Source File: noaa_nodc.py
Function: schema
    @property
    def schema(self):
        id_stanza = './gmd:identificationInfo/gmd:MD_DataIdentification/'
        cite_stanza = id_stanza + 'gmd:citation/gmd:CI_Citation/'
        return {
            'title': (cite_stanza + 'gmd:title', compose(xml_text_only, single_result)),
            'description': (id_stanza + 'gmd:abstract', compose(xml_text_only, single_result)),
            'contributors': (cite_stanza + 'gmd:citedResponsibleParty/gmd:CI_ResponsibleParty', compose(parse_contributors, filter_to_contributors)),
            'uris': {
                'canonicalUri': (
                    './gmd:fileIdentifier',
                    compose(lambda x: str(self.canonical_base_url).format(x), xml_text_only, single_result)
                ),
            },
            'publisher': (
                cite_stanza + 'gmd:citedResponsibleParty/gmd:CI_ResponsibleParty',
                compose(extract_organization, single_result, filter_to_publishers),
            ),
            'providerUpdatedDateTime': ('./gmd:dateStamp/gco:DateTime/node()', compose(datetime_formatter, single_result)),
            'languages': ('./gmd:language/gmd:LanguageCode', compose(language_codes, xml_text_only_list, coerce_to_list)),
            'subjects': (id_stanza + 'gmd:descriptiveKeywords/gmd:MD_Keywords', lambda x: filter_keywords(x)),
        }

Example 8

Project: scrapi Source File: osf.py
    @property
    def schema(self):
        return {
            'contributors': ('/contributors', process_contributors),
            'title': ('/title', lambda x: x or ''),
            'providerUpdatedDateTime': ('/date_registered', datetime_formatter),
            'description': '/description',
            'uris': {
                'canonicalUri': ('/url', url_from_guid),
                'providerUris': ('/url', compose(coerce_to_list, url_from_guid))
            },
            'tags': '/tags',
            'otherProperties': build_properties(
                ('parent_title', '/parent_title'),
                ('category', '/category'),
                ('wiki_link', '/wiki_link'),
                ('is_component', '/is_component'),
                ('is_registration', '/is_registration'),
                ('parent_url', '/parent_url'),
                ('journal Id', '/journal Id')
            )
        }