scrapi.base.helpers.build_properties

Here are the examples of the python api scrapi.base.helpers.build_properties taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

10 Examples 7

Example 1

Project: scrapi Source File: neurovault.py
    @property
    def schema(self):
        return {
            'contributors': ('/authors', process_contributors),
            'uris': {
                'objectUris': ('/url', '/full_dataset_url', compose(filter_none, lambda x, y: [x, y])),
                'descriptorUris': ('/DOI', '/paper_url', compose(filter_none, lambda x, y: [('http://dx.doi.org/{}'.format(x) if x else None), y])),
                'canonicalUri': '/url',
            },
            'title': '/name',
            'providerUpdatedDateTime': ('/modify_date', datetime_formatter),
            'description': '/description',
            'otherProperties': build_properties(
                ('owner_name', '/owner_name'),
            )
        }

Example 2

Project: scrapi Source File: __init__.py
    @property
    def formatted_properties(self):
        return {
            'otherProperties': build_properties(*list(map(self.format_property, self.property_list)))
        }

Example 3

Project: scrapi Source File: biomedcentral.py
Function: schema
    @property
    def schema(self):
        return {
            'contributors': (
                '/creators',
                compose(
                    default_name_parser,
                    lambda authors: [author['creator'] for author in authors]
                )
            ),
            'uris': ('/url', process_urls),
            'title': '/title',
            'providerUpdatedDateTime': ('/publicationDate', datetime_formatter),
            'description': '/abstract',
            'freeToRead': {
                'startDate': ('/openaccess', '/publicationDate', lambda x, y: y if x == 'true' else None)
            },
            'publisher': {
                'name': '/publisher'
            },
            'subjects': ('/genre', lambda x: [x] if x else []),
            'otherProperties': build_properties(
                ('url', '/url'),
                ('doi', '/doi'),
                ('isbn', '/isbn'),
                ('printIsbn', '/printIsbn'),
                ('electronicIsbn', '/electronicIsbn'),
                ('volume', '/volume'),
                ('number', '/number'),
                ('startingPage', '/startingPage'),
                ('copyright', '/copyright'),
                ('identifier', '/identifier')
            )
        }

Example 4

Project: scrapi Source File: crossref.py
Function: schema
    @property
    def schema(self):
        return {
            'title': ('/title', lambda x: x[0] if x else ''),
            'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''),
            'providerUpdatedDateTime': ('/issued/date-parts',
                                        compose(datetime_formatter, lambda x: ' '.join([str(part) for part in x[0]]))),
            'uris': {
                'canonicalUri': '/URL'
            },
            'contributors': ('/author', compose(lambda x: [
                process_contributor(*[
                    '{} {}'.format(entry.get('given'), entry.get('family')),
                    entry.get('ORCID')
                ]) for entry in x
            ], lambda x: x or [])),
            'sponsorships': ('/funder', lambda x: process_sponsorships(x) if x else []),
            'tags': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
            'subjects': ('/subject', '/container-title', lambda x, y: [tag.lower() for tag in (x or []) + (y or [])]),
            'otherProperties': build_properties(
                ('journalTitle', '/container-title'),
                ('volume', '/volume'),
                ('issue', '/issue'),
                ('publisher', '/publisher'),
                ('type', '/type'),
                ('ISSN', '/ISSN'),
                ('ISBN', '/ISBN'),
                ('member', '/member'),
                ('score', '/score'),
                ('issued', '/issued'),
                ('deposited', '/deposited'),
                ('indexed', '/indexed'),
                ('page', '/page'),
                ('issue', '/issue'),
                ('volume', '/volume'),
                ('referenceCount', '/reference-count'),
                ('updatePolicy', '/update-policy'),
                ('depositedTimestamp', '/deposited/timestamp')
            )
        }

Example 5

Project: scrapi Source File: lwbin.py
Function: schema
    @property
    def schema(self):
        return {
            'title': ('/title', lambda x: x or ''),
            'description': ('/notes'),
            'providerUpdatedDateTime': ('/metadata_modified', datetime_formatter),
            'uris': {
                'canonicalUri': ('/name', lambda x: construct_url(self.url, self.dataset_path, x)),  # Construct new urls directing to LWBIN
                'objectUris': ('/url', '/extras', process_object_uris)  # Default urls from the metadata directing to source pages
            },
            'contributors': ('/author', '/author_email', process_contributors),
            'licenses': ('/license_title', '/license_url', '/license_id', process_licenses),
            'tags': ('/tags', lambda x: [tag['name'].lower() for tag in (x or [])]),
            'freeToRead': {
                'startDate': ('/isopen', '/metadata_created', lambda x, y: parse(y).date().isoformat() if x else None)
            },
            'otherProperties': build_properties(
                ('maintainer', '/maintainer'),
                ('maintainerEmail', '/maintainer_email'),
                ('revisionTimestamp', ('/revision_timestamp', datetime_formatter)),
                ('id', '/id'),
                ('metadataCreated', ('/metadata_created', datetime_formatter)),
                ('state', '/state'),
                ('version', '/version'),
                ('creatorUserId', '/creator_user_id'),
                ('type', '/type'),
                ('numberOfResources', '/num_resources'),
                ('numberOfTags', '/num_tags'),
                ('name', '/name'),
                ('groups', '/groups'),
            )
        }

Example 6

Project: scrapi Source File: nih.py
Function: schema
    @property
    def schema(self):
        return {
            "contributors": ('//PIS/PI/PI_NAME/node()', '//ORG_NAME', nih_name_parser),
            "uris": {
                "canonicalUri": ("//APPLICATION_ID/node()", compose(self.construct_project_url, single_result)),
                "descriptorUris": ("//APPLICATION_ID/node()", "//FOA_NUMBER/node()",
                                   self.construct_descriptor_uris)
            },
            "providerUpdatedDateTime": ("AWARD_NOTICE_DATE/node()", compose(datetime_formatter, single_result)),
            "title": ('//PROJECT_TITLE/node()', single_result),
            "tags": ('//PROJECT_TERMSX/TERM/node()'),
            "otherProperties": build_properties(
                ("applicationID", "//APPLICATION_ID/node()"),
                ('activity', '//ACTIVITY/node()'),
                ('administeringIC', '//ADMINISTERING_IC/node()'),
                ('arraFunded', '//ARRA_FUNDED/node()'),
                ('budgetStart', '//BUDGET_START/node()'),
                ('budgetEnd', '//BUDGET_END/node()'),
                ('FOANumber', '//FOA_NUMBER/node()'),
                ('fullProjectNumber', '//FULL_PROJECT_NUM/node()'),
                ('fundingICs', '//FUNDING_ICs/node()'),
                ('fiscalYear', '//FY/node()'),
                ('NIHSpendingCats', '//NIH_SPENDING_CATS/@xsi:nil'),
                ('organizationCity', '//ORG_CITY/node()'),
                ('organizationCountry', '//ORG_CONTRY/node()'),
                ('organizationDistrict', '//ORG_DISTRICT/node()'),
                ('organizationDUNS', '//ORG_DUNS/node()'),
                ('organizationDept', '//ORG_DEPT/node()'),
                ('organizationFIPS', '//ORG_FIPS/node()'),
                ('organizationState', '//ORG_STATE/node()'),
                ('organizationZipcode', '//ORG_ZIPCODE/node()'),
                ('ICName', '//IC_NAME/node()'),
                ('organizationName', '//ORG_NAME/node()'),
                ('projectStart', '//PROJECT_START/node()'),
                ('projectEnd', '//PROJECT_END/node()'),
                ('PHR', '//PHR/node()'),
                ('serialNumber', '//SERIAL_NUMBER/node()'),
                ('studySection', '//STUDY_SECTION/node()'),
                ('studySectionName', '//STUDY_SECTION_NAME/node()'),
                ('supportYear', '//SUPPORT_YEAR/node()'),
                ('suffix', '//SUFFIX/node()'),
                ('subProjectID', '//SUBPROJECT_ID/@xsi:nil'),
                ('totalCost', '//TOTAL_COST/node()'),
                ('totalCostSubProject', '//TOTAL_COST_SUB_PROJECT/node()'),
                ('coreProjectNumber', '//CORE_PROJECT_NUM/node()'),
                ('CFDACode', '//CFDA_CODE/node()'),
                ('programOfficerName', '//PROGRAM_OFFICER_NAME/node()'),
                ('edInstType', '//ED_INST_TYPE/node()'),
                ('awardNoticeDate', '//AWARD_NOTICE_DATE/node()'),
                ('fundingMechanism', '//FUNDING_MECHANISM/node()')
            )
        }

Example 7

Project: scrapi Source File: osf.py
Function: schema
    @property
    def schema(self):
        return {
            'contributors': ('/contributors', process_contributors),
            'title': ('/title', lambda x: x or ''),
            'providerUpdatedDateTime': ('/date_registered', datetime_formatter),
            'description': '/description',
            'uris': {
                'canonicalUri': ('/url', url_from_guid),
                'providerUris': ('/url', compose(coerce_to_list, url_from_guid))
            },
            'tags': '/tags',
            'otherProperties': build_properties(
                ('parent_title', '/parent_title'),
                ('category', '/category'),
                ('wiki_link', '/wiki_link'),
                ('is_component', '/is_component'),
                ('is_registration', '/is_registration'),
                ('parent_url', '/parent_url'),
                ('journal Id', '/journal Id')
            )
        }

Example 8

Project: scrapi Source File: vivo.py
Function: schema
    @property
    def schema(self):
        return {
            'title': ('/title', lambda x: x if x else ''),
            'providerUpdatedDateTime': ('/date', datetime_formatter),
            'uris': {
                'canonicalUri': '/uri',
                'providerUris': ('/uri', lambda x: [x]),
                'objectUris': ('/pmid', '/doi', process_object_uris)
            },
            'contributors': '/authors',
            'subjects': '/subjects',
            'tags': '/keywords',
            'publisher': ('/publisher', lambda x: {'name': x} if x else ''),
            'otherProperties': build_properties(
                ('journalTitle', '/journalTitle'),
                ('abstract', ('/abstract', lambda x: x if x else '')),
                ('type', '/types'),
                ('ISSN', ('/issn', lambda x: x if x else '')),
                ('number', '/number'),
                ('ISBN', '/isbn'),
                ('startPage', '/startPage'),
                ('endPage', '/endPage'),
                ('volume', '/volume'),
            )
        }

Example 9

Project: scrapi Source File: test_json_harvester.py
Function: schema
    @property
    def schema(self):
        return {
            'title': ('/title', lambda x: x[0] if x else ''),
            'description': ('/subtitle', lambda x: x[0] if (isinstance(x, list) and x) else x or ''),
            'providerUpdatedDateTime': ('/issued/date-parts', lambda x: datetime_formatter(' '.join(
                [part for part in x[0]])
            )),
            'uris': {
                'canonicalUri': '/URL'
            },
            'contributors': ('/author', lambda x: [
                process_contributor(*[
                    '{} {}'.format(entry.get('given'), entry.get('family')),
                    entry.get('ORCID')
                ]) for entry in x
            ]),
            'otherProperties': build_properties(
                ('referenceCount', '/reference-count'),
                ('updatePolicy', '/update-policy'),
                ('depositedTimestamp', '/deposited/timestamp'),
                ('Empty', '/trash/not-here'),
                ('Empty2', '/')
            )
        }

Example 10

Project: scrapi Source File: test_transformer.py
    def test_arg_kwargs(self):
        def process_title(title, title1="test"):
            return title[0] + (title1[0] if isinstance(title1, list) else title1)

        def process_title2(title1="test"):
            return title1[0] if isinstance(title1, list) else title1

        args = ("//dc:title/node()", )
        kwargs = {"title1": "//dc:title/node()"}

        self.harvester.schema = updated_schema(
            TEST_SCHEMA,
            {
                'title': (pack(*args, **kwargs), process_title),
                'otherProperties': build_properties(
                    ('title2', (pack(*args), process_title)),
                    ('title3', (pack(**kwargs), process_title2)),
                    ('title4', (pack('//dc:title/node()', title1='//dc:title/node()'), process_title))
                )
            }
        )

        results = [self.harvester.normalize(record) for record in self.harvester.harvest(days_back=1)]

        for result in results:
            assert result['title'] == "TestTest"
            assert result['otherProperties'][0]['properties']['title2'] == 'Testtest'
            assert result['otherProperties'][1]['properties']['title3'] == 'Test'
            assert result['otherProperties'][2]['properties']['title4'] == "TestTest"