json.dumps

Here are the examples of the python api json.dumps taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 1

Project: stacktester
Source File: test_servers.py
View license
    def test_build_server(self):
        """Build and manipulate a server"""

        # Don't block for the server until later
        expected_server = {
            'name': 'stacktester1',
            'imageRef': self.image_ref,
            'flavorRef': self.flavor_ref,
            'metadata': {'testEntry': 'testValue'},
        }
        post_body = json.dumps({'server': expected_server})
        response, body = self.os.nova.request('POST',
                                              '/servers',
                                              body=post_body)

        # Ensure attributes were returned
        self.assertEqual(response.status, 202)
        _body = json.loads(body)
        self.assertEqual(_body.keys(), ['server'])
        created_server = _body['server']
        admin_pass = created_server.pop('adminPass')
        self._assert_server_entity(created_server)
        self.assertEqual(expected_server['name'], created_server['name'])
        self.assertEqual(created_server['accessIPv4'], '')
        self.assertEqual(created_server['accessIPv6'], '')
        self.assertEqual(expected_server['metadata'],
                         created_server['metadata'])
        server_id = created_server['id']

        # Get server again and ensure attributes stuck
        server = self.os.nova.get_server(server_id)
        self._assert_server_entity(server)
        self.assertEqual(server['name'], expected_server['name'])
        self.assertEqual(server['accessIPv4'], '')
        self.assertEqual(server['accessIPv6'], '')
        self.assertEqual(server['metadata'], created_server['metadata'])

        # Parse last-updated time
        update_time = utils.load_isotime(server['updated'])

        # Ensure server not returned with future changes-since
        future_time = utils.dump_isotime(update_time + datetime.timedelta(1))
        params = 'changes-since?%s' % future_time
        response, body = self.os.nova.request('GET', '/servers?%s' % params)
        servers = json.loads(body)['servers']
        self.assertTrue(len(servers) == 0)

        # Ensure server is returned with past changes-since
        future_time = utils.dump_isotime(update_time - datetime.timedelta(1))
        params = 'changes-since?%s' % future_time
        response, body = self.os.nova.request('GET', '/servers?%s' % params)
        servers = json.loads(body)['servers']
        server_ids = map(lambda x: x['id'], servers)
        self.assertTrue(server_id in server_ids)

        # Update name
        new_server = {'name': 'stacktester2'}
        put_body = json.dumps({'server': new_server})
        url = '/servers/%s' % server_id
        resp, body = self.os.nova.request('PUT', url, body=put_body)

        # Output from update should be a full server
        self.assertEqual(resp.status, 200)
        data = json.loads(body)
        self.assertEqual(data.keys(), ['server'])
        self._assert_server_entity(data['server'])
        self.assertEqual('stacktester2', data['server']['name'])

        # Check that name was changed
        updated_server = self.os.nova.get_server(server_id)
        self._assert_server_entity(updated_server)
        self.assertEqual('stacktester2', updated_server['name'])

        # Update accessIPv4
        new_server = {'accessIPv4': '192.168.0.200'}
        put_body = json.dumps({'server': new_server})
        url = '/servers/%s' % server_id
        resp, body = self.os.nova.request('PUT', url, body=put_body)

        # Output from update should be a full server
        self.assertEqual(resp.status, 200)
        data = json.loads(body)
        self.assertEqual(data.keys(), ['server'])
        self._assert_server_entity(data['server'])
        self.assertEqual('192.168.0.200', data['server']['accessIPv4'])

        # Check that accessIPv4 was changed
        updated_server = self.os.nova.get_server(server_id)
        self._assert_server_entity(updated_server)
        self.assertEqual('192.168.0.200', updated_server['accessIPv4'])

        # Update accessIPv6
        new_server = {'accessIPv6': 'feed::beef'}
        put_body = json.dumps({'server': new_server})
        url = '/servers/%s' % server_id
        resp, body = self.os.nova.request('PUT', url, body=put_body)

        # Output from update should be a full server
        self.assertEqual(resp.status, 200)
        data = json.loads(body)
        self.assertEqual(data.keys(), ['server'])
        self._assert_server_entity(data['server'])
        self.assertEqual('feed::beef', data['server']['accessIPv6'])

        # Check that accessIPv6 was changed
        updated_server = self.os.nova.get_server(server_id)
        self._assert_server_entity(updated_server)
        self.assertEqual('feed::beef', updated_server['accessIPv6'])

        # Check metadata subresource
        url = '/servers/%s/metadata' % server_id
        response, body = self.os.nova.request('GET', url)
        self.assertEqual(200, response.status)

        result = json.loads(body)
        expected = {'metadata': {'testEntry': 'testValue'}}
        self.assertEqual(expected, result)

        # Ensure metadata container can be modified
        expected = {
            'metadata': {
                'new_meta1': 'new_value1',
                'new_meta2': 'new_value2',
            },
        }
        post_body = json.dumps(expected)
        url = '/servers/%s/metadata' % server_id
        response, body = self.os.nova.request('POST', url, body=post_body)
        self.assertEqual(200, response.status)
        result = json.loads(body)
        expected['metadata']['testEntry'] = 'testValue'
        self.assertEqual(expected, result)

        # Ensure values stick
        url = '/servers/%s/metadata' % server_id
        response, body = self.os.nova.request('GET', url)
        self.assertEqual(200, response.status)
        result = json.loads(body)
        self.assertEqual(expected, result)

        # Ensure metadata container can be overwritten
        expected = {
            'metadata': {
                'new_meta3': 'new_value3',
                'new_meta4': 'new_value4',
            },
        }
        url = '/servers/%s/metadata' % server_id
        post_body = json.dumps(expected)
        response, body = self.os.nova.request('PUT', url, body=post_body)
        self.assertEqual(200, response.status)
        result = json.loads(body)
        self.assertEqual(expected, result)

        # Ensure values stick
        url = '/servers/%s/metadata' % server_id
        response, body = self.os.nova.request('GET', url)
        self.assertEqual(200, response.status)
        result = json.loads(body)
        self.assertEqual(expected, result)

        # Set specific key
        expected_meta = {'meta': {'new_meta5': 'new_value5'}}
        put_body = json.dumps(expected_meta)
        url = '/servers/%s/metadata/new_meta5' % server_id
        response, body = self.os.nova.request('PUT', url, body=put_body)
        self.assertEqual(200, response.status)
        result = json.loads(body)
        self.assertDictEqual(expected_meta, result)

        # Ensure value sticks
        expected_metadata = {
            'metadata': {
                'new_meta3': 'new_value3',
                'new_meta4': 'new_value4',
                'new_meta5': 'new_value5',
            },
        }
        url = '/servers/%s/metadata' % server_id
        response, body = self.os.nova.request('GET', url)
        result = json.loads(body)
        self.assertDictEqual(expected_metadata, result)

        # Update existing key
        expected_meta = {'meta': {'new_meta4': 'new_value6'}}
        put_body = json.dumps(expected_meta)
        url = '/servers/%s/metadata/new_meta4' % server_id
        response, body = self.os.nova.request('PUT', url, body=put_body)
        self.assertEqual(200, response.status)
        result = json.loads(body)
        self.assertEqual(expected_meta, result)

        # Ensure value sticks
        expected_metadata = {
            'metadata': {
                'new_meta3': 'new_value3',
                'new_meta4': 'new_value6',
                'new_meta5': 'new_value5',
            },
        }
        url = '/servers/%s/metadata' % server_id
        response, body = self.os.nova.request('GET', url)
        result = json.loads(body)
        self.assertDictEqual(expected_metadata, result)

        # Delete a certain key
        url = '/servers/%s/metadata/new_meta3' % server_id
        response, body = self.os.nova.request('DELETE', url)
        self.assertEquals(204, response.status)

        # Make sure the key is gone
        url = '/servers/%s/metadata/new_meta3' % server_id
        response, body = self.os.nova.request('GET', url)
        self.assertEquals(404, response.status)

        # Delete a nonexistant key
        url = '/servers/%s/metadata/new_meta3' % server_id
        response, body = self.os.nova.request('DELETE', url)
        self.assertEquals(404, response.status)

        # Wait for instance to boot
        server_id = created_server['id']
        self.os.nova.wait_for_server_status(server_id,
                                            'ACTIVE',
                                            timeout=self.build_timeout)

        # Look for 'addresses' attribute on server
        url = '/servers/%s' % server_id
        response, body = self.os.nova.request('GET', url)
        self.assertEqual(response.status, 200)
        body = json.loads(body)
        self.assertTrue('addresses' in body['server'].keys())
        server_addresses = body['server']['addresses']

        # Addresses should be available from subresource
        url = '/servers/%s/ips' % server_id
        response, body = self.os.nova.request('GET', url)
        self.assertEqual(response.status, 200)
        body = json.loads(body)
        self.assertEqual(body.keys(), ['addresses'])
        ips_addresses = body['addresses']

        # Ensure both resources return identical information
        self.assertEqual(server_addresses, ips_addresses)

        # Validate entities within network containers
        for (network, network_data) in ips_addresses.items():
            url = '/servers/%s/ips/%s' % (server_id, network)
            response, body = self.os.nova.request('GET', url)
            self.assertEqual(response.status, 200)
            body = json.loads(body)
            self.assertEqual(body.keys(), [network])
            self.assertEqual(body[network], network_data)

            # Check each IP entity
            for ip_data in network_data:
                self.assertEqual(set(ip_data.keys()), set(['addr', 'version']))

        # Find IP of server
        try:
            (_, network) = server_addresses.items()[0]
            ip = network[0]['addr']
        except KeyError:
            self.fail("Failed to retrieve IP address from server entity")

        # Assert password works
        client = ssh.Client(ip, 'root', admin_pass, self.ssh_timeout)
        self.assertTrue(client.test_connection_auth())

        # Delete server
        url = '/servers/%s' % server_id
        response, body = self.os.nova.request('DELETE', url)
        self.assertEqual(response.status, 204)

        # Poll server until deleted
        try:
            url = '/servers/%s' % server_id
            self.os.nova.poll_request_status('GET', url, 404)
        except exceptions.TimeoutException:
            self.fail("Server deletion timed out")

Example 2

Project: treeio
Source File: tests.py
View license
    def test_common_project(self):

        # create new project
        new_project = {'name': 'api test',
                       'details': '<p>test details</p>'}
        response = self.client.post(reverse('api_projects'), data=json.dumps(new_project),
                                    content_type=self.content_type, **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        # check data in response
        data = json.loads(response.content)
        self.assertEquals(data['name'], new_project['name'])
        self.assertEquals(data['details'], new_project['details'])
        project_id = data['id']

        # get info about new project
        response = self.client.get(path=reverse(
            'api_projects', kwargs={'object_ptr': project_id}), **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        # get statuses list
        response = self.client.get(
            path=reverse('api_projects_status'), **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        statuses = json.loads(response.content)
        fstatus = statuses[0]['id']

        # create new task status
        new_status = {'name': 'Open api test',
                      'active': True,
                      'hidden': False,
                      'details': '<p>test details</p>'}
        response = self.client.post(reverse('api_projects_status'), data=json.dumps(new_status),
                                    content_type=self.content_type, **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        data = json.loads(response.content)
        self.assertEquals(data['name'], new_status['name'])
        self.assertEquals(data['active'], new_status['active'])
        self.assertEquals(data['hidden'], new_status['hidden'])
        self.assertEquals(data['details'], new_status['details'])
        sstatus = data['id']

        # create new milestone
        new_milestone = {'name': 'api test milestone',
                         'status': fstatus,
                         'project': project_id,
                         'start_date': '2011-06-09 12:00:00',
                         'details': '<p>test details</p>'}
        response = self.client.post(reverse('api_projects_milestones'), data=json.dumps(new_milestone),
                                    content_type=self.content_type, **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        data = json.loads(response.content)
        self.assertEquals(data['name'], new_milestone['name'])
        self.assertEquals(data['status']['id'], new_milestone['status'])
        self.assertEquals(data['project']['id'], new_milestone['project'])
        self.assertEquals(data['details'], new_milestone['details'])
        milestone_id = data['id']

        #  create new task
        new_task = {'name': 'api test task',
                    'status': sstatus,
                    'project': project_id,
                    'milestone': milestone_id,
                    'priority': 5,
                    'start_date': '2011-06-02 12:00:00',
                    'estimated_time': 5000,
                    'details': '<p>test details</p>'
                    }
        response = self.client.post(reverse('api_projects_tasks'), data=json.dumps(new_task),
                                    content_type=self.content_type, **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        data = json.loads(response.content)
        self.assertEquals(data['name'], new_task['name'])
        self.assertEquals(data['priority'], new_task['priority'])
        self.assertEquals(data['status']['id'], new_task['status'])
        self.assertEquals(data['project']['id'], new_task['project'])
        self.assertEquals(data['milestone']['id'], new_task['milestone'])
        self.assertEquals(data['estimated_time'], new_task['estimated_time'])
        self.assertEquals(data['details'], new_task['details'])
        task_id = data['id']

        # create new subtask
        new_sub_task = {'name': 'api test task',
                        'status': sstatus,
                        'parent': task_id,
                        'project': project_id,
                        'priority': 5,
                        'start_date': '2011-06-02 13:00:00',
                        'estimated_time': 2500,
                        'details': '<p>test details</p>'
                        }

        response = self.client.post(reverse('api_projects_tasks'), data=json.dumps(new_sub_task),
                                    content_type=self.content_type, **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        data = json.loads(response.content)
        self.assertEquals(data['name'], new_sub_task['name'])
        self.assertEquals(data['priority'], new_sub_task['priority'])
        self.assertEquals(data['status']['id'], new_sub_task['status'])
        self.assertEquals(data['parent']['id'], new_sub_task['parent'])
        self.assertEquals(data['project']['id'], new_sub_task['project'])
        self.assertEquals(
            data['estimated_time'], new_sub_task['estimated_time'])
        self.assertEquals(data['details'], new_sub_task['details'])
        sub_task_id = data['id']

        # create task time
        new_tasktime = {'task': task_id,
                        'minutes': 400,
                        'details': '<p>test details</p>'
                        }

        response = self.client.post(reverse('api_projects_tasktimes'), data=json.dumps(new_tasktime),
                                    content_type=self.content_type, **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        data = json.loads(response.content)
        self.assertEquals(data['task']['id'], new_tasktime['task'])
        self.assertEquals(data['details'], new_tasktime['details'])
        tasktime_id = data['id']

        # start task time
        response = self.client.get(path=reverse('api_projects_tasktime_start', kwargs={
                                   'task_id': sub_task_id}), **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        data = json.loads(response.content)
        slot_id = data['id']

        sleep(60)

        # stop task time
        response = self.client.post(reverse('api_projects_tasktime_stop', kwargs={'slot_id': slot_id}), data=json.dumps({'details': '<p>test details</p>'}),
                                    content_type=self.content_type, **self.authentication_headers)
        self.assertEquals(response.status_code, 200)

        # delete task time
        response = self.client.delete(reverse('api_projects_tasktimes', kwargs={
                                      'object_ptr': tasktime_id}), **self.authentication_headers)
        self.assertEquals(response.status_code, 204)

        # delete task
        response = self.client.delete(reverse(
            'api_projects_tasks', kwargs={'object_ptr': task_id}), **self.authentication_headers)
        self.assertEquals(response.status_code, 204)

        # check subtask
        response = self.client.get(path=reverse('api_projects_tasks', kwargs={
                                   'object_ptr': sub_task_id}), **self.authentication_headers)
        self.assertEquals(response.status_code, 404)

        # delete milestone
        response = self.client.delete(reverse('api_projects_milestones', kwargs={
                                      'object_ptr': milestone_id}), **self.authentication_headers)
        self.assertEquals(response.status_code, 204)

        # delete status
        response = self.client.delete(reverse(
            'api_projects_status', kwargs={'object_ptr': sstatus}), **self.authentication_headers)
        self.assertEquals(response.status_code, 204)

        # delete project
        response = self.client.delete(reverse(
            'api_projects', kwargs={'object_ptr': project_id}), **self.authentication_headers)
        self.assertEquals(response.status_code, 204)

Example 3

Project: ochopod
Source File: marathon.py
View license
    def boot(self, lifecycle, model=Reactive, tools=None, local=False):

        #
        # - quick check to make sure we get the right implementations
        #
        assert issubclass(model, Model), 'model must derive from ochopod.api.Model'
        assert issubclass(lifecycle, LifeCycle), 'lifecycle must derive from ochopod.api.LifeCycle'

        #
        # - instantiate our flask endpoint
        # - default to a json handler for all HTTP errors (including an unexpected 500)
        #
        def _handler(error):
            http = error.code if isinstance(error, HTTPException) else 500
            return '{}', http, {'Content-Type': 'application/json; charset=utf-8'}

        web = Flask(__name__)
        for code in default_exceptions.iterkeys():
            web.error_handler_spec[None][code] = _handler

        #
        # - default presets in case we run outside of marathon (local vm testing)
        # - any environment variable prefixed with "ochopod." is of interest for us (e.g this is what the user puts
        #   in the marathon application configuration for instance)
        # - the other settings come from marathon (namely the port bindings & application/task identifiers)
        # - the MESOS_TASK_ID is important to keep around to enable task deletion via the marathon REST API
        #
        env = \
            {
                'ochopod_application':  '',
                'ochopod_cluster':      'default',
                'ochopod_debug':        'true',
                'ochopod_local':        'false',
                'ochopod_namespace':    'marathon',
                'ochopod_port':         '8080',
                'ochopod_start':        'true',
                'ochopod_task':         '',
                'ochopod_zk':           '',
                'PORT_8080':            '8080'
            }

        env.update(os.environ)
        ochopod.enable_cli_log(debug=env['ochopod_debug'] == 'true')
        try:

            #
            # - grab our environment variables (which are set by the marathon executor)
            # - extract the mesos PORT_* bindings and construct a small remapping dict
            #
            ports = {}
            logger.debug('environment ->\n%s' % '\n'.join(['\t%s -> %s' % (k, v) for k, v in env.items()]))
            for key, val in env.items():
                if key.startswith('PORT_'):
                    ports[key[5:]] = int(val)

            #
            # - keep any "ochopod_" environment variable & trim its prefix
            # - default all our settings, especially the mandatory ones
            # - the ip and zookeeper are defaulted to localhost to enable easy testing
            #
            hints = {k[8:]: v for k, v in env.items() if k.startswith('ochopod_')}
            if local or hints['local'] == 'true':

                #
                # - we are running in local mode (e.g on a dev workstation)
                # - default everything to localhost
                #
                logger.info('running in local mode (make sure you run a standalone zookeeper)')
                hints.update(
                    {
                        'fwk':          'marathon (debug)',
                        'ip':           '127.0.0.1',
                        'node':         'local',
                        'ports':        ports,
                        'public':       '127.0.0.1',
                        'zk':           '127.0.0.1:2181'
                    })
            else:

                #
                # - extend our hints
                # - add the application + task
                #
                hints.update(
                    {
                        'application':  env['MARATHON_APP_ID'][1:],
                        'fwk':          'marathon',
                        'ip':           '',
                        'node':         '',
                        'ports':        ports,
                        'public':       '',
                        'task':         env['MESOS_TASK_ID'],
                        'zk':           ''
                    })

                #
                # - use whatever subclass is implementing us to infer 'ip', 'node' and 'public'
                #
                hints.update(self.get_node_details())

                #
                # - lookup for the zookeeper connection string from environment variable or on disk
                # - we have to look into different places depending on how mesos was installed
                #
                def _1():

                    #
                    # - most recent DCOS release
                    # - $MESOS_MASTER is located in /opt/mesosphere/etc/mesos-slave-common
                    # - the snippet in there is prefixed by MESOS_MASTER=zk://<ip:port>/mesos
                    #
                    logger.debug('checking /opt/mesosphere/etc/mesos-slave-common...')
                    _, lines = shell("grep MESOS_MASTER /opt/mesosphere/etc/mesos-slave-common")
                    return lines[0][13:]

                def _2():

                    #
                    # - same as above except for slightly older DCOS releases
                    # - $MESOS_MASTER is located in /opt/mesosphere/etc/mesos-slave
                    #
                    logger.debug('checking /opt/mesosphere/etc/mesos-slave...')
                    _, lines = shell("grep MESOS_MASTER /opt/mesosphere/etc/mesos-slave")
                    return lines[0][13:]

                def _3():

                    #
                    # - a regular package install will write the slave settings under /etc/mesos/zk (the snippet in
                    #   there looks like zk://10.0.0.56:2181/mesos)
                    #
                    logger.debug('checking /etc/mesos/zk...')
                    _, lines = shell("cat /etc/mesos/zk")
                    return lines[0]

                def _4():

                    #
                    # - look for ZK from environment variables
                    # - user can pass down ZK using $ochopod_zk
                    # - this last-resort situation is used mostly for debugging
                    #
                    logger.debug('checking $ochopod_zk environment variable...')
                    return env['ochopod_zk']

                #
                # - depending on how the slave has been installed we might have to look in various places
                #   to find out what our zookeeper connection string is
                # - use urlparse to keep the host:port part of the URL (possibly including a login+password)
                #
                for method in [_1, _2, _3, _4]:
                    try:
                        hints['zk'] = urlparse(method()).netloc
                        break

                    except Exception:
                        pass

            #
            # - the cluster must be fully qualified with a namespace (which is defaulted anyway)
            #
            assert hints['zk'], 'unable to determine where zookeeper is located (unsupported/bogus mesos setup ?)'
            assert hints['cluster'] and hints['namespace'], 'no cluster and/or namespace defined (user error ?)'

            #
            # - load the tools
            #
            if tools:
                tools = {tool.tag: tool for tool in [clz() for clz in tools if issubclass(clz, Tool)] if tool.tag}
                logger.info('supporting tools %s' % ', '.join(tools.keys()))

            #
            # - start the life-cycle actor which will pass our hints (as a json object) to its underlying sub-process
            # - start our coordinator which will connect to zookeeper and attempt to lead the cluster
            # - upon grabbing the lock the model actor will start and implement the configuration process
            # - the hints are a convenient bag for any data that may change at runtime and needs to be returned (via
            #   the HTTP POST /info request)
            # - what's being registered in zookeeper is immutable though and decorated with additional details by
            #   the coordinator (especially the pod index which is derived from zookeeper)
            #
            latch = ThreadingFuture()
            logger.info('starting %s.%s (marathon) @ %s' % (hints['namespace'], hints['cluster'], hints['node']))
            breadcrumbs = deepcopy(hints)
            hints['metrics'] = {}
            hints['dependencies'] = model.depends_on
            env.update({'ochopod': json.dumps(hints)})
            executor = lifecycle.start(env, latch, hints)
            coordinator = Coordinator.start(
                hints['zk'].split(','),
                hints['namespace'],
                hints['cluster'],
                int(hints['port']),
                breadcrumbs,
                model,
                hints)

            #
            # - external hook forcing a coordinator reset
            # - this will force a re-connection to zookeeper and pod registration
            # - please note this will not impact the pod lifecycle (e.g the underlying sub-process will be
            #   left running)
            #
            @web.route('/reset', methods=['POST'])
            def _reset():

                logger.debug('http in -> /reset')
                coordinator.tell({'request': 'reset'})
                return '{}', 200, {'Content-Type': 'application/json; charset=utf-8'}

            #
            # - external hook exposing information about our pod
            # - this is a subset of what's registered in zookeeper at boot-time
            # - the data is dynamic and updated from time to time by the model and executor actors
            # - from @pferro -> the pod's dependencies defined in the model are now added as well
            #
            @web.route('/info', methods=['POST'])
            def _info():

                logger.debug('http in -> /info')
                keys = \
                    [
                        'application',
                        'dependencies',
                        'ip',
                        'metrics',
                        'node',
                        'port',
                        'ports',
                        'process',
                        'public',
                        'state',
                        'status',
                        'task'
                    ]

                subset = dict(filter(lambda i: i[0] in keys, hints.iteritems()))
                return json.dumps(subset), 200, {'Content-Type': 'application/json; charset=utf-8'}

            #
            # - external hook exposing our circular log
            # - reverse and dump ochopod.log as a json array
            #
            @web.route('/log', methods=['POST'])
            def _log():

                logger.debug('http in -> /log')
                with open(ochopod.LOG, 'r+') as log:
                    lines = [line for line in log]
                    return json.dumps(lines), 200, {'Content-Type': 'application/json; charset=utf-8'}

            #
            # - RPC call to run a custom tool within the pod
            #
            @web.route('/exec', methods=['POST'])
            def _exec():

                logger.debug('http in -> /exec')

                #
                # - make sure the command (first token in the X-Shell header) maps to a tool
                # - if no match abort on a 404
                #
                line = request.headers['X-Shell']
                tokens = line.split(' ')
                cmd = tokens[0]
                if not tools or cmd not in tools:
                    return '{}', 404, {'Content-Type': 'application/json; charset=utf-8'}

                code = 1
                tool = tools[cmd]

                #
                # - make sure the parser does not sys.exit()
                #
                class _Parser(ArgumentParser):
                    def exit(self, status=0, message=None):
                        raise ValueError(message)

                #
                # - prep a temporary directory
                # - invoke define_cmdline_parsing()
                # - switch off parsing if NotImplementedError is raised
                #
                use_parser = 1
                parser = _Parser(prog=tool.tag)
                try:
                    tool.define_cmdline_parsing(parser)

                except NotImplementedError:
                    use_parser = 0

                tmp = tempfile.mkdtemp()
                try:

                    #
                    # - parse the command line
                    # - upload any attachment
                    #
                    args = parser.parse_args(tokens[1:]) if use_parser else ' '.join(tokens[1:])
                    for tag, upload in request.files.items():
                        where = path.join(tmp, tag)
                        logger.debug('uploading %s @ %s' % (tag, tmp))
                        upload.save(where)

                    #
                    # - run the tool method
                    # - pass the temporary directory as well
                    #
                    logger.info('invoking "%s"' % line)
                    code, lines = tool.body(args, tmp)

                except ValueError as failure:

                    lines = [parser.format_help() if failure.message is None else failure.message]

                except Exception as failure:

                    lines = ['unexpected failure -> %s' % failure]

                finally:

                    #
                    # - make sure to cleanup our temporary directory
                    #
                    shutil.rmtree(tmp)

                out = \
                    {
                        'code': code,
                        'stdout': lines
                    }

                return json.dumps(out), 200, {'Content-Type': 'application/json; charset=utf-8'}

            #
            # - web-hook used to receive requests from the leader or the CLI tools
            # - those requests are passed down to the executor actor
            # - any non HTTP 200 response is a failure
            # - failure to acknowledge within the specified timeout will result in a HTTP 408 (REQUEST TIMEOUT)
            # - attempting to send a control request to a dead pod will result in a HTTP 410 (GONE)
            #
            @web.route('/control/<task>', methods=['POST'])
            @web.route('/control/<task>/<timeout>', methods=['POST'])
            def _control(task, timeout='60'):

                logger.debug('http in -> /control/%s' % task)
                if task not in ['check', 'on', 'off', 'ok', 'kill', 'signal']:

                    #
                    # - fail on a HTTP 400 if the request is not supported
                    #
                    return '{}', 400, {'Content-Type': 'application/json; charset=utf-8'}

                try:

                    ts = time.time()
                    latch = ThreadingFuture()
                    executor.tell({'request': task, 'latch': latch, 'data': request.data})
                    js, code = latch.get(timeout=int(timeout))
                    ms = time.time() - ts
                    logger.debug('http out -> HTTP %s (%d ms)' % (code, ms))
                    return json.dumps(js), code, {'Content-Type': 'application/json; charset=utf-8'}

                except Timeout:

                    #
                    # - we failed to match the specified timeout
                    # - gracefully fail on a HTTP 408
                    #
                    return '{}', 408, {'Content-Type': 'application/json; charset=utf-8'}

                except ActorDeadError:

                    #
                    # - the executor has been shutdown (probably after a /control/kill)
                    # - gracefully fail on a HTTP 410
                    #
                    return '{}', 410, {'Content-Type': 'application/json; charset=utf-8'}

            #
            # - internal hook required to shutdown the web-server
            # - it's not possible to do it outside of a request handler
            # - make sure this calls only comes from localhost (todo)
            #
            @web.route('/terminate', methods=['POST'])
            def _terminate():

                request.environ.get('werkzeug.server.shutdown')()
                return '{}', 200, {'Content-Type': 'application/json; charset=utf-8'}

            #
            # - run werkzeug from a separate thread to avoid blocking the main one
            # - we'll have to shut it down using a dedicated HTTP POST
            #
            class _Runner(threading.Thread):

                def run(self):
                    web.run(host='0.0.0.0', port=int(hints['port']), threaded=True)

            try:

                #
                # - block on the lifecycle actor until it goes down (usually after a /control/kill request)
                #
                _Runner().start()
                spin_lock(latch)
                logger.debug('pod is dead, idling')
                while 1:

                    #
                    # - simply idle forever (since the framework would restart any container that terminates)
                    # - /log and /hints HTTP requests will succeed (and show the pod as being killed)
                    # - any control request will now fail
                    #
                    time.sleep(60.0)

            finally:

                #
                # - when we exit the block first shutdown our executor (which may probably be already down)
                # - then shutdown the coordinator to un-register from zookeeper
                # - finally ask werkzeug to shutdown via a REST call
                #
                shutdown(executor)
                shutdown(coordinator)
                post('http://127.0.0.1:%s/terminate' % env['ochopod_port'])

        except KeyboardInterrupt:

            logger.fatal('CTRL-C pressed')

        except Exception as failure:

            logger.fatal('unexpected condition -> %s' % diagnostic(failure))

Example 4

Project: pureelk
Source File: purecollector.py
View license
    def collect(self):
        utcnow = datetime.datetime.utcnow()

        date_str = utcnow.strftime('%Y-%m-%d')
        arrays_index = "pureelk-arrays-{}".format(date_str)
        vols_index = "pureelk-vols-{}".format(date_str)
        hosts_index = "pureelk-hosts-{}".format(date_str)
        hgroups_index = "pureelk-hgroup-{}".format(date_str)
        msgs_index = "pureelk-msgs-{}".format(date_str)
        audit_index = "pureelk-audit-{}".format(date_str)
        global_arrays_index = "pureelk-global-arrays"
        global_vols_index = "pureelk-global-vols"

        # ignore indices already exists error (code 409)
        self._es_client.indices.create(index=vols_index, body=volmap, ignore=[400, 409])
        self._es_client.indices.create(index=arrays_index, body=arraymap, ignore=[400, 409])
        self._es_client.indices.create(index=msgs_index, body=msgmap, ignore=[400, 409])
        self._es_client.indices.create(index=audit_index, body=auditmap, ignore=[400, 409])
        self._es_client.indices.create(index=hosts_index, body=hostmap, ignore=[400, 409])
        self._es_client.indices.create(index=hgroups_index, body=hgroupmap, ignore=[400, 409])


        #special non-time series stash of array/vol documents
        self._es_client.indices.create(index=global_arrays_index, body=arraymap, ignore=[400, 409])
        self._es_client.indices.create(index=global_vols_index, body=volmap, ignore=[400, 409])

        # all metrics collected in the same cycle are posted to Elasticsearch with same timestamp
        timeofquery_str = utcnow.isoformat()

        # get the overall array info for performance
        ap = self._ps_client.get(action='monitor')
        ap[0]['array_name'] = self._array_name
        ap[0]['array_id'] = self._array_id
        # add an array name that elasticsearch can tokenize ( i.e. won't be present in mappings above )
        ap[0]['array_name_a'] = self._array_name

        # now get the information for space
        sp = self._ps_client.get(space=True)
        nd = sp.copy()

        # copy items into the new dictionary
        ap[0].update(nd)

        # add some pre-calc'd fields so Kibana doesn't need scripted fields. That makes install less
        # one button if we include them
        cap = long(ap[0]['capacity'])
        tot = long(ap[0]['total'])
        ap[0]['free'] = cap - tot
        ap[0]['percent_free'] = (float(cap) - float(tot)) / float(cap)

        ap[0][PureCollector._timeofquery_key] = timeofquery_str
        s = json.dumps(ap[0])
        self._es_client.index(index=arrays_index, doc_type='arrayperf', body=s, ttl=self._data_ttl)

        # non-timeseries array docs, uses id to bring es versioning into play
        self._es_client.index(index=global_arrays_index, doc_type='arrayperf', body=s, id=self._array_id, ttl=self._data_ttl)


        # index alert messages
        al = self._ps_client.list_messages(recent='true')
        for am in al:
            am['array_name'] = self._array_name
            am['array_id'] = self._array_id
            # add an array name  that elasticsearch can tokenize ( i.e. won't be present in mappings above )
            am['array_name_a'] = self._array_name
            am[PureCollector._timeofquery_key] = timeofquery_str
            s = json.dumps(am)
            self._es_client.index(index=msgs_index, doc_type='arraymsg', id=am['id'], body=s, ttl=self._data_ttl)

        # index audit log entries
        al = self._ps_client.list_messages(audit='true')
        for am in al:
            am['array_name'] = self._array_name
            am['array_id'] = self._array_id
            # add an array name  that elasticsearch can tokenize ( i.e. won't be present in mappings above )
            am['array_name_a'] = self._array_name
            am[PureCollector._timeofquery_key] = timeofquery_str
            s = json.dumps(am)
            self._es_client.index(index=audit_index, doc_type='auditmsg', id=am['id'], body=s, ttl=self._data_ttl)

        # get list of volumes
        vl = self._ps_client.list_volumes()

        for v in vl:
            # get real-time perf stats per volume
            vp = self._ps_client.get_volume(v['name'], action='monitor')
            vp[0]['array_name'] = self._array_name
            vp[0]['array_id'] = self._array_id
            vp[0]['vol_name'] = self._array_name + ':' + v['name']
            
            # add an array name and a volume name that elasticsearch can tokenize ( i.e. won't be present in mappings above )
            vp[0]['vol_name_a'] = v['name']
            vp[0]['array_name_a'] = self._array_name
            
            vp[0][PureCollector._timeofquery_key] = timeofquery_str
            
            # get space stats per volume and append 
            vs = self._ps_client.get_volume(v['name'], space=True)
            vp[0].update(vs)

            # get the host and host group connections per volume
            # create a large string that we are hoping elasticsearch
            # will tokenize and help us match
            hs =""
            hgs=""
            hp = self._ps_client.list_volume_private_connections(v['name'])
            for h in hp:
                if h['host']:
                    hs += h['host']
                    hs += ' '

            hp = self._ps_client.list_volume_shared_connections(v['name'])
            for hg in hp:
                if hg['host']:
                    hs += hg['host']
                    hs += ' '
                if hg['hgroup']:
                    hgs += hg['hgroup']
                    hgs += ' '

            vp[0]['host_name'] = hs
            vp[0]['hgroup_name'] = hgs

            # get the serial number for this volume to use as a unique global id
            vp1 = self._ps_client.get_volume(v['name'])
            vp[0]['serial'] = vp1['serial']
            
            # dump total document into json
            s = json.dumps(vp[0])
            self._es_client.index(index=vols_index, doc_type='volperf', body=s, ttl=self._data_ttl)

            # non-timeseries volume docs, uses id to bring es versioning into play, uses serial number as global ID
            self._es_client.index(index=global_vols_index, doc_type='volperf', body=s, id=vp1['serial'], ttl=self._data_ttl)

        # get list of hosts
        hl = self._ps_client.list_hosts()

        for h in hl:
            # get real-time perf stats per host
            hp = self._ps_client.get_host(h['name'], space=True)
            hp['array_name'] = self._array_name
            hp['array_id'] = self._array_id
            hp['host_name'] = h['name']
            hp['hgroup_name'] = h['hgroup']
            # add an array name and a volume name that elasticsearch can tokenize ( i.e. won't be present in mappings above )
            hp['host_name_a'] = h['name']
            hp['array_name_a'] = self._array_name
            hp[PureCollector._timeofquery_key] = timeofquery_str

            # dump total document into json
            s = json.dumps(hp)
            self._es_client.index(index=hosts_index, doc_type='hostdoc', body=s, ttl=self._data_ttl)

        # get list of host groups
        hl = self._ps_client.list_hgroups()

        for hg in hl:
            # get real-time perf stats per host group
            hgp = self._ps_client.get_hgroup(hg['name'], space=True)
            hgp['array_name'] = self._array_name
            hgp['array_id'] = self._array_id
            hgp['hgroup_name'] = hg['name']
            # add an array name and a volume name that elasticsearch can tokenize ( i.e. won't be present in mappings above )
            hgp['hgroup_name_a'] = hg['name']
            hgp['array_name_a'] = self._array_name
            hgp[PureCollector._timeofquery_key] = timeofquery_str

            # include a reference to all hosts in the group at the time of this call
            hgl = self._ps_client.get_hgroup(hg['name'])
            hls = ""
            for h in hgl['hosts']:
                hls += h
                hls += ' '
            hgp['host_name'] = hls

           # dump total document into json
            s = json.dumps(hgp)
            self._es_client.index(index=hgroups_index, doc_type='hgroupdoc', body=s, ttl=self._data_ttl)

Example 5

Project: raspberry_pwn
Source File: connect.py
View license
    @staticmethod
    def queryPage(value=None, place=None, content=False, getRatioValue=False, silent=False, method=None, timeBasedCompare=False, noteResponseTime=True, auxHeaders=None, response=False, raise404=None, removeReflection=True):
        """
        This method calls a function to get the target URL page content
        and returns its page MD5 hash or a boolean value in case of
        string match check ('--string' command line parameter)
        """

        if conf.direct:
            return direct(value, content)

        get = None
        post = None
        cookie = None
        ua = None
        referer = None
        host = None
        page = None
        pageLength = None
        uri = None
        code = None

        if not place:
            place = kb.injection.place or PLACE.GET

        if not auxHeaders:
            auxHeaders = {}

        raise404 = place != PLACE.URI if raise404 is None else raise404

        value = agent.adjustLateValues(value)
        payload = agent.extractPayload(value)
        threadData = getCurrentThreadData()

        if conf.httpHeaders:
            headers = OrderedDict(conf.httpHeaders)
            contentType = max(headers[_] if _.upper() == HTTP_HEADER.CONTENT_TYPE.upper() else None for _ in headers.keys())

            if (kb.postHint or conf.skipUrlEncode) and kb.postUrlEncode:
                kb.postUrlEncode = False
                conf.httpHeaders = [_ for _ in conf.httpHeaders if _[1] != contentType]
                contentType = POST_HINT_CONTENT_TYPES.get(kb.postHint, PLAIN_TEXT_CONTENT_TYPE)
                conf.httpHeaders.append((HTTP_HEADER.CONTENT_TYPE, contentType))

        if payload:
            if kb.tamperFunctions:
                for function in kb.tamperFunctions:
                    try:
                        payload = function(payload=payload, headers=auxHeaders)
                    except Exception, ex:
                        errMsg = "error occurred while running tamper "
                        errMsg += "function '%s' ('%s')" % (function.func_name, ex)
                        raise SqlmapGenericException(errMsg)

                    if not isinstance(payload, basestring):
                        errMsg = "tamper function '%s' returns " % function.func_name
                        errMsg += "invalid payload type ('%s')" % type(payload)
                        raise SqlmapValueException(errMsg)

                value = agent.replacePayload(value, payload)

            logger.log(CUSTOM_LOGGING.PAYLOAD, safecharencode(payload))

            if place == PLACE.CUSTOM_POST and kb.postHint:
                if kb.postHint in (POST_HINT.SOAP, POST_HINT.XML):
                    # payloads in SOAP/XML should have chars > and < replaced
                    # with their HTML encoded counterparts
                    payload = payload.replace('>', ">").replace('<', "<")
                elif kb.postHint == POST_HINT.JSON:
                    if payload.startswith('"') and payload.endswith('"'):
                        payload = json.dumps(payload[1:-1])
                    else:
                        payload = json.dumps(payload)[1:-1]
                elif kb.postHint == POST_HINT.JSON_LIKE:
                    payload = payload.replace("'", REPLACEMENT_MARKER).replace('"', "'").replace(REPLACEMENT_MARKER, '"')
                    if payload.startswith('"') and payload.endswith('"'):
                        payload = json.dumps(payload[1:-1])
                    else:
                        payload = json.dumps(payload)[1:-1]
                    payload = payload.replace("'", REPLACEMENT_MARKER).replace('"', "'").replace(REPLACEMENT_MARKER, '"')
                value = agent.replacePayload(value, payload)
            else:
                # GET, POST, URI and Cookie payload needs to be throughly URL encoded
                if place in (PLACE.GET, PLACE.URI, PLACE.COOKIE) and not conf.skipUrlEncode or place in (PLACE.POST, PLACE.CUSTOM_POST) and kb.postUrlEncode:
                    payload = urlencode(payload, '%', False, place != PLACE.URI)  # spaceplus is handled down below
                    value = agent.replacePayload(value, payload)

            if conf.hpp:
                if not any(conf.url.lower().endswith(_.lower()) for _ in (WEB_API.ASP, WEB_API.ASPX)):
                    warnMsg = "HTTP parameter pollution should work only against "
                    warnMsg += "ASP(.NET) targets"
                    singleTimeWarnMessage(warnMsg)
                if place in (PLACE.GET, PLACE.POST):
                    _ = re.escape(PAYLOAD_DELIMITER)
                    match = re.search("(?P<name>\w+)=%s(?P<value>.+?)%s" % (_, _), value)
                    if match:
                        payload = match.group("value")

                        for splitter in (urlencode(' '), ' '):
                            if splitter in payload:
                                prefix, suffix = ("*/", "/*") if splitter == ' ' else (urlencode(_) for _ in ("*/", "/*"))
                                parts = payload.split(splitter)
                                parts[0] = "%s%s" % (parts[0], suffix)
                                parts[-1] = "%s%s=%s%s" % (DEFAULT_GET_POST_DELIMITER, match.group("name"), prefix, parts[-1])
                                for i in xrange(1, len(parts) - 1):
                                    parts[i] = "%s%s=%s%s%s" % (DEFAULT_GET_POST_DELIMITER, match.group("name"), prefix, parts[i], suffix)
                                payload = "".join(parts)

                        for splitter in (urlencode(','), ','):
                            payload = payload.replace(splitter, "%s%s=" % (DEFAULT_GET_POST_DELIMITER, match.group("name")))

                        value = agent.replacePayload(value, payload)
                else:
                    warnMsg = "HTTP parameter pollution works only with regular "
                    warnMsg += "GET and POST parameters"
                    singleTimeWarnMessage(warnMsg)

        if place:
            value = agent.removePayloadDelimiters(value)

        if PLACE.GET in conf.parameters:
            get = conf.parameters[PLACE.GET] if place != PLACE.GET or not value else value

        if PLACE.POST in conf.parameters:
            post = conf.parameters[PLACE.POST] if place != PLACE.POST or not value else value

        if PLACE.CUSTOM_POST in conf.parameters:
            post = conf.parameters[PLACE.CUSTOM_POST].replace(CUSTOM_INJECTION_MARK_CHAR, "") if place != PLACE.CUSTOM_POST or not value else value
            post = post.replace(ASTERISK_MARKER, '*') if post else post

        if PLACE.COOKIE in conf.parameters:
            cookie = conf.parameters[PLACE.COOKIE] if place != PLACE.COOKIE or not value else value

        if PLACE.USER_AGENT in conf.parameters:
            ua = conf.parameters[PLACE.USER_AGENT] if place != PLACE.USER_AGENT or not value else value

        if PLACE.REFERER in conf.parameters:
            referer = conf.parameters[PLACE.REFERER] if place != PLACE.REFERER or not value else value

        if PLACE.HOST in conf.parameters:
            host = conf.parameters[PLACE.HOST] if place != PLACE.HOST or not value else value

        if PLACE.URI in conf.parameters:
            uri = conf.url if place != PLACE.URI or not value else value
        else:
            uri = conf.url

        if value and place == PLACE.CUSTOM_HEADER:
            auxHeaders[value.split(',')[0]] = value.split(',', 1)[1]

        if conf.csrfToken:
            def _adjustParameter(paramString, parameter, newValue):
                retVal = paramString
                match = re.search("%s=(?P<value>[^&]*)" % re.escape(parameter), paramString)
                if match:
                    origValue = match.group("value")
                    retVal = re.sub("%s=[^&]*" % re.escape(parameter), "%s=%s" % (parameter, newValue), paramString)
                return retVal

            page, headers, code = Connect.getPage(url=conf.csrfUrl or conf.url, cookie=conf.parameters.get(PLACE.COOKIE), direct=True, silent=True, ua=conf.parameters.get(PLACE.USER_AGENT), referer=conf.parameters.get(PLACE.REFERER), host=conf.parameters.get(PLACE.HOST))
            match = re.search(r"<input[^>]+name=[\"']?%s[\"']?\s[^>]*value=(\"([^\"]+)|'([^']+)|([^ >]+))" % re.escape(conf.csrfToken), page or "")
            token = (match.group(2) or match.group(3) or match.group(4)) if match else None

            if not token:
                if conf.csrfUrl != conf.url and code == httplib.OK:
                    if headers and "text/plain" in headers.get(HTTP_HEADER.CONTENT_TYPE, ""):
                        token = page

                if not token and any(_.name == conf.csrfToken for _ in conf.cj):
                    for _ in conf.cj:
                        if _.name == conf.csrfToken:
                            token = _.value
                            if not any (conf.csrfToken in _ for _ in (conf.paramDict.get(PLACE.GET, {}), conf.paramDict.get(PLACE.POST, {}))):
                                if post:
                                    post = "%s%s%s=%s" % (post, conf.paramDel or DEFAULT_GET_POST_DELIMITER, conf.csrfToken, token)
                                elif get:
                                    get = "%s%s%s=%s" % (get, conf.paramDel or DEFAULT_GET_POST_DELIMITER, conf.csrfToken, token)
                                else:
                                    get = "%s=%s" % (conf.csrfToken, token)
                            break

                if not token:
                    errMsg = "CSRF protection token '%s' can't be found at '%s'" % (conf.csrfToken, conf.csrfUrl or conf.url)
                    if not conf.csrfUrl:
                        errMsg += ". You can try to rerun by providing "
                        errMsg += "a valid value for option '--csrf-url'"
                    raise SqlmapTokenException, errMsg

            if token:
                for place in (PLACE.GET, PLACE.POST):
                    if place in conf.parameters:
                        if place == PLACE.GET and get:
                            get = _adjustParameter(get, conf.csrfToken, token)
                        elif place == PLACE.POST and post:
                            post = _adjustParameter(post, conf.csrfToken, token)

                for i in xrange(len(conf.httpHeaders)):
                    if conf.httpHeaders[i][0].lower() == conf.csrfToken.lower():
                        conf.httpHeaders[i] = (conf.httpHeaders[i][0], token)

        if conf.rParam:
            def _randomizeParameter(paramString, randomParameter):
                retVal = paramString
                match = re.search("%s=(?P<value>[^&;]+)" % re.escape(randomParameter), paramString)
                if match:
                    origValue = match.group("value")
                    retVal = re.sub("%s=[^&;]+" % re.escape(randomParameter), "%s=%s" % (randomParameter, randomizeParameterValue(origValue)), paramString)
                return retVal

            for randomParameter in conf.rParam:
                for item in (PLACE.GET, PLACE.POST, PLACE.COOKIE):
                    if item in conf.parameters:
                        if item == PLACE.GET and get:
                            get = _randomizeParameter(get, randomParameter)
                        elif item == PLACE.POST and post:
                            post = _randomizeParameter(post, randomParameter)
                        elif item == PLACE.COOKIE and cookie:
                            cookie = _randomizeParameter(cookie, randomParameter)

        if conf.evalCode:
            delimiter = conf.paramDel or DEFAULT_GET_POST_DELIMITER
            variables = {"uri": uri}
            originals = {}

            for item in filter(None, (get, post if not kb.postHint else None)):
                for part in item.split(delimiter):
                    if '=' in part:
                        name, value = part.split('=', 1)
                        value = urldecode(value, convall=True, plusspace=(item==post and kb.postSpaceToPlus))
                        evaluateCode("%s=%s" % (name.strip(), repr(value)), variables)

            if cookie:
                for part in cookie.split(conf.cookieDel or DEFAULT_COOKIE_DELIMITER):
                    if '=' in part:
                        name, value = part.split('=', 1)
                        value = urldecode(value, convall=True)
                        evaluateCode("%s=%s" % (name.strip(), repr(value)), variables)

            originals.update(variables)
            evaluateCode(conf.evalCode, variables)
            uri = variables["uri"]

            for name, value in variables.items():
                if name != "__builtins__" and originals.get(name, "") != value:
                    if isinstance(value, (basestring, int)):
                        found = False
                        value = unicode(value)

                        regex = r"((\A|%s)%s=).+?(%s|\Z)" % (re.escape(delimiter), re.escape(name), re.escape(delimiter))
                        if re.search(regex, (get or "")):
                            found = True
                            get = re.sub(regex, "\g<1>%s\g<3>" % value, get)

                        if re.search(regex, (post or "")):
                            found = True
                            post = re.sub(regex, "\g<1>%s\g<3>" % value, post)

                        regex = r"((\A|%s)%s=).+?(%s|\Z)" % (re.escape(conf.cookieDel or DEFAULT_COOKIE_DELIMITER), name, re.escape(conf.cookieDel or DEFAULT_COOKIE_DELIMITER))
                        if re.search(regex, (cookie or "")):
                            found = True
                            cookie = re.sub(regex, "\g<1>%s\g<3>" % value, cookie)

                        if not found:
                            if post is not None:
                                post += "%s%s=%s" % (delimiter, name, value)
                            elif get is not None:
                                get += "%s%s=%s" % (delimiter, name, value)
                            elif cookie is not None:
                                cookie += "%s%s=%s" % (conf.cookieDel or DEFAULT_COOKIE_DELIMITER, name, value)

        if not conf.skipUrlEncode:
            get = urlencode(get, limit=True)

        if post is not None:
            if place not in (PLACE.POST, PLACE.CUSTOM_POST) and hasattr(post, UNENCODED_ORIGINAL_VALUE):
                post = getattr(post, UNENCODED_ORIGINAL_VALUE)
            elif kb.postUrlEncode:
                post = urlencode(post, spaceplus=kb.postSpaceToPlus)

        if timeBasedCompare:
            if len(kb.responseTimes) < MIN_TIME_RESPONSES:
                clearConsoleLine()

                if conf.tor:
                    warnMsg = "it's highly recommended to avoid usage of switch '--tor' for "
                    warnMsg += "time-based injections because of its high latency time"
                    singleTimeWarnMessage(warnMsg)

                warnMsg = "[%s] [WARNING] time-based comparison requires " % time.strftime("%X")
                warnMsg += "larger statistical model, please wait"
                dataToStdout(warnMsg)

                while len(kb.responseTimes) < MIN_TIME_RESPONSES:
                    Connect.queryPage(content=True)
                    dataToStdout('.')

                dataToStdout("\n")

            elif not kb.testMode:
                warnMsg = "it is very important not to stress the network adapter "
                warnMsg += "during usage of time-based payloads to prevent potential "
                warnMsg += "errors "
                singleTimeWarnMessage(warnMsg)

            if not kb.laggingChecked:
                kb.laggingChecked = True

                deviation = stdev(kb.responseTimes)

                if deviation > WARN_TIME_STDEV:
                    kb.adjustTimeDelay = ADJUST_TIME_DELAY.DISABLE

                    warnMsg = "considerable lagging has been detected "
                    warnMsg += "in connection response(s). Please use as high "
                    warnMsg += "value for option '--time-sec' as possible (e.g. "
                    warnMsg += "10 or more)"
                    logger.critical(warnMsg)


        if conf.safUrl and conf.saFreq > 0:
            kb.queryCounter += 1
            if kb.queryCounter % conf.saFreq == 0:
                Connect.getPage(url=conf.safUrl, cookie=cookie, direct=True, silent=True, ua=ua, referer=referer, host=host)

        start = time.time()

        if kb.nullConnection and not content and not response and not timeBasedCompare:
            noteResponseTime = False

            pushValue(kb.pageCompress)
            kb.pageCompress = False

            if kb.nullConnection == NULLCONNECTION.HEAD:
                method = HTTPMETHOD.HEAD
            elif kb.nullConnection == NULLCONNECTION.RANGE:
                auxHeaders[HTTP_HEADER.RANGE] = "bytes=-1"

            _, headers, code = Connect.getPage(url=uri, get=get, post=post, cookie=cookie, ua=ua, referer=referer, host=host, silent=silent, method=method, auxHeaders=auxHeaders, raise404=raise404, skipRead=(kb.nullConnection == NULLCONNECTION.SKIP_READ))

            if headers:
                if kb.nullConnection in (NULLCONNECTION.HEAD, NULLCONNECTION.SKIP_READ) and HTTP_HEADER.CONTENT_LENGTH in headers:
                    pageLength = int(headers[HTTP_HEADER.CONTENT_LENGTH])
                elif kb.nullConnection == NULLCONNECTION.RANGE and HTTP_HEADER.CONTENT_RANGE in headers:
                    pageLength = int(headers[HTTP_HEADER.CONTENT_RANGE][headers[HTTP_HEADER.CONTENT_RANGE].find('/') + 1:])

            kb.pageCompress = popValue()

        if not pageLength:
            try:
                page, headers, code = Connect.getPage(url=uri, get=get, post=post, cookie=cookie, ua=ua, referer=referer, host=host, silent=silent, method=method, auxHeaders=auxHeaders, response=response, raise404=raise404, ignoreTimeout=timeBasedCompare)
            except MemoryError:
                page, headers, code = None, None, None
                warnMsg = "site returned insanely large response"
                if kb.testMode:
                    warnMsg += " in testing phase. This is a common "
                    warnMsg += "behavior in custom WAF/IDS/IPS solutions"
                singleTimeWarnMessage(warnMsg)

        if conf.secondOrder:
            page, headers, code = Connect.getPage(url=conf.secondOrder, cookie=cookie, ua=ua, silent=silent, auxHeaders=auxHeaders, response=response, raise404=False, ignoreTimeout=timeBasedCompare, refreshing=True)

        threadData.lastQueryDuration = calculateDeltaSeconds(start)

        kb.originalCode = kb.originalCode or code

        if kb.testMode:
            kb.testQueryCount += 1

        if timeBasedCompare:
            return wasLastResponseDelayed()
        elif noteResponseTime:
            kb.responseTimes.append(threadData.lastQueryDuration)

        if not response and removeReflection:
            page = removeReflectiveValues(page, payload)

        kb.maxConnectionsFlag = re.search(MAX_CONNECTIONS_REGEX, page or "", re.I) is not None
        kb.permissionFlag = re.search(PERMISSION_DENIED_REGEX, page or "", re.I) is not None

        if content or response:
            return page, headers

        if getRatioValue:
            return comparison(page, headers, code, getRatioValue=False, pageLength=pageLength), comparison(page, headers, code, getRatioValue=True, pageLength=pageLength)
        else:
            return comparison(page, headers, code, getRatioValue, pageLength)

Example 6

View license
def parse(config, verbose=False):
    """

    :param verbose:
    :param config: An instance of :class:`~awsdbrparser.config.Config` class,
        used for parsing parametrization.

    :rtype: Summary
    """
    echo = utils.ClickEchoWrapper(quiet=(not verbose))

    index_name = '{}-{:d}-{:02d}'.format(
        config.es_index,
        config.es_year,
        config.es_month)

    echo('Opening input file: {}'.format(config.input_filename))
    file_in = open(config.input_filename, 'r')

    if config.output_to_file:
        echo('Opening output file: {}'.format(config.output_filename))
        file_out = open(config.output_filename, 'w')

    elif config.output_to_elasticsearch:
        echo('Sending DBR to Elasticsearch host: {}:{}'.format(config.es_host, config.es_port))
        awsauth = None
        if config.awsauth:
            session = boto3.Session()
            credentials = session.get_credentials()
            if credentials:
                region = session.region_name
                awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es',
                                   session_token=credentials.token)

        es = Elasticsearch([{'host': config.es_host, 'port': config.es_port}], timeout=config.es_timeout,
                           http_auth=awsauth, connection_class=RequestsHttpConnection)
        if config.delete_index:
            echo('Deleting current index: {}'.format(index_name))
            es.indices.delete(index_name, ignore=404)
        es.indices.create(index_name, ignore=400)
        es.indices.put_mapping(index=index_name, doc_type=config.es_doctype, body=config.mapping)

    if verbose:
        progressbar = click.progressbar

        # calculate number of rows in input file in preparation to display a progress bar
        record_count = sum(1 for _ in file_in) - 1
        file_in.seek(0)  # reset file descriptor

        echo("Input file has {} record(s)".format(record_count))

        if config.process_mode == PROCESS_BY_BULK:
            echo('Processing in BULK MODE, size: {}'.format(config.bulk_size))
        else:
            echo('Processing in LINE MODE')
    else:
        # uses a 100% bug-free progressbar, guaranteed :-)
        progressbar = utils.null_progressbar
        record_count = 0

    # If BI is enabled, create a thread and start running
    analytics_start = time.time()
    if config.analytics:
        echo('Starting the BI Analytics Thread')
        thread = threading.Thread(target=analytics, args=(config, echo,))
        thread.start()

    added = skipped = updated = control = 0

    if config.process_mode == PROCESS_BY_BULK:
        with progressbar(length=record_count) as pbar:
            # If you wish to sort the records by UsageStartDate before send to
            # ES just uncomment the 2 lines below and comment the third line
            # reader = csv.DictReader(file_in, delimiter=config.csv_delimiter)
            # csv_file = sorted(reader, key=lambda line: line["UsageStartDate"]+line["UsageEndDate"])
            csv_file = csv.DictReader(file_in, delimiter=config.csv_delimiter)

            def documents():
                for json_row in csv_file:
                    if not is_control_message(json_row, config):
                        if config.debug:
                            print(json.dumps(  # do not use 'echo()' here
                                utils.pre_process(json_row)))
                        yield json.dumps(utils.pre_process(json_row))
                        pbar.update(1)

            for recno, (success, result) in enumerate(helpers.streaming_bulk(es, documents(),
                                                                             index=index_name,
                                                                             doc_type=config.es_doctype,
                                                                             chunk_size=config.bulk_size)):
                # <recno> integer, the record number (0-based)
                # <success> bool
                # <result> a dictionary like this one:
                #
                #   {
                #       'create': {
                #           'status': 201,
                #           '_type': 'billing',
                #           '_shards': {
                #               'successful': 1,
                #               'failed': 0,
                #               'total': 2
                #           },
                #           '_index': 'billing-2015-12',
                #           '_version': 1,
                #           '_id': u'AVOmiEdSF_o3S6_4Qeur'
                #       }
                #   }
                #
                if not success:
                    message = 'Failed to index record {:d} with result: {!r}'.format(recno, result)
                    if config.fail_fast:
                        raise ParserError(message)
                    else:
                        echo(message, err=True)
                else:
                    added += 1

    elif config.process_mode == PROCESS_BY_LINE:
        with progressbar(length=record_count) as pbar:
            csv_file = csv.DictReader(file_in, delimiter=config.csv_delimiter)
            for recno, json_row in enumerate(csv_file):
                if is_control_message(json_row, config):
                    control += 1
                else:
                    if config.debug:
                        print(json.dumps(  # do not use 'echo()' here
                            utils.pre_process(json_row),
                            ensure_ascii=False, encoding=config.encoding))

                    if config.output_to_file:
                        file_out.write(
                            json.dumps(utils.pre_process(json_row), ensure_ascii=False, encoding=config.encoding))
                        file_out.write('\n')
                        added += 1

                    elif config.output_to_elasticsearch:
                        if config.check:
                            # FIXME: the way it was, `search_exists` will not suffice, since we'll need the document _id for the update operation; # noqa
                            # FIXME: use `es.search` with the following sample body: `{'query': {'match': {'RecordId': '43347302922535274380046564'}}}`; # noqa
                            # SEE: https://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.search; # noqa
                            response = es.search_exists(index=index_name, doc_type=config.es_doctype,
                                                        q='RecordId:{}'.format(json_row['RecordId']))
                            if response:
                                if config.update:
                                    # TODO: requires _id from the existing document
                                    # FIXME: requires use of `es.search` method instead of `es.search_exists`
                                    # SEE: https://elasticsearch-py.readthedocs.org/en/master/api.html#elasticsearch.Elasticsearch.update; # noqa
                                    skipped += 1
                                else:
                                    skipped += 1
                            else:
                                response = es.index(index=index_name, doc_type=config.es_doctype,
                                                    body=body_dump(json_row, config))
                                if not es_index_successful(response):
                                    message = 'Failed to index record {:d} with result {!r}'.format(recno, response)
                                    if config.fail_fast:
                                        raise ParserError(message)
                                    else:
                                        echo(message, err=True)
                                else:
                                    added += 1
                        else:
                            response = es.index(index=index_name, doc_type=config.es_doctype,
                                                body=body_dump(json_row, config))
                            if not es_index_successful(response):
                                message = 'Failed to index record {:d} with result {!r}'.format(recno, response)
                                if config.fail_fast:
                                    raise ParserError(message)
                                else:
                                    echo(message, err=True)
                            else:
                                added += 1

                pbar.update(1)
    elif config.process_mode == PROCESS_BI_ONLY and config.analytics:
        echo('Processing Analytics Only')
        while thread.is_alive():
            # Wait for a timeout
            analytics_now = time.time()
            if analytics_start - analytics_now > config.analytics_timeout * 60:
                echo('Analytics processing timeout. exiting')
                break
            time.sleep(5)

    else:
        echo('Nothing to do!')

    file_in.close()

    if config.output_to_file:
        file_out.close()

    echo('Finished processing!')
    echo('')

    # the first line is the header then is skipped by the count bellow
    echo('Summary of documents processed...')
    echo('           Added: {}'.format(added))
    echo('         Skipped: {}'.format(skipped))
    echo('         Updated: {}'.format(updated))
    echo('Control messages: {}'.format(control))
    echo('')

    return Summary(added, skipped, updated, control)

Example 7

Project: rapidpro
Source File: models.py
View license
    @classmethod
    def trigger_flow_event(cls, webhook_url, flow, run, node_uuid, contact, event, action='POST', resthook=None):
        org = flow.org
        api_user = get_api_user()
        json_time = datetime_to_str(timezone.now())

        # get the results for this contact
        results = flow.get_results(contact)
        values = []

        if results and results[0]:
            values = results[0]['values']
            for value in values:
                value['time'] = datetime_to_str(value['time'])
                value['value'] = unicode(value['value'])

        # if the action is on the first node
        # we might not have an sms (or channel) yet
        channel = None
        text = None
        contact_urn = contact.get_urn()

        if event:
            text = event.text
            channel = event.channel
            contact_urn = event.contact_urn

        if channel:
            channel_id = channel.pk
        else:
            channel_id = -1

        steps = []
        for step in run.steps.prefetch_related('messages', 'broadcasts').order_by('arrived_on'):
            steps.append(dict(type=step.step_type,
                              node=step.step_uuid,
                              arrived_on=datetime_to_str(step.arrived_on),
                              left_on=datetime_to_str(step.left_on),
                              text=step.get_text(),
                              value=step.rule_value))

        data = dict(channel=channel_id,
                    relayer=channel_id,
                    flow=flow.id,
                    flow_name=flow.name,
                    flow_base_language=flow.base_language,
                    run=run.id,
                    text=text,
                    step=unicode(node_uuid),
                    phone=contact.get_urn_display(org=org, scheme=TEL_SCHEME, formatted=False),
                    contact=contact.uuid,
                    urn=unicode(contact_urn),
                    values=json.dumps(values),
                    steps=json.dumps(steps),
                    time=json_time)

        if not action:
            action = 'POST'

        webhook_event = WebHookEvent.objects.create(org=org,
                                                    event=FLOW,
                                                    channel=channel,
                                                    data=json.dumps(data),
                                                    try_count=1,
                                                    action=action,
                                                    resthook=resthook,
                                                    created_by=api_user,
                                                    modified_by=api_user)

        status_code = -1
        message = "None"
        body = None

        # webhook events fire immediately since we need the results back
        try:
            # only send webhooks when we are configured to, otherwise fail
            if not settings.SEND_WEBHOOKS:
                raise Exception("!! Skipping WebHook send, SEND_WEBHOOKS set to False")

            # no url, bail!
            if not webhook_url:
                raise Exception("No webhook_url specified, skipping send")

            # some hosts deny generic user agents, use Temba as our user agent
            if action == 'GET':
                response = requests.get(webhook_url, headers=TEMBA_HEADERS, timeout=10)
            else:
                response = requests.post(webhook_url, data=data, headers=TEMBA_HEADERS, timeout=10)

            response_text = response.text
            body = response.text
            status_code = response.status_code

            if response.status_code == 200 or response.status_code == 201:
                try:
                    response_json = json.loads(response_text)

                    # only update if we got a valid JSON dictionary or list
                    if not isinstance(response_json, dict) and not isinstance(response_json, list):
                        raise ValueError("Response must be a JSON dictionary or list, ignoring response.")

                    run.update_fields(response_json)
                    message = "Webhook called successfully."
                except ValueError as e:
                    message = "Response must be a JSON dictionary, ignoring response."

                webhook_event.status = COMPLETE
            else:
                webhook_event.status = FAILED
                message = "Got non 200 response (%d) from webhook." % response.status_code
                raise Exception("Got non 200 response (%d) from webhook." % response.status_code)

        except Exception as e:
            import traceback
            traceback.print_exc()

            webhook_event.status = FAILED
            message = "Error calling webhook: %s" % unicode(e)

        finally:
            webhook_event.save()

            # make sure our message isn't too long
            if message:
                message = message[:255]

            result = WebHookResult.objects.create(event=webhook_event,
                                                  url=webhook_url,
                                                  status_code=status_code,
                                                  body=body,
                                                  message=message,
                                                  data=urlencode(data, doseq=True),
                                                  created_by=api_user,
                                                  modified_by=api_user)

            # if this is a test contact, add an entry to our action log
            if run.contact.is_test:
                from temba.flows.models import ActionLog
                log_txt = "Triggered <a href='%s' target='_log'>webhook event</a> - %d" % (reverse('api.log_read', args=[webhook_event.pk]), status_code)
                ActionLog.create(run, log_txt, safe=True)

        return result

Example 8

Project: reviewboard
Source File: test_github.py
View license
    def test_get_change(self):
        """Testing GitHub get_change implementation"""
        commit_sha = '1c44b461cebe5874a857c51a4a13a849a4d1e52d'
        parent_sha = '44568f7d33647d286691517e6325fea5c7a21d5e'
        tree_sha = '56e25e58380daf9b4dfe35677ae6043fe1743922'

        commits_api_response = json.dumps([
            {
                'commit': {
                    'author': {'name': 'David Trowbridge'},
                    'committer': {'date': '2013-06-25T23:31:22Z'},
                    'message': 'Move .clearfix to defs.less',
                },
                'sha': commit_sha,
                'parents': [{'sha': parent_sha}],
            },
        ])

        compare_api_response = json.dumps({
            'base_commit': {
                'commit': {
                    'tree': {'sha': tree_sha},
                },
            },
            'files': [
                {
                    'sha': '4344b3ad41b171ea606e88e9665c34cca602affb',
                    'filename': 'reviewboard/static/rb/css/defs.less',
                    'status': 'modified',
                    'patch': dedent("""\
                        @@ -182,4 +182,23 @@
                         }


                        +/* Add a rule for clearing floats, */
                        +.clearfix {
                        +  display: inline-block;
                        +
                        +  &:after {
                        +    clear: both;
                        +    content: \".\";
                        +    display: block;
                        +    height: 0;
                        +    visibility: hidden;
                        +  }
                        +}
                        +
                        +/* Hides from IE-mac \\*/
                        +* html .clearfix {height: 1%;}
                        +.clearfix {display: block;}
                        +/* End hide from IE-mac */
                        +
                        +
                         // vim: set et ts=2 sw=2:"""),
                },
                {
                    'sha': '8e3129277b018b169cb8d13771433fbcd165a17c',
                    'filename': 'reviewboard/static/rb/css/reviews.less',
                    'status': 'modified',
                    'patch': dedent("""\
                        @@ -1311,24 +1311,6 @@
                           .border-radius(8px);
                         }

                        -/* Add a rule for clearing floats, */
                        -.clearfix {
                        -  display: inline-block;
                        -
                        -  &:after {
                        -    clear: both;
                        -    content: \".\";
                        -    display: block;
                        -    height: 0;
                        -    visibility: hidden;
                        -  }
                        -}
                        -
                        -/* Hides from IE-mac \\*/
                        -* html .clearfix {height: 1%;}
                        -.clearfix {display: block;}
                        -/* End hide from IE-mac */
                        -

                         /****************************************************
                          * Issue Summary"""),
                },
            ]
        })

        trees_api_response = json.dumps({
            'tree': [
                {
                    'path': 'reviewboard/static/rb/css/defs.less',
                    'sha': '830a40c3197223c6a0abb3355ea48891a1857bfd',
                },
                {
                    'path': 'reviewboard/static/rb/css/reviews.less',
                    'sha': '535cd2c4211038d1bb8ab6beaed504e0db9d7e62',
                },
            ],
        })

        # This has to be a list to avoid python's hinky treatment of scope of
        # variables assigned within a closure.
        step = [1]

        def _http_get(service, url, *args, **kwargs):
            parsed = urlparse(url)
            if parsed.path == '/repos/myuser/myrepo/commits':
                self.assertEqual(step[0], 1)
                step[0] += 1

                query = parsed.query.split('&')
                self.assertIn(('sha=%s' % commit_sha), query)

                return commits_api_response, None
            elif parsed.path.startswith('/repos/myuser/myrepo/compare/'):
                self.assertEqual(step[0], 2)
                step[0] += 1

                revs = parsed.path.split('/')[-1].split('...')
                self.assertEqual(revs[0], parent_sha)
                self.assertEqual(revs[1], commit_sha)

                return compare_api_response, None
            elif parsed.path.startswith('/repos/myuser/myrepo/git/trees/'):
                self.assertEqual(step[0], 3)
                step[0] += 1

                self.assertEqual(parsed.path.split('/')[-1], tree_sha)

                return trees_api_response, None
            else:
                print(parsed)
                self.fail('Got an unexpected GET request')

        account = self._get_hosting_account()
        account.data['authorization'] = {'token': 'abc123'}

        service = account.service
        self.spy_on(service.client.http_get, call_fake=_http_get)

        repository = Repository(hosting_account=account)
        repository.extra_data = {
            'repository_plan': 'public',
            'github_public_repo_name': 'myrepo',
        }

        change = service.get_change(repository, commit_sha)

        self.assertTrue(service.client.http_get.called)

        self.assertEqual(change.message, 'Move .clearfix to defs.less')
        self.assertEqual(md5(change.diff.encode('utf-8')).hexdigest(),
                         '0dd1bde0a60c0a7bb92c27b50f51fcb6')

Example 9

Project: pybossa
Source File: test_category_api.py
View license
    @with_context
    def test_category_post(self):
        """Test API Category creation and auth"""
        admin = UserFactory.create()
        user = UserFactory.create()
        name = u'Category'
        category = dict(
            name=name,
            short_name='category',
            description=u'description')
        data = json.dumps(category)
        # no api-key
        url = '/api/category'
        res = self.app.post(url, data=data)
        err = json.loads(res.data)
        err_msg = 'Should not be allowed to create'
        assert res.status_code == 401, err_msg
        assert err['action'] == 'POST', err_msg
        assert err['exception_cls'] == 'Unauthorized', err_msg

        # now a real user but not admin
        res = self.app.post(url + '?api_key=' + user.api_key, data=data)
        err = json.loads(res.data)
        err_msg = 'Should not be allowed to create'
        assert res.status_code == 403, err_msg
        assert err['action'] == 'POST', err_msg
        assert err['exception_cls'] == 'Forbidden', err_msg

        # now as an admin
        res = self.app.post(url + '?api_key=' + admin.api_key,
                            data=data)
        err = json.loads(res.data)
        err_msg = 'Admin should be able to create a Category'
        assert res.status_code == 200, err_msg
        cat = project_repo.get_category_by(short_name=category['short_name'])
        assert err['id'] == cat.id, err_msg
        assert err['name'] == category['name'], err_msg
        assert err['short_name'] == category['short_name'], err_msg
        assert err['description'] == category['description'], err_msg

        # test re-create should fail
        res = self.app.post(url + '?api_key=' + admin.api_key,
                            data=data)
        err = json.loads(res.data)
        assert res.status_code == 415, err
        assert err['status'] == 'failed', err
        assert err['action'] == 'POST', err
        assert err['exception_cls'] == "DBIntegrityError", err

        # test create with non-allowed fields should fail
        data = dict(name='fail', short_name='fail', wrong=15)
        res = self.app.post(url + '?api_key=' + admin.api_key,
                            data=data)
        err = json.loads(res.data)
        err_msg = "ValueError exception should be raised"
        assert res.status_code == 415, err
        assert err['action'] == 'POST', err
        assert err['status'] == 'failed', err
        assert err['exception_cls'] == "ValueError", err_msg
        # Now with a JSON object but not valid
        data = json.dumps(data)
        res = self.app.post(url + '?api_key=' + user.api_key,
                            data=data)
        err = json.loads(res.data)
        err_msg = "TypeError exception should be raised"
        assert err['action'] == 'POST', err_msg
        assert err['status'] == 'failed', err_msg
        assert err['exception_cls'] == "TypeError", err_msg
        assert res.status_code == 415, err_msg

        # test update
        data = {'name': 'My New Title'}
        datajson = json.dumps(data)
        ## anonymous
        res = self.app.put(url + '/%s' % cat.id,
                           data=data)
        error_msg = 'Anonymous should not be allowed to update'
        assert_equal(res.status, '401 UNAUTHORIZED', error_msg)
        error = json.loads(res.data)
        assert error['status'] == 'failed', error
        assert error['action'] == 'PUT', error
        assert error['exception_cls'] == 'Unauthorized', error

        ### real user but not allowed as not admin!
        url = '/api/category/%s?api_key=%s' % (cat.id, user.api_key)
        res = self.app.put(url, data=datajson)
        error_msg = 'Should not be able to update projects of others'
        assert_equal(res.status, '403 FORBIDDEN', error_msg)
        error = json.loads(res.data)
        assert error['status'] == 'failed', error
        assert error['action'] == 'PUT', error
        assert error['exception_cls'] == 'Forbidden', error

        # Now as an admin
        res = self.app.put('/api/category/%s?api_key=%s' % (cat.id, admin.api_key),
                           data=datajson)
        assert_equal(res.status, '200 OK', res.data)
        out2 = project_repo.get_category(cat.id)
        assert_equal(out2.name, data['name'])
        out = json.loads(res.data)
        assert out.get('status') is None, error
        assert out.get('id') == cat.id, error

        # With fake data
        data['algo'] = 13
        datajson = json.dumps(data)
        res = self.app.put('/api/category/%s?api_key=%s' % (cat.id, admin.api_key),
                           data=datajson)
        err = json.loads(res.data)
        assert res.status_code == 415, err
        assert err['status'] == 'failed', err
        assert err['action'] == 'PUT', err
        assert err['exception_cls'] == 'TypeError', err

        # With not JSON data
        datajson = data
        res = self.app.put('/api/category/%s?api_key=%s' % (cat.id, admin.api_key),
                           data=datajson)
        err = json.loads(res.data)
        assert res.status_code == 415, err
        assert err['status'] == 'failed', err
        assert err['action'] == 'PUT', err
        assert err['exception_cls'] == 'ValueError', err

        # With wrong args in the URL
        data = dict(
            name='Category3',
            short_name='category3',
            description=u'description3')

        datajson = json.dumps(data)
        res = self.app.put('/api/category/%s?api_key=%s&search=select1' % (cat.id, admin.api_key),
                           data=datajson)
        err = json.loads(res.data)
        assert res.status_code == 415, err
        assert err['status'] == 'failed', err
        assert err['action'] == 'PUT', err
        assert err['exception_cls'] == 'AttributeError', err

        # test delete
        ## anonymous
        res = self.app.delete(url + '/%s' % cat.id, data=data)
        error_msg = 'Anonymous should not be allowed to delete'
        assert_equal(res.status, '401 UNAUTHORIZED', error_msg)
        error = json.loads(res.data)
        assert error['status'] == 'failed', error
        assert error['action'] == 'DELETE', error
        assert error['target'] == 'category', error
        ### real user but not admin
        url = '/api/category/%s?api_key=%s' % (cat.id, user.api_key)
        res = self.app.delete(url, data=datajson)
        error_msg = 'Should not be able to delete apps of others'
        assert_equal(res.status, '403 FORBIDDEN', error_msg)
        error = json.loads(res.data)
        assert error['status'] == 'failed', error
        assert error['action'] == 'DELETE', error
        assert error['target'] == 'category', error

        # As admin
        url = '/api/category/%s?api_key=%s' % (cat.id, admin.api_key)
        res = self.app.delete(url, data=datajson)

        assert_equal(res.status, '204 NO CONTENT', res.data)

        # delete a category that does not exist
        url = '/api/category/5000?api_key=%s' % admin.api_key
        res = self.app.delete(url, data=datajson)
        error = json.loads(res.data)
        assert res.status_code == 404, error
        assert error['status'] == 'failed', error
        assert error['action'] == 'DELETE', error
        assert error['target'] == 'category', error
        assert error['exception_cls'] == 'NotFound', error

        # delete a category that does not exist
        url = '/api/category/?api_key=%s' % admin.api_key
        res = self.app.delete(url, data=datajson)
        assert res.status_code == 404, error

Example 10

Project: AutoO_with_django
Source File: views.py
View license
def admin(request, module="", action=""):
  def logRecord(r_action='', r_table='', r_data=''):
    record_name = request.session['user_name']
    record_time = time.strftime('%Y-%m-%d %H:%M',time.localtime())
    data_str = ''
    for temp in sorted(r_data):
      data_str += str(temp)+'='+r_data[temp]+' '
    log_op = Logrecord(user=record_name, time=record_time, action=r_action, table=r_table, data=data_str)
    log_op.save()
  
  if 'loginToken' in request.session and request.session['user_admin']:
    if module == 'project':
      if action != '':
        if action == "add" and request.session['user_sys']:
          proj_alias = request.POST['alias_name']
          proj_name = request.POST['name']
          proj_remark = request.POST['remark']
          obj = Project(alias=proj_alias, name=proj_name, remark=proj_remark)
          obj.save()
          logRecord(action, 'project', request.POST)
          result = {}
          result['code'] = 1
          result['message'] = "添加成功"
        elif action == "del":
          pid = request.POST['id']
          if request.session['user_sys'] or pid == request.session['user_proj']:
            Project.objects.get(id=pid).delete()
            logRecord(action, 'project', request.POST)
            result = {}
            result['code'] = 1
            result['message'] = "删除成功"
          else:
            result = {}
            result['code'] = 1
            result['message'] = "无权限删除"
        elif action.isdigit():
          return HttpResponse(action)
        else:         
          result = {}
          result['code'] = 0
          result['message'] = "操作失败"
        return HttpResponse(json.dumps(result), content_type="application/json")
      else:
        projects = Project.objects.all().order_by('alias')
        rsp = render(request, 'admin_project.html', locals())
        return HttpResponse(rsp)
    elif module == 'servers':
      if action != '':
        if action == "getinfo":
          if 'ipaddr' in request.POST and request.POST['ipaddr'] != "":
            ipaddr = request.POST['ipaddr']
            data_trans = request.POST['data_trans']
            snmpsession = netsnmp.Session(Version = 2, DestHost = ipaddr, Timeout=50000, ErrorStr='Cannot connect')
            oid_name = netsnmp.Varbind('.1.3.6.1.2.1.1.5.0')  #主机名oid
            bind_name = netsnmp.VarList(oid_name)

            oid_cpu = netsnmp.Varbind('.1.3.6.1.2.1.25.3.3.1.2')  #CPU负载oid
            bind_cpu = netsnmp.VarList(oid_cpu)

            oid_mem = netsnmp.Varbind('.1.3.6.1.2.1.25.2.2.0')  #内存总数oid
            bind_mem = netsnmp.VarList(oid_mem)

            oid_ip = netsnmp.Varbind('.1.3.6.1.2.1.4.20.1.1')  #IP地址oid
            bind_ip = netsnmp.VarList(oid_ip)

            snmp_name = snmpsession.get(bind_name)
            snmp_cpu = snmpsession.walk(bind_cpu)
            snmp_mem = snmpsession.get(bind_mem)
            snmp_ip = snmpsession.walk(bind_ip)

            result_name = snmp_name[0]

            i = 0
            for data in snmp_cpu:
              if data != '':
                i += 1

            result_cpu = i

            result_mem = int(snmp_mem[0])/1024
            if data_trans == "1":
              if result_mem <= 512:
                result_mem = 512
              else:
                result_mem = (result_mem/1024 + 1) * 1024

            result_ip = []
            i = 0
            for data in snmp_ip:
              if data != '127.0.0.1':
                oid = '.1.3.6.1.2.1.4.20.1.2.' + str(data)
                oid_ip_index = netsnmp.Varbind(oid)
                bind_ip_index = netsnmp.VarList(oid_ip_index)
                snmp_ip_index = snmpsession.get(bind_ip_index)
                ip_index = snmp_ip_index[0]
                oid_ip_name = netsnmp.Varbind('.1.3.6.1.2.1.2.2.1.2.' + str(ip_index))
                bind_ip_name = netsnmp.VarList(oid_ip_name)
                snmp_ip_name = snmpsession.get(bind_ip_name)
                ip_name = snmp_ip_name[0]
                result_ip.append(data)

            result = {}
            result['code'] = 0
            result['host'] = result_name
            result['cpu'] = result_cpu
            result['mem'] = result_mem
            result['ip'] = "|".join(result_ip)
            result['ip_disp'] = result['ip'].replace("|","\r\n")
            return HttpResponse(json.dumps(result), content_type="application/json")
          else:
            projects = Project.objects.all().order_by('alias')
            rsp = render(request, 'user_index.html', locals())
            return HttpResponse(rsp)
        elif action == "add":
          asset_pid = request.POST['pid']
          asset_ip = request.POST['ip']
          asset_hostname = request.POST['hostname']
          asset_cpu = request.POST['cpu']
          asset_mem = request.POST['mem']
          asset_disk = request.POST['disk']
          if request.POST['type'] == '1':
            asset_type = "物理机"
          elif request.POST['type'] == '2':
            asset_type = "虚拟机"
          else:
            asset_type = "其他" 
          asset_srv = request.POST['srv']
          asset_desc = request.POST['desc']
          pid = Project.objects.get(id=asset_pid)
          if request.session['user_sys'] or asset_pid == request.session['user_proj']:
            obj = Server(pid=pid,
                         ip=asset_ip,
                         hostname=asset_hostname,
                         cpu=asset_cpu,
                         mem=asset_mem,
                         disk=asset_disk,
                         type=asset_type,
                         srv=asset_srv,
                         desc=asset_desc,
                         status='1')
            obj.save()
            logRecord(action, 'asset', request.POST)
            result = {}
            result['code'] = 1
            result['message'] = "添加成功"
          else:
            result = {}
            result['code'] = 0
            result['message'] = "未授权的操作"
        elif action == "del":
          id = request.POST['id']
          del_data = Server.objects.filter(id=id)
          del_id = str(del_data[0].pid.id)
          if request.session['user_sys'] or del_id == request.session['user_proj']:
            try:
              Server.objects.get(id=id).delete()
              logRecord(action, 'asset', request.POST)
              result = {}
              result['code'] = 1
              result['message'] = "删除成功"
            except:
              result = {}
              result['code'] = 0
              result['message'] = "删除异常"
          else:
              result = {}
              result['code'] = 0
              result['message'] = "未授权的操作"
        elif action.isdigit():
          if 'update' in request.GET:
            asset_pid = request.POST['pid']
            asset_ip = request.POST['ip']
            asset_hostname = request.POST['hostname']
            asset_cpu = request.POST['cpu']
            asset_mem = request.POST['mem']
            asset_disk = request.POST['disk']
            if request.POST['type'] == '1':
              asset_type = "物理机"
            elif request.POST['type'] == '2':
              asset_type = "虚拟机"
            else:
              asset_type = "其他" 
            asset_srv = request.POST['srv']
            asset_desc = request.POST['desc']
            asset_status = request.POST['status']
            asset_cacti = request.POST['cacti']
            asset_nagios = request.POST['nagios']
            if request.session['user_sys'] or asset_pid == request.session['user_proj']:
              try:
                Server.objects.filter(id=action).update(ip=asset_ip,
                  hostname=asset_hostname,
                  cpu=asset_cpu,
                  mem=asset_mem,
                  disk=asset_disk,
                  type=asset_type,
                  srv=asset_srv,
                  desc=asset_desc,
                  status = asset_status,
                  cacti = asset_cacti,
                  nagios = asset_nagios
                )
                logRecord('update', 'asset', request.POST)
                result = {}
                result['code'] = 1
                result['message'] = "资产修改成功"
              except:
                result = {}
                result['code'] = 0
                result['message'] = "资产修改未提交"
            else:
                result = {}
                result['code'] = 0
                result['message'] = "未授权的操作"
            return HttpResponse(json.dumps(result), content_type="application/json")
          else:
            try:
              queryset = Server.objects.select_related().get(id=action)
            except:
              return HttpResponse('无效ID')
            rsp = render(request, 'admin_display_server.html', locals())
            return HttpResponse(rsp)
        else:
          result = {}
          result['code'] = 0
          result['message'] = "操作失败"
        return HttpResponse(json.dumps(result), content_type="application/json")
      else:
        if 'page' in request.GET and request.GET['page'].isdigit():
          page_get = int(float(request.GET['page']))
        else:
          page_get = 1
        if 'query' in request.GET:
          if request.session['query'].has_key('op'):
            request.session['query'] = {'op':'True'}
            if 'pid' in request.POST:
              if request.POST['pid'] == "":
                try:
                  request.session['query_data'].pop('pid')
                except:
                  a = 1
              else:
                request.session['query_data']['pid'] = request.POST['pid']
            if 'ip' in request.POST:
              if request.POST['ip'] == "":
                try:
                  request.session['query_data'].pop('ip__contains')
                except:
                  a = 1
              else:
                request.session['query_data']['ip__contains'] = request.POST['ip']
            if 'srv' in request.POST:
              if request.POST['srv'] == "":
                try:
                  request.session['query_data'].pop('srv')
                except:
                  a = 1
              else:
                request.session['query_data']['srv'] = request.POST['srv']
            if 'status' in request.POST:
              if request.POST['status'] == "":
                try:
                  request.session['query_data'].pop('status')
                except:
                  a = 1
              else:
                request.session['query_data']['status'] = request.POST['status']
            exper = request.session['query_data']
          else:
            request.session['query'] = {'op':'true'}          
            if 'pid' in request.POST and request.POST['pid'] != "":
              request.session['query_data']['pid'] = request.POST['pid']
            if 'ip' in request.POST and request.POST['ip'] != "":
              request.session['query_data']['ip__contains'] = request.POST['ip']
            if 'srv' in request.POST and request.POST['srv'] != "":
              request.session['query_data']['srv'] = request.POST['srv']
            if 'status' in request.POST and request.POST['status'] != "":
              request.session['query_data']['status'] = request.POST['status']
            exper = request.session['query_data']
          
          servers = Server.objects.filter(**exper)
        else:
          request.session['query'] = {}
          request.session['query_data'] = {}
          if request.session['user_sys']:
            servers = Server.objects.select_related().all()
          else:
            servers = Server.objects.select_related().filter(pid=request.session['user_proj'])
        pagin = Paginator(servers,20)
        page_max = pagin.num_pages
        if page_get > page_max:
          page = page_max
        else:
          page = page_get
        data_list = pagin.page(page)
        if 'query' in request.GET:
          url_fp = "?query&page=1"
          if page <= 1:
            url_pp = "?query&page=1"
          else:
            url_pp = "?query&page=" + str((page - 1))
          if page >= page_max:
            url_np = "?query&page=" + str(page_max)
          else:
            url_np = "?query&page=" + str((page + 1))
          url_lp = "?query&page=" + str(page_max)
        else:
          url_fp = "?page=1"
          if page <= 1:
            url_pp = "?page=1"
          else:
            url_pp = "?page=" + str((page - 1))
          if page >= page_max:
            url_np = "?page=" + str(page_max)
          else:
            url_np = "?page=" + str((page + 1))
          url_lp = "?page=" + str(page_max)
        if request.session['user_sys']:
          projects = Project.objects.all().order_by('alias')
        else:
          projects = Project.objects.filter(id=request.session['user_proj']).order_by('alias')
        rsp = render(request, 'admin_servers.html', locals())
        return HttpResponse(rsp)
    else:
      rsp = render(request, 'admin_base.html', locals())
      return HttpResponse(rsp)
  else:
    return HttpResponseRedirect('/')

Example 11

Project: tilequeue
Source File: wof.py
View license
def create_neighbourhood_from_json(json_data, neighbourhood_meta):

    def failure(reason):
        return NeighbourhoodFailure(
            neighbourhood_meta.wof_id, reason, json.dumps(json_data))

    if not isinstance(json_data, dict):
        return failure('Unexpected json')

    props = json_data.get('properties')
    if props is None or not isinstance(props, dict):
        return failure('Missing properties')

    superseded_by = props.get('wof:superseded_by')
    # these often show up as empty lists, so we do a truthy test
    # instead of expicitly checking for None
    if superseded_by:
        return NeighbourhoodFailure(
            neighbourhood_meta.wof_id,
            'superseded_by: %s' % superseded_by,
            json.dumps(json_data), superseded=True)

    geometry = json_data.get('geometry')
    if geometry is None:
        return failure('Missing geometry')

    try:
        shape_lnglat = shapely.geometry.shape(geometry)
    except:
        return failure('Unexpected geometry')

    shape_mercator = shapely.ops.transform(
        reproject_lnglat_to_mercator, shape_lnglat)

    # ignore any features that are marked as funky
    is_funky = props.get('mz:is_funky')
    if is_funky is not None:
        try:
            is_funky = int(is_funky)
        except ValueError:
            return failure('Unexpected mz:is_funky value %s' % is_funky)
        if is_funky != 0:
            return NeighbourhoodFailure(
                neighbourhood_meta.wof_id,
                'mz:is_funky value is not 0: %s' % is_funky,
                json.dumps(json_data), funky=True)

    wof_id = props.get('wof:id')
    if wof_id is None:
        return failure('Missing wof:id')
    try:
        wof_id = int(wof_id)
    except ValueError:
        return failure('wof_id is not an int: %s' % wof_id)

    name = props.get('wof:name')
    if name is None:
        return failure('Missing name')

    n_photos = props.get('misc:photo_sum')
    if n_photos is not None:
        try:
            n_photos = int(n_photos)
        except ValueError:
            return failure('misc:photo_sum is not an int: %s' % n_photos)

    label_lat = props.get('lbl:latitude')
    label_lng = props.get('lbl:longitude')
    if label_lat is None or label_lng is None:
        # first, try to fall back to geom:* when lbl:* is missing. we'd prefer
        # to have lbl:*, but it's better to have _something_ than nothing.
        label_lat = props.get('geom:latitude')
        label_lng = props.get('geom:longitude')

        if label_lat is None or label_lng is None:
            return failure('Missing lbl:latitude or lbl:longitude and ' +
                           'geom:latitude or geom:longitude')

    try:
        label_lat = float(label_lat)
        label_lng = float(label_lng)
    except ValueError:
        return failure('lbl:latitude or lbl:longitude not float')

    label_merc_x, label_merc_y = reproject_lnglat_to_mercator(
        label_lng, label_lat)
    label_position = shapely.geometry.Point(label_merc_x, label_merc_y)

    placetype = props.get('wof:placetype')
    if placetype is None:
        return failure('Missing wof:placetype')

    default_min_zoom = 15
    default_max_zoom = 16

    min_zoom = props.get('mz:min_zoom')
    if min_zoom is None:
        min_zoom = default_min_zoom
    else:
        try:
            min_zoom = float(min_zoom)
        except ValueError:
            return failure('mz:min_zoom not float: %s' % min_zoom)
    max_zoom = props.get('mz:max_zoom')
    if max_zoom is None:
        max_zoom = default_max_zoom
    else:
        try:
            max_zoom = float(max_zoom)
        except ValueError:
            return failure('mz:max_zoom not float: %s' % max_zoom)

    is_landuse_aoi = props.get('mz:is_landuse_aoi')
    if is_landuse_aoi is not None:
        try:
            is_landuse_aoi = int(is_landuse_aoi)
        except ValueError:
            return failure('is_landuse_aoi not int: %s' % is_landuse_aoi)
        is_landuse_aoi = is_landuse_aoi != 0

    if shape_mercator.type in ('Polygon', 'MultiPolygon'):
        area = int(shape_mercator.area)
    else:
        area = None

    # for the purposes of display, we only care about the times when something
    # should first start to be shown, and the time when it should stop
    # showing.
    edtf_inception = _normalize_edtf(props.get('edtf:inception'))
    edtf_cessation = _normalize_edtf(props.get('edtf:cessation'))
    edtf_deprecated = _normalize_edtf(props.get('edtf:deprecated'))

    # the 'edtf:inception' property gives us approximately the former and we
    # take the earliest date it could mean. the 'edtf:cessation' and
    # 'edtf:deprecated' would both stop the item showing, so we take the
    # earliest of each's latest possible date.
    inception = edtf_inception.date_earliest()
    cessation = min(edtf_cessation.date_latest(),
                    edtf_deprecated.date_latest())

    # grab any names in other languages
    lang_suffix_size = len('_preferred')
    l10n_names = {}
    for k, v in props.iteritems():
        if not v:
            continue
        if not k.startswith('name:') or not k.endswith('_preferred'):
            continue
        if isinstance(v, list):
            v = v[0]
        lang = k[:-lang_suffix_size]
        l10n_names[lang] = v
    if not l10n_names:
        l10n_names = None

    neighbourhood = Neighbourhood(
        wof_id, placetype, name, neighbourhood_meta.hash, label_position,
        shape_mercator, n_photos, area, min_zoom, max_zoom, is_landuse_aoi,
        inception, cessation, l10n_names)
    return neighbourhood

Example 12

Project: django-easyextjs4
Source File: __init__.py
View license
    @staticmethod
    def Request(pRequest, pRootProject = None, pRootUrl = None, pIndex = 'index.html', pAlias = None):
        lRet = HttpResponse(status = 400, content = '<h1>HTTP 400 - Bad Request</h1>The request cannot be fulfilled due to bad syntax.')

        # Remove http://<host name>:<port>/ from pRootUrl
        pRootUrl = urlparse(pRootUrl).path

        # Valid the url. 
        lPath = urlparse(pRequest.path).path
        lMatch = re.match('^/[0-9a-zA-Z\.\/\-\_]*$', lPath) 
    
        if lMatch is None:
            raise ExtJSError('You have some invalid characters on the Url: "%s"' % pRootUrl)
    
        if pRootUrl is not None:
            # If the root for the url is specify check if the Url begin with this path
            if lPath.find(pRootUrl) != 0:
                raise ExtJSError('Invalid root for the Url: "%s"' % pRootUrl)
            # Remove url root from the path
            lPath = lPath[len(pRootUrl):]
        else:
            # If url root is not specify doesn't validate it 
            pRootUrl = ''
    
        # Detect if the URL it's to return javascript wrapper        
        lUrlApis = re.search('^(\w*\.js)$', lPath)
        
        if lUrlApis is not None:
            lUrlApi = lUrlApis.group(1)
            
            if lUrlApi in Ext.__URLSAPI:
                # URL found => Generate javascript wrapper
                lRemoteAPI = dict()
                for lClass in Ext.__URLSAPI[lUrlApi]:
                    lExt = lClass.__ExtJS
                    
                    if lExt.Url not in lRemoteAPI:
                        # Collect all class with the same Url
                        lRemoteAPI[lExt.Url] = dict()
                        lCurrent = lRemoteAPI[lExt.Url]
                        if 'format' in pRequest.REQUEST and pRequest.REQUEST['format'] == 'json':
                            # 'descriptor' is need it for Sencha Architect to recognize your API
                            lCurrent['descriptor'] = lClass.__name__ + '.REMOTING_API'
                            if lExt.NameSpace is not None:
                                 lCurrent['descriptor'] = lExt.NameSpace + '.' + lCurrent['descriptor']
                        lCurrent['url'] = lExt.Url
                        lCurrent['type'] = 'remoting'
                        if lExt.Id is not None:
                            lCurrent['id'] = lExt.Id
                        if lExt.NameSpace is not None:
                            lCurrent['namespace'] = lExt.NameSpace
                        lCurrent['actions'] = dict()
                        lAction = lCurrent['actions']
                    
                    if len(lExt.StaticMethods) > 0:
                        # Define a class as an Action with a list of functions
                        lRemoteMethods = list()
                        for lMethod in lExt.StaticMethods:
                            lMethodInfo = lExt.StaticMethods[lMethod]
                            if not lMethodInfo.NameParams:
                                lMethodExt = dict(name = lMethod, len = len(lMethodInfo.Args))
                            else:
                                # Type not supported with python 2.7 or lower.
                                if sys.version_info < (3, 0):
                                    lMethodExt = dict(name = lMethod, params = lMethodInfo.Args)
                                else:
                                    if not lMethodInfo.TypeParams:
                                        lMethodExt = dict(name = lMethod, params = lMethodInfo.Args)
                                    else:
                                        # TODO: support this feature for python 3.x
                                        # Must return something like this :
                                        #    "params": [{
                                        #    "name": "path",
                                        #    "type": "string",
                                        #    "pos": 0
                                        #    },
                                        raise ExtJSError('Type for parameters not supported yet')
                            lRemoteMethods.append(lMethodExt)
                        # Each class is define as an 'Action' 
                        lAction[lClass.__name__] = lRemoteMethods
                    for lKey in lExt.StaticEvents:
                        # Each event is define as a Provider for ExtJS. Even if it share the same namespace.
                        lEvent = lExt.StaticEvents[lKey]
                        lRemote = dict()
                        lRemote['url'] = lEvent.Url
                        lRemote['type'] = 'polling'
                        if lEvent.Id is not None:
                            lRemote['id'] = lEvent.Id
                        if lEvent.NameSpace is not None:
                            lRemote['namespace'] = lEvent.NameSpace
                        if lEvent.Params is not None:
                            lRemote['baseParams'] = lEvent.Params
                        if lEvent.Interval is not None:
                            lRemote['interval'] = lEvent.Interval
                        lRemoteAPI[lEvent.Url] = lRemote

                if len(lRemoteAPI) > 0:    
                    lJsonRemoteAPI = json.dumps(lRemoteAPI.values(),default=ExtJsonHandler)
                    
                    lNameSpace = lClass.__name__
                    if lExt.NameSpace is not None:
                        lNameSpace = lExt.NameSpace + '.' + lNameSpace
                    
                    if 'format' in pRequest.REQUEST and pRequest.REQUEST['format'] == 'json':
                        # Define JSON format for Sencha Architect
                        lContent = 'Ext.require(\'Ext.direct.*\');Ext.namespace(\''+ lNameSpace +'\');'+ lNameSpace + '.REMOTING_API = ' + lJsonRemoteAPI[1:len(lJsonRemoteAPI)-1] + ';'
                    else:
                        # Otherwise it's return a Javascript. Each javascript must be include under the index.html like this:
                        # <script type="text/javascript" src="api.js"></script>
                        # Automatically your API is declare on ExtJS and available on your app.js. 
                        lContent = 'Ext.require(\'Ext.direct.*\');Ext.namespace(\''+ lNameSpace +'\');Ext.onReady( function() { Ext.direct.Manager.addProvider(' + lJsonRemoteAPI[1:len(lJsonRemoteAPI)-1] + ');});'
                    lRet = HttpResponse(content = lContent, mimetype='application/javascript')
        else:
            # Detect if the URL it's a RPC or a Poll request
            lUrlRPCsorPolls = re.search('^(\w*)$', lPath)
        
            if lUrlRPCsorPolls is not None:
                lUrl = lUrlRPCsorPolls.group(1)
                
                if lUrl in Ext.__URLSRPC:
                    
                    # URL recognize as a RPC
                    
                    # Extract data from raw post. We can not trust pRequest.POST
                    lReceiveRPCs = json.loads(pRequest.body)
                    
                    # Force to be a list of dict
                    if type(lReceiveRPCs) == dict:
                        lReceiveRPCs = [lReceiveRPCs]
                    
                    # Extract URL 
                    lClassesForUrl = Ext.__URLSRPC[lUrl]

                    # Initialize content
                    lContent = list()

                    for lReceiveRPC in lReceiveRPCs:
                        # Execute each RPC request
                        
                        lRcvClass = lReceiveRPC['action']
                        lRcvMethod = lReceiveRPC['method']

                        # Create name API
                        lMethodName = lRcvClass + '.' + lRcvMethod
                            
                        # Prepare answer
                        lAnswerRPC = dict(type = 'rpc', tid = lReceiveRPC['tid'], action = lRcvClass, method = lRcvMethod)
                        
                        # Prepare exception
                        lExceptionData = dict(Url = lUrl, Type = 'rpc', Tid = lReceiveRPC['tid'], Name = lMethodName )
                        lException = dict(type = 'exception', data = lExceptionData, message = None)
                        
                        if lRcvClass in lClassesForUrl:
                            
                            # URL for RPC founded
                            lClass = lClassesForUrl[lRcvClass]
                            lExt = lClass.__ExtJS
                            
                            if lRcvMethod in lExt.StaticMethods:
                                
                                # Method founded
                                lMethod = lExt.StaticMethods[lRcvMethod]
                                
                                # Name used for exception message
                                if lExt.NameSpace is not None:
                                    lMethodName = lExt.NameSpace + '.' + lMethodName 

                                # Add Id if it's define
                                if lExt.Id is not None:
                                    lExceptionData['Id'] = lExt.Id
                                
                                # Extract datas
                                lArgs = lReceiveRPC['data']
                                
                                # Control and call method  
                                if lArgs is None:
                                    if len(lMethod.Args) != 0:
                                        lException['message'] = '%s numbers of parameters invalid' % lMethodName
                                    else:
                                        try:
                                            # Call method with no parameter
                                            if lMethod.Session is None:
                                                lRetMethod = lMethod.Call()
                                            else:
                                                lRetMethod = lMethod.Call(pSession = lMethod.Session(pRequest))
                                            if lRetMethod is not None:
                                                lAnswerRPC['result'] = lRetMethod
                                        except Exception as lErr:
                                            lException['message'] = '%s: %s' % (lMethodName, str(lErr)) 
                                elif type(lArgs) == list:
                                    if len(lArgs) > len(lMethod.Args):
                                        lException['message'] = '%s numbers of parameters invalid' % lMethodName
                                    else:
                                        try:
                                            # Call method with list of parameters  
                                            if lMethod.Session is None:
                                                lRetMethod = lMethod.Call(*lArgs)
                                            else:
                                                lArgs.insert(0,lMethod.Session(pRequest))
                                                lRetMethod = lMethod.Call(*lArgs)
                                            if lRetMethod is not None:
                                                lAnswerRPC['result'] = lRetMethod
                                        except Exception as lErr:
                                            lException['message'] = '%s: %s' % (lMethodName, str(lErr)) 
                                elif type(lArgs) == dict:
                                    if not lMethod.NameParams:
                                        lException['message'] = '%s does not support named parameters' % lMethodName
                                    else: 
                                        if len(lArgs.keys()) > len(lMethod.Args):
                                            lException['message'] = '%s numbers of parameters invalid' % lMethodName
                                        else:
                                            lInvalidParam = list()
                                            for lParam in lArgs:
                                                if lParam not in lMethod.Args:
                                                     lInvalidParam.append(lParam)
                                            if len(lInvalidParam) > 0:
                                                lException['message'] = '%s: Parameters unknown -> %s' % ",".join(lInvalidParam) 
                                            else:
                                                try:
                                                    # Call method with naming parameters
                                                    if lMethod.Session is None:
                                                        lRetMethod = lMethod.Call(**lArgs)
                                                    else:
                                                        lArgs['pSession'] = lMethod.Session(pRequest)
                                                        lRetMethod = lMethod.Call(**lArgs)
                                                    if lRetMethod is not None:
                                                        lAnswerRPC['result'] = lRetMethod
                                                except Exception as lErr:
                                                    lException['message'] = '%s: %s' % (lMethodName, str(lErr))
                            else:
                                lException['message'] = '%s: API not found' % lMethodName
                                
                        else:
                            lException['message'] = '%s: API not found' % lMethodName
                                
                        if lException['message'] is not None:
                            lContent.append(lException)    
                        else:
                            lContent.append(lAnswerRPC)
                            
                    if len(lContent) > 0:
                        if len(lContent) == 1:
                            lRet = HttpResponse(content = json.dumps(lContent[0],default=ExtJsonHandler), mimetype='application/json')
                        else:
                            lRet = HttpResponse(content = json.dumps(lContent,default=ExtJsonHandler), mimetype='application/json')
                                
                elif lUrl in Ext.__URLSEVT:

                    # URL Recognize as Poll request. A poll request will be catch by an Ext.StaticEvent.
                    
                    lClass = Ext.__URLSEVT[lUrl]
                    lExt = lClass.__ExtJS
                    
                    lEvent = lExt.StaticEvents[lUrl]
                    
                    # Define the name of the event this will be fire on ExtJS
                    if lEvent.EventName is not None:
                        # Use the one specify with @Ext.StaticEvent parameter pEventName
                        lEventName = lEvent.Name
                    else: 
                        # This name is build with the concatanation of the name space, classe name and name event
                        lEventName = lEvent.Name
                        
                        if len(lEvent.ClassName) != 0:
                            lEventName = lEvent.ClassName + '.' + lEvent.Name
                        
                        if len(lEvent.NameSpace) != 0:
                            lEventName = lEvent.NameSpace + '.' + lEventName
                        
                    # Prepare event answer
                    lAnswerEvent = dict(type = 'event', name = lEventName, data = None)
                    
                    # Prepare exception 
                    #  Data exception have the same structur as define for a method except we don't have Tid information. It set to -1. 
                    lExceptionData = dict(Url = lUrl, Type = 'event', Tid = -1, Name = lEventName )
                    lException = dict(type = 'exception', data = lExceptionData, message = None)
                    
                    # Add Id if it's define. With the id on your javascript code you can use something like this:
                    # Ext.direct.Manager.on('exception', function(e) {
                    # if (e.data.Type == 'event') 
                    #    {
                    #      lPoll = Ext.direct.Manager.getProvider(e.data.Id);
                    #       lPoll.disconnect();
                    #    }        
                    # }
                    if lEvent.Id is not None:
                        lAnswerEvent['Id'] = lEvent.Id
                        lExceptionData['Id'] = lEvent.Id
                    
                    # Extraction of parameters. For event parameters are in the POST. 
                    # If for a key we don't have a value than mean we received a simple list of parameters direct under the key.
                    # If the key have a value that mean we have naming parameters
                    lArgs = None
                    for lKey in pRequest.POST:
                        if pRequest.POST[lKey] == '':
                            if lArgs is None:
                                lArgs = list()
                            lArgs.extend(lKey.split(','))
                        else:
                            if lArgs is None:
                                lArgs = dict()
                            lArgs[lKey] = pRequest.POST[lKey] 
                    
                    # Control and call event  
                    if lArgs is None:
                        if len(lEvent.Args) != 0:
                            lException['message'] = '%s numbers of parameters invalid' % lEventName
                        else:
                            try:
                                # Call event with no parameter
                                if lEvent.Session is None:
                                    lRetEvt = lEvent.Call()
                                else:
                                    lRetEvt = lEvent.Call(pSession = lEvent.Session(pRequest))
                                if lRetEvt is not None:
                                    lAnswerEvent['data'] = lRetEvt
                            except Exception as lErr:
                                lException['message'] = '%s: %s' % (lEventName, str(lErr)) 
                    elif type(lArgs) == list:
                        if len(lArgs) > len(lEvent.Args):
                            lException['message'] = '%s numbers of parameters invalid' % lEventName
                        else:
                            try:
                                # Call event with list of parameters  
                                if lEvent.Session is None:
                                    lRetEvt = lEvent.Call(*lArgs)
                                else:
                                    lArgs.insert(0,lEvent.Session(pRequest))
                                    lRetEvt = lEvent.Call(*lArgs)
                                if lRetEvt is not None:
                                    lAnswerEvent['data'] = lRetEvt
                            except Exception as lErr:
                                lException['message'] = '%s: %s' % (lEventName, str(lErr)) 
                    elif type(lArgs) == dict:
                        if len(lArgs.keys()) > len(lEvent.Args):
                            lException['message'] = '%s numbers of parameters invalid' % lEventName
                        else:
                            lInvalidParam = list()
                            for lParam in lArgs:
                                if lParam not in lEvent.Args:
                                     lInvalidParam.append(lParam)
                            if len(lInvalidParam) > 0:
                                lException['message'] = '%s: Parameters unknown -> %s' % ",".join(lInvalidParam) 
                            else:
                                try:
                                    # Call event with naming parameters
                                    if lEvent.Session is None:
                                        lRetEvt = lEvent.Call(**lArgs)
                                    else:
                                        lArgs['pSession'] = lEvent.Session(pRequest)
                                        lRetEvt = lEvent.Call(**lArgs)
                                    if lRetEvt is not None:
                                        lAnswerEvent['data'] = lRetEvt
                                except Exception as lErr:
                                    lException['message'] = '%s: %s' % (lEventName, str(lErr)) 
                                
                    if lException['message'] is not None:
                        lContent = lException    
                    else:
                        lContent = lAnswerEvent
                    
                    lRet = HttpResponse(content = json.dumps(lContent,default=ExtJsonHandler), mimetype='application/json')
    
        if lRet.status_code != 200:
            # The URL is not to return the API, not to execute a RPC or an event. It's just to get a file
            if pRootProject is not None:
                if not os.path.exists(pRootProject):
                    raise ExtJSError('Invalid root for the project: "%s"' % pRootProject)
            else:
                # if the root project is not specify get the path of the current folder
                pRootProject = os.getcwd()
        
            # The path is empty try to find and load index.html (or the file specify with pIndex)   
            if len(lPath) == 0:
                lPath = pIndex
    
            # Rebuild path to valid it         
            lPath = os.path.normpath("/".join([pRootProject,lPath]))
            lFileName, lFileExt = os.path.splitext(lPath)
           
            # Check if the path exist and if the extension is valid
            if not os.path.exists(lPath):
                raise ExtJSError('File not found: "%s"' % lPath)
            else:
                if lFileExt not in ['.html','.css','.js','.png','.jpg','.gif','.json','.xml']:
                    raise ExtJSError('File extension is invalid: "%s"' % lFileExt)
                else:
                    try:
                        lMime = mimetypes.types_map[lFileExt]
                    except Exception as lException:
                        if isinstance(lException,KeyError) and lFileExt == '.json':
                            lMime = 'text/json'
                        else:
                            raise lException
                    # TODO: Manage a chache file
                    lFile = open(lPath)
                    lContent = lFile.read()
                    lFile.close()
                    lRet = HttpResponse(content = lContent, mimetype = lMime)
              
        return lRet

Example 13

Project: zenodo
Source File: test_api_editflow.py
View license
@patch('invenio_pidstore.providers.datacite.DataCiteMDSClient')
def test_edit_flow(datacite_mock, api_client, db, es, location,
                   json_auth_headers, deposit_url, get_json, auth_headers,
                   json_headers, license_record, communities, resolver):
    """Test simple flow using REST API."""
    headers = json_auth_headers
    client = api_client

    test_data = dict(
        metadata=dict(
            upload_type='presentation',
            title='Test title',
            creators=[
                dict(name='Doe, John', affiliation='Atlantis'),
                dict(name='Smith, Jane', affiliation='Atlantis')
            ],
            description='Test Description',
            publication_date='2013-05-08',
            access_right='open',
            license='CC0-1.0',
            communities=[{'identifier': 'c1'}, {'identifier': 'c3'}],
        )
    )

    # Create deposit
    response = client.post(
        deposit_url, data=json.dumps(test_data), headers=headers)
    data = get_json(response, code=201)

    # Get identifier and links
    current_search.flush_and_refresh(index='deposits')
    links = data['links']

    # Upload 3 files
    for i in range(3):
        f = 'test{0}.txt'.format(i)
        response = client.post(
            links['files'],
            data=dict(file=(BytesIO(b'ctx'), f), name=f),
            headers=auth_headers,
        )
        assert response.status_code == 201, i

    # Update metadata
    newdata = dict(metadata=data['metadata'])
    newdata['metadata']['title'] = 'Updated title'
    resdata = get_json(client.put(
        links['self'], data=json.dumps(newdata), headers=headers
    ), code=200)

    # Publish deposition
    response = client.post(links['publish'], headers=auth_headers)
    data = get_json(response, code=202)
    record_id = data['record_id']

    assert PersistentIdentifier.query.filter_by(pid_type='depid').count() == 1
    recid_pid = PersistentIdentifier.query.filter_by(pid_type='recid').one()
    doi_pid = PersistentIdentifier.get(
        pid_type='doi', pid_value='10.5072/zenodo.1')
    assert doi_pid.status == PIDStatus.RESERVED
    # This task (datacite_register) would normally be executed asynchronously
    datacite_register(recid_pid.pid_value, recid_pid.object_uuid)
    assert doi_pid.status == PIDStatus.REGISTERED

    # Make sure it was registered properly in datacite
    assert datacite_mock().metadata_post.call_count == 1
    datacite_mock().doi_post.assert_called_once_with(
         '10.5072/zenodo.1', 'https://zenodo.org/record/1')

    # Does record exists?
    current_search.flush_and_refresh(index='records')

    preedit_data = get_json(client.get(
        url_for('invenio_records_rest.recid_item', pid_value=record_id),
        headers=json_headers,
    ), code=200)
    expected_doi = '10.5072/zenodo.{0}'.format(record_id)
    assert preedit_data['doi'] == expected_doi
    # - community c3 got auto-accepted (owned by deposit user)
    assert preedit_data['metadata']['communities'] == [{'identifier': 'c3'}]

    # Are files downloadable by everyone (open)?
    assert len(preedit_data['files']) == 3
    download_url = preedit_data['files'][0]['links']['download']
    assert client.get(download_url).status_code == 200

    # Edit record - can now be done immediately after.
    response = client.post(links['edit'], headers=auth_headers)
    assert response.status_code == 201

    # Edit - 2nd time is invalid.
    response = client.post(links['edit'], headers=auth_headers)
    assert response.status_code == 403  # FIXME 400

    # Get data
    data = get_json(client.get(links['self'], headers=auth_headers), code=200)

    # Not allowed to delete
    assert client.delete(
        links['self'], headers=auth_headers).status_code == 403

    # Update metadata
    data = dict(metadata=data['metadata'])
    data['metadata'].update(dict(
        title='New title',
        access_right='closed',
        creators=[
            dict(name="Smith, Jane", affiliation="Atlantis"),
            dict(name="Doe, John", affiliation="Atlantis"),
        ],
        communities=[
            {'identifier': 'c1'}
        ]
    ))

    resdata = get_json(client.put(
        links['self'], data=json.dumps(data), headers=headers
    ), code=200)
    assert resdata['title'] == 'New title'
    assert resdata['metadata']['title'] == 'New title'

    # Try to change DOI
    data['metadata']['doi'] = '10.1234/foo'
    data = get_json(client.put(
        links['self'], data=json.dumps(data), headers=headers
    ), code=400)

    # Approve community
    c = Community.get('c1')
    _, record = resolver.resolve(str(record_id))
    c.accept_record(record)
    record.commit()
    db.session.commit()

    # Get record to confirm if both communities should be visible now
    assert get_json(client.get(
        url_for('invenio_records_rest.recid_item', pid_value=record_id),
        headers=json_headers,
    ), code=200)['metadata']['communities'] == [
        {'identifier': 'c1'},
        {'identifier': 'c3'},
    ]

    # Publish
    response = client.post(links['publish'], headers=auth_headers)
    data = get_json(response, code=202)
    current_search.flush_and_refresh(index='records')

    # - is record still accessible?
    postedit_data = get_json(client.get(
        url_for('invenio_records_rest.recid_item', pid_value=record_id),
        headers=json_headers,
    ), code=200)
    # - sanity checks
    assert postedit_data['doi'] == expected_doi
    assert postedit_data['record_id'] == record_id

    # - files should no longer be downloadable (closed access)
    # - download_url worked before edit, so make sure it doesn't work now.
    assert 'files' not in postedit_data
    assert client.get(download_url).status_code == 404

    # - c3 was removed, so only c1 one should be visible now
    assert postedit_data['metadata']['communities'] == [
        {'identifier': 'c1'},
    ]

    # Edit
    data = get_json(client.post(links['edit'], headers=auth_headers), code=201)

    # Update
    data = dict(metadata=data['metadata'])
    data['metadata'].update(dict(title='Will be discarded'))
    resdata = get_json(client.put(
        links['self'], data=json.dumps(data), headers=headers
    ), code=200)

    # Discard
    data = get_json(
        client.post(links['discard'], headers=auth_headers),
        code=201)

    # Get and assert metadata
    data = get_json(client.get(links['self'], headers=auth_headers), code=200)
    assert data['title'] == postedit_data['title']

Example 14

Project: bitex
Source File: main.py
View license
    def on_message(self, raw_message):
        if self.honey_pot_connection:
            self.application.log('INFO', "HONEY_POT", raw_message )

        if self.trade_client is None or not self.trade_client.isConnected():
            return

        self.last_message_datetime.append(datetime.now())
        message_time_last_second = self.last_message_datetime[-1] - timedelta(seconds=1)
        for x in xrange(0, len(self.last_message_datetime)):
            if self.last_message_datetime[x] > message_time_last_second:
                self.last_message_datetime = self.last_message_datetime[x:]
                break
        if len(self.last_message_datetime) > 15:  # higher than 15 messages per second
            self.application.log("ERROR",
                                 "TOO_MANY_MESSAGES",
                                 "Exceed 15 messages per second. [ip=" + self.remote_ip + ",'" + raw_message + "']")
            self.write_message(
                '{"MsgType":"ERROR", "Description":"Too many messages per second", "Detail": "16 messages in the last second"}')
            self.application.unregister_connection(self)
            self.trade_client.close()
            self.close()
            return

        try:
            req_msg = JsonMessage(raw_message)
        except InvalidMessageException as e:
            self.write_message(
                '{"MsgType":"ERROR", "Description":"Invalid message", "Detail": "' +
                str(e) +
                '"}')
            self.application.unregister_connection(self)
            self.trade_client.close()
            self.close()
            return

        req_msg.set('RemoteIP' ,self.remote_ip)

        if req_msg.isUserRequest():
            if req_msg.has('Password'):
                raw_message = raw_message.replace(req_msg.get('Password'), '*')
            if req_msg.has('NewPassword'):
                raw_message = raw_message.replace(req_msg.get('NewPassword'), '*')
            self.application.log('IN', self.trade_client.connection_id ,raw_message )



        if req_msg.isTestRequest() or req_msg.isHeartbeat():
            dt = datetime.now()
            response_msg = {
                'MsgType'           : '0',
                'TestReqID'         : req_msg.get('TestReqID'),
                'ServerTimestamp'   : int(mktime(dt.timetuple()) + dt.microsecond/1000.0 )
            }

            sendTime = req_msg.get('SendTime')
            if sendTime:
                response_msg['SendTime'] = sendTime


            self.write_message(str(json.dumps(response_msg, cls=JsonEncoder)))
            return


        if req_msg.isTradeHistoryRequest():  # Trade History request
            self.on_trade_history_request(req_msg)
            return

        if req_msg.isMarketDataRequest():  # Market Data Request
            self.on_market_data_request(req_msg)

            if not self.trade_client.isConnected():
                self.application.log('DEBUG', self.trade_client.connection_id, 'not self.trade_client.isConnected()' )
                self.application.unregister_connection(self)
                self.trade_client.close()
                self.close()
            return

        if req_msg.isSecurityStatusRequest():
            self.on_security_status_request(req_msg)
            return

        if req_msg.isDepositRequest():
            if not req_msg.get('DepositMethodID') and not req_msg.get('DepositID'):

                currency = req_msg.get('Currency')

                secret = uuid.uuid4().hex
                callback_url = self.application.options.callback_url + secret

                hot_wallet  = self.get_broker_wallet('hot', currency)
                cold_wallet = self.get_broker_wallet('cold', currency)
                if not hot_wallet and not cold_wallet:
                    return

                if not hot_wallet and cold_wallet:
                    dest_wallet = cold_wallet
                elif hot_wallet and not cold_wallet:
                    dest_wallet = hot_wallet
                else:
                    # 62.5% of all deposits go to the cold wallet, and 37.5% go to the hot wallet
                    dest_wallet = hot_wallet
                    if secret[0] in ('0','1','2','3','4','5','6','7','8','9'):
                        dest_wallet = cold_wallet

                if not dest_wallet:
                    return

                parameters = urllib.urlencode({
                    'method': 'create',
                    'address': dest_wallet,
                    'callback': callback_url,
                    'currency': currency
                })

                try:
                    url_payment_processor = self.application.options.url_payment_processor + '?' + parameters
                    self.application.log('DEBUG', self.trade_client.connection_id, "invoking..."  + url_payment_processor )
                    response = urllib2.urlopen(url_payment_processor)
                    data = json.load(response)
                    self.application.log('DEBUG', self.trade_client.connection_id, str(data) )

                    req_msg.set('InputAddress', data['input_address'])
                    req_msg.set('Destination', data['destination'])
                    req_msg.set('Secret', secret)
                except urllib2.HTTPError as e:
                    out_message = json.dumps({
                      'MsgType': 'ERROR',
                      'ReqID': req_msg.get('DepositReqID'),
                      'Description': 'Blockchain.info is not available at this moment, please try again within few minutes',
                      'Detail': str(e)
                    })
                    self.write_message(out_message)
                    return
                except Exception as e:
                    out_message = json.dumps({
                      'MsgType': 'ERROR',
                      'ReqID': req_msg.get('DepositReqID'),
                      'Description': 'Error retrieving a new deposit address from Blockchain.info. Please, try again',
                      'Detail': str(e)
                    })
                    self.write_message(out_message)
                    return

        try:
            resp_message = self.trade_client.sendMessage(req_msg)
            if resp_message:
                self.write_message(resp_message.raw_message)

            if resp_message and resp_message.isUserResponse():
                self.user_response = resp_message
                if self.is_user_logged():
                    self.application.log('LOGIN_OK', self.trade_client.connection_id, raw_message )
                    #TODO: Request open order list 
                    #self.trade_client.

 
                else:
                    self.application.log('LOGIN_FAILED', self.trade_client.connection_id, raw_message )


            if not self.trade_client.isConnected():
                self.application.log('DEBUG', self.trade_client.connection_id, 'not self.trade_client.isConnected()' )
                self.application.unregister_connection(self)
                self.trade_client.close()
                self.close()
        except TradeClientException as e:
            exception_message = {
                'MsgType': 'ERROR',
                'Description': 'Invalid message',
                'Detail': str(e)
            }
            self.write_message(json.dumps(exception_message))
            self.application.unregister_connection(self)
            self.trade_client.close()
            self.close()

Example 15

View license
def update_foreign_fields(old_id, node):
    dry_run = '--dry' in sys.argv
    logger.info('* Updating ForeignFields for node {}->{}'.format(old_id, node))

    bns_owner = list(database['boxnodesettings'].find({'owner': old_id}))
    if bns_owner:
        logger.info('** Updating {} BoxNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in bns_owner]))
        for doc in bns_owner:
            database['boxnodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    bus_og = list(database['boxusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if bus_og:
        logger.info('** Updating {} BoxUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in bus_og]))
        for doc in bus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['boxusersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )
    advns_o = list(database['addondataversenodesettings'].find({'owner': old_id}))        
    if advns_o:
        logger.info('** Updating {} AddonDataverseNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in advns_o]))
        for doc in advns_o:
            database['addondataversenodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    advus_og = list(database['addondataverseusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if advus_og:
        logger.info('** Updating {} AddonDataverseUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in advus_og]))
        for doc in advus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['addondataverseusersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    dbns_o = list(database['dropboxnodesettings'].find({'owner': old_id}))
    if dbns_o:
        logger.info('** Updating {} DropboxNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in dbns_o]))
        for doc in dbns_o:
            database['dropboxnodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    dbus_og = list(database['dropboxusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if dbus_og:
        logger.info('** Updating {} DropboxUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in dbus_og]))
        for doc in dbus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['dropboxusersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    afsns_o = list(database['addonfigsharenodesettings'].find({'owner': old_id}))
    if afsns_o:
        logger.info('** Updating {} AddonFigShareNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in afsns_o]))
        for doc in afsns_o:
            database['addonfigsharenodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    ## Figshare has no oauth_grants

    fwns_o = list(database['forwardnodesettings'].find({'owner': old_id}))
    if fwns_o:
        logger.info('** Updating {} ForwardNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in fwns_o]))
        for doc in fwns_o:
            database['forwardnodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    ghns_o = list(database['githubnodesettings'].find({'owner': old_id}))
    if ghns_o:
        logger.info('** Updating {} GithubNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in ghns_o]))
        for doc in ghns_o:
            database['githubnodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    ghus_og = list(database['githubusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if ghus_og:
        logger.info('** Updating {} GithubUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in ghus_og]))
        for doc in ghus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['githubusersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    gdns_o = list(database['googledrivenodesettings'].find({'owner': old_id}))
    if gdns_o:
        logger.info('** Updating {} GoogleDriveNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in gdns_o]))
        for doc in gdns_o:
            database['googledrivenodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    gdus_og = list(database['googledriveusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if gdus_og:
        logger.info('** Updating {} GoogleDriveUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in gdus_og]))
        for doc in gdus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['googledriveusersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    mns_o = list(database['mendeleynodesettings'].find({'owner': old_id}))
    if mns_o:
        logger.info('** Updating {} MendeleyNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in mns_o]))
        for doc in mns_o:
            database['mendeleynodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    mus_og = list(database['mendeleyusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if mus_og:
        logger.info('** Updating {} MendeleyUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in mus_og]))
        for doc in mus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['mendeleyusersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    osfsns_o = list(database['osfstoragenodesettings'].find({'owner': old_id}))
    if osfsns_o:
        logger.info('** Updating {} OsfStorageNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in osfsns_o]))
        for doc in osfsns_o:
            database['osfstoragenodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    ocns_o = list(database['addonowncloudnodesettings'].find({'owner': old_id}))
    if ocns_o:
        logger.info('** Updating {} AddonOwnCloudNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in ocns_o]))
        for doc in ocns_o:
            database['addonowncloudnodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    ocus_og = list(database['addonowncloudusersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if ocus_og:
        logger.info('** Updating {} AddonOwnCloudUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in ocus_og]))
        for doc in ocus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['addonowncloudusersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    s3ns_o = list(database['s3nodesettings'].find({'owner': old_id}))
    if s3ns_o:
        logger.info('** Updating {} s3NodeSettings (owner) {}'.format(old_id, [d['_id'] for d in s3ns_o]))
        for doc in s3ns_o:
            database['s3nodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    s3us_og = list(database['s3usersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if s3us_og:
        logger.info('** Updating {} S3UserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in s3us_og]))
        for doc in s3us_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['s3usersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    awns_o = list(database['addonwikinodesettings'].find({'owner': old_id}))
    if awns_o:
        logger.info('** Updating {} AddonWikiNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in awns_o]))
        for doc in awns_o:
            database['addonwikinodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    nwp_n = list(database['nodewikipage'].find({'node': old_id}))
    if nwp_n:
        logger.info('** Updating {} NodeWikiPage (node) {}'.format(old_id, [d['_id'] for d in nwp_n]))
        for doc in nwp_n:
            database['nodewikipage'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': node._id
                }}
            )

    zns_o = list(database['zoteronodesettings'].find({'owner': old_id}))
    if zns_o:
        logger.info('** Updating {} ZoteroNodeSettings (owner) {}'.format(old_id, [d['_id'] for d in zns_o]))
        for doc in zns_o:
            database['zoteronodesettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'owner': node._id
                }}
            )

    zus_og = list(database['zoterousersettings'].find({'oauth_grants.{}'.format(old_id): {'$ne': None}}))
    if zus_og:
        logger.info('** Updating {} ZoteroUserSettings (oauth_grants) {}'.format(old_id, [d['_id'] for d in zus_og]))
        for doc in zus_og:
            og = doc['oauth_grants']
            og[node._id] = og.pop(old_id)
            database['zoterousersettings'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'oauth_grants': og
                }}
            )

    aj_sn = list(database['archivejob'].find({'src_node': old_id}))
    if aj_sn:
        logger.info('** Updating {} ArchiveJobs (src_node) {}'.format(old_id, [d['_id'] for d in aj_sn]))
        for doc in aj_sn:
            database['archivejob'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'src_node': node._id
                }}
            )

    tfn_n = list(database['trashedfilenode'].find({'node': old_id}))
    if tfn_n:
        logger.info('** Updating {} TrashedFileNodes (node) {}'.format(old_id, [d['_id'] for d in tfn_n]))
        for doc in tfn_n:
            del_on = doc.pop('deleted_on')  # Remove non-JSON-serializable datetime fields
            last_touch = doc.pop('last_touched')  
            hist_mods = [doc['history'][doc['history'].index(h)].pop('modified') for h in doc['history']]
            replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc)))
            for i, mod in enumerate(hist_mods):
                replacement['history'][i]['modified'] = mod
            database['trashedfilenode'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': replacement['node'],
                    'history': replacement['history']
                }}
            )

    sfn_n = list(database['storedfilenode'].find({'node': old_id}))
    if sfn_n:
        logger.info('** Updating {} StoredFileNodes (node) {}'.format(old_id, [d['_id'] for d in sfn_n]))
        for doc in sfn_n:
            doc.pop('last_touched')  # Remove non-JSON-serializable datetime fields
            hist_mods = [doc['history'][doc['history'].index(h)].pop('modified') for h in doc['history']]
            replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc)))
            for i, mod in enumerate(hist_mods):
                replacement['history'][i]['modified'] = mod
            database['storedfilenode'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': replacement['node'],
                    'history': replacement['history']
                }}
            )

    com_n = list(database['comment'].find({'node': old_id}))
    if com_n:
        logger.info('** Updating {} Comments (node) {}'.format(old_id, [d['_id'] for d in com_n]))
        for doc in com_n:
            database['comment'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': node._id
                }}
            )

    com_t = list(database['comment'].find({'target': {'$in': [old_id]}}))
    if com_t:
        logger.info('** Updating {} Comments (target) {}'.format(old_id, [d['_id'] for d in com_t]))
        for doc in com_t:
            targ = doc['target']
            targ.insert(targ.index(old_id), node._id)
            targ.remove(old_id)
            database['comment'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'target': targ
                }}
            )

    com_t = list(database['comment'].find({'root_target': {'$in': [old_id]}}))
    if com_t:
        logger.info('** Updating {} Comments (root_target) {}'.format(old_id, [d['_id'] for d in com_t]))
        for doc in com_t:
            rtarg = doc['root_target']
            rtarg.insert(rtarg.index(old_id), node._id)
            rtarg.remove(old_id)
            database['comment'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'root_target': rtarg
                }}
            )

    nl_on = list(database['nodelog'].find({'original_node': old_id}))
    if nl_on:
        logger.info('** Updating {} NodeLogs (original_node) {}'.format(old_id, [d['_id'] for d in nl_on]))
        for doc in nl_on:
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'original_node': node._id
                }}
            )

    nl_n = list(database['nodelog'].find({'node': old_id}))
    if nl_n:
        logger.info('** Updating {} NodeLogs (node) {}'.format(old_id, [d['_id'] for d in nl_n]))
        for doc in nl_n:
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': node._id
                }}
            )

    nl_pac = list(database['nodelog'].find({'params.auth.callback_url': {'$regex': '/{}/'.format(old_id)}}))
    if nl_pac:
        logger.info('** Updating {} NodeLogs (params.auth.callback_url) {}'.format(old_id, [d['_id'] for d in nl_pac]))
        for doc in nl_pac:
            params = doc['params']
            params['auth']['callback_url'] = params['auth']['callback_url'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_pn = list(database['nodelog'].find({'params.node': old_id}))
    if nl_pn:
        logger.info('** Updating {} NodeLogs (params.node) {}'.format(old_id, [d['_id'] for d in nl_pn]))
        for doc in nl_pn:
            params = doc['params']
            params['node'] = node._id
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_ppar = list(database['nodelog'].find({'params.parent': old_id}))
    if nl_ppar:
        logger.info('** Updating {} NodeLogs (params.parent) {}'.format(old_id, [d['_id'] for d in nl_ppar]))
        for doc in nl_ppar:
            params = doc['params']
            params['parent'] = node._id
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_ppro = list(database['nodelog'].find({'params.project': old_id}))
    if nl_ppro:
        logger.info('** Updating {} NodeLogs (params.project) {}'.format(old_id, [d['_id'] for d in nl_ppro]))
        for doc in nl_ppro:
            params = doc['params']
            params['project'] = node._id
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_ppn = list(database['nodelog'].find({'params.parent_node': old_id}))
    if nl_ppn:
        logger.info('** Updating {} NodeLogs (params.parent_node) {}'.format(old_id, [d['_id'] for d in nl_ppn]))
        for doc in nl_ppn:
            params = doc['params']
            params['parent_node'] = node._id
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_pdn = list(database['nodelog'].find({'params.destination.nid': old_id}))
    if nl_pdn:
        logger.info('** Updating {} NodeLogs (params.destination.nid) {}'.format(old_id, [d['_id'] for d in nl_pdn]))
        for doc in nl_pdn:
            params = doc['params']
            params['destination']['nid'] = node._id
            if params['destination'].get('url', None):
                params['destination']['url'] = params['destination']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_pdr = list(database['nodelog'].find({'params.destination.resource': old_id}))
    if nl_pdr:
        logger.info('** Updating {} NodeLogs (params.destination.resource) {}'.format(old_id, [d['_id'] for d in nl_pdr]))
        for doc in nl_pdr:
            params = doc['params']
            params['destination']['resource'] = node._id
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_pdni = list(database['nodelog'].find({'params.destination.node._id': old_id}))
    if nl_pdni:
        logger.info('** Updating {} NodeLogs (params.destination.node._id) {}'.format(old_id, [d['_id'] for d in nl_pdni]))
        for doc in nl_pdni:
            params = doc['params']
            params['destination']['node']['_id'] = node._id
            if params['destination']['node'].get('url', None):
                params['destination']['node']['url'] = params['destination']['node']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_ppi = list(database['nodelog'].find({'params.pointer.id': old_id}))
    if nl_ppi:
        logger.info('** Updating {} NodeLogs (params.pointer.id) {}'.format(old_id, [d['_id'] for d in nl_ppi]))
        for doc in nl_ppi:
            params = doc['params']
            params['pointer']['id'] = node._id
            if params['pointer'].get('url', None):
                params['pointer']['url'] = params['pointer']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_psn = list(database['nodelog'].find({'params.source.nid': old_id}))
    if nl_psn:
        logger.info('** Updating {} NodeLogs (params.source.nid) {}'.format(old_id, [d['_id'] for d in nl_psn]))
        for doc in nl_psn:
            params = doc['params']
            params['source']['nid'] = node._id
            if params['source'].get('url', None):
                params['source']['url'] = params['source']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_psni = list(database['nodelog'].find({'params.source.node._id': old_id}))
    if nl_psni:
        logger.info('** Updating {} NodeLogs (params.source.node._id) {}'.format(old_id, [d['_id'] for d in nl_psni]))
        for doc in nl_psni:
            params = doc['params']
            params['source']['node']['_id'] = node._id
            if params['source']['node'].get('url', None):
                params['source']['node']['url'] = params['source']['node']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_psr = list(database['nodelog'].find({'params.source.resource': old_id}))
    if nl_psr:
        logger.info('** Updating {} NodeLogs (params.source.resource) {}'.format(old_id, [d['_id'] for d in nl_psr]))
        for doc in nl_psr:
            params = doc['params']
            params['source']['resource'] = node._id
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_ptni = list(database['nodelog'].find({'params.template_node._id': old_id}))
    if nl_ptni:
        logger.info('** Updating {} NodeLogs (params.template_node._id) {}'.format(old_id, [d['_id'] for d in nl_ptni]))
        for doc in nl_ptni:
            params = doc['params']
            params['template_node']['_id'] = node._id
            if params['template_node'].get('url', None):
                params['template_node']['url'] = params['template_node']['url'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    nl_pud = list(database['nodelog'].find({'params.urls.download': {'$regex': '/{}/'.format(old_id)}}))
    if nl_pud:
        logger.info('** Updating {} NodeLogs (params.source.node._id) {}'.format(old_id, [d['_id'] for d in nl_pud]))
        for doc in nl_pud:
            params = doc['params']
            params['urls']['download'] = params['urls']['download'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            if params['urls'].get('view', None):
                params['urls']['view'] = params['urls']['view'].replace('{}/'.format(old_id), '{}/'.format(node._id))
            database['nodelog'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'params': params
                }}
            )

    ptr_n = list(database['pointer'].find({'node': old_id}))
    if ptr_n:
        logger.info('** Updating {} Pointers (node) {}'.format(old_id, [d['_id'] for d in ptr_n]))
        for doc in ptr_n:
            database['pointer'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': node._id
                }}
            )

    n_ff = list(database['node'].find({'forked_from': old_id}))
    if n_ff:
        logger.info('** Updating {} Nodes (forked_from) {}'.format(old_id, [d['_id'] for d in n_ff]))
        for doc in n_ff:
            database['node'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'forked_from': node._id
                }}
            )

    n_rf = list(database['node'].find({'registered_from': old_id}))
    if n_rf:
        logger.info('** Updating {} Nodes (registered_from) {}'.format(old_id, [d['_id'] for d in n_rf]))
        for doc in n_rf:
            database['node'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'registered_from': node._id
                }}
            )

    n_root = list(database['node'].find({'root': old_id}))
    if n_root:
        logger.info('** Updating {} Nodes (root) {}'.format(old_id, [d['_id'] for d in n_root]))
        for doc in n_root:
            database['node'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'root': node._id
                }}
            )

    n_par = list(database['node'].find({'parent': old_id}))
    if n_par:
        logger.info('** Updating {} Nodes (parent) {}'.format(old_id, [d['_id'] for d in n_par]))
        for doc in n_par:
            database['node'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'parent': node._id
                }}
            )

    n_cns = list(database['node'].find({'$where': 'if (this.child_node_subscriptions!==undefined){{var keys=Object.keys(this.child_node_subscriptions);for(var i=0;i<keys.length;i+=1){{if(this.child_node_subscriptions[keys[i]].indexOf("{}")!==-1){{return true}}}}}}return false;'.format(old_id)}))
    if n_cns:
        docs = list(n_cns)
        logger.info('** Updating {} Nodes (child_node_subscriptions) {}'.format(old_id, [d['_id'] for d in docs]))
        for doc in docs:
            if doc['_id'] in cns_dict_to_update:
                cns = cns_dict_to_update[doc['_id']]
            else:
                cns = doc['child_node_subscriptions']
            replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(cns)))
            cns_dict_to_update[doc['_id']] = replacement
            database['node'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'child_node_subscriptions': replacement
                }}
            )

    nd_nl = list(database['notificationdigest'].find({'node_lineage': {'$in': [old_id]}}))
    if nd_nl:
        logger.info('** Updating {} NotificationDigest (node_lineage) {}'.format(old_id, [d['_id'] for d in nd_nl]))
        for doc in nd_nl:
            nl = doc['node_lineage']
            nl.insert(nl.index(old_id), node._id)
            nl.remove(old_id)
            if doc['message'].find('/{}/'.format(old_id)) != -1:  # avoid html regexes
                message = doc['message'].replace('/{}/'.format(old_id), '/{}/'.format(node._id))
                database['notificationdigest'].find_and_modify(
                    {'_id': doc['_id']},
                    {'$set':{
                        'message': message,
                        'node_lineage': nl
                    }}
                )
            else:
                database['notificationdigest'].find_and_modify(
                    {'_id': doc['_id']},
                    {'$set':{
                        'node_lineage': nl
                    }}
                )

    ns_i = list(database['notificationsubscription'].find({'_id': {'$regex': old_id}}))
    if ns_i:
        logger.info('** Updating {} NotificationSubscription (_id, owner) {}'.format(old_id, [d['_id'] for d in ns_i]))
        for doc in ns_i:
            replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc)))
            new_id = replacement.pop('_id')
            database['notificationsubscription'].find_and_modify(
                {'_id': new_id},
                {'$set':replacement},
                upsert=True
            )
            database['notificationsubscription'].remove({'_id': doc['_id']})

    u_uc = list(database['user'].find({'unclaimed_records.{}'.format(old_id): {'$ne': None}}))
    if u_uc:
        logger.info('** Updating {} Users (unclaimed_records) {}'.format(old_id, [d['_id'] for d in u_uc]))
        for doc in u_uc:
            ucr = doc['unclaimed_records']
            ucr[node._id] = ucr.pop(old_id)
            database['user'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'unclaimed_records': ucr
                }}
            )

    u_caer = list(database['user'].find({'contributor_added_email_records.{}'.format(old_id): {'$ne': None}}))
    if u_caer:
        logger.info('** Updating {} Users (contributor_added_email_records) {}'.format(old_id, [d['_id'] for d in u_caer]))
        for doc in u_caer:
            caer = doc['contributor_added_email_records']
            caer[node._id] = caer.pop(old_id)
            database['user'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'contributor_added_email_records': caer
                }}
            )

    u_nc = list(database['user'].find({'notifications_configured.{}'.format(old_id): {'$ne': None}}))
    if u_nc:
        logger.info('** Updating {} Users (notifications_configured) {}'.format(old_id, [d['_id'] for d in u_nc]))
        for doc in u_nc:
            nc = doc['notifications_configured']
            nc[node._id] = nc.pop(old_id)
            database['user'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'notifications_configured': nc
                }}
            )

    u_cvt = list(database['user'].find({'comments_viewed_timestamp.{}'.format(old_id): {'$ne': None}}))
    if u_cvt:
        logger.info('** Updating {} Users (comments_viewed_timestamp) {}'.format(old_id, [d['_id'] for d in u_cvt]))
        for doc in u_cvt:
            nc = doc['comments_viewed_timestamp']
            nc[node._id] = nc.pop(old_id)
            database['user'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'comments_viewed_timestamp': nc
                }}
            )

    pc_i = list(database['pagecounters'].find({'_id': {'$regex': ':{}:'.format(old_id)}}))
    if pc_i:
        logger.info('** Updating {} PageCounters (_id) {}'.format(old_id, [d['_id'] for d in pc_i]))
        for doc in pc_i:
            replacement = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc)))
            new_id = replacement.pop('_id')
            database['pagecounters'].find_and_modify(
                {'_id': new_id},
                {'$set':replacement},
                upsert=True
            )
            database['pagecounters'].remove({'_id': doc['_id']})

    ss_dv = list(database['session'].find({'data.visited': {'$regex': ':{}:'.format(old_id)}}))
    if ss_dv:
        logger.info('** Updating {} Session (data) {}'.format(old_id, [d['_id'] for d in ss_dv]))
        for doc in ss_dv:
            repl_data = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc['data'])))
            database['session'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'data': repl_data
                }}
            )

    wc_n = list(database['watchconfig'].find({'node': old_id}))
    if wc_n:
        logger.info('** Updating {} WatchConfigs (node) {}'.format(old_id, [d['_id'] for d in wc_n]))
        for doc in wc_n:
            database['watchconfig'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': node._id
                }}
            )

    pl_n = list(database['privatelink'].find({'nodes': old_id}))
    if pl_n:
        logger.info('** Updating {} PrivateLinks (nodes) {}'.format(old_id, [d['_id'] for d in pl_n]))
        for d in pl_n:
            new_nodes = d['nodes']
            new_nodes.remove(old_id)
            new_nodes.append(node._id) 
            database['privatelink'].find_and_modify(
                {'_id': d['_id']},
                {'$set':{
                    'nodes': new_nodes
                }}
            )

    dr_bf = list(database['draftregistration'].find({'branched_from': old_id}))
    if dr_bf:
        logger.info('** Updating {} DraftRegistrations (branched_from) {}'.format(old_id, [d['_id'] for d in dr_bf]))
        for doc in dr_bf:
            database['draftregistration'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'branched_from': node._id
                }}
            )

    dr_rn = list(database['draftregistration'].find({'registered_node': old_id}))
    if dr_rn:
        logger.info('** Updating {} DraftRegistrations (registered_node) {}'.format(old_id, [d['_id'] for d in dr_rn]))
        for doc in dr_rn:
            database['draftregistration'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'registered_node': node._id
                }}
            )

    eta_er = list(database['embargoterminationapproval'].find({'embargoed_registration': old_id}))
    if eta_er:
        logger.info('** Updating {} EmbargoTerminationApprovals (embargoed_registration) {}'.format(old_id, [d['_id'] for d in eta_er]))
        for doc in eta_er:
            database['embargoterminationapproval'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'embargoed_registration': node._id
                }}
            )

    ra_su = list(database['registrationapproval'].find({'$where': 'var keys=Object.keys(this.stashed_urls);for(var i=0;i<keys.length;i+=1){{if(this.stashed_urls[keys[i]].view.indexOf("{}")!==-1){{return true}}if(this.stashed_urls[keys[i]].approve.indexOf("{}")!==-1){{return true}}if(this.stashed_urls[keys[i]].reject.indexOf("{}")!==-1){{return true}}}}return false;'.format(old_id, old_id, old_id)}))
    if ra_su:
        logger.info('** Updating {} RegistrationApprovals (stashed_urls) {}'.format(old_id, [d['_id'] for d in ra_su]))
        for doc in ra_su:
            updated_stash = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc['stashed_urls'])))
            database['registrationapproval'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'stashed_urls': updated_stash
                }}
            )

    idf_r = list(database['identifier'].find({'referent': old_id}))
    if idf_r:
        logger.info('** Updating {} Identifiers (referent) {}'.format(old_id, [d['_id'] for d in idf_r]))
        for doc in idf_r:
            ref = doc['referent']
            ref[1] = 'preprintservice'
            database['identifier'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'referent': ref
                }}
            )

    qm_dn = list(database['queuedmail'].find({'data.nid': old_id}))
    if qm_dn:
        logger.info('** Updating {} QueuedMails (data.nid) {}'.format(old_id, [d['_id'] for d in qm_dn]))
        for doc in qm_dn:
            repl_data = json.loads(re.sub(r'\b{}\b'.format(old_id), node._id, json.dumps(doc['data'])))
            database['queuedmail'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'data': repl_data
                }}
            )

    ps_n = list(database['preprintservice'].find({'node': old_id}))
    if ps_n:
        logger.info('** Updating {} PreprintServices (node) {}'.format(old_id, [d['_id'] for d in ps_n]))
        for doc in ps_n:
            database['preprintservice'].find_and_modify(
                {'_id': doc['_id']},
                {'$set':{
                    'node': node._id
                }}
            )

Example 16

Project: waterbutler
Source File: provider.py
View license
    async def _delete_folder(self, path, message=None, **kwargs):
        branch_data = await self._fetch_branch(path.branch_ref)

        old_commit_sha = branch_data['commit']['sha']
        old_commit_tree_sha = branch_data['commit']['commit']['tree']['sha']

        # e.g. 'level1', 'level2', or ''
        tree_paths = path.parts[1:]
        trees = [{
            'target': tree_paths[0].value,
            'tree': [
                {
                    'path': item['path'],
                    'mode': item['mode'],
                    'type': item['type'],
                    'sha': item['sha'],
                }
                for item in (await self._fetch_tree(old_commit_tree_sha))['tree']
            ]
        }]

        for idx, tree_path in enumerate(tree_paths[:-1]):
            try:
                tree_sha = next(x for x in trees[-1]['tree'] if x['path'] == tree_path.value)['sha']
            except StopIteration:
                raise exceptions.MetadataError(
                    'Could not delete folder \'{0}\''.format(path),
                    code=404,
                )
            trees.append({
                'target': tree_paths[idx + 1].value,
                'tree': [
                    {
                        'path': item['path'],
                        'mode': item['mode'],
                        'type': item['type'],
                        'sha': item['sha'],
                    }
                    for item in (await self._fetch_tree(tree_sha))['tree']
                ]
            })

        # The last tree's structure is rewritten w/o the target folder, all others
        # in the hierarchy are simply updated to reflect this change.
        tree = trees.pop()
        if tree['target'] == '':
            # Git Empty SHA
            tree_sha = GIT_EMPTY_SHA
        else:
            # Delete the folder from the tree cast to list iterator over all values
            current_tree = tree['tree']
            tree['tree'] = list(filter(lambda x: x['path'] != tree['target'], tree['tree']))
            if current_tree == tree['tree']:
                raise exceptions.NotFoundError(str(path))

            tree_data = await self._create_tree({'tree': tree['tree']})
            tree_sha = tree_data['sha']

            # Update parent tree(s)
            for tree in reversed(trees):
                for item in tree['tree']:
                    if item['path'] == tree['target']:
                        item['sha'] = tree_sha
                        break
                tree_data = await self._create_tree({'tree': tree['tree']})
                tree_sha = tree_data['sha']

        # Create a new commit which references our top most tree change.
        message = message or settings.DELETE_FOLDER_MESSAGE
        commit_resp = await self.make_request(
            'POST',
            self.build_repo_url('git', 'commits'),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({
                'message': message,
                'committer': self.committer,
                'tree': tree_sha,
                'parents': [
                    old_commit_sha,
                ],
            }),
            expects=(201, ),
            throws=exceptions.DeleteError,
        )
        commit_data = await commit_resp.json()
        commit_sha = commit_data['sha']

        # Update repository reference, point to the newly created commit.
        # No need to store data, rely on expects to raise exceptions
        resp = await self.make_request(
            'PATCH',
            self.build_repo_url('git', 'refs', 'heads', path.branch_ref),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({'sha': commit_sha}),
            expects=(200, ),
            throws=exceptions.DeleteError,
        )
        await resp.release()

    async def _delete_root_folder_contents(self, path, message=None, **kwargs):
        """Delete the contents of the root folder.

        :param GitHubPath path: GitHubPath path object for folder
        :param str message: Commit message
        """
        branch_data = await self._fetch_branch(path.branch_ref)
        old_commit_sha = branch_data['commit']['sha']
        tree_sha = GIT_EMPTY_SHA
        message = message or settings.DELETE_FOLDER_MESSAGE
        commit_resp = await self.make_request(
            'POST',
            self.build_repo_url('git', 'commits'),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({
                'message': message,
                'committer': self.committer,
                'tree': tree_sha,
                'parents': [
                    old_commit_sha,
                ],
            }),
            expects=(201, ),
            throws=exceptions.DeleteError,
        )
        commit_data = await commit_resp.json()
        commit_sha = commit_data['sha']

        # Update repository reference, point to the newly created commit.
        # No need to store data, rely on expects to raise exceptions
        await self.make_request(
            'PATCH',
            self.build_repo_url('git', 'refs', 'heads', path.branch_ref),
            headers={'Content-Type': 'application/json'},
            data=json.dumps({'sha': commit_sha}),
            expects=(200, ),
            throws=exceptions.DeleteError,
        )

    async def _fetch_branch(self, branch):
        resp = await self.make_request(
            'GET',
            self.build_repo_url('branches', branch)
        )

        if resp.status == 404:
            await resp.release()
            raise exceptions.NotFoundError('. No such branch \'{}\''.format(branch))

        return (await resp.json())

    async def _fetch_contents(self, path, ref=None):
        url = furl.furl(self.build_repo_url('contents', path.path))
        if ref:
            url.args.update({'ref': ref})
        resp = await self.make_request(
            'GET',
            url.url,
            expects=(200, ),
            throws=exceptions.MetadataError
        )
        return (await resp.json())

    async def _fetch_repo(self):
        resp = await self.make_request(
            'GET',
            self.build_repo_url(),
            expects=(200, ),
            throws=exceptions.MetadataError
        )
        return (await resp.json())

    async def _fetch_tree(self, sha, recursive=False):
        url = furl.furl(self.build_repo_url('git', 'trees', sha))
        if recursive:
            url.args.update({'recursive': 1})
        resp = await self.make_request(
            'GET',
            url.url,
            expects=(200, ),
            throws=exceptions.MetadataError
        )
        tree = await resp.json()

        if tree['truncated']:
            raise GitHubUnsupportedRepoError

        return tree

    async def _search_tree_for_path(self, path, tree_sha, recursive=True):
        """Search through the given tree for an entity matching the name and type of `path`.
        """
        tree = await self._fetch_tree(tree_sha, recursive=True)

        if tree['truncated']:
            raise GitHubUnsupportedRepoError

        implicit_type = 'tree' if path.endswith('/') else 'blob'

        for entity in tree['tree']:
            if entity['path'] == path.strip('/') and entity['type'] == implicit_type:
                return entity

        raise exceptions.NotFoundError(str(path))

    async def _create_tree(self, tree):
        resp = await self.make_request(
            'POST',
            self.build_repo_url('git', 'trees'),
            headers={'Content-Type': 'application/json'},
            data=json.dumps(tree),
            expects=(201, ),
            throws=exceptions.ProviderError,
        )
        return (await resp.json())

    async def _create_commit(self, commit):
        resp = await self.make_request(
            'POST',
            self.build_repo_url('git', 'commits'),
            headers={'Content-Type': 'application/json'},
            data=json.dumps(commit),
            expects=(201, ),
            throws=exceptions.ProviderError,
        )
        return (await resp.json())

    async def _create_blob(self, stream):
        blob_stream = streams.JSONStream({
            'encoding': 'base64',
            'content': streams.Base64EncodeStream(stream),
        })

        resp = await self.make_request(
            'POST',
            self.build_repo_url('git', 'blobs'),
            data=blob_stream,
            headers={
                'Content-Type': 'application/json',
                'Content-Length': str(blob_stream.size),
            },
            expects=(201, ),
            throws=exceptions.UploadError,
        )
        return (await resp.json())

    def _is_sha(self, ref):
        # sha1 is always 40 characters in length
        try:
            if len(ref) != 40:
                return False
            # sha1 is always base 16 (hex)
            int(ref, 16)
        except (TypeError, ValueError, ):
            return False
        return True

    def _web_view(self, path):
        segments = (self.owner, self.repo, 'blob', path.branch_ref, path.path)
        return provider.build_url(settings.VIEW_URL, *segments)

    async def _metadata_folder(self, path, **kwargs):
        ref = path.branch_ref

        try:
            # it's cool to use the contents API here because we know path is a dir and won't hit
            # the 1mb size limit
            data = await self._fetch_contents(path, ref=ref)
        except exceptions.MetadataError as e:
            if e.data.get('message') == 'This repository is empty.':
                data = []
            else:
                raise

        if isinstance(data, dict):
            raise exceptions.MetadataError(
                'Could not retrieve folder "{0}"'.format(str(path)),
                code=404,
            )

        ret = []
        for item in data:
            if item['type'] == 'dir':
                ret.append(GitHubFolderContentMetadata(item, ref=ref))
            else:
                ret.append(GitHubFileContentMetadata(item, ref=ref, web_view=item['html_url']))

        return ret

    async def _metadata_file(self, path, revision=None, **kwargs):
        resp = await self.make_request(
            'GET',
            self.build_repo_url('commits', path=path.path, sha=revision or path.branch_ref),
            expects=(200, ),
            throws=exceptions.MetadataError,
        )

        commits = await resp.json()

        if not commits:
            raise exceptions.NotFoundError(str(path))

        latest = commits[0]
        tree = await self._fetch_tree(latest['commit']['tree']['sha'], recursive=True)

        try:
            data = next(
                x for x in tree['tree']
                if x['path'] == path.path
            )
        except StopIteration:
            raise exceptions.NotFoundError(str(path))

        if isinstance(data, list):
            raise exceptions.MetadataError(
                'Could not retrieve file "{0}"'.format(str(path)),
                code=404,
            )

        return GitHubFileTreeMetadata(
            data, commit=latest['commit'], web_view=self._web_view(path),
            ref=path.branch_ref
        )

    async def _get_latest_sha(self, ref='master'):
        resp = await self.make_request(
            'GET',
            self.build_repo_url('git', 'refs', 'heads', ref),
            expects=(200, ),
            throws=exceptions.ProviderError
        )
        data = await resp.json()
        return data['object']['sha']

    async def _update_ref(self, sha, ref='master'):
        resp = await self.make_request(
            'POST',
            self.build_repo_url('git', 'refs', 'heads', ref),
            data=json.dumps({
                'sha': sha,
            }),
            expects=(200, ),
            throws=exceptions.ProviderError
        )
        return (await resp.json())

    async def _do_intra_move_or_copy(self, src_path, dest_path, is_copy):

        # ON PATHS:
        #   WB and GH use slightly different default conventions for their paths, so we often
        #   have to munge our WB paths before comparison. Here is a quick overview:
        #     WB (dirs):  wb_dir.path == 'foo/bar/'     str(wb_dir) == '/foo/bar/'
        #     WB (file):  wb_file.path = 'foo/bar.txt'  str(wb_file) == '/foo/bar.txt'
        #     GH (dir):   'foo/bar'
        #     GH (file):  'foo/bar.txt'

        src_tree, src_head = await self._get_tree_and_head(src_path.branch_ref)

        # these are the blobs to copy/move
        blobs = [
            item
            for item in src_tree['tree']
            if src_path.is_dir and item['path'].startswith(src_path.path) or
            src_path.is_file and item['path'] == src_path.path
        ]

        if len(blobs) == 0:
            raise exceptions.NotFoundError(str(src_path))

        if src_path.is_file:
            assert len(blobs) == 1, 'Found multiple targets'

        commit_msg = settings.COPY_MESSAGE if is_copy else settings.MOVE_MESSAGE
        commit = None

        if src_path.branch_ref == dest_path.branch_ref:
            exists = self._path_exists_in_tree(src_tree['tree'], dest_path)

            # if we're overwriting an existing dir, we must remove its blobs from the tree
            if dest_path.is_dir:
                src_tree['tree'] = self._remove_path_from_tree(src_tree['tree'], dest_path)

            # if this is a copy, duplicate and append our source blobs. The originals will be updated
            # with the new destination path.
            if is_copy:
                src_tree['tree'].extend(copy.deepcopy(blobs))

            # see, I told you they'd be overwritten
            self._reparent_blobs(blobs, src_path, dest_path)

            src_tree['tree'] = self._prune_subtrees(src_tree['tree'])

            commit = await self._commit_tree_and_advance_branch(src_tree['tree'], {'sha': src_head},
                                                                commit_msg, src_path.branch_ref)

        else:
            dest_tree, dest_head = await self._get_tree_and_head(dest_path.branch_ref)

            exists = self._path_exists_in_tree(dest_tree['tree'], dest_path)

            dest_tree['tree'] = self._remove_path_from_tree(dest_tree['tree'], dest_path)

            new_blobs = copy.deepcopy(blobs)
            self._reparent_blobs(new_blobs, src_path, dest_path)
            dest_tree['tree'].extend(new_blobs)

            dest_tree['tree'] = self._prune_subtrees(dest_tree['tree'])

            commit = await self._commit_tree_and_advance_branch(dest_tree['tree'], {'sha': dest_head},
                                                                commit_msg, dest_path.branch_ref)

            if not is_copy:
                src_tree['tree'] = self._remove_path_from_tree(src_tree['tree'], src_path)
                src_tree['tree'] = self._prune_subtrees(src_tree['tree'])
                await self._commit_tree_and_advance_branch(src_tree['tree'], {'sha': src_head},
                                                           commit_msg, src_path.branch_ref)

            blobs = new_blobs  # for the metadata

        if dest_path.is_file:
            assert len(blobs) == 1, 'Destination file should have exactly one candidate'
            return GitHubFileTreeMetadata(
                blobs[0], commit=commit, ref=dest_path.branch_ref
            ), not exists

        folder = GitHubFolderTreeMetadata({
            'path': dest_path.path.strip('/')
        }, commit=commit, ref=dest_path.branch_ref)

        folder.children = []

        for item in blobs:
            if item['path'] == dest_path.path.rstrip('/'):
                continue
            if item['type'] == 'tree':
                folder.children.append(GitHubFolderTreeMetadata(item, ref=dest_path.branch_ref))
            else:
                folder.children.append(GitHubFileTreeMetadata(item, ref=dest_path.branch_ref))

        return folder, not exists

    async def _get_tree_and_head(self, branch):
        """Fetch the head commit and tree for the given branch.

        :param str branch: The branch to fetch
        :returns dict: A GitHub tree object. Contents are under the ``tree`` key.
        :returns dict: A GitHub commit object. The SHA is under the ``sha`` key.
        """
        branch_data = await self._fetch_branch(branch)
        head = branch_data['commit']['sha']

        tree_sha = branch_data['commit']['commit']['tree']['sha']
        tree = await self._fetch_tree(tree_sha, recursive=True)

        return tree, head

    def _path_exists_in_tree(self, tree, path):
        """Search through a tree and return true if the given path is found.

        :param list tree: A list of blobs in a git tree.
        :param GitHubPath path:  The path to search for.
        :returns bool: true if ``path`` is found in ``tree``
        """
        return any(x['path'] == path.path.rstrip('/') for x in tree)

    def _remove_path_from_tree(self, tree, path):
        """Search through a tree and remove any blobs or trees that match ``path`` or are a child of
        ``path``.

        :param list tree: A list of blobs in a git tree.
        :param GitHubPath path:  The path to exclude.
        :returns list: A new list containing the filtered tree contents.
        """
        return [
            item
            for item in tree
            if (path.is_file and not item['path'] == path.path) or  # file != path
            (path.is_dir and not
             (item['path'].startswith(path.path) or  # file/folder != child of path
              (item['type'] == 'tree' and item['path'] == path.path.rstrip('/'))))  # folder != path

        ]

    def _reparent_blobs(self, blobs, src_path, dest_path):
        """Take a list of blobs and replace the source path with the dest path.

        Two caveats:

        * This method operates on the list of blobs in place. This is intentional. Anything you pass
        as the ``blobs`` arg will be mutated back in the calling scope.

        * This method assumes that the list of blobs all begin with ``src_path``, since its purpose
        is to rewite all the blobs found at or under ``src_path`` to be at or under ``dest_path``.
        If you pass it something that is not located under ``src_path``, a later part of the path
        may be updated.

        :param list blobs: A list of blobs whose paths should be updated.
        :param GitHubPath src_path:  The original path.
        :param GitHubPath dest_path:  The new path.
        :returns None: This methods returns **nothing**. It operates on the blobs in-place.
        """
        for blob in blobs:
            if blob['path'] == src_path.path.rstrip('/') and blob['type'] == 'tree':
                # Renaming the parent folder is not necessary. Tress are pruned before uploading
                # to GH.  This is only here because at somepoint someone will use it without pruning
                # and wonder why on earth the parent folder isn't renamed.
                blob['path'] = dest_path.path.rstrip('/')
            else:
                blob['path'] = blob['path'].replace(src_path.path, dest_path.path, 1)
        return

    def _prune_subtrees(self, tree):
        """Takes in a list representing a git tree and remove all the entries that are also trees.
        Only blobs should remain. GitHub infers tree structure from blob paths.  Deleting a blob
        without removing its parent tree will result in the blob *NOT* being deleted. See:
        http://www.levibotelho.com/development/commit-a-file-with-the-github-api/

        :param list tree: A list representing a git tree. May contain trees, in addition to blobs.
        :returns list: A new list containing just the blobs.
        """
        return [item for item in tree if item['type'] != 'tree']

    async def _commit_tree_and_advance_branch(self, old_tree, old_head, commit_msg, branch_ref):
        """Utilty method to bundle several commands into one.  Takes a tree, head commit, a message,
        and a branch, creates a new commit pointing to tree, then advances branch to point to the
        new commit. Basically the same thing as ``git commit -am "foo message"`` on the command
        line.  Returns the new commit.

        :param list old_tree: A list of blobs representing the new file tree.
        :param dict old_head: The commit object will be the parent of the new commit. Must have 'sha' key.
        :param str commit_msg: The commit message for the new commit.
        :param str branch_ref: The branch that will be advanced to the new commit.
        :returns dict new_head: The commit object returned by GitHub.
        """
        new_tree = await self._create_tree({'tree': old_tree})

        # Create a new commit which references our top most tree change.
        new_head = await self._create_commit({
            'tree': new_tree['sha'],
            'parents': [old_head['sha']],
            'committer': self.committer,
            'message': commit_msg,
        })

        # Update repository reference, point to the newly created commit.
        # No need to store data, rely on expects to raise exceptions
        await self._update_ref(new_head['sha'], ref=branch_ref)

        return new_head

Example 17

Project: cti-toolkit
Source File: test_misp_submission.py
View license
@httpretty.activate
@mock.patch('certau.transform.misp.time.sleep')
def test_misp_publishing(_):
    """Test that the stixtrans module can submit to a MISP server."""
    # STIX file to test against. Place in a StringIO instance so we can
    # close the file.
    with open('tests/CA-TEST-STIX.xml', 'rb') as stix_f:
        stix_io = StringIO.StringIO(stix_f.read())

    # Create a transformer - select 'text' output format and flag MISP
    # publishing (with appropriate settings).
    package = stix.core.STIXPackage.from_xml(stix_io)
    misp_args = {
        'misp_url': 'http://misp.host.tld/',
        'misp_key': '111111111111111111111111111',
    }
    misp_event_args = {
        'distribution': 1,
        'threat_level': 4,
        'analysis': 0,
    }

    # Ensures that non-registered paths fail
    httpretty.HTTPretty.allow_net_connect = False

    # Mock the MISP version retrieval.
    httpretty.register_uri(
        httpretty.GET,
        'http://misp.host.tld/servers/getVersion',
        body=json.dumps({}),
        content_type='application/json',
    )

    # Mock the creation of an event
    httpretty.register_uri(
        httpretty.POST,
        'http://misp.host.tld/events',
        body=json.dumps({'Event': {
            'id': '0',
            'distribution': misp_event_args['distribution'],
        }}),
        content_type='application/json',
    )

    # Mock the adding of a tag to an event
    httpretty.register_uri(
        httpretty.POST,
        'http://misp.host.tld/events/addTag',
        body=json.dumps({'Event': {
            'id': '0',
            'tag': 4,
        }}),
        content_type='application/json',
    )

    # Mock editing of a created event.
    httpretty.register_uri(
        httpretty.POST,
        'http://misp.host.tld/events/0',
        body=json.dumps({}),
        content_type='application/json',
    )

    # Perform the processing and the misp publishing.
    misp = certau.transform.StixMispTransform.get_misp_object(
        **misp_args
    )
    transformer = certau.transform.StixMispTransform(
        package=package,
        misp=misp,
        **misp_event_args
    )
    transformer.publish()

    # Test the correct requests were made
    reqs = list(httpretty.httpretty.latest_requests)

    # The "get version" request includes the MISP key.
    r_get_version = reqs[0]
    assert r_get_version.path == '/servers/getVersion'
    assert r_get_version.headers.dict['authorization'] == misp_args['misp_key']

    # The event creation request includes basic information.
    r_create_event = reqs[1]
    assert r_create_event.path == '/events'
    assert json.loads(r_create_event.body) == {
        u'Event': {
            u'analysis': misp_event_args['analysis'],
            u'published': False,
            u'threat_level_id': misp_event_args['threat_level'],
            u'distribution': misp_event_args['distribution'],
            u'date': '2015-12-23',
            u'info': 'CA-TEST-STIX | Test STIX data'
        }
    }

    # The TLP tag is added to the event.
    r_add_tag = reqs[2]
    assert r_add_tag.path == '/events/addTag'
    assert json.loads(r_add_tag.body) == {
        u'request': {
            u'Event': {
                u'id': '0',
                u'tag': 4,
            }
        }
    }

    # The event is then updated with the observables, over multiple
    # requests. We're only interested in the 'Attribute' key here as that
    # contains the data extracted from the observable.
    obs_attributes = sorted([json.loads(request.body)['Event']['Attribute'][0]
                             for request
                             in reqs[3:]])

    assert obs_attributes == sorted([
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'md5',
            u'value': u'11111111111111112977fa0588bd504a',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'md5',
            u'value': u'ccccccccccccccc33574c79829dc1ccf',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'md5',
            u'value': u'11111111111111133574c79829dc1ccf',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'md5',
            u'value': u'11111111111111111f2601b4d21660fb',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'md5',
            u'value': u'1111111111b42b57f518197d930471d9',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'mutex',
            u'value': u'\\BaseNamedObjects\\MUTEX_0001',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'mutex',
            u'value': u'\\BaseNamedObjects\\WIN_ABCDEF',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'mutex',
            u'value': u'\\BaseNamedObjects\\iurlkjashdk',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'regkey|value',
            u'value': u'HKEY_CURRENT_USER\\Software\\Microsoft\\Windows\\CurrentVersion\\Run|hotkey\\%APPDATA%\\malware.exe -st',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'sha1',
            u'value': u'893fb19ac24eabf9b1fe1ddd1111111111111111',
        },
        {
            u'category': u'Artifacts dropped',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'sha256',
            u'value': u'11111111111111119f167683e164e795896be3be94de7f7103f67c6fde667bdf',
        },
        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'domain',
            u'value': u'bad.domain.org',
        },
        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'domain',
            u'value': u'dnsupdate.dyn.net',
        },
        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'domain',
            u'value': u'free.stuff.com',
        },
        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'ip-dst',
            u'value': u'183.82.180.95',
        },

        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'ip-dst',
            u'value': u'111.222.33.44',
        },
        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'ip-dst',
            u'value': u'158.164.39.51',
        },
        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'url',
            u'value': u'http://host.domain.tld/path/file',
        },
        {
            u'category': u'Network activity',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'user-agent',
            u'value': u'Mozilla/5.0 (Windows NT 5.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/35.0.2309.372 Safari/537.36',
        },
        {
            u'category': u'Payload delivery',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'email-src',
            u'value': u'[email protected]',
        },
        {
            u'category': u'Payload delivery',
            u'distribution': 1,
            u'to_ids': True,
            u'type': u'email-subject',
            u'value': u'Important project details',
        },
    ])

Example 18

Project: Adlibre-DMS
Source File: views.py
View license
@login_required
@group_required(SEC_GROUP_NAMES['search'])
def search_results(request, step=None, template='mdtui/search.html'):
    """Search Step 3: Search Results

    @param request: is a Django request object
    @param step: is a current step name (for template rendering)
    @param template: is a name of template for this view"""
    document_keys = None
    docrule_ids = []
    document_names = []
    warnings = []
    mdts_list = []
    paginated_documents = []
    export = False
    cache_documents_for = 3600  # Seconds
    page = request.GET.get('page')
    force_clean_cache = request.session.get('cleanup_caches', False)
    # Sorting UI interactions
    sorting_field = request.POST.get('sorting_key', '') or ''
    order = request.POST.get('order', '') or ''
    if sorting_field and order:
        request.session["sorting_field"] = sorting_field
        request.session["order"] = order
    else:
        try:
            sorting_field = request.session["sorting_field"]
            order = request.session["order"]
        except KeyError:
            pass
    query_order = ''
    if order == "icon-chevron-up":
        query_order = "ascending"
    elif order == "icon-chevron-down":
        query_order = "descending"
    if not page:
        page = 1
    else:
        try:
            page = int(page)
        except ValueError:
            pass

    try:
        document_keys = request.session['document_search_dict']
    except KeyError:
        warnings.append(MDTUI_ERROR_STRINGS['NO_S_KEYS'])
    # Determining if we call export in fact instead of normal search and converting into internal variable
    if document_keys:
        if document_keys.__len__() == 1 and 'export_results' in document_keys:
            warnings.append(MDTUI_ERROR_STRINGS['NO_S_KEYS'])
        if 'export_results' in document_keys.iterkeys():
            if document_keys['export_results'] == 'export':
                export = True
            # Cleaning search dict afterwards
            del document_keys['export_results']
    # Getting docrules list for both search methods (Only one allowed)
    try:
        # trying to get id's list (for MDT search)
        docrule_ids = request.session['search_docrule_ids']
    except KeyError:
        pass
    if not docrule_ids:
        try:
            # If not exists making list for docrules search
            docrule_ids = [request.session['searching_docrule_id'], ]
        except KeyError:
            pass

    log.debug(
        'search_results call for : page: "%s", docrule_id: "%s", document_search_dict: "%s"'
        % (page, docrule_ids, document_keys)
    )
    # turning document_search dict into something useful for the couch request
    clean_keys = cleanup_document_keys(document_keys)
    ck = ranges_validator(clean_keys)
    cleaned_document_keys = recognise_dates_in_search(ck)
    if not cleaned_document_keys:
        warnings.append(MDTUI_ERROR_STRINGS['NO_S_KEYS'])

    cache = get_cache('mui_search_results')
    # Caching by document keys and docrules list, as a cache key
    search_data = json.dumps(document_keys)+json.dumps(docrule_ids)+json.dumps(sorting_field)+json.dumps(order)
    cache_key = hash(search_data)
    if not force_clean_cache:
        cached_documents = cache.get(cache_key, None)
    else:
        cached_documents = None
        del request.session['cleanup_caches']
    if cleaned_document_keys and not cached_documents:
        if cleaned_document_keys:
            # TODO: speedup sorting using document_names from cache not to search again.
            # Redefining proper sorting results request
            if sorting_field:
                if sorting_field == "Indexing Date":
                    sorting_field_query = "metadata_created_date"
                elif sorting_field == "Description":
                    sorting_field_query = "metadata_description"
                elif sorting_field == "Type":
                    sorting_field_query = "metadata_doc_type_rule_id"
                else:
                    sorting_field_query = sorting_field
            else:
                sorting_field_query = ''
            # Using DMS actual search method for this
            query = DMSSearchQuery({
                'document_keys': cleaned_document_keys,
                'docrules': docrule_ids,
                'only_names': True,
                'sorting_key': sorting_field_query,
                'sorting_order': query_order,
            })
            search_response = DMSSearchManager().search_dms(query)
            search_errors = search_response.get_errors()
            if search_errors:
                for error in search_errors:
                    warnings.append(error)
                document_names = []
            else:
                document_names = search_response.get_document_names()
        cache.set(cache_key, document_names, cache_documents_for)
        log.debug('search_results: Got search results with amount of results: %s' % document_names)
    else:
        if cleaned_document_keys:
            document_names = cached_documents
            log.debug('search_results: Getting results from cache. Num of results: %s' % document_names)

    # Produces a CSV file from search results
    if (document_names and step == 'export') or (document_names and export):
        log.debug('search_results exporting found documents to CSV')
        # Getting all the documents from
        documents = DMSSearchManager().get_found_documents(document_names)
        mdts_list = get_mdts_for_documents(documents)
        csv_response = export_to_csv(document_keys, mdts_list, documents)
        return csv_response

    # Paginating list of documents and retrieving only required one's
    if document_keys:
        paginator = Paginator(document_names, MUI_SEARCH_PAGINATE)
        try:
            paginated_documents = paginator.page(page)
        except PageNotAnInteger:
            # If page is not an integer, deliver first page.
            paginated_documents = paginator.page(1)
        except EmptyPage:
            # If page is out of range (e.g. 9999), deliver last page of results.
            paginated_documents = paginator.page(paginator.num_pages)
        paginated_documents_objects = DMSSearchManager().get_found_documents(paginated_documents.object_list)
        paginated_documents.object_list = paginated_documents_objects
        mdts_list = get_mdts_for_documents(paginated_documents_objects)

    context = {
        'step': step,
        'paginated_documents': paginated_documents,
        'page': page,
        'document_keys': cleaned_document_keys,
        'mdts': mdts_list,
        'warnings': warnings,
        'sorting_field': sorting_field,
        'order': order
    }
    return render_to_response(template, context, context_instance=RequestContext(request))

Example 19

Project: url-abuse
Source File: __init__.py
View license
def create_app(configfile=None):
    app = Flask(__name__)
    handler = RotatingFileHandler('urlabuse.log', maxBytes=10000, backupCount=5)
    handler.setFormatter(Formatter('%(asctime)s %(message)s'))
    app.wsgi_app = ReverseProxied(app.wsgi_app)
    app.logger.addHandler(handler)
    app.logger.setLevel(logging.INFO)
    Bootstrap(app)
    q = Queue(connection=conn)

    # Mail Config
    app.config['MAIL_SERVER'] = 'localhost'
    app.config['MAIL_PORT'] = 25
    mail = Mail(app)

    app.config['SECRET_KEY'] = 'devkey'
    app.config['BOOTSTRAP_SERVE_LOCAL'] = True
    app.config['configfile'] = config_path

    parser = configparser.SafeConfigParser()
    parser.read(app.config['configfile'])

    replacelist = make_dict(parser, 'replacelist')
    auth_users = prepare_auth()
    ignorelist = [i.strip()
                  for i in parser.get('abuse', 'ignore').split('\n')
                  if len(i.strip()) > 0]
    autosend_threshold = 5

    def _get_user_ip(request):
        ip = request.headers.get('X-Forwarded-For')
        if ip is None:
            ip = request.remote_addr
        return ip

    @app.route('/', methods=['GET', 'POST'])
    def index():
        form = URLForm()
        return render_template('index.html', form=form)

    @app.route('/urlreport', methods=['GET'])
    def url_report():
        return render_template('url-report.html')

    @app.errorhandler(404)
    def page_not_found(e):
        ip = request.headers.get('X-Forwarded-For')
        if ip is None:
            ip = request.remote_addr
        if request.path != '/_result/':
            app.logger.info('404 of {} on {}'.format(ip, request.path))
        return render_template('404.html'), 404

    def authenticate():
        """Sends a 401 response that enables basic auth"""
        return Response('Could not verify your access level for that URL.\n'
                        'You have to login with proper credentials', 401,
                        {'WWW-Authenticate': 'Basic realm="Login Required"'})

    def check_auth(username, password):
        """This function is called to check if a username /
        password combination is valid.
        """
        if auth_users is None:
            return False
        else:
            db_pass = auth_users.get(username)
            return db_pass == password

    @app.route('/login', methods=['GET', 'POST'])
    def login():
        auth = request.authorization
        if not auth or not check_auth(auth.username, auth.password):
            return authenticate()
        return redirect(url_for('index'))

    @app.route("/_result/<job_key>", methods=['GET'])
    def check_valid(job_key):
        if job_key is None:
            return json.dumps(None), 200
        job = Job.fetch(job_key, connection=conn)
        if job.is_finished:
            return json.dumps(job.result), 200
        else:
            return json.dumps("Nay!"), 202

    @app.route('/start', methods=['POST'])
    def run_query():
        data = json.loads(request.data)
        url = data["url"]
        ip = _get_user_ip(request)
        app.logger.info('{} {}'.format(ip, url))
        if get_submissions(url) >= autosend_threshold:
            send(url, '', True)
        is_valid = q.enqueue_call(func=is_valid_url, args=(url,), result_ttl=500)
        return is_valid.get_id()

    @app.route('/urls', methods=['POST'])
    def urls():
        data = json.loads(request.data)
        url = data["url"]
        u = q.enqueue_call(func=url_list, args=(url,), result_ttl=500)
        return u.get_id()

    @app.route('/resolve', methods=['POST'])
    def resolve():
        data = json.loads(request.data)
        url = data["url"]
        u = q.enqueue_call(func=dns_resolve, args=(url,), result_ttl=500)
        return u.get_id()

    @app.route('/phishtank', methods=['POST'])
    def phishtank():
        data = json.loads(request.data)
        if not os.path.exists('phishtank.key'):
            return None
        url = parser.get("PHISHTANK", "url")
        key = open('phishtank.key', 'r').readline().strip()
        query = data["query"]
        u = q.enqueue_call(func=phish_query, args=(url, key, query,), result_ttl=500)
        return u.get_id()

    @app.route('/virustotal_report', methods=['POST'])
    def vt():
        data = json.loads(request.data)
        if not os.path.exists('virustotal.key'):
            return None
        url = parser.get("VIRUSTOTAL", "url_report")
        url_up = parser.get("VIRUSTOTAL", "url_upload")
        key = open('virustotal.key', 'r').readline().strip()
        query = data["query"]
        u = q.enqueue_call(func=vt_query_url, args=(url, url_up, key, query,), result_ttl=500)
        return u.get_id()

    @app.route('/googlesafebrowsing', methods=['POST'])
    def gsb():
        data = json.loads(request.data)
        if not os.path.exists('googlesafebrowsing.key'):
            return None
        url = parser.get("GOOGLESAFEBROWSING", "url")
        key = open('googlesafebrowsing.key', 'r').readline().strip()
        url = url.format(key)
        query = data["query"]
        u = q.enqueue_call(func=gsb_query, args=(url, query,), result_ttl=500)
        return u.get_id()

    @app.route('/urlquery', methods=['POST'])
    def urlquery():
        data = json.loads(request.data)
        if not os.path.exists('urlquery.key'):
            return None
        url = parser.get("URLQUERY", "url")
        key = open('urlquery.key', 'r').readline().strip()
        query = data["query"]
        u = q.enqueue_call(func=urlquery_query, args=(url, key, query,), result_ttl=500)
        return u.get_id()

    @app.route('/ticket', methods=['POST'])
    def ticket():
        if not request.authorization:
            return ''
        data = json.loads(request.data)
        server = parser.get("SPHINX", "server")
        port = int(parser.get("SPHINX", "port"))
        url = parser.get("ITS", "url")
        query = data["query"]
        u = q.enqueue_call(func=sphinxsearch, args=(server, port, url, query,),
                           result_ttl=500)
        return u.get_id()

    @app.route('/whois', methods=['POST'])
    def whoismail():
        if not request.authorization:
            return ''
        server = parser.get("WHOIS", "server")
        port = parser.getint("WHOIS", "port")
        data = json.loads(request.data)
        query = data["query"]
        u = q.enqueue_call(func=whois, args=(server, port, query, ignorelist, replacelist),
                           result_ttl=500)
        return u.get_id()

    @app.route('/eupi', methods=['POST'])
    def eu():
        data = json.loads(request.data)
        if not os.path.exists('eupi.key'):
            return None
        url = parser.get("EUPI", "url")
        key = open('eupi.key', 'r').readline().strip()
        query = data["query"]
        u = q.enqueue_call(func=eupi, args=(url, key, query,), result_ttl=500)
        return u.get_id()

    @app.route('/pdnscircl', methods=['POST'])
    def dnscircl():
        url = parser.get("PDNS_CIRCL", "url")
        user, password = open('pdnscircl.key', 'r').readlines()
        data = json.loads(request.data)
        query = data["query"]
        u = q.enqueue_call(func=pdnscircl, args=(url, user.strip(), password.strip(),
                                                 query,), result_ttl=500)
        return u.get_id()

    @app.route('/bgpranking', methods=['POST'])
    def bgpr():
        data = json.loads(request.data)
        query = data["query"]
        u = q.enqueue_call(func=bgpranking, args=(query,), result_ttl=500)
        return u.get_id()

    @app.route('/psslcircl', methods=['POST'])
    def sslcircl():
        url = parser.get("PSSL_CIRCL", "url")
        user, password = open('psslcircl.key', 'r').readlines()
        data = json.loads(request.data)
        query = data["query"]
        u = q.enqueue_call(func=psslcircl, args=(url, user.strip(), password.strip(),
                                                 query,), result_ttl=500)
        return u.get_id()

    @app.route('/get_cache', methods=['POST'])
    def get_cache():
        data = json.loads(request.data)
        url = data["query"]
        data = cached(url)
        dumped = json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
        return dumped

    def digest(data):
        to_return = ''
        all_mails = set()
        for entry in data:
            for url, info in list(entry.items()):
                to_return += '\n{}\n'.format(url)
                if info.get('whois'):
                    all_mails.update(info.get('whois'))
                    to_return += '\tContacts: {}\n'.format(', '.join(info.get('whois')))
                if info.get('vt') and len(info.get('vt')) == 4:
                    vtstuff = info.get('vt')
                    to_return += '\t{} out of {} positive detections in VT - {}\n'.format(
                        vtstuff[2], vtstuff[3], vtstuff[1])
                if info.get('gsb'):
                    to_return += '\tKnown as malicious on Google Safe Browsing: {}\n'.format(info.get('gsb'))
                if info.get('phishtank'):
                    to_return += '\tKnown as malicious on PhishTank\n'
                if info.get('dns'):
                    ipv4, ipv6 = info.get('dns')
                    if ipv4 is not None:
                        for ip in ipv4:
                            to_return += '\t' + ip + '\n'
                            data = info[ip]
                            if data.get('bgp'):
                                to_return += '\t\t(PTR: {}) is announced by {} ({}).\n'.format(*(data.get('bgp')[:3]))
                            if data.get('whois'):
                                all_mails.update(data.get('whois'))
                                to_return += '\t\tContacts: {}\n'.format(', '.join(data.get('whois')))
                    if ipv6 is not None:
                        for ip in ipv6:
                            to_return += '\t' + ip + '\n'
                            data = info[ip]
                            if data.get('whois'):
                                all_mails.update(data.get('whois'))
                                to_return += '\t\tContacts: {}\n'.format(', '.join(data.get('whois')))
            to_return += '\tAll contacts: {}\n'.format(', '.join(all_mails))
        return to_return

    def send(url, ip='', autosend=False):
        if not get_mail_sent(url):
            set_mail_sent(url)
            data = cached(url)
            if not autosend:
                subject = 'URL Abuse report from ' + ip
            else:
                subject = 'URL Abuse report sent automatically'
            msg = Message(subject, sender='[email protected]', recipients=["[email protected]"])
            msg.body = digest(data)
            msg.body += '\n\n'
            msg.body += json.dumps(data, sort_keys=True, indent=4, separators=(',', ': '))
            mail.send(msg)

    @app.route('/submit', methods=['POST'])
    def send_mail():
        data = json.loads(request.data)
        url = data["url"]
        if not get_mail_sent(url):
            ip = _get_user_ip(request)
            send(url, ip)
        return redirect(url_for('index'))

    return app

Example 20

Project: docker-letsencrypt
Source File: acme_tiny.py
View license
def get_crt(account_key, csr, acme_dir):

    # helper function base64 encode for jose spec
    def _b64(b):
        return base64.urlsafe_b64encode(b).replace("=", "")

    # parse account key to get public key
    sys.stderr.write("Parsing account key...")
    proc = subprocess.Popen(["openssl", "rsa", "-in", account_key, "-noout", "-text"],
        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = proc.communicate()
    if proc.returncode != 0:
        raise IOError("OpenSSL Error: {0}".format(err))
    pub_hex, pub_exp = re.search(
        r"modulus:\n\s+00:([a-f0-9\:\s]+?)\npublicExponent: ([0-9]+)",
        out, re.MULTILINE|re.DOTALL).groups()
    pub_mod = binascii.unhexlify(re.sub(r"(\s|:)", "", pub_hex))
    pub_mod64 = _b64(pub_mod)
    pub_exp = "{0:x}".format(int(pub_exp))
    pub_exp = "0{0}".format(pub_exp) if len(pub_exp) % 2 else pub_exp
    pub_exp64 = _b64(binascii.unhexlify(pub_exp))
    header = {
        "alg": "RS256",
        "jwk": {
            "e": pub_exp64,
            "kty": "RSA",
            "n": pub_mod64,
        },
    }
    accountkey_json = json.dumps(header['jwk'], sort_keys=True, separators=(',', ':'))
    thumbprint = _b64(hashlib.sha256(accountkey_json).digest())
    sys.stderr.write("parsed!\n")

    # helper function make signed requests
    def _send_signed_request(url, payload):
        nonce = urllib2.urlopen(CA + "/directory").headers['Replay-Nonce']
        payload64 = _b64(json.dumps(payload))
        protected = copy.deepcopy(header)
        protected.update({"nonce": nonce})
        protected64 = _b64(json.dumps(protected))
        proc = subprocess.Popen(["openssl", "dgst", "-sha256", "-sign", account_key],
            stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = proc.communicate("{0}.{1}".format(protected64, payload64))
        if proc.returncode != 0:
            raise IOError("OpenSSL Error: {0}".format(err))
        data = json.dumps({
            "header": header,
            "protected": protected64,
            "payload": payload64,
            "signature": _b64(out),
        })
        try:
            resp = urllib2.urlopen(url, data)
            return resp.getcode(), resp.read()
        except urllib2.HTTPError as e:
            return e.code, e.read()

    # find domains
    sys.stderr.write("Parsing CSR...")
    proc = subprocess.Popen(["openssl", "req", "-in", csr, "-noout", "-text"],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = proc.communicate()
    if proc.returncode != 0:
        raise IOError("Error loading {0}: {1}".format(csr, err))
    domains = set([])
    common_name = re.search(r"Subject:.*? CN=([^\s,;/]+)", out)
    if common_name is not None:
        domains.add(common_name.group(1))
    subject_alt_names = re.search(r"X509v3 Subject Alternative Name: \n +([^\n]+)\n", out, re.MULTILINE|re.DOTALL)
    if subject_alt_names is not None:
        for san in subject_alt_names.group(1).split(", "):
            if san.startswith("DNS:"):
                domains.add(san[4:])
    sys.stderr.write("parsed!\n")

    # get the certificate domains and expiration
    sys.stderr.write("Registering account...")
    code, result = _send_signed_request(CA + "/acme/new-reg", {
        "resource": "new-reg",
        "agreement": "https://letsencrypt.org/documents/LE-SA-v1.0.1-July-27-2015.pdf",
    })
    if code == 201:
        sys.stderr.write("registered!\n")
    elif code == 409:
        sys.stderr.write("already registered!\n")
    else:
        raise ValueError("Error registering: {0} {1}".format(code, result))

    # verify each domain
    for domain in domains:
        sys.stderr.write("Verifying {0}...".format(domain))

        # get new challenge
        code, result = _send_signed_request(CA + "/acme/new-authz", {
            "resource": "new-authz",
            "identifier": {
                "type": "dns",
                "value": domain,
            },
        })
        if code != 201:
            raise ValueError("Error registering: {0} {1}".format(code, result))

        # make the challenge file
        challenge = [c for c in json.loads(result)['challenges'] if c['type'] == "http-01"][0]
        challenge['token'] = re.sub(r"[^A-Za-z0-9_\-]", "_", challenge['token'])
        keyauthorization = "{0}.{1}".format(challenge['token'], thumbprint)
        wellknown_path = os.path.join(acme_dir, challenge['token'])
        wellknown_file = open(wellknown_path, "w")
        wellknown_file.write(keyauthorization)
        wellknown_file.close()

        # check that the file is in place
        wellknown_url = "http://{0}/.well-known/acme-challenge/{1}".format(
            domain, challenge['token'])
        try:
            resp = urllib2.urlopen(wellknown_url)
            assert resp.read().strip() == keyauthorization
        except (urllib2.HTTPError, urllib2.URLError, AssertionError):
            os.remove(wellknown_path)
            raise ValueError("Wrote file to {0}, but couldn't download {1}".format(
                wellknown_path, wellknown_url))

        # notify challenge are met
        code, result = _send_signed_request(challenge['uri'], {
            "resource": "challenge",
            "keyAuthorization": keyauthorization,
        })
        if code != 202:
            raise ValueError("Error triggering challenge: {0} {1}".format(code, result))

        # wait for challenge to be verified
        while True:
            try:
                resp = urllib2.urlopen(challenge['uri'])
                challenge_status = json.loads(resp.read())
            except urllib2.HTTPError as e:
                raise ValueError("Error checking challenge: {0} {1}".format(
                    e.code, json.loads(e.read())))
            if challenge_status['status'] == "pending":
                time.sleep(2)
            elif challenge_status['status'] == "valid":
                sys.stderr.write("verified!\n")
                os.remove(wellknown_path)
                break
            else:
                raise ValueError("{0} challenge did not pass: {1}".format(
                    domain, challenge_status))

    # get the new certificate
    sys.stderr.write("Signing certificate...")
    proc = subprocess.Popen(["openssl", "req", "-in", csr, "-outform", "DER"],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    csr_der, err = proc.communicate()
    code, result = _send_signed_request(CA + "/acme/new-cert", {
        "resource": "new-cert",
        "csr": _b64(csr_der),
    })
    if code != 201:
        raise ValueError("Error signing certificate: {0} {1}".format(code, result))

    # return signed certificate!
    sys.stderr.write("signed!\n")
    return """-----BEGIN CERTIFICATE-----\n{0}\n-----END CERTIFICATE-----\n""".format(
        "\n".join(textwrap.wrap(base64.b64encode(result), 64)))

Example 21

Project: landsat-util
Source File: landsat.py
View license
def main(args):
    """
    Main function - launches the program.

    :param args:
        The Parser arguments
    :type args:
        Parser object

    :returns:
        List

    :example:
        >>> ["The latitude and longitude values must be valid numbers", 1]
    """

    v = VerbosityMixin()

    if args:

        if 'clip' in args:
            bounds = convert_to_float_list(args.clip)
        else:
            bounds = None

        if args.subs == 'process':
            verbose = True if args.verbose else False
            force_unzip = True if args.force_unzip else False
            stored = process_image(args.path, args.bands, verbose, args.pansharpen, args.ndvi, force_unzip,
                                   args.ndvigrey, bounds)

            if args.upload:
                u = Uploader(args.key, args.secret, args.region)
                u.run(args.bucket, get_file(stored), stored)

            return ["The output is stored at %s" % stored]

        elif args.subs == 'search':

            try:
                if args.start:
                    args.start = reformat_date(parse(args.start))
                if args.end:
                    args.end = reformat_date(parse(args.end))
                if args.latest > 0:
                    args.limit = 25
                    end = datetime.now()
                    start = end - relativedelta(days=+365)
                    args.end = end.strftime("%Y-%m-%d")
                    args.start = start.strftime("%Y-%m-%d")
            except (TypeError, ValueError):
                return ["Your date format is incorrect. Please try again!", 1]

            s = Search()

            try:
                if args.lat is not None:
                    lat = float(args.lat)
                else:
                    lat = None

                if args.lon is not None:
                    lon = float(args.lon)
                else:
                    lon = None
            except ValueError:
                return ["The latitude and longitude values must be valid numbers", 1]

            address = args.address
            if address and (lat and lon):
                return ["Cannot specify both address and latitude-longitude"]

            result = s.search(paths_rows=args.pathrow,
                              lat=lat,
                              lon=lon,
                              address=address,
                              limit=args.limit,
                              start_date=args.start,
                              end_date=args.end,
                              cloud_max=args.cloud,
                              geojson=args.geojson)

            if 'status' in result:

                if result['status'] == 'SUCCESS':
                    if args.json:
                        return json.dumps(result)

                    if args.latest > 0:
                        datelist = []
                        for i in range(0, result['total_returned']):
                            datelist.append((result['results'][i]['date'], result['results'][i]))

                        datelist.sort(key=lambda tup: tup[0], reverse=True)
                        datelist = datelist[:args.latest]

                        result['results'] = []
                        for i in range(0, len(datelist)):
                            result['results'].append(datelist[i][1])
                            result['total_returned'] = len(datelist)

                    else:
                        v.output('%s items were found' % result['total'], normal=True, arrow=True)

                    if result['total'] > 100:
                        return ['Over 100 results. Please narrow your search', 1]
                    else:
                        v.output(json.dumps(result, sort_keys=True, indent=4), normal=True, color='green')
                    return ['Search completed!']

                elif result['status'] == 'error':
                    return [result['message'], 1]

            if args.geojson:
                return json.dumps(result)

        elif args.subs == 'download':
            d = Downloader(download_dir=args.dest, usgs_user=args.username, usgs_pass=args.password)
            try:
                bands = convert_to_integer_list(args.bands)

                if args.process:
                    if args.pansharpen:
                        bands.append(8)

                    if args.ndvi or args.ndvigrey:
                        bands = [4, 5]

                    if not args.bands:
                        bands = [4, 3, 2]

                files = d.download(args.scenes, bands)

                if args.process:
                    if not args.bands:
                        args.bands = '432'
                    force_unzip = True if args.force_unzip else False
                    for f in files:
                        stored = process_image(f, args.bands, False, args.pansharpen, args.ndvi, force_unzip,
                                               args.ndvigrey, bounds=bounds)

                        if args.upload:
                            try:
                                u = Uploader(args.key, args.secret, args.region)
                            except NoAuthHandlerFound:
                                return ["Could not authenticate with AWS", 1]
                            except URLError:
                                return ["Connection timeout. Probably the region parameter is incorrect", 1]
                            u.run(args.bucket, get_file(stored), stored)

                    return ['The output is stored at %s' % stored, 0]
                else:
                    return ['Download Completed', 0]
            except IncorrectSceneId:
                return ['The SceneID provided was incorrect', 1]
            except (RemoteFileDoesntExist, USGSInventoryAccessMissing) as e:
                return [e.message, 1]

Example 22

Project: acme-tiny
Source File: acme_tiny.py
View license
def get_crt(account_key, csr, acme_dir, log=LOGGER, CA=DEFAULT_CA):
    # helper function base64 encode for jose spec
    def _b64(b):
        return base64.urlsafe_b64encode(b).decode('utf8').replace("=", "")

    # parse account key to get public key
    log.info("Parsing account key...")
    proc = subprocess.Popen(["openssl", "rsa", "-in", account_key, "-noout", "-text"],
        stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = proc.communicate()
    if proc.returncode != 0:
        raise IOError("OpenSSL Error: {0}".format(err))
    pub_hex, pub_exp = re.search(
        r"modulus:\n\s+00:([a-f0-9\:\s]+?)\npublicExponent: ([0-9]+)",
        out.decode('utf8'), re.MULTILINE|re.DOTALL).groups()
    pub_exp = "{0:x}".format(int(pub_exp))
    pub_exp = "0{0}".format(pub_exp) if len(pub_exp) % 2 else pub_exp
    header = {
        "alg": "RS256",
        "jwk": {
            "e": _b64(binascii.unhexlify(pub_exp.encode("utf-8"))),
            "kty": "RSA",
            "n": _b64(binascii.unhexlify(re.sub(r"(\s|:)", "", pub_hex).encode("utf-8"))),
        },
    }
    accountkey_json = json.dumps(header['jwk'], sort_keys=True, separators=(',', ':'))
    thumbprint = _b64(hashlib.sha256(accountkey_json.encode('utf8')).digest())

    # helper function make signed requests
    def _send_signed_request(url, payload):
        payload64 = _b64(json.dumps(payload).encode('utf8'))
        protected = copy.deepcopy(header)
        protected["nonce"] = urlopen(CA + "/directory").headers['Replay-Nonce']
        protected64 = _b64(json.dumps(protected).encode('utf8'))
        proc = subprocess.Popen(["openssl", "dgst", "-sha256", "-sign", account_key],
            stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        out, err = proc.communicate("{0}.{1}".format(protected64, payload64).encode('utf8'))
        if proc.returncode != 0:
            raise IOError("OpenSSL Error: {0}".format(err))
        data = json.dumps({
            "header": header, "protected": protected64,
            "payload": payload64, "signature": _b64(out),
        })
        try:
            resp = urlopen(url, data.encode('utf8'))
            return resp.getcode(), resp.read()
        except IOError as e:
            return getattr(e, "code", None), getattr(e, "read", e.__str__)()

    # find domains
    log.info("Parsing CSR...")
    proc = subprocess.Popen(["openssl", "req", "-in", csr, "-noout", "-text"],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    out, err = proc.communicate()
    if proc.returncode != 0:
        raise IOError("Error loading {0}: {1}".format(csr, err))
    domains = set([])
    common_name = re.search(r"Subject:.*? CN=([^\s,;/]+)", out.decode('utf8'))
    if common_name is not None:
        domains.add(common_name.group(1))
    subject_alt_names = re.search(r"X509v3 Subject Alternative Name: \n +([^\n]+)\n", out.decode('utf8'), re.MULTILINE|re.DOTALL)
    if subject_alt_names is not None:
        for san in subject_alt_names.group(1).split(", "):
            if san.startswith("DNS:"):
                domains.add(san[4:])

    # get the certificate domains and expiration
    log.info("Registering account...")
    code, result = _send_signed_request(CA + "/acme/new-reg", {
        "resource": "new-reg",
        "agreement": "https://letsencrypt.org/documents/LE-SA-v1.1.1-August-1-2016.pdf",
    })
    if code == 201:
        log.info("Registered!")
    elif code == 409:
        log.info("Already registered!")
    else:
        raise ValueError("Error registering: {0} {1}".format(code, result))

    # verify each domain
    for domain in domains:
        log.info("Verifying {0}...".format(domain))

        # get new challenge
        code, result = _send_signed_request(CA + "/acme/new-authz", {
            "resource": "new-authz",
            "identifier": {"type": "dns", "value": domain},
        })
        if code != 201:
            raise ValueError("Error requesting challenges: {0} {1}".format(code, result))

        # make the challenge file
        challenge = [c for c in json.loads(result.decode('utf8'))['challenges'] if c['type'] == "http-01"][0]
        token = re.sub(r"[^A-Za-z0-9_\-]", "_", challenge['token'])
        keyauthorization = "{0}.{1}".format(token, thumbprint)
        wellknown_path = os.path.join(acme_dir, token)
        with open(wellknown_path, "w") as wellknown_file:
            wellknown_file.write(keyauthorization)

        # check that the file is in place
        wellknown_url = "http://{0}/.well-known/acme-challenge/{1}".format(domain, token)
        try:
            resp = urlopen(wellknown_url)
            resp_data = resp.read().decode('utf8').strip()
            assert resp_data == keyauthorization
        except (IOError, AssertionError):
            os.remove(wellknown_path)
            raise ValueError("Wrote file to {0}, but couldn't download {1}".format(
                wellknown_path, wellknown_url))

        # notify challenge are met
        code, result = _send_signed_request(challenge['uri'], {
            "resource": "challenge",
            "keyAuthorization": keyauthorization,
        })
        if code != 202:
            raise ValueError("Error triggering challenge: {0} {1}".format(code, result))

        # wait for challenge to be verified
        while True:
            try:
                resp = urlopen(challenge['uri'])
                challenge_status = json.loads(resp.read().decode('utf8'))
            except IOError as e:
                raise ValueError("Error checking challenge: {0} {1}".format(
                    e.code, json.loads(e.read().decode('utf8'))))
            if challenge_status['status'] == "pending":
                time.sleep(2)
            elif challenge_status['status'] == "valid":
                log.info("{0} verified!".format(domain))
                os.remove(wellknown_path)
                break
            else:
                raise ValueError("{0} challenge did not pass: {1}".format(
                    domain, challenge_status))

    # get the new certificate
    log.info("Signing certificate...")
    proc = subprocess.Popen(["openssl", "req", "-in", csr, "-outform", "DER"],
        stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    csr_der, err = proc.communicate()
    code, result = _send_signed_request(CA + "/acme/new-cert", {
        "resource": "new-cert",
        "csr": _b64(csr_der),
    })
    if code != 201:
        raise ValueError("Error signing certificate: {0} {1}".format(code, result))

    # return signed certificate!
    log.info("Certificate signed!")
    return """-----BEGIN CERTIFICATE-----\n{0}\n-----END CERTIFICATE-----\n""".format(
        "\n".join(textwrap.wrap(base64.b64encode(result).decode('utf8'), 64)))

Example 23

Project: dx-toolkit
Source File: __init__.py
View license
def DXHTTPRequest(resource, data, method='POST', headers=None, auth=True,
                  timeout=DEFAULT_TIMEOUT,
                  use_compression=None, jsonify_data=True, want_full_response=False,
                  decode_response_body=True, prepend_srv=True, session_handler=None,
                  max_retries=DEFAULT_RETRIES, always_retry=False,
                  **kwargs):
    '''
    :param resource: API server route, e.g. "/record/new". If *prepend_srv* is False, a fully qualified URL is expected. If this argument is a callable, it will be called just before each request attempt, and expected to return a tuple (URL, headers). Headers returned by the callback are updated with *headers* (including headers set by this method).
    :type resource: string
    :param data: Content of the request body
    :type data: list or dict, if *jsonify_data* is True; or string or file-like object, otherwise
    :param headers: Names and values of HTTP headers to submit with the request (in addition to those needed for authentication, compression, or other options specified with the call).
    :type headers: dict
    :param auth:
        Controls the ``Authentication`` header or other means of authentication supplied with the request. If ``True``
        (default), a token is obtained from the ``DX_SECURITY_CONTEXT``. If the value evaluates to false, no action is
        taken to prepare authentication for the request. Otherwise, the value is assumed to be callable, and called with
        three arguments (method, url, headers) and expected to prepare the authentication headers by reference.
    :type auth: tuple, object, True (default), or None
    :param timeout: HTTP request timeout, in seconds
    :type timeout: float
    :param config: *config* value to pass through to :meth:`requests.request`
    :type config: dict
    :param use_compression: Deprecated
    :type use_compression: string or None
    :param jsonify_data: If True, *data* is converted from a Python list or dict to a JSON string
    :type jsonify_data: boolean
    :param want_full_response: If True, the full :class:`requests.Response` object is returned (otherwise, only the content of the response body is returned)
    :type want_full_response: boolean
    :param decode_response_body: If True (and *want_full_response* is False), the response body is decoded and, if it is a JSON string, deserialized. Otherwise, the response body is uncompressed if transport compression is on, and returned raw.
    :type decode_response_body: boolean
    :param prepend_srv: If True, prepends the API server location to the URL
    :type prepend_srv: boolean
    :param session_handler: Deprecated.
    :param max_retries: Maximum number of retries to perform for a request. A "failed" request is retried if any of the following is true:

                        - A response is received from the server, and the content length received does not match the "Content-Length" header.
                        - A response is received from the server, and the response has an HTTP status code in 5xx range.
                        - A response is received from the server, the "Content-Length" header is not set, and the response JSON cannot be parsed.
                        - No response is received from the server, and either *always_retry* is True or the request *method* is "GET".

    :type max_retries: int
    :param always_retry: If True, indicates that it is safe to retry a request on failure

                        - Note: It is not guaranteed that the request will *always* be retried on failure; rather, this is an indication to the function that it would be safe to do so.

    :type always_retry: boolean
    :returns: Response from API server in the format indicated by *want_full_response* and *decode_response_body*.
    :raises: :exc:`exceptions.DXAPIError` or a subclass if the server returned a non-200 status code; :exc:`requests.exceptions.HTTPError` if an invalid response was received from the server; or :exc:`requests.exceptions.ConnectionError` if a connection cannot be established.

    Wrapper around :meth:`requests.request()` that makes an HTTP
    request, inserting authentication headers and (by default)
    converting *data* to JSON.

    .. note:: Bindings methods that make API calls make the underlying
       HTTP request(s) using :func:`DXHTTPRequest`, and most of them
       will pass any unrecognized keyword arguments you have supplied
       through to :func:`DXHTTPRequest`.

    '''
    if headers is None:
        headers = {}

    global _UPGRADE_NOTIFY

    seq_num = _get_sequence_number()

    url = APISERVER + resource if prepend_srv else resource
    method = method.upper()  # Convert method name to uppercase, to ease string comparisons later

    if auth is True:
        auth = AUTH_HELPER

    if auth:
        auth(_RequestForAuth(method, url, headers))

    pool_args = {arg: kwargs.pop(arg, None) for arg in ("verify", "cert_file", "key_file")}
    test_retry = kwargs.pop("_test_retry_http_request", False)

    if _DEBUG >= 2:
        if isinstance(data, basestring) or isinstance(data, mmap.mmap):
            if len(data) == 0:
                formatted_data = '""'
            else:
                formatted_data = "<file data>"
        else:
            try:
                if _DEBUG >= 3:
                    formatted_data = json.dumps(data, indent=2)
                else:
                    formatted_data = json.dumps(data)
            except (UnicodeDecodeError, TypeError):
                formatted_data = "<binary data>"

    if jsonify_data:
        data = json.dumps(data)
        if 'Content-Type' not in headers and method == 'POST':
            headers['Content-Type'] = 'application/json'

    # If the input is a buffer, its data gets consumed by
    # requests.request (moving the read position). Record the initial
    # buffer position so that we can return to it if the request fails
    # and needs to be retried.
    rewind_input_buffer_offset = None
    if hasattr(data, 'seek') and hasattr(data, 'tell'):
        rewind_input_buffer_offset = data.tell()

    try_index = 0
    retried_responses = []
    while True:
        success, time_started = True, None
        response = None
        req_id = None
        try:
            time_started = time.time()
            _method, _url, _headers = _process_method_url_headers(method, url, headers)

            if _DEBUG >= 2:
                maybe_headers = ''
                if 'Range' in _headers:
                    maybe_headers = " " + json.dumps({"Range": _headers["Range"]})
                print("%s [%f] %s %s%s => %s\n" % (YELLOW(BOLD(">%d" % seq_num)),
                                                   time_started,
                                                   BLUE(method),
                                                   _url,
                                                   maybe_headers,
                                                   formatted_data),
                      file=sys.stderr,
                      end="")
            elif _DEBUG > 0:
                from repr import Repr
                print("%s [%f] %s %s => %s\n" % (YELLOW(BOLD(">%d" % seq_num)),
                                                 time_started,
                                                 BLUE(method),
                                                 _url,
                                                 Repr().repr(data)),
                      file=sys.stderr,
                      end="")

            body = _maybe_trucate_request(_url, try_index, data)

            # throws BadStatusLine if the server returns nothing
            try:
                pool_manager = _get_pool_manager(**pool_args)

                def unicode2str(s):
                    if isinstance(s, unicode):
                        return s.encode('ascii')
                    else:
                        return s

                _headers['User-Agent'] = USER_AGENT
                _headers['DNAnexus-API'] = API_VERSION

                # Converted Unicode headers to ASCII and throw an error if not possible
                _headers = {unicode2str(k): unicode2str(v) for k, v in _headers.items()}
                response = pool_manager.request(_method, _url, headers=_headers, body=body,
                                                timeout=timeout, retries=False, **kwargs)
            except urllib3.exceptions.ClosedPoolError:
                # If another thread closed the pool before the request was
                # started, will throw ClosedPoolError
                raise exceptions.UrllibInternalError("ClosedPoolError")

            _raise_error_for_testing(try_index, method)
            req_id = response.headers.get("x-request-id", "unavailable")

            if _UPGRADE_NOTIFY and response.headers.get('x-upgrade-info', '').startswith('A recommended update is available') and '_ARGCOMPLETE' not in os.environ:
                logger.info(response.headers['x-upgrade-info'])
                try:
                    with file(_UPGRADE_NOTIFY, 'a'):
                        os.utime(_UPGRADE_NOTIFY, None)
                except:
                    pass
                _UPGRADE_NOTIFY = False

            # If an HTTP code that is not in the 200 series is received and the content is JSON, parse it and throw the
            # appropriate error.  Otherwise, raise the usual exception.
            if response.status // 100 != 2:
                # response.headers key lookup is case-insensitive
                if response.headers.get('content-type', '').startswith('application/json'):
                    try:
                        content = response.data.decode('utf-8')
                    except AttributeError:
                        raise exceptions.UrllibInternalError("Content is none", response.status)
                    try:
                        content = json.loads(content)
                    except ValueError:
                        # The JSON is not parsable, but we should be able to retry.
                        raise exceptions.BadJSONInReply("Invalid JSON received from server", response.status)
                    try:
                        error_class = getattr(exceptions, content["error"]["type"], exceptions.DXAPIError)
                    except (KeyError, AttributeError, TypeError):
                        raise exceptions.HTTPError(response.status, content)
                    raise error_class(content, response.status, time_started, req_id)
                else:
                    try:
                        content = response.data.decode('utf-8')
                    except AttributeError:
                        raise exceptions.UrllibInternalError("Content is none", response.status)
                    raise exceptions.HTTPError("{} {} [Time={} RequestID={}]\n{}".format(response.status,
                                                                                         response.reason,
                                                                                         time_started,
                                                                                         req_id,
                                                                                         content))

            if want_full_response:
                return response
            else:
                if 'content-length' in response.headers:
                    if int(response.headers['content-length']) != len(response.data):
                        range_str = (' (%s)' % (headers['Range'],)) if 'Range' in headers else ''
                        raise exceptions.ContentLengthError(
                            "Received response with content-length header set to %s but content length is %d%s. " +
                            "[Time=%f RequestID=%s]" %
                            (response.headers['content-length'], len(response.data), range_str, time_started, req_id)
                        )

                content = response.data

                content_to_print = "(%d bytes)" % len(content) if len(content) > 0 else ''

                if decode_response_body:
                    content = content.decode('utf-8')
                    if response.headers.get('content-type', '').startswith('application/json'):
                        try:
                            content = json.loads(content)
                        except ValueError:
                            # The JSON is not parsable, but we should be able to retry.
                            raise exceptions.BadJSONInReply("Invalid JSON received from server", response.status)
                        if _DEBUG >= 3:
                            content_to_print = "\n  " + json.dumps(content, indent=2).replace("\n", "\n  ")
                        elif _DEBUG == 2:
                            content_to_print = json.dumps(content)
                        elif _DEBUG > 0:
                            content_to_print = Repr().repr(content)

                if _DEBUG > 0:
                    t = int((time.time() - time_started) * 1000)
                    req_id = response.headers.get('x-request-id') or "--"
                    code_format = GREEN if (200 <= response.status < 300) else RED
                    print("  " + YELLOW(BOLD("<%d" % seq_num)),
                          "[%f]" % time_started,
                          BLUE(method),
                          req_id,
                          _url,
                          "<=",
                          code_format(str(response.status)),
                          WHITE(BOLD("(%dms)" % t)),
                          content_to_print,
                          file=sys.stderr)

                if test_retry:
                    retried_responses.append(content)
                    if len(retried_responses) == 1:
                        continue
                    else:
                        _set_retry_response(retried_responses[0])
                        return retried_responses[1]

                return content
            raise AssertionError('Should never reach this line: expected a result to have been returned by now')
        except Exception as e:
            # Avoid reusing connections in the pool, since they may be
            # in an inconsistent state (observed as "ResponseNotReady"
            # errors).
            _get_pool_manager(**pool_args).clear()
            success = False
            exception_msg = _extract_msg_from_last_exception()
            if isinstance(e, _expected_exceptions):
                if response is not None and response.status == 503:
                    seconds_to_wait = _extract_retry_after_timeout(response)
                    logger.warn("%s %s: %s. Request Time=[%f] RequestID=[%s] Waiting %d seconds due to server unavailability...",
                                method, url, exception_msg, time_started, req_id, seconds_to_wait)
                    time.sleep(seconds_to_wait)
                    # Note, we escape the "except" block here without
                    # incrementing try_index because 503 responses with
                    # Retry-After should not count against the number of
                    # permitted retries.
                    continue

                # Total number of allowed tries is the initial try + up to
                # (max_retries) subsequent retries.
                total_allowed_tries = max_retries + 1
                ok_to_retry = False
                is_retryable = always_retry or (method == 'GET') or _is_retryable_exception(e)
                # Because try_index is not incremented until we escape this
                # iteration of the loop, try_index is equal to the number of
                # tries that have failed so far, minus one. Test whether we
                # have exhausted all retries.
                #
                # BadStatusLine ---  server did not return anything
                # BadJSONInReply --- server returned JSON that didn't parse properly
                if try_index + 1 < total_allowed_tries:
                    if response is None or \
                       isinstance(e, (exceptions.ContentLengthError, BadStatusLine, exceptions.BadJSONInReply, \
                                      urllib3.exceptions.ProtocolError, exceptions.UrllibInternalError)):
                        ok_to_retry = is_retryable
                    else:
                        ok_to_retry = 500 <= response.status < 600

                    # The server has closed the connection prematurely
                    if response is not None and \
                       response.status == 400 and is_retryable and method == 'PUT' and \
                       isinstance(e, requests.exceptions.HTTPError):
                        if '<Code>RequestTimeout</Code>' in exception_msg:
                            logger.info("Retrying 400 HTTP error, due to slow data transfer. " +
                                        "Request Time=[%f] RequestID=[%s]", time_started, req_id)
                        else:
                            logger.info("400 HTTP error, of unknown origin, exception_msg=[%s]. " +
                                        "Request Time=[%f] RequestID=[%s]", exception_msg, time_started, req_id)
                        ok_to_retry = True

                if ok_to_retry:
                    if rewind_input_buffer_offset is not None:
                        data.seek(rewind_input_buffer_offset)
                    delay = min(2 ** try_index, DEFAULT_TIMEOUT)
                    range_str = (' (range=%s)' % (headers['Range'],)) if 'Range' in headers else ''
                    logger.warn("[%s] %s %s: %s. Waiting %d seconds before retry %d of %d... %s",
                                time.ctime(), method, url, exception_msg, delay, try_index + 1, max_retries, range_str)
                    time.sleep(delay)
                    try_index += 1
                    continue

            # All retries have been exhausted OR the error is deemed not
            # retryable. Print the latest error and propagate it back to the caller.
            if not isinstance(e, exceptions.DXAPIError):
                logger.error("[%s] %s %s: %s.", time.ctime(), method, url, exception_msg)

            # Retries have been exhausted, and we are unable to get a full
            # buffer from the data source. Raise a special exception.
            if isinstance(e, urllib3.exceptions.ProtocolError) and \
               'Connection broken: IncompleteRead' in exception_msg:
                raise exceptions.DXIncompleteReadsError(exception_msg)
            raise
        finally:
            if success and try_index > 0:
                logger.info("[%s] %s %s: Recovered after %d retries", time.ctime(), method, url, try_index)

        raise AssertionError('Should never reach this line: should have attempted a retry or reraised by now')
    raise AssertionError('Should never reach this line: should never break out of loop')

Example 24

Project: twarc
Source File: twarc.py
View license
def main():
    """
    The twarc command line.
    """
    parser = argparse.ArgumentParser("twarc")
    parser.add_argument('-v', '--version', action='version',
                        version='%(prog)s {version}'.format(
                            version=__version__))
    parser.add_argument("--search", dest="search",
                        help="search for tweets matching a query")
    parser.add_argument("--max_id", dest="max_id",
                        help="maximum tweet id to search for")
    parser.add_argument("--since_id", dest="since_id",
                        help="smallest id to search for")
    parser.add_argument("--result_type", dest="result_type",
                        choices=["mixed", "recent", "popular"],
                        default="recent", help="search result type")
    parser.add_argument("--lang", dest="lang",
                        help="limit to ISO 639-1 language code"),
    parser.add_argument("--geocode", dest="geocode",
                        help="limit by latitude,longitude,radius")
    parser.add_argument("--track", dest="track",
                        help="stream tweets matching track filter")
    parser.add_argument("--follow", dest="follow",
                        help="stream tweets from user ids")
    parser.add_argument("--locations", dest="locations",
                        help="stream tweets from a particular location")
    parser.add_argument("--sample", action="store_true",
                        help="stream sample live tweets")
    parser.add_argument("--timeline", dest="timeline",
                        help="get user timeline for a screen name")
    parser.add_argument("--timeline_user_id", dest="timeline_user_id",
                        help="get user timeline for a user id")
    parser.add_argument("--lookup_screen_names", dest="lookup_screen_names",
                        nargs='+', help="look up users by screen name; \
                                         returns user objects")
    parser.add_argument("--lookup_user_ids", dest="lookup_user_ids", nargs='+',
                        help="look up users by user id; returns user objects")
    parser.add_argument("--follower_ids", dest="follower_ids", nargs=1,
                        help="retrieve follower lists; returns follower ids")
    parser.add_argument("--friend_ids", dest="friend_ids", nargs=1,
                        help="retrieve friend (following) list; returns friend ids")
    parser.add_argument("--hydrate", action="append", dest="hydrate",
                        help="rehydrate tweets from a file of tweet ids, \
                              use - for stdin")
    parser.add_argument("--trends_available", action="store_true",
                        help="show all regions available for trend summaries")
    parser.add_argument("--trends_place", dest="trends_place", nargs=1,
                        type=int, metavar="WOEID",
                        help="recent trends for WOEID specified")
    parser.add_argument("--trends_closest", dest="trends_closest", nargs=1,
                        metavar="LAT,LONG",
                        help="show available trend regions for LAT,LONG")
    parser.add_argument("--trends_place_exclude",
                        dest="trends_place_exclude", nargs=1,
                        type=int, metavar="WOEID",
                        help="recent trends for WOEID specified sans hashtags")
    parser.add_argument("--log", dest="log",
                        default="twarc.log", help="log file")
    parser.add_argument("--consumer_key",
                        default=None, help="Twitter API consumer key")
    parser.add_argument("--consumer_secret",
                        default=None, help="Twitter API consumer secret")
    parser.add_argument("--access_token",
                        default=None, help="Twitter API access key")
    parser.add_argument("--access_token_secret",
                        default=None, help="Twitter API access token secret")
    parser.add_argument('-c', '--config',
                        default=default_config_filename(),
                        help="Config file containing Twitter keys and secrets")
    parser.add_argument('-p', '--profile', default='main',
                        help="Name of a profile in your configuration file")
    parser.add_argument('-w', '--warnings', action='store_true',
                        help="Include warning messages in output")
    parser.add_argument("--connection_errors", type=int, default="0",
                        help="Number of connection errors before giving up. Default is to keep trying.")
    parser.add_argument("--http_errors", type=int, default="0",
                        help="Number of http errors before giving up. Default is to keep trying.")

    args = parser.parse_args()

    logging.basicConfig(
        filename=args.log,
        level=logging.INFO,
        format="%(asctime)s %(levelname)s %(message)s"
    )

    consumer_key = args.consumer_key or os.environ.get('CONSUMER_KEY')
    consumer_secret = args.consumer_secret or os.environ.get('CONSUMER_SECRET')
    access_token = args.access_token or os.environ.get('ACCESS_TOKEN')
    access_token_secret = args.access_token_secret or os.environ.get('ACCESS_TOKEN_SECRET')

    if not (consumer_key and consumer_secret and
            access_token and access_token_secret):
        credentials = load_config(args.config, args.profile)
        if credentials:
            consumer_key = credentials['consumer_key']
            consumer_secret = credentials['consumer_secret']
            access_token = credentials['access_token']
            access_token_secret = credentials['access_token_secret']
        else:
            print("Please enter Twitter authentication credentials")
            consumer_key = get_input('consumer key: ')
            consumer_secret = get_input('consumer secret: ')
            access_token = get_input('access_token: ')
            access_token_secret = get_input('access token secret: ')
            save_keys(args.profile, consumer_key, consumer_secret,
                      access_token, access_token_secret)

    t = Twarc(consumer_key=consumer_key,
              consumer_secret=consumer_secret,
              access_token=access_token,
              access_token_secret=access_token_secret,
              connection_errors=args.connection_errors,
              http_errors=args.http_errors)

    tweets = []
    users = []
    user_ids = []
    trends_json = []

    # Calls that return tweets
    if args.search or args.geocode:
        tweets = t.search(
            args.search,
            since_id=args.since_id,
            max_id=args.max_id,
            lang=args.lang,
            result_type=args.result_type,
            geocode=args.geocode
        )
    elif args.track or args.follow or args.locations:
        tweets = t.filter(track=args.track, follow=args.follow,
                          locations=args.locations)
    elif args.hydrate:
        input_iterator = fileinput.FileInput(
            args.hydrate,
            mode='rU',
            openhook=fileinput.hook_compressed,
        )
        tweets = t.hydrate(input_iterator)
    elif args.sample:
        tweets = t.sample()
    elif args.timeline:
        tweets = t.timeline(screen_name=args.timeline)
    elif args.timeline_user_id:
        tweets = t.timeline(user_id=args.timeline_user_id)

    # Calls that return user profile objects
    elif args.lookup_user_ids:
        users = t.user_lookup(user_ids=args.lookup_user_ids)
    elif args.lookup_screen_names:
        users = t.user_lookup(screen_names=args.lookup_screen_names)

    # Calls that return lists of user ids
    elif args.follower_ids:
        # Note: only one at a time, so assume exactly one
        user_ids = t.follower_ids(screen_name=args.follower_ids[0])
    elif args.friend_ids:
        # Note: same here, only one at a time, so assume exactly one
        user_ids = t.friend_ids(screen_name=args.friend_ids[0])

    # Calls that return JSON relating to trends
    elif args.trends_available:
        trends_json = t.trends_available()
    elif args.trends_place:
        trends_json = t.trends_place(args.trends_place)
    elif args.trends_place_exclude:
        trends_json = t.trends_place(args.trends_place_exclude,
                                     exclude='hashtags')
    elif args.trends_closest:
        # Note: using "lon" as var name instead of restricted "long"
        try:
            lat, lon = [float(s.strip())
                        for s in args.trends_closest[0].split(',')]
            if lat > 180 or lat < -180 or lon > 180 or lon < -180:
                raise "Unacceptable values"
        except Exception as e:
            parser.error('LAT and LONG must be floats within [-180.0, 180.0]')
        trends_json = t.trends_closest(lat, lon)

    else:
        raise argparse.ArgumentTypeError(
            'must supply one of:  --search --track --follow --locations'
            ' --timeline --timeline_user_id'
            ' --lookup_screen_names --lookup_user_ids'
            ' --follower_ids --friend_ids'
            ' --trends_available --trends_closest'
            ' --trends_place --trends_place_exclude'
            ' --sample --hydrate')

    # iterate through the tweets and write them to stdout
    for tweet in tweets:
        # include warnings in output only if they asked for it
        if 'id_str' in tweet or args.warnings:
            print(json.dumps(tweet))

        # add some info to the log
        if 'id_str' in tweet:
            if 'user' in tweet:
                logging.info("archived https://twitter.com/%s/status/%s",
                             tweet['user']['screen_name'], tweet['id_str'])
        elif 'limit' in tweet:
            t = datetime.datetime.utcfromtimestamp(
                float(tweet['limit']['timestamp_ms']) / 1000)
            t = t.isoformat("T") + "Z"
            logging.warn("%s tweets undelivered at %s",
                         tweet['limit']['track'], t)
        elif 'warning' in tweet:
            logging.warn(tweet['warning']['message'])
        else:
            logging.warn(json.dumps(tweet))

    # iterate through the user objects and write them to stdout
    for user in users:
        # include warnings in output only if they asked for it
        if 'id_str' in user or args.warnings:
            print(json.dumps(user))

            # add some info to the log
            if 'screen_name' in user:
                logging.info("archived user profile for @%s / id_str=%s",
                             user['screen_name'], user['id_str'])
        else:
            logging.warn(json.dumps(user))

    # iterate through the user ids and write them to stdout
    for user_id in user_ids:
        print(str(user_id))

    # iterate through trend JSON and write each to stdout
    for trend_info in trends_json:
        print(json.dumps(trend_info))

Example 25

Project: p2pool-n
Source File: web.py
View license
def get_web_root(wb, datadir_path, bitcoind_getinfo_var, stop_event=variable.Event()):
    node = wb.node
    start_time = time.time()
    
    web_root = resource.Resource()
    
    def get_users():
        height, last = node.tracker.get_height_and_last(node.best_share_var.value)
        weights, total_weight, donation_weight = node.tracker.get_cumulative_weights(node.best_share_var.value, min(height, 720), 65535*2**256)
        res = {}
        for script in sorted(weights, key=lambda s: weights[s]):
            res[bitcoin_data.script2_to_address(script, node.net.PARENT)] = weights[script]/total_weight
        return res
    
    def get_current_scaled_txouts(scale, trunc=0):
        txouts = node.get_current_txouts()
        total = sum(txouts.itervalues())
        results = dict((script, value*scale//total) for script, value in txouts.iteritems())
        if trunc > 0:
            total_random = 0
            random_set = set()
            for s in sorted(results, key=results.__getitem__):
                if results[s] >= trunc:
                    break
                total_random += results[s]
                random_set.add(s)
            if total_random:
                winner = math.weighted_choice((script, results[script]) for script in random_set)
                for script in random_set:
                    del results[script]
                results[winner] = total_random
        if sum(results.itervalues()) < int(scale):
            results[math.weighted_choice(results.iteritems())] += int(scale) - sum(results.itervalues())
        return results
    
    def get_patron_sendmany(total=None, trunc='0.01'):
        if total is None:
            return 'need total argument. go to patron_sendmany/<TOTAL>'
        total = int(float(total)*1e8)
        trunc = int(float(trunc)*1e8)
        return json.dumps(dict(
            (bitcoin_data.script2_to_address(script, node.net.PARENT), value/1e8)
            for script, value in get_current_scaled_txouts(total, trunc).iteritems()
            if bitcoin_data.script2_to_address(script, node.net.PARENT) is not None
        ))
    
    def get_global_stats():
        # averaged over last hour
        if node.tracker.get_height(node.best_share_var.value) < 10:
            return None
        lookbehind = min(node.tracker.get_height(node.best_share_var.value), 3600//node.net.SHARE_PERIOD)
        
        nonstale_hash_rate = p2pool_data.get_pool_attempts_per_second(node.tracker, node.best_share_var.value, lookbehind)
        stale_prop = p2pool_data.get_average_stale_prop(node.tracker, node.best_share_var.value, lookbehind)
        diff = bitcoin_data.target_to_difficulty(wb.current_work.value['bits'].target)

        return dict(
            pool_nonstale_hash_rate=nonstale_hash_rate,
            pool_hash_rate=nonstale_hash_rate/(1 - stale_prop),
            pool_stale_prop=stale_prop,
            min_difficulty=bitcoin_data.target_to_difficulty(node.tracker.items[node.best_share_var.value].max_target),
            network_block_difficulty=diff,
            network_hashrate=(diff * 2**32 // node.net.PARENT.BLOCK_PERIOD),
        )
    
    def get_local_stats():
        if node.tracker.get_height(node.best_share_var.value) < 10:
            return None
        lookbehind = min(node.tracker.get_height(node.best_share_var.value), 3600//node.net.SHARE_PERIOD)
        
        global_stale_prop = p2pool_data.get_average_stale_prop(node.tracker, node.best_share_var.value, lookbehind)
        
        my_unstale_count = sum(1 for share in node.tracker.get_chain(node.best_share_var.value, lookbehind) if share.hash in wb.my_share_hashes)
        my_orphan_count = sum(1 for share in node.tracker.get_chain(node.best_share_var.value, lookbehind) if share.hash in wb.my_share_hashes and share.share_data['stale_info'] == 'orphan')
        my_doa_count = sum(1 for share in node.tracker.get_chain(node.best_share_var.value, lookbehind) if share.hash in wb.my_share_hashes and share.share_data['stale_info'] == 'doa')
        my_share_count = my_unstale_count + my_orphan_count + my_doa_count
        my_stale_count = my_orphan_count + my_doa_count
        
        my_stale_prop = my_stale_count/my_share_count if my_share_count != 0 else None
        
        my_work = sum(bitcoin_data.target_to_average_attempts(share.target)
            for share in node.tracker.get_chain(node.best_share_var.value, lookbehind - 1)
            if share.hash in wb.my_share_hashes)
        actual_time = (node.tracker.items[node.best_share_var.value].timestamp -
            node.tracker.items[node.tracker.get_nth_parent_hash(node.best_share_var.value, lookbehind - 1)].timestamp)
        share_att_s = my_work / actual_time
        
        miner_hash_rates, miner_dead_hash_rates = wb.get_local_rates()
        (stale_orphan_shares, stale_doa_shares), shares, _ = wb.get_stale_counts()

        miner_last_difficulties = {}
        for addr in wb.last_work_shares.value:
            miner_last_difficulties[addr] = bitcoin_data.target_to_difficulty(wb.last_work_shares.value[addr].target)
        
        return dict(
            my_hash_rates_in_last_hour=dict(
                note="DEPRECATED",
                nonstale=share_att_s,
                rewarded=share_att_s/(1 - global_stale_prop),
                actual=share_att_s/(1 - my_stale_prop) if my_stale_prop is not None else 0, # 0 because we don't have any shares anyway
            ),
            my_share_counts_in_last_hour=dict(
                shares=my_share_count,
                unstale_shares=my_unstale_count,
                stale_shares=my_stale_count,
                orphan_stale_shares=my_orphan_count,
                doa_stale_shares=my_doa_count,
            ),
            my_stale_proportions_in_last_hour=dict(
                stale=my_stale_prop,
                orphan_stale=my_orphan_count/my_share_count if my_share_count != 0 else None,
                dead_stale=my_doa_count/my_share_count if my_share_count != 0 else None,
            ),
            miner_hash_rates=miner_hash_rates,
            miner_dead_hash_rates=miner_dead_hash_rates,
            miner_last_difficulties=miner_last_difficulties,
            efficiency_if_miner_perfect=(1 - stale_orphan_shares/shares)/(1 - global_stale_prop) if shares else None, # ignores dead shares because those are miner's fault and indicated by pseudoshare rejection
            efficiency=(1 - (stale_orphan_shares+stale_doa_shares)/shares)/(1 - global_stale_prop) if shares else None,
            peers=dict(
                incoming=sum(1 for peer in node.p2p_node.peers.itervalues() if peer.incoming),
                outgoing=sum(1 for peer in node.p2p_node.peers.itervalues() if not peer.incoming),
            ),
            shares=dict(
                total=shares,
                orphan=stale_orphan_shares,
                dead=stale_doa_shares,
            ),
            uptime=time.time() - start_time,
            attempts_to_share=bitcoin_data.target_to_average_attempts(node.tracker.items[node.best_share_var.value].max_target),
            attempts_to_block=bitcoin_data.target_to_average_attempts(node.bitcoind_work.value['bits'].target),
            block_value=node.bitcoind_work.value['subsidy']*1e-8,
            warnings=p2pool_data.get_warnings(node.tracker, node.best_share_var.value, node.net, bitcoind_getinfo_var.value, node.bitcoind_work.value),
            donation_proportion=wb.donation_percentage/100,
            version=p2pool.__version__,
            protocol_version=p2p.Protocol.VERSION,
            fee=wb.worker_fee,
        )
    
    class WebInterface(deferred_resource.DeferredResource):
        def __init__(self, func, mime_type='application/json', args=()):
            deferred_resource.DeferredResource.__init__(self)
            self.func, self.mime_type, self.args = func, mime_type, args
        
        def getChild(self, child, request):
            return WebInterface(self.func, self.mime_type, self.args + (child,))
        
        @defer.inlineCallbacks
        def render_GET(self, request):
            request.setHeader('Content-Type', self.mime_type)
            request.setHeader('Access-Control-Allow-Origin', '*')
            res = yield self.func(*self.args)
            defer.returnValue(json.dumps(res) if self.mime_type == 'application/json' else res)
    
    def decent_height():
        return min(node.tracker.get_height(node.best_share_var.value), 720)
    web_root.putChild('rate', WebInterface(lambda: p2pool_data.get_pool_attempts_per_second(node.tracker, node.best_share_var.value, decent_height())/(1-p2pool_data.get_average_stale_prop(node.tracker, node.best_share_var.value, decent_height()))))
    web_root.putChild('difficulty', WebInterface(lambda: bitcoin_data.target_to_difficulty(node.tracker.items[node.best_share_var.value].max_target)))
    web_root.putChild('users', WebInterface(get_users))
    web_root.putChild('user_stales', WebInterface(lambda: dict((bitcoin_data.pubkey_hash_to_address(ph, node.net.PARENT), prop) for ph, prop in
        p2pool_data.get_user_stale_props(node.tracker, node.best_share_var.value, node.tracker.get_height(node.best_share_var.value)).iteritems())))
    web_root.putChild('fee', WebInterface(lambda: wb.worker_fee))
    web_root.putChild('current_payouts', WebInterface(lambda: dict((bitcoin_data.script2_to_address(script, node.net.PARENT), value/1e8) for script, value in node.get_current_txouts().iteritems())))
    web_root.putChild('patron_sendmany', WebInterface(get_patron_sendmany, 'text/plain'))
    web_root.putChild('global_stats', WebInterface(get_global_stats))
    web_root.putChild('local_stats', WebInterface(get_local_stats))
    web_root.putChild('peer_addresses', WebInterface(lambda: ' '.join('%s%s' % (peer.transport.getPeer().host, ':'+str(peer.transport.getPeer().port) if peer.transport.getPeer().port != node.net.P2P_PORT else '') for peer in node.p2p_node.peers.itervalues())))
    web_root.putChild('peer_txpool_sizes', WebInterface(lambda: dict(('%s:%i' % (peer.transport.getPeer().host, peer.transport.getPeer().port), peer.remembered_txs_size) for peer in node.p2p_node.peers.itervalues())))
    web_root.putChild('pings', WebInterface(defer.inlineCallbacks(lambda: defer.returnValue(
        dict([(a, (yield b)) for a, b in
            [(
                '%s:%i' % (peer.transport.getPeer().host, peer.transport.getPeer().port),
                defer.inlineCallbacks(lambda peer=peer: defer.returnValue(
                    min([(yield peer.do_ping().addCallback(lambda x: x/0.001).addErrback(lambda fail: None)) for i in xrange(3)])
                ))()
            ) for peer in list(node.p2p_node.peers.itervalues())]
        ])
    ))))
    web_root.putChild('peer_versions', WebInterface(lambda: dict(('%s:%i' % peer.addr, peer.other_sub_version) for peer in node.p2p_node.peers.itervalues())))
    web_root.putChild('payout_addr', WebInterface(lambda: bitcoin_data.pubkey_hash_to_address(wb.my_pubkey_hash, node.net.PARENT)))
    web_root.putChild('recent_blocks', WebInterface(lambda: [dict(
        ts=s.timestamp,
        hash='%064x' % s.header_hash,
        number=pack.IntType(24).unpack(s.share_data['coinbase'][1:4]) if len(s.share_data['coinbase']) >= 4 else None,
        share='%064x' % s.hash,
    ) for s in node.tracker.get_chain(node.best_share_var.value, min(node.tracker.get_height(node.best_share_var.value), 24*60*60//node.net.SHARE_PERIOD)) if s.pow_hash <= s.header['bits'].target]))
    web_root.putChild('uptime', WebInterface(lambda: time.time() - start_time))
    web_root.putChild('stale_rates', WebInterface(lambda: p2pool_data.get_stale_counts(node.tracker, node.best_share_var.value, decent_height(), rates=True)))
    
    new_root = resource.Resource()
    web_root.putChild('web', new_root)
    
    stat_log = []
    if os.path.exists(os.path.join(datadir_path, 'stats')):
        try:
            with open(os.path.join(datadir_path, 'stats'), 'rb') as f:
                stat_log = json.loads(f.read())
        except:
            log.err(None, 'Error loading stats:')
    def update_stat_log():
        while stat_log and stat_log[0]['time'] < time.time() - 24*60*60:
            stat_log.pop(0)
        
        lookbehind = 3600//node.net.SHARE_PERIOD
        if node.tracker.get_height(node.best_share_var.value) < lookbehind:
            return None
        
        global_stale_prop = p2pool_data.get_average_stale_prop(node.tracker, node.best_share_var.value, lookbehind)
        (stale_orphan_shares, stale_doa_shares), shares, _ = wb.get_stale_counts()
        miner_hash_rates, miner_dead_hash_rates = wb.get_local_rates()
        
        stat_log.append(dict(
            time=time.time(),
            pool_hash_rate=p2pool_data.get_pool_attempts_per_second(node.tracker, node.best_share_var.value, lookbehind)/(1-global_stale_prop),
            pool_stale_prop=global_stale_prop,
            local_hash_rates=miner_hash_rates,
            local_dead_hash_rates=miner_dead_hash_rates,
            shares=shares,
            stale_shares=stale_orphan_shares + stale_doa_shares,
            stale_shares_breakdown=dict(orphan=stale_orphan_shares, doa=stale_doa_shares),
            current_payout=node.get_current_txouts().get(bitcoin_data.pubkey_hash_to_script2(wb.my_pubkey_hash), 0)*1e-8,
            peers=dict(
                incoming=sum(1 for peer in node.p2p_node.peers.itervalues() if peer.incoming),
                outgoing=sum(1 for peer in node.p2p_node.peers.itervalues() if not peer.incoming),
            ),
            attempts_to_share=bitcoin_data.target_to_average_attempts(node.tracker.items[node.best_share_var.value].max_target),
            attempts_to_block=bitcoin_data.target_to_average_attempts(node.bitcoind_work.value['bits'].target),
            block_value=node.bitcoind_work.value['subsidy']*1e-8,
        ))
        
        with open(os.path.join(datadir_path, 'stats'), 'wb') as f:
            f.write(json.dumps(stat_log))
    x = deferral.RobustLoopingCall(update_stat_log)
    x.start(5*60)
    stop_event.watch(x.stop)
    new_root.putChild('log', WebInterface(lambda: stat_log))
    
    def get_share(share_hash_str):
        if int(share_hash_str, 16) not in node.tracker.items:
            return None
        share = node.tracker.items[int(share_hash_str, 16)]
        
        return dict(
            parent='%064x' % share.previous_hash,
            children=['%064x' % x for x in sorted(node.tracker.reverse.get(share.hash, set()), key=lambda sh: -len(node.tracker.reverse.get(sh, set())))], # sorted from most children to least children
            type_name=type(share).__name__,
            local=dict(
                verified=share.hash in node.tracker.verified.items,
                time_first_seen=start_time if share.time_seen == 0 else share.time_seen,
                peer_first_received_from=share.peer_addr,
            ),
            share_data=dict(
                timestamp=share.timestamp,
                target=share.target,
                max_target=share.max_target,
                payout_address=bitcoin_data.script2_to_address(share.new_script, node.net.PARENT),
                donation=share.share_data['donation']/65535,
                stale_info=share.share_data['stale_info'],
                nonce=share.share_data['nonce'],
                desired_version=share.share_data['desired_version'],
                absheight=share.absheight,
                abswork=share.abswork,
            ),
            block=dict(
                hash='%064x' % share.header_hash,
                header=dict(
                    version=share.header['version'],
                    previous_block='%064x' % share.header['previous_block'],
                    merkle_root='%064x' % share.header['merkle_root'],
                    timestamp=share.header['timestamp'],
                    target=share.header['bits'].target,
                    nonce=share.header['nonce'],
                ),
                gentx=dict(
                    hash='%064x' % share.gentx_hash,
                    coinbase=share.share_data['coinbase'].ljust(2, '\x00').encode('hex'),
                    value=share.share_data['subsidy']*1e-8,
                    last_txout_nonce='%016x' % share.contents['last_txout_nonce'],
                ),
                other_transaction_hashes=['%064x' % x for x in share.get_other_tx_hashes(node.tracker)],
            ),
        )
    new_root.putChild('share', WebInterface(lambda share_hash_str: get_share(share_hash_str)))
    new_root.putChild('heads', WebInterface(lambda: ['%064x' % x for x in node.tracker.heads]))
    new_root.putChild('verified_heads', WebInterface(lambda: ['%064x' % x for x in node.tracker.verified.heads]))
    new_root.putChild('tails', WebInterface(lambda: ['%064x' % x for t in node.tracker.tails for x in node.tracker.reverse.get(t, set())]))
    new_root.putChild('verified_tails', WebInterface(lambda: ['%064x' % x for t in node.tracker.verified.tails for x in node.tracker.verified.reverse.get(t, set())]))
    new_root.putChild('best_share_hash', WebInterface(lambda: '%064x' % node.best_share_var.value))
    new_root.putChild('my_share_hashes', WebInterface(lambda: ['%064x' % my_share_hash for my_share_hash in wb.my_share_hashes]))
    def get_share_data(share_hash_str):
        if int(share_hash_str, 16) not in node.tracker.items:
            return ''
        share = node.tracker.items[int(share_hash_str, 16)]
        return p2pool_data.share_type.pack(share.as_share1a())
    new_root.putChild('share_data', WebInterface(lambda share_hash_str: get_share_data(share_hash_str), 'application/octet-stream'))
    new_root.putChild('currency_info', WebInterface(lambda: dict(
        symbol=node.net.PARENT.SYMBOL,
        block_explorer_url_prefix=node.net.PARENT.BLOCK_EXPLORER_URL_PREFIX,
        address_explorer_url_prefix=node.net.PARENT.ADDRESS_EXPLORER_URL_PREFIX,
        tx_explorer_url_prefix=node.net.PARENT.TX_EXPLORER_URL_PREFIX,
    )))
    new_root.putChild('version', WebInterface(lambda: p2pool.__version__))
    
    hd_path = os.path.join(datadir_path, 'graph_db')
    hd_data = _atomic_read(hd_path)
    hd_obj = {}
    if hd_data is not None:
        try:
            hd_obj = json.loads(hd_data)
        except Exception:
            log.err(None, 'Error reading graph database:')
    dataview_descriptions = {
        'last_hour': graph.DataViewDescription(150, 60*60),
        'last_day': graph.DataViewDescription(300, 60*60*24),
        'last_week': graph.DataViewDescription(300, 60*60*24*7),
        'last_month': graph.DataViewDescription(300, 60*60*24*30),
        'last_year': graph.DataViewDescription(300, 60*60*24*365.25),
    }
    hd = graph.HistoryDatabase.from_obj({
        'local_hash_rate': graph.DataStreamDescription(dataview_descriptions, is_gauge=False),
        'local_dead_hash_rate': graph.DataStreamDescription(dataview_descriptions, is_gauge=False),
        'local_share_hash_rates': graph.DataStreamDescription(dataview_descriptions, is_gauge=False,
            multivalues=True, multivalue_undefined_means_0=True,
            default_func=graph.make_multivalue_migrator(dict(good='local_share_hash_rate', dead='local_dead_share_hash_rate', orphan='local_orphan_share_hash_rate'),
                post_func=lambda bins: [dict((k, (v[0] - (sum(bin.get(rem_k, (0, 0))[0] for rem_k in ['dead', 'orphan']) if k == 'good' else 0), v[1])) for k, v in bin.iteritems()) for bin in bins])),
        'pool_rates': graph.DataStreamDescription(dataview_descriptions, multivalues=True,
            multivalue_undefined_means_0=True),
        'current_payout': graph.DataStreamDescription(dataview_descriptions),
        'current_payouts': graph.DataStreamDescription(dataview_descriptions, multivalues=True),
        'peers': graph.DataStreamDescription(dataview_descriptions, multivalues=True, default_func=graph.make_multivalue_migrator(dict(incoming='incoming_peers', outgoing='outgoing_peers'))),
        'miner_hash_rates': graph.DataStreamDescription(dataview_descriptions, is_gauge=False, multivalues=True),
        'miner_dead_hash_rates': graph.DataStreamDescription(dataview_descriptions, is_gauge=False, multivalues=True),
        'desired_version_rates': graph.DataStreamDescription(dataview_descriptions, multivalues=True,
            multivalue_undefined_means_0=True),
        'traffic_rate': graph.DataStreamDescription(dataview_descriptions, is_gauge=False, multivalues=True),
        'getwork_latency': graph.DataStreamDescription(dataview_descriptions),
        'memory_usage': graph.DataStreamDescription(dataview_descriptions),
    }, hd_obj)
    x = deferral.RobustLoopingCall(lambda: _atomic_write(hd_path, json.dumps(hd.to_obj())))
    x.start(100)
    stop_event.watch(x.stop)
    @wb.pseudoshare_received.watch
    def _(work, dead, user):
        t = time.time()
        hd.datastreams['local_hash_rate'].add_datum(t, work)
        if dead:
            hd.datastreams['local_dead_hash_rate'].add_datum(t, work)
        if user is not None:
            hd.datastreams['miner_hash_rates'].add_datum(t, {user: work})
            if dead:
                hd.datastreams['miner_dead_hash_rates'].add_datum(t, {user: work})
    @wb.share_received.watch
    def _(work, dead, share_hash):
        t = time.time()
        if not dead:
            hd.datastreams['local_share_hash_rates'].add_datum(t, dict(good=work))
        else:
            hd.datastreams['local_share_hash_rates'].add_datum(t, dict(dead=work))
        def later():
            res = node.tracker.is_child_of(share_hash, node.best_share_var.value)
            if res is None: res = False # share isn't connected to sharechain? assume orphaned
            if res and dead: # share was DOA, but is now in sharechain
                # move from dead to good
                hd.datastreams['local_share_hash_rates'].add_datum(t, dict(dead=-work, good=work))
            elif not res and not dead: # share wasn't DOA, and isn't in sharechain
                # move from good to orphan
                hd.datastreams['local_share_hash_rates'].add_datum(t, dict(good=-work, orphan=work))
        reactor.callLater(200, later)
    @node.p2p_node.traffic_happened.watch
    def _(name, bytes):
        hd.datastreams['traffic_rate'].add_datum(time.time(), {name: bytes})
    def add_point():
        if node.tracker.get_height(node.best_share_var.value) < 10:
            return None
        lookbehind = min(node.net.CHAIN_LENGTH, 60*60//node.net.SHARE_PERIOD, node.tracker.get_height(node.best_share_var.value))
        t = time.time()
        
        pool_rates = p2pool_data.get_stale_counts(node.tracker, node.best_share_var.value, lookbehind, rates=True)
        pool_total = sum(pool_rates.itervalues())
        hd.datastreams['pool_rates'].add_datum(t, pool_rates)
        
        current_txouts = node.get_current_txouts()
        hd.datastreams['current_payout'].add_datum(t, current_txouts.get(bitcoin_data.pubkey_hash_to_script2(wb.my_pubkey_hash), 0)*1e-8)
        miner_hash_rates, miner_dead_hash_rates = wb.get_local_rates()
        current_txouts_by_address = dict((bitcoin_data.script2_to_address(script, node.net.PARENT), amount) for script, amount in current_txouts.iteritems())
        hd.datastreams['current_payouts'].add_datum(t, dict((user, current_txouts_by_address[user]*1e-8) for user in miner_hash_rates if user in current_txouts_by_address))
        
        hd.datastreams['peers'].add_datum(t, dict(
            incoming=sum(1 for peer in node.p2p_node.peers.itervalues() if peer.incoming),
            outgoing=sum(1 for peer in node.p2p_node.peers.itervalues() if not peer.incoming),
        ))
        
        vs = p2pool_data.get_desired_version_counts(node.tracker, node.best_share_var.value, lookbehind)
        vs_total = sum(vs.itervalues())
        hd.datastreams['desired_version_rates'].add_datum(t, dict((str(k), v/vs_total*pool_total) for k, v in vs.iteritems()))
        try:
            hd.datastreams['memory_usage'].add_datum(t, memory.resident())
        except:
            if p2pool.DEBUG:
                traceback.print_exc()
    x = deferral.RobustLoopingCall(add_point)
    x.start(5)
    stop_event.watch(x.stop)
    @node.bitcoind_work.changed.watch
    def _(new_work):
        hd.datastreams['getwork_latency'].add_datum(time.time(), new_work['latency'])
    new_root.putChild('graph_data', WebInterface(lambda source, view: hd.datastreams[source].dataviews[view].get_data(time.time())))
    
    web_root.putChild('static', static.File(os.path.join(os.path.dirname(os.path.abspath(sys.argv[0])), 'web-static')))
    
    return web_root

Example 26

View license
def PDF(request):
    try:
        MD5 = request.GET['md5']
        TYP = request.GET['type']
        m = re.match('^[0-9a-f]{32}$', MD5)
        if m:
            if TYP in ['APK', 'ANDZIP']:
                DB = StaticAnalyzerAndroid.objects.filter(MD5=MD5)
                if DB.exists():
                    print "\n[INFO] Fetching data from DB for PDF Report Generation (Android)"
                    context = {
                        'title': DB[0].TITLE,
                        'name': DB[0].APP_NAME,
                        'size': DB[0].SIZE,
                        'md5': DB[0].MD5,
                        'sha1': DB[0].SHA1,
                        'sha256': DB[0].SHA256,
                        'packagename': DB[0].PACKAGENAME,
                        'mainactivity': DB[0].MAINACTIVITY,
                        'targetsdk': DB[0].TARGET_SDK,
                        'maxsdk': DB[0].MAX_SDK,
                        'minsdk': DB[0].MIN_SDK,
                        'androvername': DB[0].ANDROVERNAME,
                        'androver': DB[0].ANDROVER,
                        'manifest': DB[0].MANIFEST_ANAL,
                        'permissions': DB[0].PERMISSIONS,
                        'files': python_list(DB[0].FILES),
                        'certz': DB[0].CERTZ,
                        'activities': python_list(DB[0].ACTIVITIES),
                        'receivers': python_list(DB[0].RECEIVERS),
                        'providers': python_list(DB[0].PROVIDERS),
                        'services': python_list(DB[0].SERVICES),
                        'libraries': python_list(DB[0].LIBRARIES),
                        'act_count': DB[0].CNT_ACT,
                        'prov_count': DB[0].CNT_PRO,
                        'serv_count': DB[0].CNT_SER,
                        'bro_count': DB[0].CNT_BRO,
                        'certinfo': DB[0].CERT_INFO,
                        'issued': DB[0].ISSUED,
                        'native': DB[0].NATIVE,
                        'dynamic': DB[0].DYNAMIC,
                        'reflection': DB[0].REFLECT,
                        'crypto': DB[0].CRYPTO,
                        'obfus': DB[0].OBFUS,
                        'api': DB[0].API,
                        'dang': DB[0].DANG,
                        'urls': DB[0].URLS,
                        'domains': python_dict(DB[0].DOMAINS),
                        'emails': DB[0].EMAILS,
                        'strings': python_list(DB[0].STRINGS),
                        'zipped': DB[0].ZIPPED,
                        'mani': DB[0].MANI
                    }
                    if TYP == 'APK':
                        template = get_template("static_analysis_pdf.html")
                    else:
                        template = get_template("static_analysis_zip_pdf.html")
                else:
                    return HttpResponse(json.dumps({"report": "Report not Found"}),
                                        content_type="application/json; charset=utf-8")
            elif re.findall('IPA|IOSZIP', TYP):
                if TYP == 'IPA':
                    DB = StaticAnalyzerIPA.objects.filter(MD5=MD5)
                    if DB.exists():
                        print "\n[INFO] Fetching data from DB for PDF Report Generation (IOS IPA)"
                        context = {
                            'title': DB[0].TITLE,
                            'name': DB[0].APPNAMEX,
                            'size': DB[0].SIZE,
                            'md5': DB[0].MD5,
                            'sha1': DB[0].SHA1,
                            'sha256': DB[0].SHA256,
                            'plist': DB[0].INFOPLIST,
                            'bin_name': DB[0].BINNAME,
                            'id': DB[0].IDF,
                            'ver': DB[0].VERSION,
                            'sdk': DB[0].SDK,
                            'pltfm': DB[0].PLTFM,
                            'min': DB[0].MINX,
                            'bin_anal': DB[0].BIN_ANAL,
                            'libs': DB[0].LIBS,
                            'files': python_list(DB[0].FILES),
                            'file_analysis': DB[0].SFILESX,
                            'strings': DB[0].STRINGS
                        }
                        template = get_template("ios_binary_analysis_pdf.html")
                    else:
                        return HttpResponse(json.dumps({"report": "Report not Found"}),
                                            content_type="application/json; charset=utf-8")
                elif TYP == 'IOSZIP':
                    DB = StaticAnalyzerIOSZIP.objects.filter(MD5=MD5)
                    if DB.exists():
                        print "\n[INFO] Fetching data from DB for PDF Report Generation (IOS ZIP)"
                        context = {
                            'title': DB[0].TITLE,
                            'name': DB[0].APPNAMEX,
                            'size': DB[0].SIZE,
                            'md5': DB[0].MD5,
                            'sha1': DB[0].SHA1,
                            'sha256': DB[0].SHA256,
                            'plist': DB[0].INFOPLIST,
                            'bin_name': DB[0].BINNAME,
                            'id': DB[0].IDF,
                            'ver': DB[0].VERSION,
                            'sdk': DB[0].SDK,
                            'pltfm': DB[0].PLTFM,
                            'min': DB[0].MINX,
                            'bin_anal': DB[0].BIN_ANAL,
                            'libs': DB[0].LIBS,
                            'files': python_list(DB[0].FILES),
                            'file_analysis': DB[0].SFILESX,
                            'api': DB[0].HTML,
                            'insecure': DB[0].CODEANAL,
                            'urls': DB[0].URLnFile,
                            'domains': python_dict(DB[0].DOMAINS),
                            'emails': DB[0].EmailnFile
                        }
                        template = get_template("ios_source_analysis_pdf.html")
                    else:
                        return HttpResponse(json.dumps({"report": "Report not Found"}),
                                            content_type="application/json; charset=utf-8")
            elif re.findall('APPX',TYP):
                if TYP == 'APPX':
                    db_entry = StaticAnalyzerWindows.objects.filter( # pylint: disable-msg=E1101
                        MD5=MD5
                    )
                    if db_entry.exists():
                        print "\n[INFO] Fetching data from DB for PDF Report Generation (APPX)"
                        context = {
                            'title' : db_entry[0].TITLE,
                            'name' : db_entry[0].APP_NAME,
                            'pub_name' : db_entry[0].PUB_NAME,
                            'size' : db_entry[0].SIZE,
                            'md5': db_entry[0].MD5,
                            'sha1' : db_entry[0].SHA1,
                            'sha256' : db_entry[0].SHA256,
                            'bin_name' : db_entry[0].BINNAME,
                            'version' :  db_entry[0].VERSION,
                            'arch' :  db_entry[0].ARCH,
                            'compiler_version' :  db_entry[0].COMPILER_VERSION,
                            'visual_studio_version' :  db_entry[0].VISUAL_STUDIO_VERSION,
                            'visual_studio_edition' :  db_entry[0].VISUAL_STUDIO_EDITION,
                            'target_os' :  db_entry[0].TARGET_OS,
                            'appx_dll_version' :  db_entry[0].APPX_DLL_VERSION,
                            'proj_guid' :  db_entry[0].PROJ_GUID,
                            'opti_tool' :  db_entry[0].OPTI_TOOL,
                            'target_run' :  db_entry[0].TARGET_RUN,
                            'files' :  python_list(db_entry[0].FILES),
                            'strings' : db_entry[0].STRINGS,
                            'bin_an_results' : python_list(db_entry[0].BIN_AN_RESULTS),
                            'bin_an_warnings' : python_list(db_entry[0].BIN_AN_WARNINGS)
                        }
                        template= get_template("windows_binary_analysis_pdf.html")
            else:
                return HttpResponse(json.dumps({"type": "Type is not Allowed"}),
                                    content_type="application/json; charset=utf-8")
            html = template.render(context)
            result = StringIO()
            pdf = pisa.pisaDocument(StringIO("{0}".format(
                html.encode('utf-8'))), result, encoding='utf-8')
            if not pdf.err:
                return HttpResponse(result.getvalue(), content_type='application/pdf')
            else:
                return HttpResponseRedirect('/error/')
        else:
            return HttpResponse(json.dumps({"md5": "Invalid MD5"}),
                                content_type="application/json; charset=utf-8")
    except:

        PrintException("[ERROR] PDF Report Generation Error")
        return HttpResponseRedirect('/error/')

Example 27

Project: wpJson4Harp
Source File: wp2json4harp.py
View license
def databaseMigrate():
	#Verify database connection
	try:
		db = MySQLdb.connect(host=MYSQL_HOST,user=MYSQL_USER,passwd=MYSQL_PASS,  db=MYSQL_DB)
		db.close()
	except Exception as err:
		print err
		print "Could not connect to database. Aborting..."
		sys.exit(1)

	db = MySQLdb.connect(host=MYSQL_HOST,user=MYSQL_USER,passwd=MYSQL_PASS,  db=MYSQL_DB)
	curs = db.cursor()

	curs.execute("SELECT p.ID, u.meta_value AS nickname , p.post_date_gmt, p.post_content, REPLACE(REPLACE(REPLACE(post_title, '\t', ''), '\r', ''), '\n', ''), p.post_status, pm.meta_key, pm.meta_value, p.post_type, p.post_name from %(WP_PREFIX)sposts p LEFT JOIN %(WP_PREFIX)spostmeta pm ON p.ID=pm.post_id LEFT JOIN %(WP_PREFIX)susermeta u ON p.post_author=u.user_id WHERE u.meta_key='nickname' ORDER BY p.ID"  % globals())

	post_types = sets.Set()
	placeholder = WP_Object()
	setattr(placeholder,'ID',-1)
	posts = [placeholder]
	for row in curs.fetchall():
		if posts[-1].ID != row[0]:
			posts.append(WP_Object())
		setattr(posts[-1],'ID',row[0])
		setattr(posts[-1],'author',row[1].decode(ENCODING))
		setattr(posts[-1],'date',row[2])
		setattr(posts[-1],'content',row[3].decode(ENCODING))
		setattr(posts[-1],'title',row[4].decode(ENCODING))
		setattr(posts[-1],'status',row[5].decode(ENCODING))
		if row[6] and row[7]:
			if not serializedArraySearch.match(row[7]):
				setattr(posts[-1],row[6].decode(ENCODING),row[7].decode(ENCODING))
			else:
				try:
					setattr(posts[-1],row[6], phpserialize.loads(row[7], object_hook=obj_hook))
				except ValueError, e:
					pass #If a failure occurs here it's probably a corrupt object
					print "Failed to load: ",row[7]
					if STOP_ON_ERR:
						curs.close()
						db.close()
						sys.exit()
		setattr(posts[-1],'ptype',row[8].decode(ENCODING))
		if row[9]:
			setattr(posts[-1],'slug',row[9].decode(ENCODING))
		else:
			setattr(posts[-1],'slug',row[0])
		post_types.add(row[8])
	del posts[0] #Remove placeholder
	

	if PULL_TYPES:
		ptypeCount = {}
		ptypeTotal = {}
		for ptype in post_types:
			checkAndMakeDir("%s%s" % (ROOT_DIR,ptype))
			p = open("%s%s/_data.json" % (ROOT_DIR, ptype),'w')
			writeEncoded(p,'{')
			p.close()
			ptypeCount[ptype] = 0
			ptypeTotal[ptype] = sum(map(lambda x: x.ptype == ptype ,posts))
		for post in posts:
			if GENERATE_POSTS:
				tmp = open("%s%s/%s.md" % (ROOT_DIR, post.ptype, post.slug),'w+')
				writeEncoded(tmp,"#%s\n\n" % json.dumps(post.title.strip()))
				if(hasattr(post,'content')):
					writeEncoded(tmp,post.content)
					delattr(post,'content')
				tmp.close()
			p = open("%s%s/_data.json" % (ROOT_DIR, post.ptype),'a')
			tobeprocessed = " %s : %s " % (json.dumps("%s%d" % (post.title.strip(),post.ID)), post.to_JSON())
			writeEncoded(p, tobeprocessed )
			if ptypeTotal[post.ptype]-1 != ptypeCount[post.ptype]:
				writeEncoded(p,',')
			ptypeCount[post.ptype]+=1
			p.close()
		for ptype in post_types:
			p = open("%s%s/_data.json" % (ROOT_DIR, ptype),'a+')
			writeEncoded(p,'}')
			p.close()
	else:
		checkAndMakeDir("%(ROOT_DIR)s%(PAGES_DIR)s" % globals())
		checkAndMakeDir("%(ROOT_DIR)s%(BLOG_DIR)s" % globals())
		checkAndMakeDir("%(ROOT_DIR)s%(NAV_DIR)s" % globals())

		p = open("%(ROOT_DIR)s%(PAGES_DIR)s/_data.json" % globals(),'w')
		b = open('%(ROOT_DIR)s%(BLOG_DIR)s/_data.json' % globals(),'w')
		n = open('%(ROOT_DIR)s%(NAV_DIR)s/_data.json' % globals(),'w')
		pcount = 0
		bcount = 0
		ncount = 0
		totalPages = sum(map(lambda x: x.ptype == "page",posts))
		totalPosts = sum(map(lambda x: x.ptype == "post",posts))
		totalNavs = sum(map(lambda x: x.ptype == "nav_menu_item" ,posts))
		
		writeEncoded(p,'{')
		writeEncoded(b,'{')
		writeEncoded(n,'{')
		for post in posts:
			if ONLY_PUBLISHED and hasattr(post,'status') and post.status != "publish":
					continue
			if post.ptype == "page":
				#Throw the id onto the string to ensure unique ness of the title
				if GENERATE_PAGES:
					tmp = open("%s%s/%s.md" % (ROOT_DIR, PAGES_DIR, post.slug),'w')
					writeEncoded(tmp,"#%s\n\n" % post.title)
					writeEncoded(tmp,post.content)
					tmp.close()
					delattr(post,'content')
				writeEncoded(p," %s : %s " % (json.dumps("%s%d" % (post.title.strip(),post.ID)),  post.to_JSON()) )
				if totalPages-1 != pcount:
					writeEncoded(p,',')
				pcount+=1
			elif post.ptype == "post":
				if GENERATE_POSTS:
					tmp = open("%s%s/%s.md" % (ROOT_DIR, BLOG_DIR, post.slug),'w')
					writeEncoded(tmp,"#%s\n\n" % post.title)
					writeEncoded(tmp,post.content)
					tmp.close()
					delattr(post,'content')
				writeEncoded(b," %s : %s " % (json.dumps("%s%d" % (post.title.strip(),post.ID)), post.to_JSON()) )
				if totalPosts-1 != bcount:
					writeEncoded(b,',')
				bcount+=1
			elif post.ptype == "nav_menu_item" :
				if post._menu_item_object == "custom":
					post.slug = post._menu_item_url
				elif post._menu_item_object == "page":
					for temp_post in posts:
						if int(temp_post.ID) == int(post._menu_item_object_id):
							post.slug = "%s/%s" % (PAGES_DIR,temp_post.slug)
							post.title = temp_post.title
				writeEncoded(n,"%s : %s " % (json.dumps( "%s%d" % (post.title.strip(),post.ID) ),post.to_JSON()))
				if totalNavs-1 != ncount:
					writeEncoded(n,',')
				ncount+=1
			else:
				#I'm just printing to look at the objects to decide to convert them into something or not.
				#print(post.to_JSON())
				pass
				#do what you will with the other types

		writeEncoded(p,'}')
		writeEncoded(b,'}')
		writeEncoded(n,'}')
		p.close()
		b.close()
		n.close()

	curs.execute("SELECT c.comment_ID, c.comment_post_ID, c.comment_author, c.comment_author_email, c.comment_author_url, c.comment_date, c.comment_content, c.user_id, u.meta_value as nickname, cm.meta_key, cm.meta_value FROM %(WP_PREFIX)scomments c LEFT JOIN %(WP_PREFIX)scommentmeta cm ON c.comment_ID=cm.comment_id JOIN %(WP_PREFIX)susermeta u ON c.user_id in (u.user_id,0) WHERE u.meta_key='nickname' ORDER BY c.comment_post_ID " % globals())
	placeholder = WP_Object()
	setattr(placeholder,'ID',-1)
	comments = [placeholder]
	for row in curs.fetchall():
		if comments[-1].ID != row[0]:
			comments.append(WP_Object())
		setattr(comments[-1],'ID',row[0])
		setattr(comments[-1],'post_ID',row[1])
		setattr(comments[-1],'author',row[2].decode(ENCODING))
		setattr(comments[-1],'author_email',row[3].decode(ENCODING))
		setattr(comments[-1],'author_url',row[4].decode(ENCODING))
		setattr(comments[-1],'date',row[5])
		setattr(comments[-1],'content',row[6].decode(ENCODING))
		setattr(comments[-1],'user_id',row[7])
		setattr(comments[-1],'nickname',row[8].decode(ENCODING))
		if row[9] is not None and row[10] is not None:

			setattr(comments[-1],row[9], row[10])
	del comments[0]

	checkAndMakeDir("%(ROOT_DIR)s%(COMMENTS_DIR)s" % globals())
	c = open("%(ROOT_DIR)s%(COMMENTS_DIR)s/_data.json" % globals() ,'w' )
	writeEncoded(c,'{')
	ccount = 0
	cTotal = len(comments)
	for comment in comments:
		writeEncoded(c,"\"%d-%d-%d\" : %s" % (comment.post_ID, (comment.date - datetime.datetime(1970,1,1)).total_seconds(), comment.ID, comment.to_JSON()) )
		if cTotal-1 != ccount:
			writeEncoded(c,',')
		ccount+=1
	writeEncoded(c,'}')
	c.close()

	
	makeExampleFile(post_types)

Example 28

Project: edx-platform
Source File: video_module.py
View license
    def get_html(self):
        track_status = (self.download_track and self.track)
        transcript_download_format = self.transcript_download_format if not track_status else None
        sources = filter(None, self.html5_sources)

        download_video_link = None
        branding_info = None
        youtube_streams = ""

        # Determine if there is an alternative source for this video
        # based on user locale.  This exists to support cases where
        # we leverage a geography specific CDN, like China.
        cdn_url = getattr(settings, 'VIDEO_CDN_URL', {}).get(self.system.user_location)

        # If we have an edx_video_id, we prefer its values over what we store
        # internally for download links (source, html5_sources) and the youtube
        # stream.
        if self.edx_video_id and edxval_api:
            try:
                val_profiles = ["youtube", "desktop_webm", "desktop_mp4"]

                # strip edx_video_id to prevent ValVideoNotFoundError error if unwanted spaces are there. TNL-5769
                val_video_urls = edxval_api.get_urls_for_profiles(self.edx_video_id.strip(), val_profiles)

                # VAL will always give us the keys for the profiles we asked for, but
                # if it doesn't have an encoded video entry for that Video + Profile, the
                # value will map to `None`

                # add the non-youtube urls to the list of alternative sources
                # use the last non-None non-youtube url as the link to download the video
                for url in [val_video_urls[p] for p in val_profiles if p != "youtube"]:
                    if url:
                        if url not in sources:
                            sources.append(url)
                        if self.download_video:
                            # function returns None when the url cannot be re-written
                            rewritten_link = rewrite_video_url(cdn_url, url)
                            if rewritten_link:
                                download_video_link = rewritten_link
                            else:
                                download_video_link = url

                # set the youtube url
                if val_video_urls["youtube"]:
                    youtube_streams = "1.00:{}".format(val_video_urls["youtube"])

            except edxval_api.ValInternalError:
                # VAL raises this exception if it can't find data for the edx video ID. This can happen if the
                # course data is ported to a machine that does not have the VAL data. So for now, pass on this
                # exception and fallback to whatever we find in the VideoDescriptor.
                log.warning("Could not retrieve information from VAL for edx Video ID: %s.", self.edx_video_id)

        # If the user comes from China use China CDN for html5 videos.
        # 'CN' is China ISO 3166-1 country code.
        # Video caching is disabled for Studio. User_location is always None in Studio.
        # CountryMiddleware disabled for Studio.
        if getattr(self, 'video_speed_optimizations', True) and cdn_url:
            branding_info = BrandingInfoConfig.get_config().get(self.system.user_location)

            for index, source_url in enumerate(sources):
                new_url = rewrite_video_url(cdn_url, source_url)
                if new_url:
                    sources[index] = new_url

        # If there was no edx_video_id, or if there was no download specified
        # for it, we fall back on whatever we find in the VideoDescriptor
        if not download_video_link and self.download_video:
            if self.source:
                download_video_link = self.source
            elif self.html5_sources:
                download_video_link = self.html5_sources[0]

        track_url, transcript_language, sorted_languages = self.get_transcripts_for_student(self.get_transcripts_info())

        # CDN_VIDEO_URLS is only to be used here and will be deleted
        # TODO([email protected]): Delete this after the CDN experiment has completed.
        html_id = self.location.html_id()
        if self.system.user_location == 'CN' and \
                settings.FEATURES.get('ENABLE_VIDEO_BEACON', False) and \
                html_id in getattr(settings, 'CDN_VIDEO_URLS', {}).keys():
            cdn_urls = getattr(settings, 'CDN_VIDEO_URLS', {})[html_id]
            cdn_exp_group, new_source = random.choice(zip(range(len(cdn_urls)), cdn_urls))
            if cdn_exp_group > 0:
                sources[0] = new_source
            cdn_eval = True
        else:
            cdn_eval = False
            cdn_exp_group = None

        self.youtube_streams = youtube_streams or create_youtube_string(self)  # pylint: disable=W0201

        settings_service = self.runtime.service(self, 'settings')

        yt_api_key = None
        if settings_service:
            xblock_settings = settings_service.get_settings_bucket(self)
            if xblock_settings and 'YOUTUBE_API_KEY' in xblock_settings:
                yt_api_key = xblock_settings['YOUTUBE_API_KEY']

        metadata = {
            'saveStateUrl': self.system.ajax_url + '/save_user_state',
            'autoplay': settings.FEATURES.get('AUTOPLAY_VIDEOS', False),
            'streams': self.youtube_streams,
            'sub': self.sub,
            'sources': sources,

            # This won't work when we move to data that
            # isn't on the filesystem
            'captionDataDir': getattr(self, 'data_dir', None),

            'showCaptions': json.dumps(self.show_captions),
            'generalSpeed': self.global_speed,
            'speed': self.speed,
            'savedVideoPosition': self.saved_video_position.total_seconds(),
            'start': self.start_time.total_seconds(),
            'end': self.end_time.total_seconds(),
            'transcriptLanguage': transcript_language,
            'transcriptLanguages': sorted_languages,

            # TODO: Later on the value 1500 should be taken from some global
            # configuration setting field.
            'ytTestTimeout': 1500,

            'ytApiUrl': settings.YOUTUBE['API'],
            'ytMetadataUrl': settings.YOUTUBE['METADATA_URL'],
            'ytKey': yt_api_key,

            'transcriptTranslationUrl': self.runtime.handler_url(
                self, 'transcript', 'translation/__lang__'
            ).rstrip('/?'),
            'transcriptAvailableTranslationsUrl': self.runtime.handler_url(
                self, 'transcript', 'available_translations'
            ).rstrip('/?'),

            ## For now, the option "data-autohide-html5" is hard coded. This option
            ## either enables or disables autohiding of controls and captions on mouse
            ## inactivity. If set to true, controls and captions will autohide for
            ## HTML5 sources (non-YouTube) after a period of mouse inactivity over the
            ## whole video. When the mouse moves (or a key is pressed while any part of
            ## the video player is focused), the captions and controls will be shown
            ## once again.
            ##
            ## There is no option in the "Advanced Editor" to set this option. However,
            ## this option will have an effect if changed to "True". The code on
            ## front-end exists.
            'autohideHtml5': False,

            # This is the server's guess at whether youtube is available for
            # this user, based on what was recorded the last time we saw the
            # user, and defaulting to True.
            'recordedYoutubeIsAvailable': self.youtube_is_available,
        }

        bumperize(self)

        context = {
            'bumper_metadata': json.dumps(self.bumper['metadata']),  # pylint: disable=E1101
            'metadata': json.dumps(OrderedDict(metadata)),
            'poster': json.dumps(get_poster(self)),
            'branding_info': branding_info,
            'cdn_eval': cdn_eval,
            'cdn_exp_group': cdn_exp_group,
            'id': self.location.html_id(),
            'display_name': self.display_name_with_default,
            'handout': self.handout,
            'download_video_link': download_video_link,
            'track': track_url,
            'transcript_download_format': transcript_download_format,
            'transcript_download_formats_list': self.descriptor.fields['transcript_download_format'].values,
            'license': getattr(self, "license", None),
        }
        return self.system.render_template('video.html', context)

Example 29

Project: ArcREST
Source File: _geoenrichment.py
View license
    def standardGeographyQuery(self,
                               sourceCountry=None,
                               optionalCountryDataset=None,
                               geographyLayers=None,
                               geographyIDs=None,
                               geographyQuery=None,
                               returnSubGeographyLayer=False,
                               subGeographyLayer=None,
                               subGeographyQuery=None,
                               outSR=4326,
                               returnGeometry=False,
                               returnCentroids=False,
                               generalizationLevel=0,
                               useFuzzySearch=False,
                               featureLimit=1000):
        """
        The GeoEnrichment service provides a helper method that returns
        standard geography IDs and features for the supported geographic
        levels in the United States and Canada.
        As indicated throughout this documentation guide, the GeoEnrichment
        service uses the concept of a study area to define the location of
        the point or area that you want to enrich with additional
        information. Locations can also be passed as one or many named
        statistical areas. This form of a study area lets you define an
        area by the ID of a standard geographic statistical feature, such
        as a census or postal area. For example, to obtain enrichment
        information for a U.S. state, county or ZIP Code or a Canadian
        province or postal code, the Standard Geography Query helper method
        allows you to search and query standard geography areas so that
        they can be used in the GeoEnrichment method to obtain facts about
        the location.
        The most common workflow for this service is to find a FIPS
        (standard geography ID) for a geographic name. For example, you can
        use this service to find the FIPS for the county of San Diego which
        is 06073. You can then use this FIPS ID within the GeoEnrichment
        service study area definition to get geometry and optional
        demographic data for the county. This study area definition is
        passed as a parameter to the GeoEnrichment service to return data
        defined in the enrichment pack and optionally return geometry for
        the feature.

        For examples and more help with this function see:
        http://resources.arcgis.com/en/help/arcgis-rest-api/#/Standard_geography_query/02r30000000q000000/

        Inputs:
           sourceCountry - Optional parameter to specify the source country
            for the search. Use this parameter to limit the search and
            query of standard geographic features to one country. This
            parameter supports both the two-digit and three-digit country
            codes illustrated in the coverage table.
           optionalCountryDataset - Optional parameter to specify a
            specific dataset within a defined country.
           geographyLayers - Optional parameter to specify which standard
            geography layers are being queried or searched. If this
            parameter is not provided, all layers within the defined
            country will be queried.
           geographyIDs - Optional parameter to specify which IDs for the
            standard geography layers are being queried or searched. You
            can use this parameter to return attributes and/or geometry for
            standard geographic areas for administrative areas where you
            already know the ID, for example, if you know the Federal
            Information Processing Standard (FIPS) Codes for a U.S. state
            or county; or, in Canada, to return the geometry and attributes
            for a Forward Sortation Area (FSA).
           geographyQuery - Optional parameter to specify the text to query
            and search the standard geography layers specified. You can use
            this parameter to query and find standard geography features
            that meet an input term, for example, for a list of all the
            U.S. counties that contain the word "orange". The
            geographyQuery parameter can be a string that contains one or
            more words.
           returnSubGeographyLayer - Use this optional parameter to return
            all the subgeographic areas that are within a parent geography.
            For example, you could return all the U.S. counties for a given
            U.S. state or you could return all the Canadian postal areas
            (FSAs) within a Census Metropolitan Area (city).
            When this parameter is set to true, the output features will be
            defined in the subGeographyLayer. The output geometries will be
            in the spatial reference system defined by outSR.
           subGeographyLayer - Use this optional parameter to return all
            the subgeographic areas that are within a parent geography. For
            example, you could return all the U.S. counties within a given
            U.S. state or you could return all the Canadian postal areas
            (FSAs) within a Census Metropolitan Areas (city).
            When this parameter is set to true, the output features will be
            defined in the subGeographyLayer. The output geometries will be
            in the spatial reference system defined by outSR.
           subGeographyQuery - Optional parameter to filter the results of
            the subgeography features that are returned by a search term.
            You can use this parameter to query and find subgeography
            features that meet an input term. This parameter is used to
            filter the list of subgeography features that are within a
            parent geography. For example, you may want a list of all the
            ZIP Codes that are within "San Diego County" and filter the
            results so that only ZIP Codes that start with "921" are
            included in the output response. The subgeography query is a
            string that contains one or more words.
           outSR - Optional parameter to request the output geometries in a
            specified spatial reference system.
           returnGeometry - Optional parameter to request the output
            geometries in the response.
           returnCentroids - Optional Boolean parameter to request the
            output geometry to return the center point for each feature.
            Use this parameter to return all the geometries as points. For
            example, you could return all U.S. ZIP Code centroids (points)
            rather than providing the boundaries.
           generalizationLevel - Optional integer that specifies the level
            of generalization or detail in the area representations of the
            administrative boundary or standard geographic data layers.
            Values must be whole integers from 0 through 6, where 0 is most
            detailed and 6 is most generalized.
           useFuzzySearch - Optional Boolean parameter to define if text
            provided in the geographyQuery parameter should utilize fuzzy
            search logic. Fuzzy searches are based on the Levenshtein
            Distance or Edit Distance algorithm.
           featureLimit - Optional integer value where you can limit the
            number of features that are returned from the geographyQuery.
        """
        url = self._base_url + self._url_standard_geography_query_execute
        params = {
            "f" : "json"
        }
        if not sourceCountry is None:
            params['sourceCountry'] = sourceCountry
        if not optionalCountryDataset is None:
            params['optionalCountryDataset'] = optionalCountryDataset
        if not geographyLayers is None:
            params['geographylayers'] = geographyLayers
        if not geographyIDs is None:
            params['geographyids'] = json.dumps(geographyIDs)
        if not geographyQuery is None:
            params['geographyQuery'] = geographyQuery
        if not returnSubGeographyLayer is None and \
           isinstance(returnSubGeographyLayer, bool):
            params['returnSubGeographyLayer'] = returnSubGeographyLayer
        if not subGeographyLayer is None:
            params['subGeographyLayer'] = json.dumps(subGeographyLayer)
        if not subGeographyQuery is None:
            params['subGeographyQuery'] = subGeographyQuery
        if not outSR is None and \
           isinstance(outSR, int):
            params['outSR'] = outSR
        if not returnGeometry is None and \
           isinstance(returnGeometry, bool):
            params['returnGeometry']  = returnGeometry
        if not returnCentroids is None and \
           isinstance(returnCentroids, bool):
            params['returnCentroids'] = returnCentroids
        if not generalizationLevel is None and \
           isinstance(generalizationLevel, int):
            params['generalizationLevel'] = generalizationLevel
        if not useFuzzySearch is None and \
           isinstance(useFuzzySearch, bool):
            params['useFuzzySearch'] = json.dumps(useFuzzySearch)
        if featureLimit is None:
            featureLimit = 1000
        elif isinstance(featureLimit, int):
            params['featureLimit'] = featureLimit
        else:
            params['featureLimit'] = 1000
        return self._post(url=url,
                             param_dict=params,
                             securityHandler=self._securityHandler,
                             proxy_url=self._proxy_url,
                             proxy_port=self._proxy_port)

Example 30

View license
    def standardGeographyQuery(self,
                               sourceCountry=None,
                               optionalCountryDataset=None,
                               geographyLayers=None,
                               geographyIDs=None,
                               geographyQuery=None,
                               returnSubGeographyLayer=False,
                               subGeographyLayer=None,
                               subGeographyQuery=None,
                               outSR=4326,
                               returnGeometry=False,
                               returnCentroids=False,
                               generalizationLevel=0,
                               useFuzzySearch=False,
                               featureLimit=1000):
        """
        The GeoEnrichment service provides a helper method that returns
        standard geography IDs and features for the supported geographic
        levels in the United States and Canada.
        As indicated throughout this documentation guide, the GeoEnrichment
        service uses the concept of a study area to define the location of
        the point or area that you want to enrich with additional
        information. Locations can also be passed as one or many named
        statistical areas. This form of a study area lets you define an
        area by the ID of a standard geographic statistical feature, such
        as a census or postal area. For example, to obtain enrichment
        information for a U.S. state, county or ZIP Code or a Canadian
        province or postal code, the Standard Geography Query helper method
        allows you to search and query standard geography areas so that
        they can be used in the GeoEnrichment method to obtain facts about
        the location.
        The most common workflow for this service is to find a FIPS
        (standard geography ID) for a geographic name. For example, you can
        use this service to find the FIPS for the county of San Diego which
        is 06073. You can then use this FIPS ID within the GeoEnrichment
        service study area definition to get geometry and optional
        demographic data for the county. This study area definition is
        passed as a parameter to the GeoEnrichment service to return data
        defined in the enrichment pack and optionally return geometry for
        the feature.

        For examples and more help with this function see:
        http://resources.arcgis.com/en/help/arcgis-rest-api/#/Standard_geography_query/02r30000000q000000/

        Inputs:
           sourceCountry - Optional parameter to specify the source country
            for the search. Use this parameter to limit the search and
            query of standard geographic features to one country. This
            parameter supports both the two-digit and three-digit country
            codes illustrated in the coverage table.
           optionalCountryDataset - Optional parameter to specify a
            specific dataset within a defined country.
           geographyLayers - Optional parameter to specify which standard
            geography layers are being queried or searched. If this
            parameter is not provided, all layers within the defined
            country will be queried.
           geographyIDs - Optional parameter to specify which IDs for the
            standard geography layers are being queried or searched. You
            can use this parameter to return attributes and/or geometry for
            standard geographic areas for administrative areas where you
            already know the ID, for example, if you know the Federal
            Information Processing Standard (FIPS) Codes for a U.S. state
            or county; or, in Canada, to return the geometry and attributes
            for a Forward Sortation Area (FSA).
           geographyQuery - Optional parameter to specify the text to query
            and search the standard geography layers specified. You can use
            this parameter to query and find standard geography features
            that meet an input term, for example, for a list of all the
            U.S. counties that contain the word "orange". The
            geographyQuery parameter can be a string that contains one or
            more words.
           returnSubGeographyLayer - Use this optional parameter to return
            all the subgeographic areas that are within a parent geography.
            For example, you could return all the U.S. counties for a given
            U.S. state or you could return all the Canadian postal areas
            (FSAs) within a Census Metropolitan Area (city).
            When this parameter is set to true, the output features will be
            defined in the subGeographyLayer. The output geometries will be
            in the spatial reference system defined by outSR.
           subGeographyLayer - Use this optional parameter to return all
            the subgeographic areas that are within a parent geography. For
            example, you could return all the U.S. counties within a given
            U.S. state or you could return all the Canadian postal areas
            (FSAs) within a Census Metropolitan Areas (city).
            When this parameter is set to true, the output features will be
            defined in the subGeographyLayer. The output geometries will be
            in the spatial reference system defined by outSR.
           subGeographyQuery - Optional parameter to filter the results of
            the subgeography features that are returned by a search term.
            You can use this parameter to query and find subgeography
            features that meet an input term. This parameter is used to
            filter the list of subgeography features that are within a
            parent geography. For example, you may want a list of all the
            ZIP Codes that are within "San Diego County" and filter the
            results so that only ZIP Codes that start with "921" are
            included in the output response. The subgeography query is a
            string that contains one or more words.
           outSR - Optional parameter to request the output geometries in a
            specified spatial reference system.
           returnGeometry - Optional parameter to request the output
            geometries in the response.
           returnCentroids - Optional Boolean parameter to request the
            output geometry to return the center point for each feature.
            Use this parameter to return all the geometries as points. For
            example, you could return all U.S. ZIP Code centroids (points)
            rather than providing the boundaries.
           generalizationLevel - Optional integer that specifies the level
            of generalization or detail in the area representations of the
            administrative boundary or standard geographic data layers.
            Values must be whole integers from 0 through 6, where 0 is most
            detailed and 6 is most generalized.
           useFuzzySearch - Optional Boolean parameter to define if text
            provided in the geographyQuery parameter should utilize fuzzy
            search logic. Fuzzy searches are based on the Levenshtein
            Distance or Edit Distance algorithm.
           featureLimit - Optional integer value where you can limit the
            number of features that are returned from the geographyQuery.
        """
        url = self._base_url + self._url_standard_geography_query_execute
        params = {
            "f" : "json"
        }
        if not sourceCountry is None:
            params['sourceCountry'] = sourceCountry
        if not optionalCountryDataset is None:
            params['optionalCountryDataset'] = optionalCountryDataset
        if not geographyLayers is None:
            params['geographylayers'] = geographyLayers
        if not geographyIDs is None:
            params['geographyids'] = json.dumps(geographyIDs)
        if not geographyQuery is None:
            params['geographyQuery'] = geographyQuery
        if not returnSubGeographyLayer is None and \
           isinstance(returnSubGeographyLayer, bool):
            params['returnSubGeographyLayer'] = returnSubGeographyLayer
        if not subGeographyLayer is None:
            params['subGeographyLayer'] = json.dumps(subGeographyLayer)
        if not subGeographyQuery is None:
            params['subGeographyQuery'] = subGeographyQuery
        if not outSR is None and \
           isinstance(outSR, int):
            params['outSR'] = outSR
        if not returnGeometry is None and \
           isinstance(returnGeometry, bool):
            params['returnGeometry']  = returnGeometry
        if not returnCentroids is None and \
           isinstance(returnCentroids, bool):
            params['returnCentroids'] = returnCentroids
        if not generalizationLevel is None and \
           isinstance(generalizationLevel, int):
            params['generalizationLevel'] = generalizationLevel
        if not useFuzzySearch is None and \
           isinstance(useFuzzySearch, bool):
            params['useFuzzySearch'] = json.dumps(useFuzzySearch)
        if featureLimit is None:
            featureLimit = 1000
        elif isinstance(featureLimit, int):
            params['featureLimit'] = featureLimit
        else:
            params['featureLimit'] = 1000
        return self._post(url=url,
                             param_dict=params,
                             securityHandler=self._securityHandler,
                             proxy_url=self._proxy_url,
                             proxy_port=self._proxy_port)

Example 31

Project: fedocal
Source File: api.py
View license
@APP.route('/api/meetings/', methods=['GET', 'POST'])
@APP.route('/api/meetings', methods=['GET', 'POST'])
def api_meetings():
    """
Retrieve meetings
=================

The ``/api/meetings/`` endpoint returns the meetings meeting the
provided criteria.

Response format
----------------

Sample response:

.. code-block:: javascript

    {
        "meetings": [
            {
                "meeting_time_start": "23:00:00",
                "meeting_information": "",
                "meeting_time_stop": "23:00:00",
                "calendar_name": "test",
                "meeting_date_end": "2013-05-27",
                "meeting_manager": "pingou2,",
                "meeting_date": "2013-05-27",
                "meeting_name": "test1.5",
                "meeting_location": "None"
            },
            {
                "meeting_time_start": "06:00:00",
                "meeting_information": "",
                "meeting_time_stop": "07:00:00",
                "calendar_name": "test",
                "meeting_date_end": "2013-05-28",
                "meeting_manager": "pingou,",
                "meeting_date": "2013-05-28",
                "meeting_name": "test3",
                "meeting_location": null
            }
        ],
        "arguments": {
            "start": "2013-05-04",
            "calendar": "test",
            "end": "2013-11-30",
            "region": null
        }
    }


The ``arguments`` item in the root dictionary contains all possible
arguments, and displays the value used (the default if the argument
was not provided).

Time arguments
--------------

Below is a table describing what timeframe messages are received from
depending on what combination of time options you provide.

========= ======= =================
``start`` ``end`` Message timeframe
========= ======= =================
no        no      the last 30 days and the coming 180 days
**yes**   no      from ``start`` until the coming 180 days
no        **yes** the last 30 days until ``end``
**yes**   **yes** between ``start`` and ``end``
========= ======= =================

``start``
  Return results starting at date ``start`` (prefered format is
  "+%Y-%m-%d" see ``date "+%Y-%m-%d"``).

  Default: 30 days ago ``date "+%Y-%m-%d" -d "30 days ago"``

``end``
  Return results ending at date ``end`` (prefered format is
  "+%Y-%m-%d" see ``date "+%Y-%m-%d"``).

  Default: coming 180 days ``date "+%Y-%m-%d" -d "180 days"``

Filter arguments
----------------

``calendar``
  Restrict the meetings to a specific calendar.

  Default: all calendars

``region``
  Restrict the meeting to a specific region.

  If the calendar does not have support for regions enabled, no
  meetings will be found matching this criteria and no meetings will
  be returned.

  Default: all regions

    """
    @flask.after_this_request
    def callback(response):
        """ Handle case the query was an JQuery ajax call. """
        return check_callback(response)

    startd = flask.request.args.get('start', None)
    if startd is None:
        startd = datetime.date.today() - datetime.timedelta(days=30)
    else:
        try:
            startd = parser.parse(startd).date()
        except (ValueError, TypeError):
            output = {"meetings": [],
                      "error": "Invalid start date format: %s" % startd}
            return flask.Response(
                response=json.dumps(output),
                status=400,
                mimetype='application/json')

    endd = flask.request.args.get('end', None)
    if endd is None:
        endd = datetime.date.today() + datetime.timedelta(days=180)
    else:
        try:
            endd = parser.parse(endd).date()
        except (ValueError, TypeError):
            output = {"meetings": [],
                      "error": "Invalid end date format: %s" % endd}
            return flask.Response(
                response=json.dumps(output),
                status=400,
                mimetype='application/json')

    calendar_name = flask.request.args.get('calendar', None)
    location = flask.request.args.get('location', None)
    region = flask.request.args.get('region', None)
    location = location or region

    if calendar_name:
        calendarobj = Calendar.by_id(SESSION, calendar_name)

        if not calendarobj:
            output = {
                "meetings": [],
                "error": "Invalid calendar provided: %s" % calendar_name}
            return flask.Response(
                response=json.dumps(output),
                status=400,
                mimetype='application/json')

    status = 200
    meetings = []
    try:
        if calendar_name:
            if location:
                # print "calendar and region"
                meetings = fedocallib.get_meetings_by_date_and_location(
                    SESSION, calendar_name, startd, endd, location)
            else:
                # print "calendar and no region"
                meetings = fedocallib.get_by_date(
                    SESSION, calendarobj, startd, endd)
        else:
            meetings = []
            if location:
                # print "no calendar and region"
                meetings.extend(
                    fedocallib.get_by_date_at_location(
                        SESSION, location, startd, endd)
                )
            else:
                # print "no calendar and no region"
                for calendar in fedocallib.get_calendars(SESSION):
                    meetings.extend(fedocallib.get_by_date(
                        SESSION, calendar, startd, endd))
    except SQLAlchemyError, err:  # pragma: no cover
        status = 500
        LOG.debug('Error in api_meetings')
        LOG.exception(err)

    output = {}
    output['arguments'] = {
        'start': startd.strftime('%Y-%m-%d'),
        'end': endd.strftime('%Y-%m-%d'),
        'calendar': calendar_name,
        'location': location,
    }

    meetings_json = []
    for meeting in meetings:
        meetings_json.append(meeting.to_json())
    output['meetings'] = meetings_json

    return flask.Response(
        response=json.dumps(output),
        status=status,
        mimetype='application/json'
    )

Example 32

Project: labmanager
Source File: sample_data.py
View license
def add_sample_users(silence = False):

    init_db(drop = True, silence = silence)

    with app.app_context():
        #################################################
        # 
        #     RLMS 1: WebLab-Deusto
        #   
        weblabdeusto_configuration = {
            'remote_login' : 'labmanager',
            'password'     : 'password',
            'base_url'     : 'http://www.weblab.deusto.es/weblab/',
        }

        rlms_weblabdeusto = RLMS(kind = u"WebLab-Deusto",
                           location = u"Bilbao, Spain",
                           url = u"https://www.weblab.deusto.es/",
                           version = u"5.0",
                           configuration = unicode(json.dumps(weblabdeusto_configuration)) )
        db.session.add(rlms_weblabdeusto)

        robot_lab = Laboratory(name = u"[email protected] experiments",
                               laboratory_id = u"[email protected] experiments",
                               rlms = rlms_weblabdeusto)

        db.session.add(robot_lab)

        #######################################################
        # 
        #     RLMS 2: FCEIA UNR
        #   

        rlms_unr = RLMS(kind = u'FCEIA-UNR',
                           location = u'Rosario, Argentina',
                           url = u'http://labremf4a.fceia.unr.edu.ar/accesodeusto.aspx',
                           version = u"1.2.2",
                           configuration = unicode(json.dumps(dict(remote_login = 'login', password = 'password'))))
        db.session.add(rlms_unr)

        physics_lab = Laboratory(name = u'unr-physics',
                               laboratory_id = u'unr-physics',
                               rlms = rlms_unr)

        db.session.add(physics_lab)


        #######################################################
        # 
        #     RLMS 3: iLabs (not implemented at this moment)
        #   


        rlms_ilab = RLMS(kind = u'iLabs',
                           location = u'MIT',
                           url = u'http://ilab.mit.edu/wiki/',
                           version = u"1.0",
                           configuration = unicode(json.dumps(dict(
                                sb_guid = u'ISB-247A4591CA1443485D85657CF357',
                                sb_url  = u'http://ludi.mit.edu/iLabServiceBroker/iLabServiceBroker.asmx',
                                authority_guid = u'fakeGUIDforRMLStest-12345',
                                group_name = u'Experiment_Group',
                           ))))
        db.session.add(rlms_ilab)


        #######################################################
        #     
        #     LT 1: Using LTI
        #    

        lt1 = LearningTool(full_name = u"Deusto Moodle (LTI)", name = u"deusto",
                         url = u"http://alud2.deusto.es/")
        db.session.add(lt1)

        password = unicode(hashlib.new(u'sha', u'password').hexdigest())

        lt_admin      = LtUser(login=u"admin", full_name=u"Administrator", lt = lt1, access_level = u'admin')
        lt_admin.password = password
        lt_instructor1 = LtUser(login=u"instructor1", full_name=u"Instructor 1", lt = lt1, access_level = u'instructor')
        lt_instructor1.password = password
        lt_instructor2 = LtUser(login=u"instructor2", full_name=u"Instructor 2", lt = lt1, access_level = u'instructor')
        lt_instructor2.password = password

        permission_to_lt1 = PermissionToLt(lt = lt1, laboratory = robot_lab, configuration = u'', local_identifier = u'robot')
        db.session.add(permission_to_lt1)

        db.session.add(lt_admin)
        db.session.add(lt_instructor1)
        db.session.add(lt_instructor2)

        permission_instructor1 = PermissionToLtUser(permission_to_lt = permission_to_lt1, lt_user = lt_instructor1, key = u'deusto_moodle_instructor1_robot', secret = u'abcdefghijklmnopqrstuvwxyz')

        permission_instructor2 = PermissionToLtUser(permission_to_lt = permission_to_lt1, lt_user = lt_instructor2, key = u'deusto_moodle_instructor2_robot', secret = u'abcdefghijklmnopqrstuvwxyz')

        db.session.add(permission_instructor1)
        db.session.add(permission_instructor2)

        #######################################################
        #     
        #     LT 2: Using LTI, too
        #    


        lt2 = LearningTool(full_name = u"Ilias Stuttgart (LTI)", name = u"stuttgart",
                         url = u"https://ilias3.uni-stuttgart.de")
        db.session.add(lt2)

        lt_admin2   = LtUser(login=u"admin", full_name=u"Administrator", lt = lt2, access_level = u'admin')
        lt_admin2.password = password
        lt_instructor1b = LtUser(login=u"instructor1", full_name=u"Instructor 1 (at B)", lt = lt2, access_level = u'instructor')
        lt_instructor1b.password = password
        lt_instructor2b = LtUser(login=u"instructor2", full_name=u"Instructor 2 (at B)", lt = lt2, access_level = u'instructor')
        lt_instructor2b.password = password

        db.session.add(lt_admin2)
        db.session.add(lt_instructor1b)
        db.session.add(lt_instructor2b)

        permission_to_lt2 = PermissionToLt(lt = lt2, laboratory = robot_lab, configuration = u'', local_identifier = u'robot')
        db.session.add(permission_to_lt2)

        permission_instructor1b = PermissionToLtUser(permission_to_lt = permission_to_lt2, lt_user = lt_instructor1b, key = u'ilias_stuttgart_instructor1_robot', secret = u'abcdefghijklmnopqrstuvwxyz')

        permission_instructor2b = PermissionToLtUser(permission_to_lt = permission_to_lt2, lt_user = lt_instructor2b, key = u'ilias_stuttgart_instructor2_robot', secret = u'abcdefghijklmnopqrstuvwxyz')

        db.session.add(permission_instructor1b)
        db.session.add(permission_instructor2b)

        #######################################################
        #     
        #     LT 3: Using Basic HTTP
        #    

        lt3 = LearningTool(full_name = u"UNED aLF (HTTP)", name = u"uned",
                         url = u"https://www.innova.uned.es/")
        db.session.add(lt3)

        credential = BasicHttpCredentials(lt_login = u'uned', lt_password = password, lt = lt3, lt_url = u'http://localhost:5000/fake_list_courses/gateway4labs/list', labmanager_login = u'labmanager', labmanager_password = u'password')
        db.session.add(credential)

        lt_admin3   = LtUser(login=u"admin", full_name=u"Administrator", lt = lt3, access_level = u'admin')
        lt_admin3.password = password

        db.session.add(lt_admin3)

        permission_to_lt3 = PermissionToLt(lt = lt3, laboratory = robot_lab, configuration = u'', local_identifier = u'robot')
        db.session.add(permission_to_lt3)

        course1 = Course(name = u"Physics course", lt = lt3, context_id = u"physics")
        course2 = Course(name = u"Robots course", lt = lt3, context_id = u"robots")
        db.session.add(course1)
        db.session.add(course2)

        permission_to_course = PermissionToCourse(course = course2, permission_to_lt = permission_to_lt3)
        db.session.add(permission_to_course)

        #######################################################
        #     
        #     LT 4: Using Shindig, school 1
        #    

        lt4 = LearningTool(full_name = u"School 1 at Graasp", name = u"school1", url = u"http://graasp.epfl.ch/")
        db.session.add(lt4)

        credential = ShindigCredentials(lt = lt4, shindig_url = u'http://shindig2.epfl.ch')
        db.session.add(credential)

        lt_admin4   = LtUser(login=u"admin", full_name=u"Administrator", lt = lt4, access_level = u'admin')
        lt_admin4.password = password

        db.session.add(lt_admin4)

        permission_to_lt4 = PermissionToLt(lt = lt4, laboratory = robot_lab, configuration = u'', local_identifier = u'robot')
        db.session.add(permission_to_lt4)

        course1 = Course(name = u"Physics course", lt = lt4, context_id = u"1234")
        course2 = Course(name = u"Robots course", lt = lt4, context_id = u"1235")
        db.session.add(course1)
        db.session.add(course2)

        permission_to_course = PermissionToCourse(course = course2, permission_to_lt = permission_to_lt4)
        db.session.add(permission_to_course)

        #######################################################
        #     
        #     LT 5: Using Shindig, school 2
        #    

        lt5 = LearningTool(full_name = u"School 2 at Graasp", name = u"school2", url = u"http://graasp.epfl.ch/")
        db.session.add(lt5)

        credential = ShindigCredentials(lt = lt5, shindig_url = u'http://shindig2.epfl.ch')
        db.session.add(credential)

        lt_admin5  = LtUser(login=u"admin", full_name=u"Administrator", lt = lt5, access_level = u'admin')
        lt_admin5.password = password

        db.session.add(lt_admin5)

        permission_to_lt5 = PermissionToLt(lt = lt5, laboratory = robot_lab, configuration = u'', local_identifier = u'robot')
        db.session.add(permission_to_lt5)

        course1 = Course(name = u"Other physics course", lt = lt5, context_id = u"1236")
        course2 = Course(name = u"Other robots course", lt = lt5, context_id = u"1237")
        db.session.add(course1)
        db.session.add(course2)

        permission_to_course = PermissionToCourse(course = course2, permission_to_lt = permission_to_lt5)
        db.session.add(permission_to_course)

        db.session.commit()

Example 33

Project: girder
Source File: collection_test.py
View license
    def testCollectionAccess(self):
        # Asking to change to an invalid access list should fail
        resp = self.request(path='/collection/%s/access' %
                            self.collection['_id'], method='PUT', params={
                                'access': 'not an access list',
                                'public': False
                            }, user=self.admin)
        self.assertStatus(resp, 400)

        # Create some folders underneath the collection
        folder1 = self.model('folder').createFolder(
            parentType='collection', parent=self.collection, creator=self.admin,
            public=False, name='top level')
        folder2 = self.model('folder').createFolder(
            parentType='folder', parent=folder1, creator=self.admin,
            public=False, name='subfolder')
        self.model('folder').createFolder(
            parentType='collection', parent=self.collection, creator=self.admin,
            public=False, name='another top level folder')

        # Admin should see two top level folders
        resp = self.request(path='/collection/%s/details' %
                            self.collection['_id'], user=self.admin)
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['nFolders'], 2)
        self.assertNotIn('nItems', resp.json)

        # Normal user should see 0 folders
        resp = self.request(path='/collection/%s/details' %
                            self.collection['_id'], user=self.user)
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['nFolders'], 0)

        # Add read access on one of the folders
        self.model('folder').setUserAccess(
            folder1, self.user, AccessType.READ, save=True)

        # Normal user should see one folder now
        resp = self.request(path='/collection/%s/details' %
                            self.collection['_id'], user=self.user)
        self.assertStatusOk(resp)
        self.assertEqual(resp.json['nFolders'], 1)

        # Change the access to allow just the user
        obj = {'users': [{'id': str(self.user['_id']),
                          'level': AccessType.WRITE}]}
        resp = self.request(path='/collection/%s/access' %
                            self.collection['_id'], method='PUT', params={
                                'access': json.dumps(obj),
                                'public': True
                            }, user=self.admin)
        self.assertStatusOk(resp)

        # Request the collection access
        resp = self.request(path='/collection/%s/access' %
                            self.collection['_id'], user=self.admin)
        self.assertStatusOk(resp)
        access = resp.json
        self.assertEqual(access['users'][0]['id'], str(self.user['_id']))
        self.assertEqual(access['users'][0]['level'], AccessType.WRITE)
        coll = self.model('collection').load(self.collection['_id'], force=True)
        folder1 = self.model('folder').load(folder1['_id'], force=True)
        folder2 = self.model('folder').load(folder2['_id'], force=True)
        self.assertEqual(coll['public'], True)
        self.assertEqual(folder1['public'], False)

        # Update the collection recursively to public
        resp = self.request(
            path='/collection/%s/access' % coll['_id'], method='PUT', params={
                'access': json.dumps(obj),
                'public': True,
                'recurse': True,
                'progress': True
            }, user=self.admin)
        self.assertStatusOk(resp)
        coll = self.model('collection').load(coll['_id'], force=True)
        folder1 = self.model('folder').load(folder1['_id'], force=True)
        folder2 = self.model('folder').load(folder2['_id'], force=True)
        self.assertEqual(coll['public'], True)
        self.assertEqual(folder1['public'], True)
        self.assertEqual(folder2['public'], True)
        self.assertEqual(folder1['access'], coll['access'])
        self.assertEqual(folder1['access'], folder2['access'])
        self.assertEqual(folder2['access'], {
            'users': [{
                'id': self.user['_id'],
                'level': AccessType.WRITE
            }],
            'groups': []
        })

        # Recursively drop the user's access level to READ
        obj['users'][0]['level'] = AccessType.READ
        resp = self.request(
            path='/collection/%s/access' % coll['_id'], method='PUT', params={
                'access': json.dumps(obj),
                'public': True,
                'recurse': True,
                'progress': True
            }, user=self.admin)
        coll = self.model('collection').load(coll['_id'], force=True)
        folder1 = self.model('folder').load(folder1['_id'], force=True)
        folder2 = self.model('folder').load(folder2['_id'], force=True)
        self.assertEqual(coll['public'], True)
        self.assertEqual(folder1['public'], True)
        self.assertEqual(folder2['public'], True)
        self.assertEqual(folder1['access'], coll['access'])
        self.assertEqual(folder1['access'], folder2['access'])
        self.assertEqual(folder2['access'], {
            'users': [{
                'id': self.user['_id'],
                'level': AccessType.READ
            }],
            'groups': []
        })

        # Recursively remove the user's access altogether, also make sure that
        # passing no "public" param just retains the current flag state
        obj['users'] = ()
        resp = self.request(
            path='/collection/%s/access' % coll['_id'], method='PUT', params={
                'access': json.dumps(obj),
                'recurse': True
            }, user=self.admin)
        coll = self.model('collection').load(coll['_id'], force=True)
        folder1 = self.model('folder').load(folder1['_id'], force=True)
        folder2 = self.model('folder').load(folder2['_id'], force=True)
        self.assertEqual(coll['public'], True)
        self.assertEqual(folder1['public'], True)
        self.assertEqual(folder2['public'], True)
        self.assertEqual(folder1['access'], coll['access'])
        self.assertEqual(folder1['access'], folder2['access'])
        self.assertEqual(folder2['access'], {
            'users': [],
            'groups': []
        })

        # Add group access to the collection
        group = self.model('group').createGroup('test', self.admin)
        obj = {
            'groups': [{
                'id': str(group['_id']),
                'level': AccessType.WRITE
            }]
        }

        resp = self.request(
            path='/collection/%s/access' % coll['_id'], method='PUT', params={
                'access': json.dumps(obj),
                'recurse': False
            }, user=self.admin)
        self.assertStatusOk(resp)

        # Create a new top-level folder, it should inherit the collection ACL.
        resp = self.request(path='/folder', method='POST', params={
            'name': 'top level 2',
            'parentId': coll['_id'],
            'parentType': 'collection'
        }, user=self.admin)
        self.assertStatusOk(resp)
        folder = self.model('folder').load(resp.json['_id'], force=True)
        coll = self.model('collection').load(coll['_id'], force=True)
        self.assertEqual(coll['access']['users'], [])
        self.assertEqual(folder['access']['users'], [{
            'id': self.admin['_id'],
            'level': AccessType.ADMIN
        }])
        self.assertEqual(folder['access']['groups'], [{
            'id': group['_id'],
            'level': AccessType.WRITE
        }])
        self.assertEqual(folder['access']['groups'], coll['access']['groups'])

Example 34

Project: kokoropy
Source File: classification.py
View license
    def action_result(self):
        import json
        import numpy as np
        import matplotlib.pyplot as plt
        import time

        result = {
                  'success' : True,
                  'message' : ''
            }
        # redirect if data not completed
        if 'training_csv' not in request.POST or 'classifier' not in request.POST:
            result['message'] = 'Undefined training data, testing data, or classifier'
            result['success'] = False
            return json.dumps(result)
       
        # preprocess POST data
        training_csv = request.POST['training_csv']
        if 'testing_csv' not in request.POST or request.POST['testing_csv'] == '':
            testing_csv =  '\n'.join(training_csv.split('\n')[1:])
        else:
            testing_csv = request.POST['testing_csv']
        classifier_name = request.POST['classifier']
        parameter_pair_list = []
        for parameter in self.classifiers[classifier_name]:
            value = request.POST['param_'+parameter]
            if (not self.is_number(value)) and value != 'True' and value != 'False' and value != 'None':
                value = '"'+value.replace('"','\"')+'"'
            parameter_pair_list.append(parameter + ' = ' + value)
        parameter_string = ", ".join(parameter_pair_list)
        if 'draw_plot' in request.POST and request.POST['draw_plot'] == 'true':
            draw_plot = True
        else:
            draw_plot = False

        if training_csv == '':
            result['message'] = 'Training data is empty'
            result['success'] = False
            return json.dumps(result)

        # preprocess csv
        try:
            training_data, training_target, caption_list, numeric_value = self.extract_csv(training_csv)
            testing_data, testing_target, caption_list, numeric_value = self.extract_csv(testing_csv, caption_list, numeric_value)
            if 'predict_csv' in request.POST and request.POST['predict_csv'] != '' :
                do_prediction = True
                predict_csv = request.POST['predict_csv']
                prediction_data, prediction_target, caption_list, numeric_value = self.extract_csv(predict_csv, caption_list, numeric_value)
                del prediction_target
            else:
                do_prediction = False
                predict_csv = ''
                prediction_data = []
        except Exception, e:
            result['success'] = False
            result['message'] = 'Unexpected error while extracting csv : '+ e.message
        if not result['success']:
            return json.dumps(result)

        # make classifier
        classifier = None
        try:
            import_module_name = '.'.join(classifier_name.split('.')[:-1])
            exec('import '+import_module_name)
            exec('classifier = '+classifier_name+'('+parameter_string+')')
        except Exception, e:
            result['success'] = False
            result['message'] = 'Unexpected error while define classifier : '+ e.message
        if not result['success']:
            return json.dumps(result)

        # learn
        try:
            classifier.fit(training_data, training_target)
        except Exception, e:
            result['success'] = False
            result['message'] = 'Unexpected error while fit classifier : '+ e.message
        if not result['success']:
            return json.dumps(result)

        # and test the classifier
        try:
            training_predict_target = classifier.predict(training_data)
            testing_predict_target = classifier.predict(testing_data)
            if do_prediction:
                prediction_predict_target = classifier.predict(prediction_data)
            else:
                prediction_predict_target = []
        except Exception, e:
            result['success'] = False
            result['message'] = 'Unexpected error while predicting target : '+ e.message
        if not result['success']:
            return json.dumps(result)

        # if the classes is not in numeric value, then use_alias = True
        use_alias = caption_list[-1] in numeric_value
        target_dict = {}
        if use_alias:
            target_numeric_value = numeric_value[caption_list[-1]]
            for label in target_numeric_value:
                target_dict[target_numeric_value[label]] = label

        # Available classes (we call it as groups, since class is reserved word in Python)
        groups = []
        for target in (training_target, testing_target):
            for i in xrange(len(target)):
                if target[i] not in groups:
                    groups.append(target[i])

        # plotting
        dimensions = caption_list[:-1]
        dimension_count = len(dimensions)
        subplot_num = dimension_count * (dimension_count-1)
        for i in xrange(dimension_count):
            subplot_num -= i
        subplot_num *= 2
        if subplot_num == 1:
            row_count = 1
            col_count = 1
        else:
            row_count = np.ceil(subplot_num / 2)
            col_count = 2
        # make figure
        plot_url = ''
        if draw_plot:
            try:
                fig = plt.figure(figsize=(6.0*col_count, 6.0*row_count))
                fig.subplots_adjust(hspace = 0.2, wspace = 0.2)
                fig.suptitle('Dimension Projection')
                # subplot
                subplot_index = 1
                for mode in xrange(2):
                    if mode == 0:
                        data = training_data
                        target = training_target
                        caption = 'training'
                    else:
                        data = testing_data
                        target = testing_target
                        caption = 'testing'
                    second_dimension_start_index = 1
                    first_dimension_index = 0
                    for first_dimension in dimensions:
                        second_dimension_index = second_dimension_start_index
                        x = data[:,first_dimension_index]
                        # determine x_min and x_max for contour
                        x_min, x_max = x.min(), x.max()
                        x_range = x_max - x_min
                        x_max += 0.1 * x_range
                        x_min -= 0.1 * x_range
                        for second_dimension in dimensions[second_dimension_start_index:]:
                            ax = fig.add_subplot(row_count, col_count, subplot_index)
                            y = data[:,second_dimension_index]
                            y_min, y_max = y.min(), y.max()
                            y_range = y_max - y_min
                            y_max += 0.1 * y_range
                            y_min -= 0.1 * y_range
                            # xx, yy
                            xx, yy = np.meshgrid(np.arange(x_min, x_max, 0.01 * x_range),
                                 np.arange(y_min, y_max, 0.01 * y_range))
                            tup = ()
                            for i in xrange(len(data[0])):
                                if i == first_dimension_index:
                                    tup = tup + ( xx.ravel(), )
                                elif i == second_dimension_index:
                                    tup = tup + ( yy.ravel(), )
                                else:
                                    dimension_mean = data[:, i].mean()
                                    tup = tup + ([dimension_mean] * len(xx.ravel()) , )
                            Z = classifier.predict(np.c_[tup])
                            Z = Z.reshape(xx.shape)
                            ax.contourf(xx, yy, Z)
                            # scatter-plot the data
                            ax.scatter(x, y, c=target, cmap=plt.cm.gist_rainbow)
                            ax.set_title (first_dimension + ' vs ' + second_dimension + ' ('+caption+')')
                            ax.set_xlabel(first_dimension)
                            ax.set_ylabel(second_dimension)
                            subplot_index += 1
                            second_dimension_index += 1
                        first_dimension_index += 1
                        second_dimension_start_index += 1
                # make canvas
                file_name = 'classification/plot_'+str(np.random.randint(10000))+str(time.time())+'.png'
                plot_url = draw_matplotlib_figure(fig,file_name,'example')
            except Exception, e:
                result['success'] = False
                result['message'] = 'Unexpected error while creating plot : '+ e.message
            if not result['success']:
                return json.dumps(result)

        # initiate false positive, false negative, true positive, and true negative
        training_false_positive = {}
        training_false_negative = {}
        training_true_positive = {}
        training_true_negative = {}
        testing_false_positive = {}
        testing_false_negative = {}
        testing_true_positive = {}
        testing_true_negative = {}
        for group in groups:
            training_false_positive[group] = 0.0
            training_false_negative[group] = 0.0
            training_true_positive[group] = 0.0
            training_true_negative[group] = 0.0
            testing_false_positive[group] = 0.0
            testing_false_negative[group] = 0.0
            testing_true_positive[group] = 0.0
            testing_true_negative[group] = 0.0
        # determine true positive, true negative, false positive and false negative for training and testing
        for i in xrange(len(training_target)):
            for group in groups:
                if training_target[i] == group and training_predict_target[i] == group:
                    training_true_positive[group] += 1
                elif training_target[i] != group and training_predict_target[i] != group:
                    training_true_negative[group] += 1
                elif training_target[i] != group and training_predict_target[i] == group:
                    training_false_positive[group] += 1
                else:
                    training_false_negative[group] += 1
        for i in xrange(len(testing_target)):
            for group in groups:
                if testing_target[i] == group and testing_predict_target[i] == group:
                    testing_true_positive[group] += 1
                elif testing_target[i] != group and testing_predict_target[i] != group:
                    testing_true_negative[group] += 1
                elif testing_target[i] != group and testing_predict_target[i] == group:
                    testing_false_positive[group] += 1
                else:
                    testing_false_negative[group] += 1

        # use alias if needed   
        if use_alias:
            groups = []
            for key in target_dict:
                label = target_dict[key]
                groups.append(label)
                training_true_positive[label] = training_true_positive[key]
                training_true_positive.pop(key)
                training_true_negative[label] = training_true_negative[key]
                training_true_negative.pop(key)
                training_false_positive[label] = training_false_positive[key]
                training_false_positive.pop(key)
                training_false_negative[label] = training_false_negative[key]
                training_false_negative.pop(key)
                testing_true_positive[label] = testing_true_positive[key]
                testing_true_positive.pop(key)
                testing_true_negative[label] = testing_true_negative[key]
                testing_true_negative.pop(key)
                testing_false_positive[label] = testing_false_positive[key]
                testing_false_positive.pop(key)
                testing_false_negative[label] = testing_false_negative[key]
                testing_false_negative.pop(key)
            # prediction
            str_prediction_predict_target = []
            for i in xrange(len(prediction_predict_target)):
                str_prediction_predict_target.append(target_dict[prediction_predict_target[i]])
            prediction_predict_target = str_prediction_predict_target

        # further calculation (http://en.wikipedia.org/wiki/Accuracy_and_precision)
        total_false_positive = {}
        total_false_negative = {}
        total_true_positive = {}
        total_true_negative = {}
        training_sensitivity = {}
        training_specificity = {}
        training_precision = {}
        training_negative_predictive_value = {}
        training_accuracy = {}
        training_balanced_accuracy = {}
        training_informedness = {}
        testing_sensitivity = {}
        testing_specificity = {}
        testing_precision = {}
        testing_negative_predictive_value = {}
        testing_accuracy = {}
        testing_balanced_accuracy = {}
        testing_informedness = {}
        total_sensitivity = {}
        total_specificity = {}
        total_precision = {}
        total_negative_predictive_value = {}
        total_accuracy = {}
        total_balanced_accuracy = {}
        total_informedness = {}
        for group in groups:
            # total true and false positive and negative
            total_false_positive[group] = training_false_positive[group] + testing_false_positive[group]
            total_false_negative[group] = training_false_negative[group] + testing_false_negative[group]
            total_true_positive[group] = training_true_positive[group] + testing_true_positive[group]
            total_true_negative[group] = training_true_negative[group] + testing_true_negative[group]
       
            # training measurement
            #   sensitivity
            if (training_true_positive[group] + training_false_negative[group]) == 0:
                training_sensitivity[group] = 0
            else:
                training_sensitivity[group] = training_true_positive[group] / (training_true_positive[group] + training_false_negative[group])
            #   specificity
            if (training_true_negative[group] + training_false_positive[group]) == 0:
                training_specificity[group] = 0
            else:
                training_specificity[group] = training_true_negative[group] / (training_true_negative[group] + training_false_positive[group])
            #   precision
            if (training_true_positive[group] + training_false_positive[group]) == 0:
                training_precision[group] = 0
            else:
                training_precision[group] = training_true_positive[group] / (training_true_positive[group] + training_false_positive[group])
            #   negative prediction value
            if (training_true_negative[group] + training_false_negative[group]) == 0:
                training_negative_predictive_value[group] = 0
            else:
                training_negative_predictive_value[group] = training_true_negative[group] / (training_true_negative[group] + training_false_negative[group])
            #   accuracy
            if (training_true_positive[group] + training_true_negative[group] + training_false_positive[group] + training_false_negative[group]) == 0:
                training_accuracy[group] = 0
            else:
                training_accuracy[group] = (training_true_positive[group] + training_true_negative[group]) / (training_true_positive[group] + training_true_negative[group] + training_false_positive[group] + training_false_negative[group])
            #   balanced accuracy
            training_balanced_accuracy[group] = (training_sensitivity[group] + training_specificity[group])/2.0
            #   informedness
            training_informedness[group] = training_sensitivity[group] + training_specificity[group] - 1
       
            # testing measurement
            #   sensitivity
            if (testing_true_positive[group] + testing_false_negative[group]) == 0:
                testing_sensitivity[group] = 0
            else:
                testing_sensitivity[group] = testing_true_positive[group] / (testing_true_positive[group] + testing_false_negative[group])
            #   specificity
            if (testing_true_negative[group] + testing_false_positive[group]) == 0:
                testing_specificity[group] = 0
            else:
                testing_specificity[group] = testing_true_negative[group] / (testing_true_negative[group] + testing_false_positive[group])
            #   precision
            if (testing_true_positive[group] + testing_false_positive[group]) == 0:
                testing_precision[group] = 0
            else:
                testing_precision[group] = testing_true_positive[group] / (testing_true_positive[group] + testing_false_positive[group])
            #   negative prediction value
            if (testing_true_negative[group] + testing_false_negative[group]) == 0:
                testing_negative_predictive_value[group] = 0
            else:
                testing_negative_predictive_value[group] = testing_true_negative[group] / (testing_true_negative[group] + testing_false_negative[group])
            #   accuracy
            if (testing_true_positive[group] + testing_true_negative[group] + testing_false_positive[group] + testing_false_negative[group]) == 0:
                testing_accuracy[group] = 0
            else:
                testing_accuracy[group] = (testing_true_positive[group] + testing_true_negative[group]) / (testing_true_positive[group] + testing_true_negative[group] + testing_false_positive[group] + testing_false_negative[group])
            #   balanced accuracy
            testing_balanced_accuracy[group] = (testing_sensitivity[group] + testing_specificity[group])/2.0
            #   informedness
            testing_informedness[group] = testing_sensitivity[group] + testing_specificity[group] - 1
       
            # total measurement
            #   sensitivity
            if (total_true_positive[group] + total_false_negative[group]) == 0:
                total_sensitivity[group] = 0
            else:
                total_sensitivity[group] = total_true_positive[group] / (total_true_positive[group] + total_false_negative[group])
            #   specificity
            if (total_true_negative[group] + total_false_positive[group]) == 0:
                total_specificity[group] = 0
            else:
                total_specificity[group] = total_true_negative[group] / (total_true_negative[group] + total_false_positive[group])
            #   precision
            if (total_true_positive[group] + total_false_positive[group]) == 0:
                total_precision[group] = 0
            else:
                total_precision[group] = total_true_positive[group] / (total_true_positive[group] + total_false_positive[group])
            #   negative prediction value
            if (total_true_negative[group] + total_false_negative[group]) == 0:
                total_negative_predictive_value[group] = 0
            else:
                total_negative_predictive_value[group] = total_true_negative[group] / (total_true_negative[group] + total_false_negative[group])
            #   accuracy
            if (total_true_positive[group] + total_true_negative[group] + total_false_positive[group] + total_false_negative[group]) == 0:
                total_accuracy[group] = 0
            else:
                total_accuracy[group] = (total_true_positive[group] + total_true_negative[group]) / (total_true_positive[group] + total_true_negative[group] + total_false_positive[group] + total_false_negative[group])
            #   balanced accuracy
            total_balanced_accuracy[group] = (total_sensitivity[group] + total_specificity[group])/2.0
            #   informedness
            total_informedness[group] = total_sensitivity[group] + total_specificity[group] - 1

        # show it
        prediction_data = self.csv_to_list(predict_csv)
        result = {
                  'success' : True,
                  'message' : '',
                  'groups' : groups,
                  'training_true_positive' : training_true_positive,
                  'training_true_negative' : training_true_negative,
                  'training_false_positive': training_false_positive,
                  'training_false_negative': training_false_negative,
                  'testing_true_positive'  : testing_true_positive,
                  'testing_true_negative'  : testing_true_negative,
                  'testing_false_positive' : testing_false_positive,
                  'testing_false_negative' : testing_false_negative,
                  'total_true_positive'    : total_true_positive,
                  'total_true_negative'    : total_true_negative,
                  'total_false_positive'   : total_false_positive,
                  'total_false_negative'   : total_false_negative,
                  'training_sensitivity' : training_sensitivity,
                  'testing_sensitivity'  : testing_sensitivity,
                  'total_sensitivity'    : total_sensitivity,
                  'training_specificity' : training_specificity,
                  'testing_specificity'  : testing_specificity,
                  'total_specificity'    : total_specificity,
                  'training_precision' : training_precision,
                  'testing_precision'  : testing_precision,
                  'total_precision'    : total_precision,
                  'training_negative_predictive_value' : training_negative_predictive_value,
                  'testing_negative_predictive_value'  : testing_negative_predictive_value,
                  'total_negative_predictive_value'    : total_negative_predictive_value,
                  'training_accuracy' : training_accuracy,
                  'testing_accuracy'  : testing_accuracy,
                  'total_accuracy'    : total_accuracy,
                  'training_balanced_accuracy' : training_balanced_accuracy,
                  'testing_balanced_accuracy'  : testing_balanced_accuracy,
                  'total_balanced_accuracy'    : total_balanced_accuracy,
                  'training_informedness' : training_informedness,
                  'testing_informedness'  : testing_informedness,
                  'total_informedness'    : total_informedness,
                  'do_prediction'         : do_prediction,
                  'prediction_data'       : prediction_data,
                  'prediction_result'     : prediction_predict_target,
                  'plot_url'              : plot_url,
                  'draw_plot'             : draw_plot,
                  'dimensions'            : dimensions
            }
        return json.dumps(result)

Example 35

View license
def upgrade():
  # drop Cycle, it doesn't exist
  set_permissions([
      'ObjectDocument',
      'ObjectObjective',
      'ObjectPerson',
      'ObjectSection',
      'Program',
      'ProgramControl',
      'ProgramDirective',
      'Relationship',
  ])
  # create join table for inferred permissions
  # define auditor role
  # define program owner privileges for audit context
  # define program editor privileges for audit context
  # define program reader privileges for audit context
  # set_audit_permissions([
  #     'Audit',
  #     'Request',
  #     'Response',
  # ])

  current_datetime = datetime.now()
  op.bulk_insert(
      roles_table,
      [
          {'name': 'AuditorReader',
           'description': 'A user with Auditor role for a program audit will '
              'also have this role in the default object context so that '
              'the auditor will have access to the objects required to '
              'perform the audit.',
           'permissions_json': json.dumps({
              'create': [],
              'read': reader_objects,
              'update': [],
              'delete': [],
           }),
           'scope': 'System',
           'created_at': current_datetime,
           'updated_at': current_datetime,
           'context_id': None},
          {'name': 'AuditorProgramReader',
           'description': 'A user with Auditor role for a program audit will '
              'also have this role in the program context so that '
              'the auditor will have access to the private program '
              'information and mappings required to perform the audit.',
           'permissions_json': json.dumps({
              'create': [],
              'read': program_reader_objects,
              'update': [],
              'delete': [],
           }),
           'scope': 'Private Program Implied',
           'created_at': current_datetime,
           'updated_at': current_datetime,
           'context_id': None},
          {'name': 'ProgramAuditOwner',
           'description': 'A user with the ProgramOwner role for a private '
              'program will also have this role in the audit context for any '
              'audit created for that program.',
           'permissions_json': json.dumps({
               'create': audit_owner_create,
               'read': audit_owner_create,
               'update': audit_update_objects,
               'delete': [],
           }),
           'scope': 'Audit Implied',
           'created_at': current_datetime,
           'updated_at': current_datetime,
           'context_id': None},
          {'name': 'ProgramAuditEditor',
           'description': 'A user with the ProgramEditor role for a private '
              'program will also have this role in the audit context for any '
              'audit created for that program.',
           'permissions_json': json.dumps({
              'create': audit_create_objects,
              'read': audit_read_objects,
              'update': audit_update_objects,
              'delete': [],
           }),
           'scope': 'Audit Implied',
           'created_at': current_datetime,
           'updated_at': current_datetime,
           'context_id': None},
          {'name': 'ProgramAuditReader',
           'description': 'A user with the ProgramReader role for a private '
              'program will also have this role in the audit context for any '
              'audit created for that program.',
           'permissions_json': json.dumps({
              'create': [],
              'read': audit_read_objects,
              'update': [],
              'delete': [],
           }),
           'scope': 'Audit Implied',
           'created_at': current_datetime,
           'updated_at': current_datetime,
           'context_id': None},
          {'name': 'Auditor',
           'description': 'The permissions required by an auditor to access '
              'relevant resources for the program being audited.',
           'permissions_json': json.dumps({
              'create': ['Request'],
              'read': auditor_read_objects,
              'update': ['Request', 'Response'],
              'delete': [],
           }),
           'scope': 'Audit',
           'created_at': current_datetime,
           'updated_at': current_datetime,
           'context_id': None},
      ])

  # Add role implications table
  # Defined within the context of the target so that authorization in the
  # target is a requirement to create the implication.
  op.create_table(
      'role_implications',
      sa.Column('id', sa.Integer(), nullable=False, primary_key=True),
      sa.Column('source_context_id', sa.Integer(), nullable=True),
      sa.Column('context_id', sa.Integer(), nullable=True),  # target
      sa.Column('source_role_id', sa.Integer(), nullable=False),
      sa.Column('role_id', sa.Integer(), nullable=False),  # target
      sa.Column('modified_by_id', sa.Integer(), nullable=False),
      sa.Column(
          'created_at', sa.DateTime(), default=sa.text('current_timestamp')),
      sa.Column(
          'updated_at',
          sa.DateTime(),
          default=sa.text('current_timestamp'),
          onupdate=sa.text('current_timestamp'),
      ),
      sa.ForeignKeyConstraint(['source_context_id'], ['contexts.id']),
      sa.ForeignKeyConstraint(['context_id'], ['contexts.id']),
      sa.ForeignKeyConstraint(['source_role_id'], ['roles.id']),
      sa.ForeignKeyConstraint(['role_id'], ['roles.id']),
  )
  op.create_unique_constraint(
      'uq_role_implications',
      'role_implications',
      ['source_context_id', 'context_id', 'source_role_id', 'role_id'])

Example 36

Project: chromium-dashboard
Source File: server.py
View license
  def get(self, path, feature_id=None):
    # Default to features page.
    # TODO: remove later when we want an index.html
    if not path:
      return self.redirect('/features')

    # Default /metrics to CSS ranking.
    # TODO: remove later when we want /metrics/index.html
    if path == 'metrics' or path == 'metrics/css':
      return self.redirect('/metrics/css/popularity')

    # Remove trailing slash from URL and redirect. e.g. /metrics/ -> /metrics
    if feature_id == '':
      return self.redirect(self.request.path.rstrip('/'))

    template_data = {}
    push_urls = [] # URLs to push in this response.

    template_data['embed'] = self.request.get('embed', None) is not None

    if path.startswith('features'):
      if path.endswith('.json'): # JSON request.
        KEY = '%s|all' % (models.Feature.DEFAULT_MEMCACHE_KEY)
        feature_list = memcache.get(KEY)
        if feature_list is None:
          feature_list = self.__get_feature_list()
          memcache.set(KEY, feature_list)
        return common.JSONHandler.get(self, feature_list, formatted=True)
      elif path.endswith('.xml'): # Atom feed request.
        status = self.request.get('status', None)
        if status:
          feature_list = models.Feature.get_all_with_statuses(status.split(','))
        else:
          filterby = None
          category = self.request.get('category', None)

          # Support setting larger-than-default Atom feed sizes so that web
          # crawlers can use this as a full site feed.
          try:
            max_items = int(self.request.get('max-items',
                                             settings.RSS_FEED_LIMIT))
          except TypeError:
            max_items = settings.RSS_FEED_LIMIT

          if category is not None:
            for k,v in models.FEATURE_CATEGORIES.iteritems():
              normalized = normalized_name(v)
              if category == normalized:
                filterby = ('category =', k)
                break

          feature_list = models.Feature.get_all( # Memcached
              limit=max_items,
              filterby=filterby,
              order='-updated')

        return self.render_atom_feed('Features', feature_list)
      else:
        # if settings.PROD:
        #   feature_list = self.__get_feature_list()
        # else:
        #   result = urlfetch.fetch(
        #     self.request.scheme + '://' + self.request.host +
        #     '/static/js/mockdata.json')
        #   feature_list = json.loads(result.content)

        # template_data['features'] = json.dumps(
        #     feature_list, separators=(',',':'))

        template_data['categories'] = [
          (v, normalized_name(v)) for k,v in
          models.FEATURE_CATEGORIES.iteritems()]
        template_data['IMPLEMENTATION_STATUSES'] = json.dumps([
          {'key': k, 'val': v} for k,v in
          models.IMPLEMENTATION_STATUS.iteritems()])
        template_data['VENDOR_VIEWS'] = json.dumps([
          {'key': k, 'val': v} for k,v in
          models.VENDOR_VIEWS.iteritems()])
        template_data['WEB_DEV_VIEWS'] = json.dumps([
          {'key': k, 'val': v} for k,v in
          models.WEB_DEV_VIEWS.iteritems()])
        template_data['STANDARDS_VALS'] = json.dumps([
          {'key': k, 'val': v} for k,v in
          models.STANDARDIZATION.iteritems()])
        template_data['TEMPLATE_CACHE_TIME'] = settings.TEMPLATE_CACHE_TIME

        push_urls = http2push.use_push_manifest('push_manifest_features.json')

    elif path.startswith('feature'):
      feature = None
      try:
        feature = models.Feature.get_feature(int(feature_id))
      except TypeError:
        pass
      if feature is None:
        self.abort(404)

      was_updated = False
      if self.request.referer:
        was_updated = (self.request.referer.endswith('/admin/features/new') or
                       '/admin/features/edit' in self.request.referer)

      template_data['feature'] = feature
      template_data['was_updated'] = was_updated

    elif path.startswith('metrics/css/timeline'):
      properties = sorted(
          models.CssPropertyHistogram.get_all().iteritems(), key=lambda x:x[1])
      template_data['CSS_PROPERTY_BUCKETS'] = json.dumps(
          properties, separators=(',',':'))
    elif path.startswith('metrics/feature/timeline'):
      properties = sorted(
          models.FeatureObserverHistogram.get_all().iteritems(), key=lambda x:x[1])
      template_data['FEATUREOBSERVER_BUCKETS'] = json.dumps(
          properties, separators=(',',':'))
    elif path.startswith('omaha_data'):
      omaha_data = self.__get_omaha_data()
      return common.JSONHandler.get(self, omaha_data, formatted=True)
    elif path.startswith('samples'):
      feature_list = models.Feature.get_shipping_samples() # Memcached

      if path.endswith('.json'): # JSON request.
        return common.JSONHandler.get(self, feature_list, formatted=True)
      elif path.endswith('.xml'): # Atom feed request.
        # Support setting larger-than-default Atom feed sizes so that web
        # crawlers can use this as a full site feed.
        try:
          max_items = int(self.request.get('max-items',
                                           settings.RSS_FEED_LIMIT))
        except TypeError:
          max_items = settings.RSS_FEED_LIMIT

        return self.render_atom_feed('Samples', feature_list)
      else:
        template_data['FEATURES'] = json.dumps(feature_list, separators=(',',':'))
        template_data['CATEGORIES'] = [
          (v, normalized_name(v)) for k,v in
          models.FEATURE_CATEGORIES.iteritems()]
        template_data['categories'] = dict([
          (v, normalized_name(v)) for k,v in
          models.FEATURE_CATEGORIES.iteritems()])

    if path.startswith('metrics/'):
      push_urls = http2push.use_push_manifest('push_manifest_metrics.json')

    # Add Link rel=preload header for h2 push on .html file requests.
    if push_urls:
      self.response.headers.add_header(
          'Link', self._generate_link_preload_headers(push_urls))

    self.render(data=template_data, template_path=os.path.join(path + '.html'))

Example 37

Project: osrframework
Source File: platforms.py
View license
    def getInfo(self, query=None, process = False, mode="phonefy", qURI=None):
        ''' 
            Method that checks the presence of a given query and recovers the first list of complains.

            :param query:   Query to verify.
            :param proces:  Calling the processing function.
            :param mode:    Mode to be executed.   
            :param qURI:    A query to be checked         

            :return:    Python structure for the html processed.
        '''
        # Defining variables for this process
        results = []
        data = ""
        if not self.modeIsValid(mode=mode):
            # TO-DO: InvalidModeException
            return json.dumps(results)
        
        # Verrifying if the mode is valid
        if not self._isValidQuery(query, mode=mode):
            # TO-DO: InvalidQueryException
            return json.dumps(results)

        # Verifying if the platform has an API defined
        try:
            if type(self.wrapperAPI) != "<type 'NoneType'>":
                if mode == "phonefy":
                    pass
                elif mode == "usufy":
                    results = self.wrapperAPI.get_user(query)
                    # Manually appending the URL
                    for r in results:
                        aux = {}
                        aux["type"]="i3visio.uri"
                        alias=r["value"].split(' - ')[1]
                        qURL, query = self.createURL(word=alias, mode="usufy")                         
                        aux["value"]= qURL
                        aux["attributes"]= []                        
                        r["attributes"].append(aux)
                        
                elif mode == "searchfy":                                
                    results = self.wrapperAPI.search_users(query) 
                    # Manually appending the URL
                    for r in results:
                        aux = {}
                        aux["type"]="i3visio.uri"
                        alias=r["value"].split(' - ')[1]
                        qURL, query = self.createURL(word=alias, mode="usufy")                         
                        aux["value"]= qURL
                        aux["attributes"]= []                        
                        r["attributes"].append(aux)                         
            else:
                # NoneType returned
                pass
        # The platform does not have a Wrapper defined for its API... Then we will use the traditional approach...
        except:
            # Creating the query URL for that mode
            if qURI != None:
                qURL = qURI
            else:
                qURL, query = self.createURL(word=query, mode=mode)           
            i3Browser = browser.Browser()            
            try:
                # check if it needs creds
                if self.needsCredentials[mode]:
                    authenticated = self._getAuthenticated(i3Browser)
                    if authenticated:
                        # Accessing the resources
                        data = i3Browser.recoverURL(qURL)
                else:
                    # Accessing the resources
                    data = i3Browser.recoverURL(qURL)
            except:
                # No information was found, then we return a null entity
                # TO-DO: i3BrowserException         
                return json.dumps(results)            
                   
            # Verifying if the platform exists
            if self.somethingFound(data, mode=mode):

                if mode == "phonefy":
                    r = {}
                    r["type"] = "i3visio.phone"
                    r["value"] = self.platformName + " - " + query
                    r["attributes"] = []      

                    # Appending platform URI
                    aux = {}
                    aux["type"] = "i3visio.uri"
                    aux["value"] = qURL
                    aux["attributes"] = []           
                    r["attributes"].append(aux)   
                    
                    # Appending platform name
                    aux = {}
                    aux["type"] = "i3visio.platform"
                    aux["value"] = self.platformName
                    aux["attributes"] = []
                    r["attributes"].append(aux)
                                  
                    # Iterating if requested to extract more entities from the URI
                    if process:                               
                        # This function returns a json text!
                        r["attributes"] += json.loads(self.processData(data=data, mode=mode))
                    # Appending the result to results: in this case only one profile will be grabbed
                    results.append(r)    
                                                              
                elif mode == "usufy":
                    r = {}            
                    r["type"] = "i3visio.profile"
                    r["value"] = self.platformName + " - " + query                
                    r["attributes"] = []   
                    
                    # Appending platform URI
                    aux = {}
                    aux["type"] = "i3visio.uri"
                    aux["value"] = qURL
                    aux["attributes"] = []           
                    r["attributes"].append(aux)  
                    # Appending the alias
                    aux = {}
                    aux["type"] = "i3visio.alias"
                    aux["value"] = query
                    aux["attributes"] = []           
                    r["attributes"].append(aux)                    
                    # Appending platform name
                    aux = {}
                    aux["type"] = "i3visio.platform"
                    aux["value"] = self.platformName
                    aux["attributes"] = []
                    r["attributes"].append(aux)
                    
                  
                    # Iterating if requested to extract more entities from the URI
                    if process:                               
                        # This function returns a json text!
                        r["attributes"] += json.loads(self.processData(data=data, mode=mode))
                
                    # Appending the result to results: in this case only one profile will be grabbed
                    results.append(r)                
                                                  
                elif mode == "searchfy":
                    # Recovering all the found aliases...
                    ids = re.findall(self.searchfyAliasRegexp, data, re.DOTALL)

                    for j, i in enumerate(ids):
                        r = {}            
                        r["type"] = "i3visio.profile"
                        r["value"] = self.platformName + " - " + i            
                        r["attributes"] = []   

                        # Appending platform URI
                        aux = {}
                        aux["type"] = "i3visio.uri"
                        # Creating the URI based on the base URL for the new profiles...
                        uri, alias = self.createURL(word=i, mode="base")
                        #uri=self.baseURL+i

                        aux["value"] = uri                    

                        aux["attributes"] = []           
                        r["attributes"].append(aux)  
                        # Appending the alias
                        aux = {}
                        aux["type"] = "i3visio.alias"
                        aux["value"] = alias
                        aux["attributes"] = []           
                        r["attributes"].append(aux)                      
                        # Appending platform name
                        aux = {}
                        aux["type"] = "i3visio.platform"
                        aux["value"] = self.platformName
                        aux["attributes"] = []
                        r["attributes"].append(aux)
                        # Appending the query performed to grab this items
                        aux = {}
                        aux["type"] = "i3visio.search"
                        aux["value"] = query
                        aux["attributes"] = []
                        r["attributes"].append(aux)
                        
                        # TO-DO:
                        # Perform additional procesing
                        # Iterating the requested profiles to extract more entities from the URI would be slow!
                        """if process:                               
                            # This function returns a json text in usufy format for the returned objects.
                            r["attributes"] += json.loads(self.getInfo(process = True, mode="usufy", qURI=uri, query=i))                    
                        # Appending the result to results: in this case only one profile will be grabbed"""
                        results.append(r)  
        return json.dumps(results)

Example 38

Project: inasafe
Source File: push_shake.py
View license
def push_shake_event_to_rest(shake_event, fail_silent=True):
    """Pushing shake event Grid.xml description files to REST server.

    :param shake_event: The shake event to push
    :type shake_event: ShakeEvent

    :param fail_silent: If set True, will still continue whan the push process
        failed. Default vaule to True. If False, this method will raise
        exception.
    :type fail_silent: bool

    :return: Return True if successfully pushed data
    :rtype: bool
    """
    inasafe_django = InaSAFEDjangoREST()
    # check credentials exists in os.environ
    if not inasafe_django.is_configured():
        LOGGER.info('Insufficient information to push shake map to '
                    'Django Realtime')
        LOGGER.info('Please set environment for INASAFE_REALTIME_REST_URL, '
                    'INASAFE_REALTIME_REST_LOGIN_URL, '
                    'INASAFE_REALTIME_REST_USER, and '
                    'INASAFE_REALTIME_REST_PASSWORD')
        return

    event_dict = shake_event.event_dict()

    # set headers and cookie
    # begin communicating with server
    LOGGER.info('----------------------------------')
    LOGGER.info('Push data to REST server: %s', inasafe_django.base_url())
    try:
        session = inasafe_django.rest
        headers = {
            'X-CSRFTOKEN': inasafe_django.csrf_token,
            'Content-Type': 'application/json'
        }

        # build the data request:
        earthquake_data = {
            'shake_id': shake_event.event_id,
            'magnitude': float(event_dict.get('mmi')),
            'depth': float(event_dict.get('depth-value')),
            'time': str(shake_event.shake_grid.time),
            'location': {
                'type': 'Point',
                'coordinates': [
                    shake_event.shake_grid.longitude,
                    shake_event.shake_grid.latitude
                ]
            },
            'location_description': event_dict.get('shake-grid-location')
        }
        earthquake_file = {
            'shake_grid': (
                '%s-grid.xml' % shake_event.event_id,
                open(shake_event.grid_file_path())),
        }
        # check does the shake event already exists?
        response = session.earthquake(
            earthquake_data['shake_id']).GET()
        if response.status_code == requests.codes.ok:
            # event exists, we should update using PUT Url
            response = session.earthquake(
                earthquake_data['shake_id']).PUT(
                data=json.dumps(earthquake_data),
                headers=headers)
        elif response.status_code == requests.codes.not_found:
            # event does not exists, create using POST url
            response = session.earthquake.POST(
                data=json.dumps(earthquake_data),
                headers=headers)

        # upload grid.xml
        headers = {
            'X-CSRFTOKEN': inasafe_django.csrf_token,
        }
        if response.status_code == requests.codes.ok:
            response = session.earthquake(
                earthquake_data['shake_id']).PUT(
                files=earthquake_file,
                headers=headers)

        if not (response.status_code == requests.codes.ok or
                response.status_code == requests.codes.created):
            # raise exceptions
            error = RESTRequestFailedError(
                url=response.url,
                status_code=response.status_code,
                data=json.dumps(earthquake_data))
            if fail_silent:
                LOGGER.info(error.message)
            else:
                raise error

        # post the report
        # build report data
        path_files = shake_event.generate_result_path_dict()
        event_report_dict = {
            'shake_id': shake_event.event_id,
            'language': shake_event.locale
        }
        event_report_files = {
            'report_pdf': open(path_files.get('pdf')),
            'report_image': open(path_files.get('image')),
            'report_thumbnail': open(path_files.get('thumbnail'))
        }
        # check report exists

        # build headers and cookies
        headers = {
            'X-CSRFTOKEN': inasafe_django.csrf_token,
        }
        response = session(
            'earthquake-report',
            event_report_dict['shake_id'],
            event_report_dict['language']).GET()
        if response.status_code == requests.codes.ok:
            # event exists, we should update using PUT Url
            response = session(
                'earthquake-report',
                event_report_dict['shake_id'],
                event_report_dict['language']).PUT(
                data=event_report_dict,
                files=event_report_files,
                headers=headers)
        elif response.status_code == requests.codes.not_found:
            # event doesn't exists, we should update using POST url
            response = session(
                'earthquake-report',
                event_report_dict['shake_id']).POST(
                    data=event_report_dict,
                    files=event_report_files,
                    headers=headers)

        if not (response.status_code == requests.codes.ok or
                response.status_code == requests.codes.created):
            error = RESTRequestFailedError(
                url=response.url,
                status_code=response.status_code,
                data=event_report_dict,
                files=event_report_files)

            if fail_silent:
                LOGGER.info(error.message)
            else:
                raise error

        return True
    # pylint: disable=broad-except
    except Exception as exc:
        if fail_silent:
            LOGGER.warning(exc)
        else:
            raise exc

Example 39

Project: topic-explorer
Source File: server.py
View license
    def _setup_routes(self, **kwargs):
        @self.route('/<k:int>/doc_topics/<doc_id>')
        @_set_acao_headers
        def doc_topic_csv(k, doc_id):
            response.content_type = 'text/csv; charset=UTF8'

            doc_id = unquote(doc_id)

            data = self.v[k].doc_topics(doc_id)

            output = StringIO()
            writer = csv.writer(output)
            writer.writerow(['topic', 'prob'])
            writer.writerows([(t, "%6f" % p) for t, p in data])

            return output.getvalue()

        @self.route('/<k:int>/docs/<doc_id>')
        @_set_acao_headers
        def doc_csv(k, doc_id, threshold=0.2):
            response.content_type = 'text/csv; charset=UTF8'

            doc_id = unquote(doc_id)

            data = self.v[k].dist_doc_doc(doc_id)

            output = StringIO()
            writer = csv.writer(output)
            writer.writerow(['doc', 'prob'])
            writer.writerows([(d, "%6f" % p) for d, p in data if p > threshold])

            return output.getvalue()

        @self.route('/<k:int>/topics/<topic_no:int>.json')
        @_set_acao_headers
        def topic_json(k, topic_no, N=40):
            response.content_type = 'application/json; charset=UTF8'
            try:
                N = int(request.query.n)
            except:
                pass

            if N > 0:
                data = self.v[k].dist_top_doc([topic_no])[:N]
            else:
                data = self.v[k].dist_top_doc([topic_no])[N:]
                data = reversed(data)

            docs = [doc for doc, prob in data]
            doc_topics_mat = self.v[k].doc_topics(docs)
            docs = self.get_docs(docs, id_as_key=True)

            js = []
            for doc_prob, topics in zip(data, doc_topics_mat):
                doc, prob = doc_prob
                struct = docs[doc]
                struct.update({'prob': 1 - prob,
                               'topics': dict([(str(t), float(p)) for t, p in topics])})
                js.append(struct)

            return json.dumps(js)

        @self.route('/<k:int>/docs_topics/<doc_id:path>.json')
        @_set_acao_headers
        def doc_topics(k, doc_id, N=40):
            try:
                N = int(request.query.n)
            except:
                pass

            doc_id = unquote(doc_id)

            response.content_type = 'application/json; charset=UTF8'

            if N > 0:
                data = self.v[k].dist_doc_doc(doc_id)[:N]
            else:
                data = self.v[k].dist_doc_doc(doc_id)[N:]
                data = reversed(data)

            docs = [doc for doc, prob in data]
            doc_topics_mat = self.v[k].doc_topics(docs)
            docs = self.get_docs(docs, id_as_key=True)

            js = []
            for doc_prob, topics in zip(data, doc_topics_mat):
                doc, prob = doc_prob
                struct = docs[doc]
                struct.update({'prob': 1 - prob,
                               'topics': dict([(str(t), float(p)) for t, p in topics])})
                js.append(struct)

            return json.dumps(js)

        @self.route('/<k:int>/word_docs.json')
        @_set_acao_headers
        def word_docs(k, N=40):
            try:
                N = int(request.query.n)
            except:
                pass
            try:
                query = request.query.q.lower().split('|')
            except:
                raise Exception('Must specify a query')

            response.content_type = 'application/json; charset=UTF8'

            query = [word for word in query if word in self.c.words]

            # abort if there are no terms in the query
            if not query:
                response.status = 400  # Bad Request
                return "Search terms not in model"

            topics = self.v[k].dist_word_top(query, show_topics=False)
            data = self.v[k].dist_top_doc(topics['i'],
                                          weights=(topics['value'].max() - topics['value']))

            if N > 0:
                data = data[:N]
            else:
                data = data[N:]
                data = reversed(data)

            docs = [doc for doc, prob in data]
            doc_topics_mat = self.v[k].doc_topics(docs)
            docs = self.get_docs(docs, id_as_key=True)

            js = []
            for doc_prob, topics in zip(data, doc_topics_mat):
                doc, prob = doc_prob
                struct = docs[doc]
                struct.update({'prob': 1 - prob,
                               'topics': dict([(str(t), p) for t, p in topics])})
                js.append(struct)

            return json.dumps(js)

        @self.route('/<k:int>/topics.json')
        @_set_acao_headers
        def topics(k):
            from topicexplorer.lib.color import rgb2hex

            response.content_type = 'application/json; charset=UTF8'
            response.set_header('Expires', _cache_date())
            response.set_header('Cache-Control', 'max-age=86400')
            

            # populate partial jsd values
            data = self.v[k].topic_jsds()

            js = {}
            for rank, topic_H in enumerate(data):
                topic, H = topic_H
                if math.isnan(H): 
                    H = 0.0
                js[str(topic)] = {
                    "H": float(H),
                    "color": rgb2hex(self.colors[k][topic])
                }

            # populate word values
            data = self.v[k].topics()

            wordmax = 10  # for alphabetic languages
            if kwargs.get('lang', None) == 'cn':
                wordmax = 25  # for ideographic languages

            for i, topic in enumerate(data):
                js[str(i)].update({'words': dict([(unicode(w), float(p))
                                                  for w, p in topic[:wordmax]])})

            return json.dumps(js)

        @self.route('/topics.json')
        @_set_acao_headers
        def word_topic_distance():
            import numpy as np
            response.content_type = 'application/json; charset=UTF8'

            # parse query
            try:
                if '|' in request.query.q:
                    query = request.query.q.lower().split('|')
                else:
                    query = request.query.q.lower().split(' ')
            except:
                raise Exception('Must specify a query')

            query = [word for word in query if word in self.c.words]

            # abort if there are no terms in the query
            if not query:
                response.status = 400  # Bad Request
                return "Search terms not in model"


            # calculate distances
            distances = dict()
            for k, viewer in self.v.iteritems():
                d = viewer.dist_word_top(query, show_topics=False)
                distances[k] = np.fromiter(
                    ((k, row['i'], row['value']) for row in d),
                    dtype=[('k', '<i8'), ('i', '<i8'), ('value', '<f8')])

            # merge and sort all topics across all models
            merged_similarity = np.hstack(distances.values())
            sorted_topics = merged_similarity[np.argsort(merged_similarity['value'])]

            # return data
            data = [{'k' : t['k'],
                     't' : t['i'],
                     'distance' : t['value'] } for t in sorted_topics]
            return json.dumps(data)


        @self.route('/topics')
        @_set_acao_headers
        def view_clusters():
            with open(resource_filename(__name__, '../www/master.mustache.html'),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'body' : _render_template('cluster.mustache.html'),
                           'topic_range': self.topic_range}
            return self.renderer.render(template, tmpl_params)


        @self.route('/docs.json')
        @_set_acao_headers
        def docs(docs=None, q=None):
            response.content_type = 'application/json; charset=UTF8'
            response.set_header('Expires', _cache_date())

            try:
                if request.query.q:
                    q = unquote(request.query.q)
            except:
                pass

            try:
                if request.query.id:
                    docs = [unquote(request.query.id)]
            except:
                pass

            try:
                response.set_header('Expires', 0)
                response.set_header('Pragma', 'no-cache')
                response.set_header('Cache-Control', 'no-cache, no-store, must-revalidate')
                if request.query.random:
                    docs = [random.choice(self.labels)]
            except:
                pass

            js = self.get_docs(docs, query=q)

            return json.dumps(js)

        @self.route('/icons.js')
        def icons():
            with open(resource_filename(__name__, '../www/icons.js')) as icons:
                text = '{0}\n var icons = {1};'\
                    .format(icons.read(), json.dumps(self.icons))
            return text

        def _render_template(page):
            response.set_header('Expires', _cache_date())

            with open(resource_filename(__name__, '../www/' + page),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'corpus_name': kwargs.get('corpus_name', ''),
                           'corpus_link': kwargs.get('corpus_link', ''),
                           'context_type': self.context_type,
                           'topic_range': self.topic_range,
                           'doc_title_format': kwargs.get('doc_title_format', '{0}'),
                           'doc_url_format': kwargs.get('doc_url_format', ''),
                           'home_link': kwargs.get('home_link', '/')}
            return self.renderer.render(template, tmpl_params)

        @self.route('/<k:int>/')
        def index(k):
            with open(resource_filename(__name__, '../www/master.mustache.html'),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'body' : _render_template('bars.mustache.html'),
                           'topic_range': self.topic_range}
            return self.renderer.render(template, tmpl_params)

        @self.route('/cluster.csv')
        @_set_acao_headers
        def cluster_csv(second=False):
            filename = kwargs.get('cluster_path')
            print "Retireving cluster.csv:", filename
            if not filename or not os.path.exists(filename):
                import topicexplorer.train
                filename = topicexplorer.train.cluster(10, self.config_file)
                kwargs['cluster_path'] = filename

            root, filename = os.path.split(filename)
            return static_file(filename, root=root)
        
        @self.route('/description.md')
        @_set_acao_headers
        def description():
            filename = kwargs.get('corpus_desc')
            if not filename:
                response.status = 404
                return "File not found"
            root, filename = os.path.split(filename)
            return static_file(filename, root=root)
        
        @self.route('/')
        @_set_acao_headers
        def cluster():
            with open(resource_filename(__name__, '../www/master.mustache.html'),
                      encoding='utf-8') as tmpl_file:
                template = tmpl_file.read()

            tmpl_params = {'body' : _render_template('splash.mustache.html'),
                           'topic_range': self.topic_range}
            return self.renderer.render(template, tmpl_params)

        @self.route('/<filename:path>')
        @_set_acao_headers
        def send_static(filename):
            return static_file(filename, root=resource_filename(__name__, '../www/'))

Example 40

View license
def gen_annotations(indir, in_doc_topics, in_topic_keys, in_topic_state,
                    outdir, min_topic_appearances, min_pointedness,
                    num_words_per_topic, resdir, bandwidth,
                    stopwords, extra_stopwords, subunits):

    topic_state = {}
    topic_appearances_by_doc = {}
    top_topics_by_doc = {}
    docs_by_topic = {}
    top_words_by_topic = {}

    # Load 'stopwords.txt' from the resource directory, as well as the
    # file with additional stopwords (if one is specified).
    if stopwords:
        stopwords_file = codecs.open(stopwords, 'r', 'utf-8')
        stopwords = stopwords_file.read().strip().split('\n')
    else:
        stopwords_file = open(os.path.join(resdir, 'stopwords.txt'))
        stopwords = stopwords_file.read().strip().split(' ')
    stopwords_file.close()
    if extra_stopwords:
        extra_stopwords_file = codecs.open(extra_stopwords, 'r', 'utf-8')
        stopwords += tokenize(extra_stopwords_file.read())
        extra_stopwords_file.close()
    stopwords = set(stopwords)

    # Load the data from the MALLET topic-state file.
    f = gzip.open(in_topic_state, 'r')
    f.readline(); f.readline(); f.readline()
    if subunits:
        subunit_topic_state = {}

        # The topic_stage.gz file will be organized by subunit, so we will
        # need to do some reconstruction.
        for line in f.readlines():
            line = unicode(line, 'utf-8').strip()
            subdocnum, subdoc, pos, wordtypeindex, wordtype, topic \
                = line.split(' ')
            topic = int(topic)
            # Figure out which of the original documents this is a subunit of.
            doc, subunit_index = parse_subdoc(subdoc)
            subunit_topic_state.setdefault(doc, {}) \
                .setdefault(subunit_index, []) \
                .append((wordtype, topic))
            topic_appearances_by_doc.setdefault(doc, set()).add(topic)

        # Construct topic state for the original documents.
        for doc in subunit_topic_state:
            for subunit_index in sorted(subunit_topic_state[doc].keys()):
                topic_state.setdefault(doc, [])
                topic_state[doc] += subunit_topic_state[doc][subunit_index]

    else:
        for line in f.readlines():
            line = unicode(line, 'utf-8').strip()
            docnum, doc, pos, wordtypeindex, wordtype, topic = line.split(' ')
            topic = int(topic)
            doc = os.path.split(doc)[-1]
            topic_state.setdefault(doc, []) \
                .append((wordtype, topic))
            topic_appearances_by_doc.setdefault(doc, set()).add(topic)

    # Load the data from the MALLET doc-topic file
    f = open(in_doc_topics, 'r')
    f.readline()
    topic_coefs_by_doc = {}
    for line in f.readlines():
        line = line.split('\t')
        doc = line[1].split('/')[-1].replace('%20', ' ').replace('%3F', '?')
        line = line[2:]
        ntopics = len(line) / 2
        if ntopics > 9:
            ntopics = 9;
        if subunits:
            doc, subunit_index = parse_subdoc(doc)
        for i in xrange(0, ntopics):
            topic = int(line[i*2])
            coef = float(line[i*2 + 1])
            # Only include topics that account for at least one word.
            if topic in topic_appearances_by_doc[doc]:
                topic_coefs_by_doc.setdefault(doc, {}).setdefault(topic, 0.0)
                topic_coefs_by_doc[doc][topic] += coef
                # If we are in subunit mode, we will come across each
                # document multiple times, so we sum up all the coefs.

    # Sort out the top topics by document.
    for doc in topic_coefs_by_doc:
        for topic in sorted(topic_coefs_by_doc[doc],
                            key=lambda topic: -topic_coefs_by_doc[doc][topic])[:9]:
            top_topics_by_doc.setdefault(doc, []).append(topic)
            docs_by_topic.setdefault(topic, []).append(doc)

    # Load the data from the MALLET topic-keys file.
    f = codecs.open(in_topic_keys, 'r', 'utf-8')
    for line in f.readlines():
        line = line.strip()
        topic, n, words = line.split('\t') # What is the second value?
        topic = int(topic)
        top_words_by_topic[topic] = words.split(' ')[:num_words_per_topic]

    # Create the output directory (if necessary).
    if not os.path.exists(outdir):
        os.makedirs(outdir)
    elif not os.path.isdir(outdir):
        print >>sys.stderr, ("Error: '{0}' exists but is not a directory!"
                             .format(outdir))
        exit()

    # Generate and save topic names.
    topic_names = dict([(topic, ' '.join(top_words_by_topic[topic]
                                         [:num_words_per_topic]))
                        for topic in top_words_by_topic])
    outf = open(os.path.join(outdir, 'topic_names.js'), 'w')
    outf.write('topic_names = ' + json.dumps(topic_names) + ';\n')

    # Convert the individual documents to HTML and add annotations,
    # also saving the text of the lines where the link for a given
    # topic will plant you in the document, and getting a list of the
    # documents with links for each topic.
    extracts = {}
    firstlines = {}
    pointed_topics_by_doc = {}
    docs_by_pointed_topic = {}
    for doc in os.listdir(indir):
        state = list(topic_state.get(doc, []))
        pointed_topics_by_doc[doc] = []

        f = codecs.open(os.path.join(indir, doc), 'r', 'utf-8')
        text = f.read()
        f.close()
        lines = text.split(u'\n')
        nlines = len(lines)
        firstlines[doc] = lines[0]
        line_toks = [tokenize(line) for line in lines]

        # Scour the topic state to find the topic assignments for each
        # token in this document.  Also save the line numbers on which
        # words associated with each of the top topics appear.
        line_toks_annotated = []
        topic_appearances = {}
        ntoks = 0
        for i, toks in enumerate(line_toks):
            ntoks += len(toks)
            toks_annotated = []
            for tok in toks:
                match_tok = tok.lower()
                # The last condition is because the first line is supposed
                # to contain a title that is not included in a subunit.
                if match_tok.isalpha() and match_tok not in stopwords \
                        and not (subunits and i == 0):
                    wordtype, topic = state.pop(0)
                    if wordtype != match_tok:
                        print doc, 'line', i, \
                            u'- unable to match input file with MALLET token stream: expected \'' \
                            + unicode(wordtype) + u'\' but found \'' + unicode(match_tok) + u'\'.'
                        print 'Please check your MALLET tokenization settings.  If you are using a custom'
                        print 'stopwords list, you must specify it on the command line when running this'
                        print 'script.'
                        exit()
                else:
                    topic = None
                toks_annotated.append((tok, topic))
                topic_appearances.setdefault(topic, []) \
                    .append(i)
            line_toks_annotated.append(toks_annotated)

        # Compute estimates of the density of each top topic over the
        # lines of the document, and identify which topics are 'pointed'.
        topic_density_fcns = {}
        topic_density_maxima = {}
        for topic in top_words_by_topic:
            if not (topic in top_topics_by_doc.get(doc, [])
                    or topic in pointed_topics_by_doc[doc]):
                continue
            appearances = [float(x) for x in topic_appearances[topic]]
            try:
                # Compute the KDE if possible.
                kde = scipy.stats.gaussian_kde(appearances)
            except ValueError:
                continue
            except numpy.linalg.linalg.LinAlgError:
                continue
            # SciPy lets you set a bandwidth adjustment factor that gets 
            # squared and multiplied by the variance of the data to determine
            # the actual bandwidth.  We want to set the bandwidth directly,
            # so we need to work around this.
            kde.set_bandwidth(1.0)
            kde.set_bandwidth(math.sqrt(bandwidth / float(kde.covariance[0])))
            topic_density_fcns[topic] = [truncate(kde(float(i))[0])
                                         for i in xrange(nlines)]
            # Identify 'pointed' topics.
            if len(appearances) < min_topic_appearances:
                continue
            maximum = numpy.argmax(topic_density_fcns[topic])
            mean = float(kde.integrate_box_1d(0.0, nlines - 1.0)) \
                / nlines
            if topic_density_fcns[topic][maximum] \
                    > mean * min_pointedness:
                topic_density_maxima.setdefault(maximum, []).append(topic)
                pointed_topics_by_doc[doc].append(topic)
                docs_by_pointed_topic.setdefault(topic, []).append(doc)
    
        # Create an HTML document with all of the words associated with
        # top topics marked as such, and annotations added to the lines
        # of greatest density for each top topic.
        outf = codecs.open(os.path.join(outdir, doc + '.html'), 'w', 'utf-8')
        outf.write(html1.format(firstlines[doc], doc))
        # Save the density functions.
        outf.write('<script>\n')
        outf.write('density_fcns = ' + json.dumps(topic_density_fcns) + ';\n')
        outf.write('this_doc = "' + doc + '";\n')
        outf.write('top_topics = ' + json.dumps(top_topics_by_doc.get(doc, [])) + ';\n')
        outf.write('</script>\n')
        outf.write('<table id="text-table">')
        extracts[doc] = {}
        for i, toks in enumerate(line_toks_annotated):
            if i == 0:
                outf.write('<tr class="first-row"><td class="text-line">')
            elif i == nlines - 1:
                outf.write('<tr class="last-row"><td class="text-line">')
            else:
                outf.write('<tr><td class="text-line">')
            if i in topic_density_maxima:
                for topic in topic_density_maxima[i]:
                    outf.write('<a name="topic' + str(topic) + '">')
                    if len(toks) == 1 and toks[0][0] == u'':
                        # Avoid pulling blank lines as extracts.
                        if i < len(line_toks_annotated) - 1:
                            extract_toks = line_toks_annotated[i + 1]
                        else:
                            extract_toks = line_toks_annotated[i - 1]
                    else:
                        extract_toks = toks
                    extracts[doc][topic] = ''.join(tok for tok, topic
                                                   in extract_toks)
            for tok, topic in toks:
                if topic in top_topics_by_doc.get(doc, []) \
                        or topic in pointed_topics_by_doc[doc]:
                    outf.write('<span class="topic' + str(topic) + '">' +
                               tok + '</span>')
                else:
                    outf.write(tok)
            if i in topic_density_maxima:
                for topic in topic_density_maxima[i]:
                    outf.write('</a>')
            if i == 0:
                outf.write('&nbsp;</td><td rowspan="'
                           + str(nlines + 1)
                           + '" id="chart-cell" valign="top">'
                           + '<div id="chart-area"><div id="chart">'
                           + '</div></div></td>'
                           + '<td class="marginal-link-cell">')
            else:
                outf.write('&nbsp;</td><td class="marginal-link-cell">')
            if i in topic_density_maxima:
                for topic in topic_density_maxima[i]:
                    outf.write('<span class="marginal-link" id="'
                               + str(topic) + '"></span>')
            if i == 0:
                outf.write('</td><td valign="top" rowspan="'
                           + str(nlines)
                           + '" id="popup-cell">'
                           + '<div id="popup-area"></div></td></tr>\n')
            else:
                outf.write('</td></tr>\n')

        outf.write('</table>')
        outf.write(html2)

    # Sort the lists of top docs.
    for topic in docs_by_topic:
        d = docs_by_topic[topic]
        docs_by_topic[topic] = alphanumeric_sort(d)
    for topic in docs_by_pointed_topic:
        d = docs_by_pointed_topic[topic]
        docs_by_pointed_topic[topic] = alphanumeric_sort(d)

    # Save the list of documents to display for each topic.
    outf = open(os.path.join(outdir, 'docs_by_topic.js'), 'w')
    outf.write('docs_by_pointed_topic = ' + json.dumps(docs_by_pointed_topic) + ';\n')
    outf.write('docs_by_topic = ' + json.dumps(docs_by_topic) + ';\n')

    # Save the list of document names
    outf = open(os.path.join(outdir, 'doc_names.js'), 'w')
    outf.write('doc_names = ' + json.dumps(firstlines) + ';\n')

    # Save the extracts.
    outf = codecs.open(os.path.join(outdir, 'extracts.js'), 'w', 'utf-8')
    outf.write('extracts = ' + json.dumps(extracts) + ';\n')

    # Create the index file.
    outf = codecs.open(os.path.join(outdir, 'index.html'), 'w', 'utf-8')
    outf.write(index_html1)
    docs = alphanumeric_sort(os.listdir(indir))
    ndocs = len(docs)
    for i, doc in enumerate(docs):
        if i == 0:
            outf.write('<tr class="first-row">')
        elif i == ndocs - 1:
            outf.write('<tr class="last-row">')
        else:
            outf.write('<tr>')
        outf.write('<td class="index-entry"><a href="' + doc
                   + '.html">' + doc + '</a>: '
                   + firstlines[doc] + '</td></tr>')
    outf.write(index_html2)

    # Create the topic index file.
    outf = codecs.open(os.path.join(outdir, 'topic-index.html'), 'w', 'utf-8')
    outf.write(topic_index_html1)
    topic_list = sorted(top_words_by_topic.keys())
    ntopics = len(topic_list)
    for i, topic in enumerate(topic_list):
        if i == 0:
            outf.write('<tr class="first-row">')
        elif i == ntopics - 1:
            outf.write('<tr class="last-row">')
        else:
            outf.write('<tr>')
        outf.write('<td class="index-entry" id="' + str(topic)
                   + '"><a class="topic-link" '
                   + 'href="javascript:show_index_popup('
                   + str(topic) + ')">Topic ' + str(topic) + '</a>: '
                   + topic_names[topic].encode('ascii', 'xmlcharrefreplace')
                   + '</td>')
        if i == 0:
            outf.write('<td valign="top" rowspan="' + str(ntopics + 1)
                       + '" id="popup-cell"><div id="popup-area"></div></td>')
        outf.write('</tr>')
    outf.write('<tr><td class="blank-index-entry">&nbsp;</td></tr>')
    outf.write(topic_index_html2)

    # Copy the resource files to the output directory.
    for filename in resource_files:
        shutil.copy(os.path.join(resdir, filename), outdir)
    
    # Print a summary of the exemplary/pointed passages that were found.
    print 'Summary of links generated:'
    print 'Topic\tNum. linked passages'
    for topic in topic_list:
        print '{0}\t{1}'.format(topic, len(docs_by_pointed_topic.get(topic, [])))

Example 41

Project: ANALYSE
Source File: psychoanalyze.py
View license
def generate_plots_for_problem(problem):

    pmdset = PsychometricData.objects.using(db).filter(
        studentmodule__module_state_key=BlockUsageLocator.from_string(problem)
    )
    nstudents = pmdset.count()
    msg = ""
    plots = []

    if nstudents < 2:
        msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents)
        return msg, plots

    max_grade = pmdset[0].studentmodule.max_grade

    agdat = pmdset.aggregate(Sum('attempts'), Max('attempts'))
    max_attempts = agdat['attempts__max']
    total_attempts = agdat['attempts__sum']  # not used yet

    msg += "max attempts = %d" % max_attempts

    xdat = range(1, max_attempts + 1)
    dataset = {'xdat': xdat}

    # compute grade statistics
    grades = [pmd.studentmodule.grade for pmd in pmdset]
    gsv = StatVar()
    for g in grades:
        gsv += g
    msg += "<br><p><font color='blue'>Grade distribution: %s</font></p>" % gsv

    # generate grade histogram
    ghist = []

    axisopts = """{
        xaxes: [{
            axisLabel: 'Grade'
        }],
        yaxes: [{
            position: 'left',
            axisLabel: 'Count'
         }]
         }"""

    if gsv.max > max_grade:
        msg += "<br/><p><font color='red'>Something is wrong: max_grade=%s, but max(grades)=%s</font></p>" % (max_grade, gsv.max)
        max_grade = gsv.max

    if max_grade > 1:
        ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1))
        ghist_json = json.dumps(ghist.items())

        plot = {'title': "Grade histogram for %s" % problem,
                'id': 'histogram',
                'info': '',
                'data': "var dhist = %s;\n" % ghist_json,
                'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts,
                }
        plots.append(plot)
    else:
        msg += "<br/>Not generating histogram: max_grade=%s" % max_grade

    # histogram of time differences between checks
    # Warning: this is inefficient - doesn't scale to large numbers of students
    dtset = []  # time differences in minutes
    dtsv = StatVar()
    for pmd in pmdset:
        try:
            checktimes = eval(pmd.checktimes)  # update log of attempt timestamps
        except:
            continue
        if len(checktimes) < 2:
            continue
        ct0 = checktimes[0]
        for ct in checktimes[1:]:
            dt = (ct - ct0).total_seconds() / 60.0
            if dt < 20:  # ignore if dt too long
                dtset.append(dt)
                dtsv += dt
            ct0 = ct
    if dtsv.cnt > 2:
        msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv
        bins = np.linspace(0, 1.5 * dtsv.sdv(), 30)
        dbar = bins[1] - bins[0]
        thist = make_histogram(dtset, bins)
        thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))

        axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""

        plot = {'title': "Histogram of time differences between checks",
                'id': 'thistogram',
                'info': '',
                'data': "var thist = %s;\n" % thist_json,
                'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts),
                }
        plots.append(plot)

    # one IRT plot curve for each grade received (TODO: this assumes integer grades)
    for grade in range(1, int(max_grade) + 1):
        yset = {}
        gset = pmdset.filter(studentmodule__grade=grade)
        ngset = gset.count()
        if ngset == 0:
            continue
        ydat = []
        ylast = 0
        for x in xdat:
            y = gset.filter(attempts=x).count() / ngset
            ydat.append(y + ylast)
            ylast = y + ylast
        yset['ydat'] = ydat

        if len(ydat) > 3:  # try to fit to logistic function if enough data points
            try:
                cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0])
                yset['fitparam'] = cfp
                yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0])
                yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])]
                fitx = np.linspace(xdat[0], xdat[-1], 100)
                yset['fitx'] = fitx
                yset['fity'] = func_2pl(np.array(fitx), *cfp[0])
            except Exception as err:
                log.debug('Error in psychoanalyze curve fitting: %s' % err)

        dataset['grade_%d' % grade] = yset

    axisopts = """{
        xaxes: [{
            axisLabel: 'Number of Attempts'
        }],
        yaxes: [{
            max:1.0,
            position: 'left',
            axisLabel: 'Probability of correctness'
         }]
         }"""

    # generate points for flot plot
    for grade in range(1, int(max_grade) + 1):
        jsdata = ""
        jsplots = []
        gkey = 'grade_%d' % grade
        if gkey in dataset:
            yset = dataset[gkey]
            jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat'])))
            jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
            if 'fitpts' in yset:
                jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity'])))
                jsplots.append('{ data: fit,  lines: { show: true }, color: "blue" }')
                (a, b) = yset['fitparam'][0]
                irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b)
            else:
                irtinfo = ""

            plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo),
                          'id': "irt%s" % grade,
                          'info': '',
                          'data': jsdata,
                          'cmd': '[%s], %s' % (','.join(jsplots), axisopts),
                          })

    #log.debug('plots = %s' % plots)
    return msg, plots

Example 42

Project: ANALYSE
Source File: psychoanalyze.py
View license
def generate_plots_for_problem(problem):

    pmdset = PsychometricData.objects.using(db).filter(
        studentmodule__module_state_key=BlockUsageLocator.from_string(problem)
    )
    nstudents = pmdset.count()
    msg = ""
    plots = []

    if nstudents < 2:
        msg += "%s nstudents=%d --> skipping, too few" % (problem, nstudents)
        return msg, plots

    max_grade = pmdset[0].studentmodule.max_grade

    agdat = pmdset.aggregate(Sum('attempts'), Max('attempts'))
    max_attempts = agdat['attempts__max']
    total_attempts = agdat['attempts__sum']  # not used yet

    msg += "max attempts = %d" % max_attempts

    xdat = range(1, max_attempts + 1)
    dataset = {'xdat': xdat}

    # compute grade statistics
    grades = [pmd.studentmodule.grade for pmd in pmdset]
    gsv = StatVar()
    for g in grades:
        gsv += g
    msg += "<br><p><font color='blue'>Grade distribution: %s</font></p>" % gsv

    # generate grade histogram
    ghist = []

    axisopts = """{
        xaxes: [{
            axisLabel: 'Grade'
        }],
        yaxes: [{
            position: 'left',
            axisLabel: 'Count'
         }]
         }"""

    if gsv.max > max_grade:
        msg += "<br/><p><font color='red'>Something is wrong: max_grade=%s, but max(grades)=%s</font></p>" % (max_grade, gsv.max)
        max_grade = gsv.max

    if max_grade > 1:
        ghist = make_histogram(grades, np.linspace(0, max_grade, max_grade + 1))
        ghist_json = json.dumps(ghist.items())

        plot = {'title': "Grade histogram for %s" % problem,
                'id': 'histogram',
                'info': '',
                'data': "var dhist = %s;\n" % ghist_json,
                'cmd': '[ {data: dhist, bars: { show: true, align: "center" }} ], %s' % axisopts,
                }
        plots.append(plot)
    else:
        msg += "<br/>Not generating histogram: max_grade=%s" % max_grade

    # histogram of time differences between checks
    # Warning: this is inefficient - doesn't scale to large numbers of students
    dtset = []  # time differences in minutes
    dtsv = StatVar()
    for pmd in pmdset:
        try:
            checktimes = eval(pmd.checktimes)  # update log of attempt timestamps
        except:
            continue
        if len(checktimes) < 2:
            continue
        ct0 = checktimes[0]
        for ct in checktimes[1:]:
            dt = (ct - ct0).total_seconds() / 60.0
            if dt < 20:  # ignore if dt too long
                dtset.append(dt)
                dtsv += dt
            ct0 = ct
    if dtsv.cnt > 2:
        msg += "<br/><p><font color='brown'>Time differences between checks: %s</font></p>" % dtsv
        bins = np.linspace(0, 1.5 * dtsv.sdv(), 30)
        dbar = bins[1] - bins[0]
        thist = make_histogram(dtset, bins)
        thist_json = json.dumps(sorted(thist.items(), key=lambda(x): x[0]))

        axisopts = """{ xaxes: [{ axisLabel: 'Time (min)'}], yaxes: [{position: 'left',axisLabel: 'Count'}]}"""

        plot = {'title': "Histogram of time differences between checks",
                'id': 'thistogram',
                'info': '',
                'data': "var thist = %s;\n" % thist_json,
                'cmd': '[ {data: thist, bars: { show: true, align: "center", barWidth:%f }} ], %s' % (dbar, axisopts),
                }
        plots.append(plot)

    # one IRT plot curve for each grade received (TODO: this assumes integer grades)
    for grade in range(1, int(max_grade) + 1):
        yset = {}
        gset = pmdset.filter(studentmodule__grade=grade)
        ngset = gset.count()
        if ngset == 0:
            continue
        ydat = []
        ylast = 0
        for x in xdat:
            y = gset.filter(attempts=x).count() / ngset
            ydat.append(y + ylast)
            ylast = y + ylast
        yset['ydat'] = ydat

        if len(ydat) > 3:  # try to fit to logistic function if enough data points
            try:
                cfp = curve_fit(func_2pl, xdat, ydat, [1.0, max_attempts / 2.0])
                yset['fitparam'] = cfp
                yset['fitpts'] = func_2pl(np.array(xdat), *cfp[0])
                yset['fiterr'] = [yd - yf for (yd, yf) in zip(ydat, yset['fitpts'])]
                fitx = np.linspace(xdat[0], xdat[-1], 100)
                yset['fitx'] = fitx
                yset['fity'] = func_2pl(np.array(fitx), *cfp[0])
            except Exception as err:
                log.debug('Error in psychoanalyze curve fitting: %s' % err)

        dataset['grade_%d' % grade] = yset

    axisopts = """{
        xaxes: [{
            axisLabel: 'Number of Attempts'
        }],
        yaxes: [{
            max:1.0,
            position: 'left',
            axisLabel: 'Probability of correctness'
         }]
         }"""

    # generate points for flot plot
    for grade in range(1, int(max_grade) + 1):
        jsdata = ""
        jsplots = []
        gkey = 'grade_%d' % grade
        if gkey in dataset:
            yset = dataset[gkey]
            jsdata += "var d%d = %s;\n" % (grade, json.dumps(zip(xdat, yset['ydat'])))
            jsplots.append('{ data: d%d, lines: { show: false }, points: { show: true}, color: "red" }' % grade)
            if 'fitpts' in yset:
                jsdata += 'var fit = %s;\n' % (json.dumps(zip(yset['fitx'], yset['fity'])))
                jsplots.append('{ data: fit,  lines: { show: true }, color: "blue" }')
                (a, b) = yset['fitparam'][0]
                irtinfo = "(2PL: D=1.7, a=%6.3f, b=%6.3f)" % (a, b)
            else:
                irtinfo = ""

            plots.append({'title': 'IRT Plot for grade=%s %s' % (grade, irtinfo),
                          'id': "irt%s" % grade,
                          'info': '',
                          'data': jsdata,
                          'cmd': '[%s], %s' % (','.join(jsplots), axisopts),
                          })

    #log.debug('plots = %s' % plots)
    return msg, plots

Example 43

Project: ansible
Source File: module_common.py
View license
def _find_snippet_imports(module_name, module_data, module_path, module_args, task_vars, module_compression):
    """
    Given the source of the module, convert it to a Jinja2 template to insert
    module code and return whether it's a new or old style module.
    """

    module_substyle = module_style = 'old'

    # module_style is something important to calling code (ActionBase).  It
    # determines how arguments are formatted (json vs k=v) and whether
    # a separate arguments file needs to be sent over the wire.
    # module_substyle is extra information that's useful internally.  It tells
    # us what we have to look to substitute in the module files and whether
    # we're using module replacer or ansiballz to format the module itself.
    if _is_binary(module_data):
        module_substyle = module_style = 'binary'
    elif REPLACER in module_data:
        # Do REPLACER before from ansible.module_utils because we need make sure
        # we substitute "from ansible.module_utils basic" for REPLACER
        module_style = 'new'
        module_substyle = 'python'
        module_data = module_data.replace(REPLACER, b'from ansible.module_utils.basic import *')
    elif b'from ansible.module_utils.' in module_data:
        module_style = 'new'
        module_substyle = 'python'
    elif REPLACER_WINDOWS in module_data:
        module_style = 'new'
        module_substyle = 'powershell'
    elif REPLACER_JSONARGS in module_data:
        module_style = 'new'
        module_substyle = 'jsonargs'
    elif b'WANT_JSON' in module_data:
        module_substyle = module_style = 'non_native_want_json'

    shebang = None
    # Neither old-style, non_native_want_json nor binary modules should be modified
    # except for the shebang line (Done by modify_module)
    if module_style in ('old', 'non_native_want_json', 'binary'):
        return module_data, module_style, shebang

    output = BytesIO()
    py_module_names = set()

    if module_substyle == 'python':
        params = dict(ANSIBLE_MODULE_ARGS=module_args,)
        python_repred_params = repr(json.dumps(params))

        try:
            compression_method = getattr(zipfile, module_compression)
        except AttributeError:
            display.warning(u'Bad module compression string specified: %s.  Using ZIP_STORED (no compression)' % module_compression)
            compression_method = zipfile.ZIP_STORED

        lookup_path = os.path.join(C.DEFAULT_LOCAL_TMP, 'ansiballz_cache')
        cached_module_filename = os.path.join(lookup_path, "%s-%s" % (module_name, module_compression))

        zipdata = None
        # Optimization -- don't lock if the module has already been cached
        if os.path.exists(cached_module_filename):
            display.debug('ANSIBALLZ: using cached module: %s' % cached_module_filename)
            zipdata = open(cached_module_filename, 'rb').read()
        else:
            if module_name in action_write_locks.action_write_locks:
                display.debug('ANSIBALLZ: Using lock for %s' % module_name)
                lock = action_write_locks.action_write_locks[module_name]
            else:
                # If the action plugin directly invokes the module (instead of
                # going through a strategy) then we don't have a cross-process
                # Lock specifically for this module.  Use the "unexpected
                # module" lock instead
                display.debug('ANSIBALLZ: Using generic lock for %s' % module_name)
                lock = action_write_locks.action_write_locks[None]

            display.debug('ANSIBALLZ: Acquiring lock')
            with lock:
                display.debug('ANSIBALLZ: Lock acquired: %s' % id(lock))
                # Check that no other process has created this while we were
                # waiting for the lock
                if not os.path.exists(cached_module_filename):
                    display.debug('ANSIBALLZ: Creating module')
                    # Create the module zip data
                    zipoutput = BytesIO()
                    zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method)
                    # Note: If we need to import from release.py first,
                    # remember to catch all exceptions: https://github.com/ansible/ansible/issues/16523
                    zf.writestr('ansible/__init__.py',
                            b'from pkgutil import extend_path\n__path__=extend_path(__path__,__name__)\n__version__="' +
                            to_bytes(__version__) + b'"\n__author__="' +
                            to_bytes(__author__) + b'"\n')
                    zf.writestr('ansible/module_utils/__init__.py', b'from pkgutil import extend_path\n__path__=extend_path(__path__,__name__)\n')

                    zf.writestr('ansible_module_%s.py' % module_name, module_data)

                    py_module_cache = { ('__init__',): b'' }
                    recursive_finder(module_name, module_data, py_module_names, py_module_cache, zf)
                    zf.close()
                    zipdata = base64.b64encode(zipoutput.getvalue())

                    # Write the assembled module to a temp file (write to temp
                    # so that no one looking for the file reads a partially
                    # written file)
                    if not os.path.exists(lookup_path):
                        # Note -- if we have a global function to setup, that would
                        # be a better place to run this
                        os.makedirs(lookup_path)
                    display.debug('ANSIBALLZ: Writing module')
                    with open(cached_module_filename + '-part', 'wb') as f:
                        f.write(zipdata)

                    # Rename the file into its final position in the cache so
                    # future users of this module can read it off the
                    # filesystem instead of constructing from scratch.
                    display.debug('ANSIBALLZ: Renaming module')
                    os.rename(cached_module_filename + '-part', cached_module_filename)
                    display.debug('ANSIBALLZ: Done creating module')

            if zipdata is None:
                display.debug('ANSIBALLZ: Reading module after lock')
                # Another process wrote the file while we were waiting for
                # the write lock.  Go ahead and read the data from disk
                # instead of re-creating it.
                try:
                    zipdata = open(cached_module_filename, 'rb').read()
                except IOError:
                    raise AnsibleError('A different worker process failed to create module file.'
                    ' Look at traceback for that process for debugging information.')
        zipdata = to_text(zipdata, errors='surrogate_or_strict')

        shebang, interpreter = _get_shebang(u'/usr/bin/python', task_vars)
        if shebang is None:
            shebang = u'#!/usr/bin/python'

        # Enclose the parts of the interpreter in quotes because we're
        # substituting it into the template as a Python string
        interpreter_parts = interpreter.split(u' ')
        interpreter = u"'{0}'".format(u"', '".join(interpreter_parts))

        output.write(to_bytes(ACTIVE_ANSIBALLZ_TEMPLATE % dict(
            zipdata=zipdata,
            ansible_module=module_name,
            params=python_repred_params,
            shebang=shebang,
            interpreter=interpreter,
            coding=ENCODING_STRING,
        )))
        module_data = output.getvalue()

    elif module_substyle == 'powershell':
        # Module replacer for jsonargs and windows
        lines = module_data.split(b'\n')
        for line in lines:
            if REPLACER_WINDOWS in line:
                ps_data = _slurp(os.path.join(_SNIPPET_PATH, "powershell.ps1"))
                output.write(ps_data)
                py_module_names.add((b'powershell',))
                continue
            output.write(line + b'\n')
        module_data = output.getvalue()

        module_args_json = to_bytes(json.dumps(module_args))
        module_data = module_data.replace(REPLACER_JSONARGS, module_args_json)

        # Powershell/winrm don't actually make use of shebang so we can
        # safely set this here.  If we let the fallback code handle this
        # it can fail in the presence of the UTF8 BOM commonly added by
        # Windows text editors
        shebang = u'#!powershell'

        # Sanity check from 1.x days.  This is currently useless as we only
        # get here if we are going to substitute powershell.ps1 into the
        # module anyway.  Leaving it for when/if we add other powershell
        # module_utils files.
        if (b'powershell',) not in py_module_names:
            raise AnsibleError("missing required import in %s: # POWERSHELL_COMMON" % module_path)

    elif module_substyle == 'jsonargs':
        module_args_json = to_bytes(json.dumps(module_args))

        # these strings could be included in a third-party module but
        # officially they were included in the 'basic' snippet for new-style
        # python modules (which has been replaced with something else in
        # ansiballz) If we remove them from jsonargs-style module replacer
        # then we can remove them everywhere.
        python_repred_args = to_bytes(repr(module_args_json))
        module_data = module_data.replace(REPLACER_VERSION, to_bytes(repr(__version__)))
        module_data = module_data.replace(REPLACER_COMPLEX, python_repred_args)
        module_data = module_data.replace(REPLACER_SELINUX, to_bytes(','.join(C.DEFAULT_SELINUX_SPECIAL_FS)))

        # The main event -- substitute the JSON args string into the module
        module_data = module_data.replace(REPLACER_JSONARGS, module_args_json)

        facility = b'syslog.' + to_bytes(task_vars.get('ansible_syslog_facility', C.DEFAULT_SYSLOG_FACILITY), errors='surrogate_or_strict')
        module_data = module_data.replace(b'syslog.LOG_USER', facility)

    return (module_data, module_style, shebang)

Example 44

Project: ansible
Source File: module_common.py
View license
def _find_snippet_imports(module_name, module_data, module_path, module_args, task_vars, module_compression):
    """
    Given the source of the module, convert it to a Jinja2 template to insert
    module code and return whether it's a new or old style module.
    """

    module_substyle = module_style = 'old'

    # module_style is something important to calling code (ActionBase).  It
    # determines how arguments are formatted (json vs k=v) and whether
    # a separate arguments file needs to be sent over the wire.
    # module_substyle is extra information that's useful internally.  It tells
    # us what we have to look to substitute in the module files and whether
    # we're using module replacer or ansiballz to format the module itself.
    if _is_binary(module_data):
        module_substyle = module_style = 'binary'
    elif REPLACER in module_data:
        # Do REPLACER before from ansible.module_utils because we need make sure
        # we substitute "from ansible.module_utils basic" for REPLACER
        module_style = 'new'
        module_substyle = 'python'
        module_data = module_data.replace(REPLACER, b'from ansible.module_utils.basic import *')
    elif b'from ansible.module_utils.' in module_data:
        module_style = 'new'
        module_substyle = 'python'
    elif REPLACER_WINDOWS in module_data:
        module_style = 'new'
        module_substyle = 'powershell'
    elif REPLACER_JSONARGS in module_data:
        module_style = 'new'
        module_substyle = 'jsonargs'
    elif b'WANT_JSON' in module_data:
        module_substyle = module_style = 'non_native_want_json'

    shebang = None
    # Neither old-style, non_native_want_json nor binary modules should be modified
    # except for the shebang line (Done by modify_module)
    if module_style in ('old', 'non_native_want_json', 'binary'):
        return module_data, module_style, shebang

    output = BytesIO()
    py_module_names = set()

    if module_substyle == 'python':
        params = dict(ANSIBLE_MODULE_ARGS=module_args,)
        python_repred_params = repr(json.dumps(params))

        try:
            compression_method = getattr(zipfile, module_compression)
        except AttributeError:
            display.warning(u'Bad module compression string specified: %s.  Using ZIP_STORED (no compression)' % module_compression)
            compression_method = zipfile.ZIP_STORED

        lookup_path = os.path.join(C.DEFAULT_LOCAL_TMP, 'ansiballz_cache')
        cached_module_filename = os.path.join(lookup_path, "%s-%s" % (module_name, module_compression))

        zipdata = None
        # Optimization -- don't lock if the module has already been cached
        if os.path.exists(cached_module_filename):
            display.debug('ANSIBALLZ: using cached module: %s' % cached_module_filename)
            zipdata = open(cached_module_filename, 'rb').read()
        else:
            if module_name in action_write_locks.action_write_locks:
                display.debug('ANSIBALLZ: Using lock for %s' % module_name)
                lock = action_write_locks.action_write_locks[module_name]
            else:
                # If the action plugin directly invokes the module (instead of
                # going through a strategy) then we don't have a cross-process
                # Lock specifically for this module.  Use the "unexpected
                # module" lock instead
                display.debug('ANSIBALLZ: Using generic lock for %s' % module_name)
                lock = action_write_locks.action_write_locks[None]

            display.debug('ANSIBALLZ: Acquiring lock')
            with lock:
                display.debug('ANSIBALLZ: Lock acquired: %s' % id(lock))
                # Check that no other process has created this while we were
                # waiting for the lock
                if not os.path.exists(cached_module_filename):
                    display.debug('ANSIBALLZ: Creating module')
                    # Create the module zip data
                    zipoutput = BytesIO()
                    zf = zipfile.ZipFile(zipoutput, mode='w', compression=compression_method)
                    # Note: If we need to import from release.py first,
                    # remember to catch all exceptions: https://github.com/ansible/ansible/issues/16523
                    zf.writestr('ansible/__init__.py',
                            b'from pkgutil import extend_path\n__path__=extend_path(__path__,__name__)\n__version__="' +
                            to_bytes(__version__) + b'"\n__author__="' +
                            to_bytes(__author__) + b'"\n')
                    zf.writestr('ansible/module_utils/__init__.py', b'from pkgutil import extend_path\n__path__=extend_path(__path__,__name__)\n')

                    zf.writestr('ansible_module_%s.py' % module_name, module_data)

                    py_module_cache = { ('__init__',): b'' }
                    recursive_finder(module_name, module_data, py_module_names, py_module_cache, zf)
                    zf.close()
                    zipdata = base64.b64encode(zipoutput.getvalue())

                    # Write the assembled module to a temp file (write to temp
                    # so that no one looking for the file reads a partially
                    # written file)
                    if not os.path.exists(lookup_path):
                        # Note -- if we have a global function to setup, that would
                        # be a better place to run this
                        os.makedirs(lookup_path)
                    display.debug('ANSIBALLZ: Writing module')
                    with open(cached_module_filename + '-part', 'wb') as f:
                        f.write(zipdata)

                    # Rename the file into its final position in the cache so
                    # future users of this module can read it off the
                    # filesystem instead of constructing from scratch.
                    display.debug('ANSIBALLZ: Renaming module')
                    os.rename(cached_module_filename + '-part', cached_module_filename)
                    display.debug('ANSIBALLZ: Done creating module')

            if zipdata is None:
                display.debug('ANSIBALLZ: Reading module after lock')
                # Another process wrote the file while we were waiting for
                # the write lock.  Go ahead and read the data from disk
                # instead of re-creating it.
                try:
                    zipdata = open(cached_module_filename, 'rb').read()
                except IOError:
                    raise AnsibleError('A different worker process failed to create module file.'
                    ' Look at traceback for that process for debugging information.')
        zipdata = to_text(zipdata, errors='surrogate_or_strict')

        shebang, interpreter = _get_shebang(u'/usr/bin/python', task_vars)
        if shebang is None:
            shebang = u'#!/usr/bin/python'

        # Enclose the parts of the interpreter in quotes because we're
        # substituting it into the template as a Python string
        interpreter_parts = interpreter.split(u' ')
        interpreter = u"'{0}'".format(u"', '".join(interpreter_parts))

        output.write(to_bytes(ACTIVE_ANSIBALLZ_TEMPLATE % dict(
            zipdata=zipdata,
            ansible_module=module_name,
            params=python_repred_params,
            shebang=shebang,
            interpreter=interpreter,
            coding=ENCODING_STRING,
        )))
        module_data = output.getvalue()

    elif module_substyle == 'powershell':
        # Module replacer for jsonargs and windows
        lines = module_data.split(b'\n')
        for line in lines:
            if REPLACER_WINDOWS in line:
                ps_data = _slurp(os.path.join(_SNIPPET_PATH, "powershell.ps1"))
                output.write(ps_data)
                py_module_names.add((b'powershell',))
                continue
            output.write(line + b'\n')
        module_data = output.getvalue()

        module_args_json = to_bytes(json.dumps(module_args))
        module_data = module_data.replace(REPLACER_JSONARGS, module_args_json)

        # Powershell/winrm don't actually make use of shebang so we can
        # safely set this here.  If we let the fallback code handle this
        # it can fail in the presence of the UTF8 BOM commonly added by
        # Windows text editors
        shebang = u'#!powershell'

        # Sanity check from 1.x days.  This is currently useless as we only
        # get here if we are going to substitute powershell.ps1 into the
        # module anyway.  Leaving it for when/if we add other powershell
        # module_utils files.
        if (b'powershell',) not in py_module_names:
            raise AnsibleError("missing required import in %s: # POWERSHELL_COMMON" % module_path)

    elif module_substyle == 'jsonargs':
        module_args_json = to_bytes(json.dumps(module_args))

        # these strings could be included in a third-party module but
        # officially they were included in the 'basic' snippet for new-style
        # python modules (which has been replaced with something else in
        # ansiballz) If we remove them from jsonargs-style module replacer
        # then we can remove them everywhere.
        python_repred_args = to_bytes(repr(module_args_json))
        module_data = module_data.replace(REPLACER_VERSION, to_bytes(repr(__version__)))
        module_data = module_data.replace(REPLACER_COMPLEX, python_repred_args)
        module_data = module_data.replace(REPLACER_SELINUX, to_bytes(','.join(C.DEFAULT_SELINUX_SPECIAL_FS)))

        # The main event -- substitute the JSON args string into the module
        module_data = module_data.replace(REPLACER_JSONARGS, module_args_json)

        facility = b'syslog.' + to_bytes(task_vars.get('ansible_syslog_facility', C.DEFAULT_SYSLOG_FACILITY), errors='surrogate_or_strict')
        module_data = module_data.replace(b'syslog.LOG_USER', facility)

    return (module_data, module_style, shebang)

Example 45

Project: mining
Source File: build_admin.py
View license
def build(level=0):
    demo_path = os.path.abspath(os.path.dirname(__file__))
    try:
        os.remove(os.path.join(demo_path, 'demo.db'))
    except OSError:
        pass
    conn = sqlite3.connect('{}'.format(os.path.join(demo_path, 'demo.db')))
    cur = conn.cursor()
    f = open('{}'.format(os.path.join(demo_path, 'base.sql')), 'r')
    sql_str = f.read()
    print 'INSERT SQLITE DATA'
    cur.executescript(sql_str)
    conn.commit()
    f.close()
    if level > 0:
        l = open(os.path.join(demo_path, 'people.sql'), 'r').read()
        print 'INSERT SQLITE DATA LEVEL {}'.format(level)
        for i in xrange(level):
            cur.executescript(l)
            conn.commit()
            print "LEVEL {} COMMIT".format(i)
    cur.close()

    url_api = {
        'user': "http://127.0.0.1:8888/api/user",
        'connection': "http://127.0.0.1:8888/api/connection",
        'cube': "http://127.0.0.1:8888/api/cube",
        'element': "http://127.0.0.1:8888/api/element",
        'dashboard': "http://127.0.0.1:8888/api/dashboard"
    }
    data = {
        'user': {'username': 'admin', 'password': 'admin', 'rule': 'root'},
        'connection': {
            "connection": 'sqlite:///{}'.format(
                os.path.join(demo_path, 'demo.db')),
            "name": "DEMO"
        },
        'cube': [
            {
                "status": False,
                "run": False,
                "name": "Sales",
                "slug": "sales",
                "connection": "demo",
                "sql": "select * from SALE;",
                "scheduler_status": False,
                "type": "relational"
            },
            {
                "status": False,
                "run": False,
                "name": "People",
                "slug": "people",
                "connection": "demo",
                "sql": "select * from people;",
                "scheduler_status": False,
                "type": "relational"
            },
            {
                "status": False,
                "run": False,
                "name": "Product Sales",
                "slug": "product-sales",
                "connection": "demo",
                "sql": "select * from SALE_PRODUCT;",
                "scheduler_status": False,
                "type": "relational"
            },
            {
                "status": False,
                "run": False,
                "name": "Sales by month",
                "slug": "sales-by-month",
                "connection": "demo",
                "sql": "SELECT  strftime('%Y-%m', sale_at) as month, SUM(value) \
                        as total\nFROM    sale\n\
                        GROUP BY strftime('%Y-%m', sale_at)",
                "scheduler_status": False,
                "type": "relational",
                "slug": "sales-by-month"
            }
        ],
        'element': [
            {
                "alias": {

                },
                "cube": "people",
                "field_serie": None,
                "field_x": None,
                "field_y": None,
                "name": "People Grid",
                "orderby": [
                    "full_name"
                ],
                "orderby__order": [
                    "1"
                ],
                "scheduler_status": False,
                "show_fields": [
                    "id_people",
                    "full_name",
                    "gender",
                    "age",
                    "country",
                    "created_at"
                ],
                "type": "grid",
                "widgets": [
                    {
                        "field": "country",
                        "type": "distinct",
                        "label": "Country"
                    }
                ]
            },
            {
                "alias": {

                },
                "cube": "sales-by-month",
                "field_serie": None,
                "field_x": "month",
                "field_y": "total",
                "name": "Sales Bar",
                "scheduler_status": False,
                "show_fields": [
                    "month",
                    "total"
                ],
                "type": "chart_bar"
            },
            {
                "orderby": [
                    "sale_at"
                ],
                "cube": "sales",
                "name": "Sales Grid",
                "show_fields": [
                    "id_sale",
                    "id_people",
                    "value",
                    "paid",
                    "sale_at"
                ],
                "widgets": [],
                "alias": {

                },
                "field_x": None,
                "field_y": None,
                "scheduler_status": False,
                "orderby__order": [
                    "0"
                ],
                "type": "grid",
                "field_serie": None
            }
        ],
        'dashboard': {
            "scheduler_status": False,
            "element": [
                {
                    "id": "people-grid",
                    "label": "People Grid"
                },
                {
                    "id": "sales-bar",
                    "label": "Sales Bar"
                },
                {
                    "id": "sales-grid",
                    "label": "Sales Grid"
                }
            ],
            "slug": "demo",
            "name": "Demo"
        }
    }

    headers = {'Content-type': 'application/json', 'Accept': 'text/plain'}
    print 'CREATE USER admin'
    requests.post(url_api.get('user'), data=json.dumps(data.get('user')),
                  headers=headers)
    print 'CREATE connection'
    requests.post(url_api.get('connection'),
                  data=json.dumps(data.get('connection')),
                  headers=headers)
    print 'CREATE cube'
    for cb in data.get('cube'):
        requests.post(url_api.get('cube'), data=json.dumps(cb),
                      headers=headers)
        print 'RUNNING cube {}'.format(cb.get('slug'))
        process(cb)

    print 'CREATE element'
    for el in data.get('element'):
        print '--> {}'.format(el.get('name'))
        requests.post(url_api.get('element'), data=json.dumps(el),
                      headers=headers)

    print 'CREATE dashboard'
    requests.post(url_api.get('dashboard'),
                  data=json.dumps(data.get('dashboard')),
                  headers=headers)

Example 46

Project: edx2bigquery
Source File: addmoduleid.py
View license
def guess_module_id(doc):

    event = doc['event']
    event_type = doc['event_type']
    path = doc.get('context', {}).get('path', '')
  
    # opaque keys

    rr = okre1.search(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid
  
    rr = okre1.search(path)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    if ('problem' in event_type and type(event) in [str, unicode] and event.startswith("input_")):
        page = doc.get('page', '') or ''
        # sys.stderr.write("page=%s\n" % page)
        rr = okre2.search(page)
        if (rr):
            rr2 = okre2a.search(event.split('&',1)[0])
            if rr2:
                mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), 'problem', rr2.group('id'))
                # sys.stderr.write("ok mid = %s\n" % mid)
                return mid
        rr2 = cidre11a.search(event)
        if (rr2):
            mid = "%s/%s/%s/%s" % (rr2.group('org'), rr2.group('course'), rr2.group('mtype'), rr2.group('id'))
            # sys.stderr.write("ok mid = %s\n" % mid)
            return mid
        sys.stderr.write("ok parse failed on %s" % json.dumps(doc, indent=4))
  
    if (event_type=="problem_graded" and type(event)==list and len(event)>0 and event[0].startswith("input_")):
        page = doc.get('page', '') or ''
        # sys.stderr.write("page=%s\n" % page)
        rr = okre2.search(page)
        if (rr):
            rr2 = okre2a.search(event[0])
            if rr2:
                mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), 'problem', rr2.group('id'))
                # sys.stderr.write("ok mid = %s\n" % mid)
                return mid
        rr2 = cidre11a.search(event[0])
        if (rr2):
            mid = "%s/%s/%s/%s" % (rr2.group('org'), rr2.group('course'), rr2.group('mtype'), rr2.group('id'))
            # sys.stderr.write("ok mid = %s\n" % mid)
            return mid
        sys.stderr.write("ok parse failed on %s" % json.dumps(doc, indent=4))

    rr = okre3.search(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        #sys.stderr.write("ok mid = %s\n" % mid)
        return mid
  
    rr = okre3.search(path)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        #sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    if not type(event)==dict:
        event_dict = None
        try:
            event_dict = json.loads(event)
        except:
            pass
        if type(event_dict)==dict and 'id' in event_dict:
            event = event_dict

    if (type(event)==dict and 'id' in event and type(event['id']) in [str, unicode]):
        eid = event['id']
        rr = okre4.search(eid)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            return mid
        rr = okre5a.search(eid)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            #sys.stderr.write("ok mid = %s\n" % mid)
            return mid
        if event_type=='play_video' and '/' not in eid:
            rr = okre5.search(doc.get('page', '') or '')
            if (rr):
                mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), 'video', eid)
                return mid

    elif (type(event) in [str, unicode]):
        rr = okre4.search(event)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            return mid

    # sys.stderr.write('path=%s\n' % json.dumps(path, indent=4))
    # sys.stderr.write('doc=%s\n' % json.dumps(doc, indent=4))
    # return

    # non-opaque keys

    if event_type in ['add_resource', 'delete_resource', 'recommender_upvote']:
        return None

    if type(event)==dict and ('id' in event) and not (type(event['id']) in [str, unicode]):
        return None

    if doc['event_source']=='browser':
        try:
            m = cidre3.search(event['id'])
            if m:
                # print "="*20 + "checking event id"
                # special handling for seq_goto or seq_next, so we append seq_num to sequential's module_id
                if ((event_type=='seq_goto') or (event_type=="seq_next")):
                    mid = m.group(1) + "/" + str(event['new'])
                    # sys.stderr.write("mid=" + mid + " for %s\n" % doc)
                    return mid
                return m.group(1)
        except Exception as err:
            pass

        if (event_type=='page_close'):
            rr = cidre7.search(doc['page'])
            # sys.stderr.write('checking page_close, rr=%s' % rr)
            if (rr):
                return rr.group(1) + "/sequential/" + rr.group(2) + '/'
            rr = cidre8.search(doc['page'])
            if (rr):
                return rr.group(1) + "/chapter/" + rr.group(2) + '/'
  
        try:
          rr = cidre3.search(event.problem)        # for problem_show ajax
          if (rr):
              return rr.group(1)
        except Exception as err:
            pass
  
        if (type(event)==str or type(event)==unicode):
            rr = cidre5.search(event)
            if (rr):
                return rr.group(1) + '/' + rr.group(2) + '/problem/' + rr.group(3)

        try:
            rr = cidre5.search(event[0])   # for problem_graded events from browser
            if (rr):
                return rr.group(1) + '/' + rr.group(2) + '/problem/' + rr.group(3)
        except Exception as err:
            pass
  
    # server events - ones which do not depend on event (just event_type)

    rr = fidre5.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/" + rr.group(3)
  
    rr = fidre6.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/" + rr.group(4)
  
    rr = fidre7.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/new"
  
    rr = fidre8.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/" + rr.group(4)
  
    # event of going to sequence and seeing last page visited, eg:
    rr = cidre6.search(event_type)
    if (rr):
        return rr.group(1) + "/sequential/" + rr.group(2) + '/'
  
    # event of opening a chapter
    rr = cidre8.search(event_type)
    if (rr):
        return rr.group(1) + "/chapter/" + rr.group(2)

    # event of jump_to_id
    rr = cidre9.search(event_type)
    if (rr):
        return rr.group(1) + "/jump_to_id/" + rr.group(2)

    # event of xblock with i4x
    rr = cidre10.match(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    rr = cidre10.match(path)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    if type(event) in [str, unicode] and event.startswith('input_'):
        #rr = cidre11.search(doc.get('page', ''))
        rr2 = cidre11a.search(event)
        if (rr2):
            mid = "%s/%s/%s/%s" % (rr2.group('org'), rr2.group('course'), rr2.group('mtype'), rr2.group('id'))
            # sys.stderr.write("ok mid = %s\n" % mid)
            return mid

    rr = cidre3.search(event_type)
    if (rr):
        if (cidre3a.search(event_type)):   # handle goto_position specially: append new seq position
            try:
                mid = rr.group(1) + '/' + event['POST']['position'][0]
                return mid
            except Exception as err:
                sys.stderr.write("Failed to handle goto_position for" + doc.get('_id', '<unknown>') + "\n")
                sys.stderr.write("%s\n" % json.dumps(doc, indent=4))
        return rr.group(1)
  
    rr = cidre3b.search(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        return mid
  
    if type(event) in [str, unicode]:
        rr = cidre3c.search(event)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            return mid

    if (type(event)==str or type(event)==unicode):	# all the rest of the patterns need event to be a dict
        return

    if (type(event)==dict and event.get('problem_id')): # assumes event is js, not string
        rr = cidre3.search(event['problem_id'])
        if (rr):
            return rr.group(1)
  
    if (type(event)==dict and event.get('id')): # assumes event is js, not string
        rr = cidre4.search(event['id'])
        if (rr):
            return rr.group(1) + "/" + rr.group(2) + "/video/" + rr.group(3)

    return None

Example 47

Project: edx2bigquery
Source File: addmoduleid.py
View license
def guess_module_id(doc):

    event = doc['event']
    event_type = doc['event_type']
    path = doc.get('context', {}).get('path', '')
  
    # opaque keys

    rr = okre1.search(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid
  
    rr = okre1.search(path)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    if ('problem' in event_type and type(event) in [str, unicode] and event.startswith("input_")):
        page = doc.get('page', '') or ''
        # sys.stderr.write("page=%s\n" % page)
        rr = okre2.search(page)
        if (rr):
            rr2 = okre2a.search(event.split('&',1)[0])
            if rr2:
                mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), 'problem', rr2.group('id'))
                # sys.stderr.write("ok mid = %s\n" % mid)
                return mid
        rr2 = cidre11a.search(event)
        if (rr2):
            mid = "%s/%s/%s/%s" % (rr2.group('org'), rr2.group('course'), rr2.group('mtype'), rr2.group('id'))
            # sys.stderr.write("ok mid = %s\n" % mid)
            return mid
        sys.stderr.write("ok parse failed on %s" % json.dumps(doc, indent=4))
  
    if (event_type=="problem_graded" and type(event)==list and len(event)>0 and event[0].startswith("input_")):
        page = doc.get('page', '') or ''
        # sys.stderr.write("page=%s\n" % page)
        rr = okre2.search(page)
        if (rr):
            rr2 = okre2a.search(event[0])
            if rr2:
                mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), 'problem', rr2.group('id'))
                # sys.stderr.write("ok mid = %s\n" % mid)
                return mid
        rr2 = cidre11a.search(event[0])
        if (rr2):
            mid = "%s/%s/%s/%s" % (rr2.group('org'), rr2.group('course'), rr2.group('mtype'), rr2.group('id'))
            # sys.stderr.write("ok mid = %s\n" % mid)
            return mid
        sys.stderr.write("ok parse failed on %s" % json.dumps(doc, indent=4))

    rr = okre3.search(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        #sys.stderr.write("ok mid = %s\n" % mid)
        return mid
  
    rr = okre3.search(path)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        #sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    if not type(event)==dict:
        event_dict = None
        try:
            event_dict = json.loads(event)
        except:
            pass
        if type(event_dict)==dict and 'id' in event_dict:
            event = event_dict

    if (type(event)==dict and 'id' in event and type(event['id']) in [str, unicode]):
        eid = event['id']
        rr = okre4.search(eid)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            return mid
        rr = okre5a.search(eid)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            #sys.stderr.write("ok mid = %s\n" % mid)
            return mid
        if event_type=='play_video' and '/' not in eid:
            rr = okre5.search(doc.get('page', '') or '')
            if (rr):
                mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), 'video', eid)
                return mid

    elif (type(event) in [str, unicode]):
        rr = okre4.search(event)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            return mid

    # sys.stderr.write('path=%s\n' % json.dumps(path, indent=4))
    # sys.stderr.write('doc=%s\n' % json.dumps(doc, indent=4))
    # return

    # non-opaque keys

    if event_type in ['add_resource', 'delete_resource', 'recommender_upvote']:
        return None

    if type(event)==dict and ('id' in event) and not (type(event['id']) in [str, unicode]):
        return None

    if doc['event_source']=='browser':
        try:
            m = cidre3.search(event['id'])
            if m:
                # print "="*20 + "checking event id"
                # special handling for seq_goto or seq_next, so we append seq_num to sequential's module_id
                if ((event_type=='seq_goto') or (event_type=="seq_next")):
                    mid = m.group(1) + "/" + str(event['new'])
                    # sys.stderr.write("mid=" + mid + " for %s\n" % doc)
                    return mid
                return m.group(1)
        except Exception as err:
            pass

        if (event_type=='page_close'):
            rr = cidre7.search(doc['page'])
            # sys.stderr.write('checking page_close, rr=%s' % rr)
            if (rr):
                return rr.group(1) + "/sequential/" + rr.group(2) + '/'
            rr = cidre8.search(doc['page'])
            if (rr):
                return rr.group(1) + "/chapter/" + rr.group(2) + '/'
  
        try:
          rr = cidre3.search(event.problem)        # for problem_show ajax
          if (rr):
              return rr.group(1)
        except Exception as err:
            pass
  
        if (type(event)==str or type(event)==unicode):
            rr = cidre5.search(event)
            if (rr):
                return rr.group(1) + '/' + rr.group(2) + '/problem/' + rr.group(3)

        try:
            rr = cidre5.search(event[0])   # for problem_graded events from browser
            if (rr):
                return rr.group(1) + '/' + rr.group(2) + '/problem/' + rr.group(3)
        except Exception as err:
            pass
  
    # server events - ones which do not depend on event (just event_type)

    rr = fidre5.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/" + rr.group(3)
  
    rr = fidre6.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/" + rr.group(4)
  
    rr = fidre7.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/new"
  
    rr = fidre8.search(event_type)
    if (rr):
        return rr.group(1) + "/forum/" + rr.group(4)
  
    # event of going to sequence and seeing last page visited, eg:
    rr = cidre6.search(event_type)
    if (rr):
        return rr.group(1) + "/sequential/" + rr.group(2) + '/'
  
    # event of opening a chapter
    rr = cidre8.search(event_type)
    if (rr):
        return rr.group(1) + "/chapter/" + rr.group(2)

    # event of jump_to_id
    rr = cidre9.search(event_type)
    if (rr):
        return rr.group(1) + "/jump_to_id/" + rr.group(2)

    # event of xblock with i4x
    rr = cidre10.match(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    rr = cidre10.match(path)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        # sys.stderr.write("ok mid = %s\n" % mid)
        return mid

    if type(event) in [str, unicode] and event.startswith('input_'):
        #rr = cidre11.search(doc.get('page', ''))
        rr2 = cidre11a.search(event)
        if (rr2):
            mid = "%s/%s/%s/%s" % (rr2.group('org'), rr2.group('course'), rr2.group('mtype'), rr2.group('id'))
            # sys.stderr.write("ok mid = %s\n" % mid)
            return mid

    rr = cidre3.search(event_type)
    if (rr):
        if (cidre3a.search(event_type)):   # handle goto_position specially: append new seq position
            try:
                mid = rr.group(1) + '/' + event['POST']['position'][0]
                return mid
            except Exception as err:
                sys.stderr.write("Failed to handle goto_position for" + doc.get('_id', '<unknown>') + "\n")
                sys.stderr.write("%s\n" % json.dumps(doc, indent=4))
        return rr.group(1)
  
    rr = cidre3b.search(event_type)
    if (rr):
        mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
        return mid
  
    if type(event) in [str, unicode]:
        rr = cidre3c.search(event)
        if (rr):
            mid = "%s/%s/%s/%s" % (rr.group('org'), rr.group('course'), rr.group('mtype'), rr.group('id'))
            return mid

    if (type(event)==str or type(event)==unicode):	# all the rest of the patterns need event to be a dict
        return

    if (type(event)==dict and event.get('problem_id')): # assumes event is js, not string
        rr = cidre3.search(event['problem_id'])
        if (rr):
            return rr.group(1)
  
    if (type(event)==dict and event.get('id')): # assumes event is js, not string
        rr = cidre4.search(event['id'])
        if (rr):
            return rr.group(1) + "/" + rr.group(2) + "/video/" + rr.group(3)

    return None

Example 48

Project: edx2bigquery
Source File: edx2course_axis.py
View license
def make_axis(dir):
    '''
    return dict of {course_id : { policy, xbundle, axis (as list of Axel elements) }}
    '''
    
    courses = []
    log_msg = []

    def logit(msg, nolog=False):
        if not nolog:
            log_msg.append(msg)
        print msg

    dir = path(dir)

    if os.path.exists(dir / 'roots'):	# if roots directory exists, use that for different course versions
        # get roots
        roots = glob.glob(dir / 'roots/*.xml')
        courses = [ CourseInfo(fn, '', dir) for fn in roots ]

    else:	# single course.xml file - use differnt policy files in policy directory, though

        fn = dir / 'course.xml'
    
        # get semesters
        policies = glob.glob(dir/'policies/*.json')
        assetsfn = dir / 'policies/assets.json'
        if str(assetsfn) in policies:
            policies.remove(assetsfn)
        if not policies:
            policies = glob.glob(dir/'policies/*/policy.json')
        if not policies:
            logit("Error: no policy files found!")
        
        courses = [ CourseInfo(fn, pfn) for pfn in policies ]


    logit("%d course runs found: %s" % (len(courses), [c.url_name for c in courses]))
    
    ret = {}

    # construct axis for each policy
    for cinfo in courses:
        policy = cinfo.policy
        semester = policy.semester
        org = cinfo.org
        course = cinfo.course
        cid = '%s/%s/%s' % (org, course, semester)
        logit('course_id=%s' %  cid)
    
        cfn = dir / ('course/%s.xml' % semester)
        
        # generate XBundle for course
        xml = etree.parse(cfn).getroot()
        xb = xbundle.XBundle(keep_urls=True, skip_hidden=True, keep_studio_urls=True)
        xb.policy = policy.policy
        cxml = xb.import_xml_removing_descriptor(dir, xml)

        # append metadata
        metadata = etree.Element('metadata')
        cxml.append(metadata)
        policy_xml = etree.Element('policy')
        metadata.append(policy_xml)
        policy_xml.text = json.dumps(policy.policy)
        grading_policy_xml = etree.Element('grading_policy')
        metadata.append(grading_policy_xml)
        grading_policy_xml.text = json.dumps(policy.grading_policy)
    
        bundle = etree.tostring(cxml, pretty_print=True)
        #print bundle[:500]
        index = [1]
        caxis = []
    
        def walk(x, seq_num=1, path=[], seq_type=None, parent_start=None, parent=None, chapter=None,
                 parent_url_name=None, split_url_name=None):
            '''
            Recursively traverse course tree.  
            
            x        = current etree element
            seq_num  = sequence of current element in its parent, starting from 1
            path     = list of url_name's to current element, following edX's hierarchy conventions
            seq_type = problemset, sequential, or videosequence
            parent_start = start date of parent of current etree element
            parent   = parent module
            chapter  = the last chapter module_id seen while walking through the tree
            parent_url_name = url_name of parent
            split_url_name   = url_name of split_test element if this subtree is in a split_test, otherwise None
            '''
            url_name = x.get('url_name',x.get('url_name_orig',''))
            if not url_name:
                dn = x.get('display_name')
                if dn is not None:
                    url_name = dn.strip().replace(' ','_')     # 2012 convention for converting display_name to url_name
                    url_name = url_name.replace(':','_')
                    url_name = url_name.replace('.','_')
                    url_name = url_name.replace('(','_').replace(')','_').replace('__','_')
            
            data = None
            start = None

            if not FORCE_NO_HIDE:
                hide = policy.get_metadata(x, 'hide_from_toc')
                if hide is not None and not hide=="false":
                    logit('[edx2course_axis] Skipping %s (%s), it has hide_from_toc=%s' % (x.tag, x.get('display_name','<noname>'), hide))
                    return

            if x.tag=='video':	# special: for video, let data = youtube ID(s)
                data = x.get('youtube','')
                if data:
                    # old ytid format - extract just the 1.0 part of this 
                    # 0.75:JdL1Vo0Hru0,1.0:lbaG3uiQ6IY,1.25:Lrj0G8RWHKw,1.50:54fs3-WxqLs
                    ytid = data.replace(' ','').split(',')
                    ytid = [z[1] for z in [y.split(':') for y in ytid] if z[0]=='1.0']
                    # print "   ytid: %s -> %s" % (x.get('youtube',''), ytid)
                    if ytid:
                        data = ytid
                if not data:
                    data = x.get('youtube_id_1_0', '')
                if data:
                    data = '{"ytid": "%s"}' % data

            if x.tag=="split_test":
                data = {}
                to_copy = ['group_id_to_child', 'user_partition_id']
                for tc in to_copy:
                    data[tc] = x.get(tc, None)

            if x.tag=='problem' and x.get('weight') is not None and x.get('weight'):
                try:
                    # Changed from string to dict. In next code block.
                    data = {"weight": "%f" % float(x.get('weight'))}
                except Exception as err:
                    logit("    Error converting weight %s" % x.get('weight'))

            ### Had a hard time making my code work within the try/except for weight. Happy to improve
            ### Also note, weight is typically missing in problems. So I find it weird that we throw an exception.
            if x.tag=='problem':
                # Initialize data if no weight
                if not data:
                    data = {}

                # meta will store all problem related metadata, then be used to update data
                meta = {}
                # Items is meant to help debug - an ordered list of encountered problem types with url names
                # Likely should not be pulled to Big Query 
                meta['items'] = []
                # Known Problem Types
                known_problem_types = ['multiplechoiceresponse','numericalresponse','choiceresponse',
                                       'optionresponse','stringresponse','formularesponse',
                                       'customresponse','fieldset']

                # Loop through all child nodes in a problem. If encountering a known problem type, add metadata.
                for a in x:
                    if a.tag in known_problem_types:
                        meta['items'].append({'itype':a.tag,'url_name':a.get('url_name')})

                ### Check for accompanying image
                images = x.findall('.//img')
                # meta['has_image'] = False
                
                if images and len(images)>0:
                    meta['has_image'] = True #Note, one can use a.get('src'), but needs to account for multiple images
                    # print meta['img'],len(images)

                ### Search for all solution tags in a problem
                solutions = x.findall('.//solution')
                # meta['has_solution'] = False

                if solutions and len(solutions)>0:
                    text = ''
                    for sol in solutions:
                        text = text.join(html.tostring(e, pretty_print=False) for e in sol)
                        # This if statment checks each solution. Note, many MITx problems have multiple solution tags.
                        # In 8.05x, common to put image in one solution tag, and the text in a second. So we are checking each tag.
                        # If we find one solution with > 65 char, or one solution with an image, we set meta['solution'] = True
                        if len(text) > 65 or 'img src' in text:
                            meta['has_solution'] = True

                ### If meta is empty, log all tags for debugging later. 
                if len(meta)==0:
                    logit('item type not found - here is the list of tags:['+','.join(a.tag if a else ' ' for a in x)+']')
                    # print 'problem type not found - here is the list of tags:['+','.join(a.tag for a in x)+']'

                ### Add easily accessible metadata for problems
                # num_items: number of items
                # itype: problem type - note, mixed is used when items are not of same type
                if len(meta['items']) > 0:
                    # Number of Items
                    meta['num_items'] = len(meta['items'])

                    # Problem Type
                    if all(meta['items'][0]['itype'] == item['itype'] for item in meta['items']):
                        meta['itype'] = meta['items'][0]['itype']
                        # print meta['items'][0]['itype']
                    else:
                        meta['itype'] = 'mixed'

                # Update data field
                ### ! For now, removing the items field. 
                del meta["items"]               

                data.update(meta)
                data = json.dumps(data)

            if x.tag=='html':
                iframe = x.find('.//iframe')
                if iframe is not None:
                    logit("   found iframe in html %s" % url_name)
                    src = iframe.get('src','')
                    if 'https://www.youtube.com/embed/' in src:
                        m = re.search('embed/([^"/?]+)', src)
                        if m:
                            data = '{"ytid": "%s"}' % m.group(1)
                            logit("    data=%s" % data)
                
            if url_name:              # url_name is mandatory if we are to do anything with this element
                # url_name = url_name.replace(':','_')
                dn = x.get('display_name', url_name)
                try:
                    #dn = dn.decode('utf-8')
                    dn = unicode(dn)
                    dn = fix_bad_unicode(dn)
                except Exception as err:
                    logit('unicode error, type(dn)=%s'  % type(dn))
                    raise
                pdn = policy.get_metadata(x, 'display_name')      # policy display_name - if given, let that override default
                if pdn is not None:
                    dn = pdn

                #start = date_parse(x.get('start', policy.get_metadata(x, 'start', '')))
                start = date_parse(policy.get_metadata(x, 'start', '', parent=True))
                
                if parent_start is not None and start < parent_start:
                    if VERBOSE_WARNINGS:
                        logit("    Warning: start of %s element %s happens before start %s of parent: using parent start" % (start, x.tag, parent_start), nolog=True)
                    start = parent_start
                #print "start for %s = %s" % (x, start)
                
                # drop bad due date strings
                if date_parse(x.get('due',None), retbad=True)=='Bad':
                    x.set('due', '')

                due = date_parse(policy.get_metadata(x, 'due', '', parent=True))
                if x.tag=="problem":
                    logit("    setting problem due date: for %s due=%s" % (url_name, due), nolog=True)

                gformat = x.get('format', policy.get_metadata(x, 'format', ''))
                if url_name=='hw0':
                    logit( "gformat for hw0 = %s" % gformat)

                graded = x.get('graded', policy.get_metadata(x, 'graded', ''))
                if not (type(graded) in [unicode, str]):
                    graded = str(graded)

                # compute path
                # The hierarchy goes: `course > chapter > (problemset | sequential | videosequence)`
                if x.tag=='chapter':
                    path = [url_name]
                elif x.tag in ['problemset', 'sequential', 'videosequence', 'proctor', 'randomize']:
                    seq_type = x.tag
                    path = [path[0], url_name]
                else:
                    path = path[:] + [str(seq_num)]      # note arrays are passed by reference, so copy, don't modify
                    
                # compute module_id
                if x.tag=='html':
                    module_id = '%s/%s/%s/%s' % (org, course, seq_type, '/'.join(path[1:3]))  # module_id which appears in tracking log
                else:
                    module_id = '%s/%s/%s/%s' % (org, course, x.tag, url_name)
                
                # debugging
                # print "     module %s gformat=%s" % (module_id, gformat)

                # done with getting all info for this axis element; save it
                path_str = '/' + '/'.join(path)
                ae = Axel(cid, index[0], url_name, x.tag, gformat, start, due, dn, path_str, module_id, data, chapter, graded,
                          parent_url_name,
                          not split_url_name==None,
                          split_url_name)
                caxis.append(ae)
                index[0] += 1
            else:
                if VERBOSE_WARNINGS:
                    if x.tag in ['transcript', 'wiki', 'metadata']:
                        pass
                    else:
                        logit("Missing url_name for element %s (attrib=%s, parent_tag=%s)" % (x, x.attrib, (parent.tag if parent is not None else '')))

            # chapter?
            if x.tag=='chapter':
                the_chapter = module_id
            else:
                the_chapter = chapter

            # done processing this element, now process all its children
            if (not x.tag in ['html', 'problem', 'discussion', 'customtag', 'poll_question', 'combinedopenended', 'metadata']):
                inherit_seq_num = (x.tag=='vertical' and not url_name)    # if <vertical> with no url_name then keep seq_num for children
                if not inherit_seq_num:
                    seq_num = 1
                for y in x:
                    if (not str(y).startswith('<!--')) and (not y.tag in ['discussion', 'source']):
                        if not split_url_name and x.tag=="split_test":
                            split_url_name = url_name
                                
                        walk(y, seq_num, path, seq_type, parent_start=start, parent=x, chapter=the_chapter,
                             parent_url_name=url_name,
                             split_url_name=split_url_name,
                        )
                        if not inherit_seq_num:
                            seq_num += 1
                
        walk(cxml)
        ret[cid] = dict(policy=policy.policy, 
                        bundle=bundle, 
                        axis=caxis, 
                        grading_policy=policy.grading_policy,
                        log_msg=log_msg,
                        )
    
    return ret

Example 49

Project: edx2bigquery
Source File: edx2course_axis.py
View license
def make_axis(dir):
    '''
    return dict of {course_id : { policy, xbundle, axis (as list of Axel elements) }}
    '''
    
    courses = []
    log_msg = []

    def logit(msg, nolog=False):
        if not nolog:
            log_msg.append(msg)
        print msg

    dir = path(dir)

    if os.path.exists(dir / 'roots'):	# if roots directory exists, use that for different course versions
        # get roots
        roots = glob.glob(dir / 'roots/*.xml')
        courses = [ CourseInfo(fn, '', dir) for fn in roots ]

    else:	# single course.xml file - use differnt policy files in policy directory, though

        fn = dir / 'course.xml'
    
        # get semesters
        policies = glob.glob(dir/'policies/*.json')
        assetsfn = dir / 'policies/assets.json'
        if str(assetsfn) in policies:
            policies.remove(assetsfn)
        if not policies:
            policies = glob.glob(dir/'policies/*/policy.json')
        if not policies:
            logit("Error: no policy files found!")
        
        courses = [ CourseInfo(fn, pfn) for pfn in policies ]


    logit("%d course runs found: %s" % (len(courses), [c.url_name for c in courses]))
    
    ret = {}

    # construct axis for each policy
    for cinfo in courses:
        policy = cinfo.policy
        semester = policy.semester
        org = cinfo.org
        course = cinfo.course
        cid = '%s/%s/%s' % (org, course, semester)
        logit('course_id=%s' %  cid)
    
        cfn = dir / ('course/%s.xml' % semester)
        
        # generate XBundle for course
        xml = etree.parse(cfn).getroot()
        xb = xbundle.XBundle(keep_urls=True, skip_hidden=True, keep_studio_urls=True)
        xb.policy = policy.policy
        cxml = xb.import_xml_removing_descriptor(dir, xml)

        # append metadata
        metadata = etree.Element('metadata')
        cxml.append(metadata)
        policy_xml = etree.Element('policy')
        metadata.append(policy_xml)
        policy_xml.text = json.dumps(policy.policy)
        grading_policy_xml = etree.Element('grading_policy')
        metadata.append(grading_policy_xml)
        grading_policy_xml.text = json.dumps(policy.grading_policy)
    
        bundle = etree.tostring(cxml, pretty_print=True)
        #print bundle[:500]
        index = [1]
        caxis = []
    
        def walk(x, seq_num=1, path=[], seq_type=None, parent_start=None, parent=None, chapter=None,
                 parent_url_name=None, split_url_name=None):
            '''
            Recursively traverse course tree.  
            
            x        = current etree element
            seq_num  = sequence of current element in its parent, starting from 1
            path     = list of url_name's to current element, following edX's hierarchy conventions
            seq_type = problemset, sequential, or videosequence
            parent_start = start date of parent of current etree element
            parent   = parent module
            chapter  = the last chapter module_id seen while walking through the tree
            parent_url_name = url_name of parent
            split_url_name   = url_name of split_test element if this subtree is in a split_test, otherwise None
            '''
            url_name = x.get('url_name',x.get('url_name_orig',''))
            if not url_name:
                dn = x.get('display_name')
                if dn is not None:
                    url_name = dn.strip().replace(' ','_')     # 2012 convention for converting display_name to url_name
                    url_name = url_name.replace(':','_')
                    url_name = url_name.replace('.','_')
                    url_name = url_name.replace('(','_').replace(')','_').replace('__','_')
            
            data = None
            start = None

            if not FORCE_NO_HIDE:
                hide = policy.get_metadata(x, 'hide_from_toc')
                if hide is not None and not hide=="false":
                    logit('[edx2course_axis] Skipping %s (%s), it has hide_from_toc=%s' % (x.tag, x.get('display_name','<noname>'), hide))
                    return

            if x.tag=='video':	# special: for video, let data = youtube ID(s)
                data = x.get('youtube','')
                if data:
                    # old ytid format - extract just the 1.0 part of this 
                    # 0.75:JdL1Vo0Hru0,1.0:lbaG3uiQ6IY,1.25:Lrj0G8RWHKw,1.50:54fs3-WxqLs
                    ytid = data.replace(' ','').split(',')
                    ytid = [z[1] for z in [y.split(':') for y in ytid] if z[0]=='1.0']
                    # print "   ytid: %s -> %s" % (x.get('youtube',''), ytid)
                    if ytid:
                        data = ytid
                if not data:
                    data = x.get('youtube_id_1_0', '')
                if data:
                    data = '{"ytid": "%s"}' % data

            if x.tag=="split_test":
                data = {}
                to_copy = ['group_id_to_child', 'user_partition_id']
                for tc in to_copy:
                    data[tc] = x.get(tc, None)

            if x.tag=='problem' and x.get('weight') is not None and x.get('weight'):
                try:
                    # Changed from string to dict. In next code block.
                    data = {"weight": "%f" % float(x.get('weight'))}
                except Exception as err:
                    logit("    Error converting weight %s" % x.get('weight'))

            ### Had a hard time making my code work within the try/except for weight. Happy to improve
            ### Also note, weight is typically missing in problems. So I find it weird that we throw an exception.
            if x.tag=='problem':
                # Initialize data if no weight
                if not data:
                    data = {}

                # meta will store all problem related metadata, then be used to update data
                meta = {}
                # Items is meant to help debug - an ordered list of encountered problem types with url names
                # Likely should not be pulled to Big Query 
                meta['items'] = []
                # Known Problem Types
                known_problem_types = ['multiplechoiceresponse','numericalresponse','choiceresponse',
                                       'optionresponse','stringresponse','formularesponse',
                                       'customresponse','fieldset']

                # Loop through all child nodes in a problem. If encountering a known problem type, add metadata.
                for a in x:
                    if a.tag in known_problem_types:
                        meta['items'].append({'itype':a.tag,'url_name':a.get('url_name')})

                ### Check for accompanying image
                images = x.findall('.//img')
                # meta['has_image'] = False
                
                if images and len(images)>0:
                    meta['has_image'] = True #Note, one can use a.get('src'), but needs to account for multiple images
                    # print meta['img'],len(images)

                ### Search for all solution tags in a problem
                solutions = x.findall('.//solution')
                # meta['has_solution'] = False

                if solutions and len(solutions)>0:
                    text = ''
                    for sol in solutions:
                        text = text.join(html.tostring(e, pretty_print=False) for e in sol)
                        # This if statment checks each solution. Note, many MITx problems have multiple solution tags.
                        # In 8.05x, common to put image in one solution tag, and the text in a second. So we are checking each tag.
                        # If we find one solution with > 65 char, or one solution with an image, we set meta['solution'] = True
                        if len(text) > 65 or 'img src' in text:
                            meta['has_solution'] = True

                ### If meta is empty, log all tags for debugging later. 
                if len(meta)==0:
                    logit('item type not found - here is the list of tags:['+','.join(a.tag if a else ' ' for a in x)+']')
                    # print 'problem type not found - here is the list of tags:['+','.join(a.tag for a in x)+']'

                ### Add easily accessible metadata for problems
                # num_items: number of items
                # itype: problem type - note, mixed is used when items are not of same type
                if len(meta['items']) > 0:
                    # Number of Items
                    meta['num_items'] = len(meta['items'])

                    # Problem Type
                    if all(meta['items'][0]['itype'] == item['itype'] for item in meta['items']):
                        meta['itype'] = meta['items'][0]['itype']
                        # print meta['items'][0]['itype']
                    else:
                        meta['itype'] = 'mixed'

                # Update data field
                ### ! For now, removing the items field. 
                del meta["items"]               

                data.update(meta)
                data = json.dumps(data)

            if x.tag=='html':
                iframe = x.find('.//iframe')
                if iframe is not None:
                    logit("   found iframe in html %s" % url_name)
                    src = iframe.get('src','')
                    if 'https://www.youtube.com/embed/' in src:
                        m = re.search('embed/([^"/?]+)', src)
                        if m:
                            data = '{"ytid": "%s"}' % m.group(1)
                            logit("    data=%s" % data)
                
            if url_name:              # url_name is mandatory if we are to do anything with this element
                # url_name = url_name.replace(':','_')
                dn = x.get('display_name', url_name)
                try:
                    #dn = dn.decode('utf-8')
                    dn = unicode(dn)
                    dn = fix_bad_unicode(dn)
                except Exception as err:
                    logit('unicode error, type(dn)=%s'  % type(dn))
                    raise
                pdn = policy.get_metadata(x, 'display_name')      # policy display_name - if given, let that override default
                if pdn is not None:
                    dn = pdn

                #start = date_parse(x.get('start', policy.get_metadata(x, 'start', '')))
                start = date_parse(policy.get_metadata(x, 'start', '', parent=True))
                
                if parent_start is not None and start < parent_start:
                    if VERBOSE_WARNINGS:
                        logit("    Warning: start of %s element %s happens before start %s of parent: using parent start" % (start, x.tag, parent_start), nolog=True)
                    start = parent_start
                #print "start for %s = %s" % (x, start)
                
                # drop bad due date strings
                if date_parse(x.get('due',None), retbad=True)=='Bad':
                    x.set('due', '')

                due = date_parse(policy.get_metadata(x, 'due', '', parent=True))
                if x.tag=="problem":
                    logit("    setting problem due date: for %s due=%s" % (url_name, due), nolog=True)

                gformat = x.get('format', policy.get_metadata(x, 'format', ''))
                if url_name=='hw0':
                    logit( "gformat for hw0 = %s" % gformat)

                graded = x.get('graded', policy.get_metadata(x, 'graded', ''))
                if not (type(graded) in [unicode, str]):
                    graded = str(graded)

                # compute path
                # The hierarchy goes: `course > chapter > (problemset | sequential | videosequence)`
                if x.tag=='chapter':
                    path = [url_name]
                elif x.tag in ['problemset', 'sequential', 'videosequence', 'proctor', 'randomize']:
                    seq_type = x.tag
                    path = [path[0], url_name]
                else:
                    path = path[:] + [str(seq_num)]      # note arrays are passed by reference, so copy, don't modify
                    
                # compute module_id
                if x.tag=='html':
                    module_id = '%s/%s/%s/%s' % (org, course, seq_type, '/'.join(path[1:3]))  # module_id which appears in tracking log
                else:
                    module_id = '%s/%s/%s/%s' % (org, course, x.tag, url_name)
                
                # debugging
                # print "     module %s gformat=%s" % (module_id, gformat)

                # done with getting all info for this axis element; save it
                path_str = '/' + '/'.join(path)
                ae = Axel(cid, index[0], url_name, x.tag, gformat, start, due, dn, path_str, module_id, data, chapter, graded,
                          parent_url_name,
                          not split_url_name==None,
                          split_url_name)
                caxis.append(ae)
                index[0] += 1
            else:
                if VERBOSE_WARNINGS:
                    if x.tag in ['transcript', 'wiki', 'metadata']:
                        pass
                    else:
                        logit("Missing url_name for element %s (attrib=%s, parent_tag=%s)" % (x, x.attrib, (parent.tag if parent is not None else '')))

            # chapter?
            if x.tag=='chapter':
                the_chapter = module_id
            else:
                the_chapter = chapter

            # done processing this element, now process all its children
            if (not x.tag in ['html', 'problem', 'discussion', 'customtag', 'poll_question', 'combinedopenended', 'metadata']):
                inherit_seq_num = (x.tag=='vertical' and not url_name)    # if <vertical> with no url_name then keep seq_num for children
                if not inherit_seq_num:
                    seq_num = 1
                for y in x:
                    if (not str(y).startswith('<!--')) and (not y.tag in ['discussion', 'source']):
                        if not split_url_name and x.tag=="split_test":
                            split_url_name = url_name
                                
                        walk(y, seq_num, path, seq_type, parent_start=start, parent=x, chapter=the_chapter,
                             parent_url_name=url_name,
                             split_url_name=split_url_name,
                        )
                        if not inherit_seq_num:
                            seq_num += 1
                
        walk(cxml)
        ret[cid] = dict(policy=policy.policy, 
                        bundle=bundle, 
                        axis=caxis, 
                        grading_policy=policy.grading_policy,
                        log_msg=log_msg,
                        )
    
    return ret

Example 50

Project: edx2bigquery
Source File: run_external.py
View license
def run_external_script(extcmd, param, ecinfo, course_id):
    """
    Run external script on specified course.

    extcmd = string specifying external command to run
    param = command line parameters, including extparam
    ecinfo = external command info from edx2bigquery_config
    course_id = course_id to run external command on
    """
    # use default for base set of parameters
    ed_name = ecinfo.get('default_parameters', 'DEFAULT')
    settings = ecinfo.get(ed_name, {})
    settings.update(ecinfo.get(extcmd))
    # print "settings: ", json.dumps(settings, indent=4)
    
    print settings['name']
    
    if param.verbose:
        print settings.get('description', '')

    cidns = course_id.replace('/', '__')
    cidns_nodots = course_id.replace('/', '__').replace('.', '_').replace('-', '_')

    mypath = path(os.path.realpath(__file__)).dirname()
    edx2bigquery_context = {'lib': mypath / "lib",
                            'bin': mypath / "bin",
                        }

    the_template = settings['template'].format(**edx2bigquery_context)
    fnpre = settings['filename_prefix']
    lfn = "%s-%s.log" % (fnpre, cidns)
    if settings.get('logs_dir'):
        lfn = path(settings['logs_dir']) / lfn

    try:
        ofn = settings['script_fn'].format(filename_prefix=fnpre, cidns=cidns)
    except Exception as err:
        print "oops, errr %s" % str(err)
        print "settings=", json.dumps(settings, indent=4)
        raise
    cwd = os.getcwd()

    the_date = str(datetime.datetime.now())

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=param.use_dataset_latest)
    table_prefix = dataset

    if param.force_recompute:
        param.force_recompute = 1
    else:
        param.force_recompute = 0

    context = {'course_id': course_id,
               'script_name': ofn,
               'the_date': the_date,
               'cidns': cidns,
               'cidns_nodots': cidns,
               'template_file': the_template,
               'log_file': lfn,
               'filename_prefix': fnpre,
               'filename_prefix_cidns': "%s__%s" % (fnpre, cidns),
               'working_dir': cwd,
               'table_prefix': table_prefix,
               'lib_dir': edx2bigquery_context['lib'],
               'bin_dir': edx2bigquery_context['bin'],
    }
    context.update(settings)
    context.update(param.__dict__)

    rundir = settings['run_dir'].format(**context)
    runcmd = settings['script_cmd'].format(**context)

    tem = codecs.open(the_template).read()
    tem = unicode(tem)
    try:
        # script_file = tem.format(**context)
        script_file = Template(tem).render(**context)
    except Exception as err:
        print "Oops, cannot properly format template %s" % the_template
        print "Error %s" % str(err)
        print "context: ", json.dumps(context, indent=4)
        raise
    ofndir = path(ofn).dirname()
    if not os.path.exists(ofndir):
        print "[Warning] Directory %s doesn't exist - creating it" % ofndir
        os.mkdir(ofndir)
    fp = codecs.open(ofn, 'w', encoding="utf8")
    fp.write(script_file)
    fp.close()
    print "Generated %s" % ofn

    # if depends_on is defined, and force_recompute is not true, then skip
    # run if output already exists and is newer than all depends_on tables.

    depends_on = settings.get('depends_on')
    output_table = settings.get('output_table')
    if depends_on and not type(depends_on)==list:
        depends_on = [ depends_on ]
    do_compute = param.force_recompute
    if (not param.force_recompute) and depends_on and output_table:
        # does output already exist?
        has_output = False
        try:
            tinfo = bqutil.get_bq_table_info(dataset, output_table)
            if tinfo:
                has_output = True
        except:
            pass
        if not has_output:
            print "Output table %s.%s doesn't exist: running" % (dataset, output_table)
            do_compute = True
        else:
            table_date = tinfo['lastModifiedTime']
            for deptab in depends_on:
                try:
                    dtab_date = bqutil.get_bq_table_last_modified_datetime(dataset, deptab)
                except Exception as err:
                    raise Exception("[run_external] missing dependent table %s.%s" % (dataset, deptab))
                if not dtab_date:
                    raise Exception("[run_external] missing dependent table %s.%s" % (dataset, deptab))
                if table_date and dtab_date > table_date:
                    do_compute = True
                    break
            if not do_compute:
                print "Output table %s.%s exists and is newer than %s, skipping" % (dataset, output_table, depends_on)
            
    if do_compute:
        os.chdir(rundir)
        print "Working directory: %s" % rundir
        print "Logging to %s" % lfn
        print "Run command: %s" % runcmd
        sys.stdout.flush()
        if not param.skiprun:
            start = datetime.datetime.now()

            if param.submit_condor:
                condor_template_fn = settings.get('condor_job_template', '').format(**edx2bigquery_context)
                if not condor_template_fn:
                    raise Exception("[run_external] missing condor_job_template specification for %s" % (extcmd))
                condor_submit_fn = "CONDOR/{filename_prefix}-{cidns}.submit".format(**context)
                context.update({ 'MEMORY': 32768,
                                 'arguments': '{script_name}'.format(**context),
                                 'executable': context['script_cmd'],
                                 'input_file': '',
                                 'filename': condor_submit_fn,
                                 })
                condor_template = Template(open(condor_template_fn).read()).render(**context)
                dirs = ['CONDOR', 'JOBS']
                for dir in dirs:
                    if not os.path.exists(dir):
                        os.mkdir(dir)
                fp = open(condor_submit_fn, 'w')
                fp.write(condor_template)
                fp.close()
                cmd = "condor_submit %s" % condor_submit_fn
                print cmd
                jobid = None
                for k in os.popen(cmd):
                    m = re.search('submitted to cluster ([0-9]+)', k)
                    if m:
                        jobid = m.group(1)
                dt = str(datetime.datetime.now())
                jobfile = 'condor_jobs.csv'
                open(jobfile, 'a').write("%s,%s,%s,%s\n" % (course_id, dt, jobid, lfn))
                print "[%s] Submitted as condor job %s at %s" % (course_id, jobid, dt)
                # print "[run_external] submitted %s, job=%s" % (extcmd, jobnum)
                return
            else:
                os.system(runcmd)

            if settings.get('type')=="stata":
                # cleanup leftover log file after stata batch run
                batch_log = ofn.split('.')[0] + ".log"
                if os.path.exists(batch_log):
                    os.unlink(batch_log)
                    print "Removed old log file %s" % batch_log

            end = datetime.datetime.now()
            has_output = False
            try:
                tinfo = bqutil.get_bq_table_info(dataset, output_table)
                if tinfo:
                    has_output = True
            except:
                pass
            success = has_output
            dt = end-start
            print "[run_external] DONE WITH %s, success=%s, dt=%s" % (extcmd, success, dt)
            sys.stdout.flush()
            if param.parallel and not success:
                raise Exception("[run_external] External command %s failed on %s" % (extcmd, course_id))