datetime.datetime

Here are the examples of the python api datetime.datetime taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 1

Project: geraldo
Source File: query.py
View license
def query_class(QueryClass, Database):
    """
    Returns a custom django.db.models.sql.query.Query subclass that is
    appropriate for Oracle.

    The 'Database' module (cx_Oracle) is passed in here so that all the setup
    required to import it only needs to be done by the calling module.
    """
    global _classes
    try:
        return _classes[QueryClass]
    except KeyError:
        pass

    class OracleQuery(QueryClass):
        def resolve_columns(self, row, fields=()):
            index_start = len(self.extra_select.keys())
            values = [self.convert_values(v, None) for v in row[:index_start]]
            for value, field in map(None, row[index_start:], fields):
                values.append(self.convert_values(value, field))
            return values

        def convert_values(self, value, field):
            from django.db.models.fields import DateField, DateTimeField, \
                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
            if isinstance(value, Database.LOB):
                value = value.read()
            # Oracle stores empty strings as null. We need to undo this in
            # order to adhere to the Django convention of using the empty
            # string instead of null, but only if the field accepts the
            # empty string.
            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
                value = u''
            # Convert 1 or 0 to True or False
            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
                value = bool(value)
            # Convert floats to decimals
            elif value is not None and isinstance(field, DecimalField):
                value = util.typecast_decimal(field.format_number(value))
            # cx_Oracle always returns datetime.datetime objects for
            # DATE and TIMESTAMP columns, but Django wants to see a
            # python datetime.date, .time, or .datetime.  We use the type
            # of the Field to determine which to cast to, but it's not
            # always available.
            # As a workaround, we cast to date if all the time-related
            # values are 0, or to time if the date is 1/1/1900.
            # This could be cleaned a bit by adding a method to the Field
            # classes to normalize values from the database (the to_python
            # method is used for validation and isn't what we want here).
            elif isinstance(value, Database.Timestamp):
                # In Python 2.3, the cx_Oracle driver returns its own
                # Timestamp object that we must convert to a datetime class.
                if not isinstance(value, datetime.datetime):
                    value = datetime.datetime(value.year, value.month,
                            value.day, value.hour, value.minute, value.second,
                            value.fsecond)
                if isinstance(field, DateTimeField):
                    # DateTimeField subclasses DateField so must be checked
                    # first.
                    pass
                elif isinstance(field, DateField):
                    value = value.date()
                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
                    value = value.time()
                elif value.hour == value.minute == value.second == value.microsecond == 0:
                    value = value.date()
            return value

        def as_sql(self, with_limits=True, with_col_aliases=False):
            """
            Creates the SQL for this query. Returns the SQL string and list
            of parameters.  This is overriden from the original Query class
            to handle the additional SQL Oracle requires to emulate LIMIT
            and OFFSET.

            If 'with_limits' is False, any limit/offset information is not
            included in the query.
            """

            # The `do_offset` flag indicates whether we need to construct
            # the SQL needed to use limit/offset with Oracle.
            do_offset = with_limits and (self.high_mark is not None
                                         or self.low_mark)
            if not do_offset:
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                        with_col_aliases=with_col_aliases)
            else:
                # `get_columns` needs to be called before `get_ordering` to
                # populate `_select_alias`.
                self.pre_sql_setup()
                self.get_columns()
                ordering = self.get_ordering()

                # Oracle's ROW_NUMBER() function requires an ORDER BY clause.
                if ordering:
                    rn_orderby = ', '.join(ordering)
                else:
                    # Create a default ORDER BY since none was specified.
                    qn = self.quote_name_unless_alias
                    opts = self.model._meta
                    rn_orderby = '%s.%s' % (qn(opts.db_table),
                        qn(opts.fields[0].db_column or opts.fields[0].column))

                # Ensure the base query SELECTs our special "_RN" column
                self.extra_select['_RN'] = ('ROW_NUMBER() OVER (ORDER BY %s)'
                                            % rn_orderby, '')
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                                                        with_col_aliases=True)

                # Wrap the base query in an outer SELECT * with boundaries on
                # the "_RN" column.  This is the canonical way to emulate LIMIT
                # and OFFSET on Oracle.
                sql = 'SELECT * FROM (%s) WHERE "_RN" > %d' % (sql, self.low_mark)
                if self.high_mark is not None:
                    sql = '%s AND "_RN" <= %d' % (sql, self.high_mark)

            return sql, params

        def set_limits(self, low=None, high=None):
            super(OracleQuery, self).set_limits(low, high)
            # We need to select the row number for the LIMIT/OFFSET sql.
            # A placeholder is added to extra_select now, because as_sql is
            # too late to be modifying extra_select.  However, the actual sql
            # depends on the ordering, so that is generated in as_sql.
            self.extra_select['_RN'] = ('1', '')

        def clear_limits(self):
            super(OracleQuery, self).clear_limits()
            if '_RN' in self.extra_select:
                del self.extra_select['_RN']

    _classes[QueryClass] = OracleQuery
    return OracleQuery

Example 2

View license
    def test_minute_buy_and_hold(self):
        with trading.TradingEnvironment():
            start_date = datetime.datetime(
                year=2006,
                month=1,
                day=3,
                hour=0,
                minute=0,
                tzinfo=pytz.utc)
            end_date = datetime.datetime(
                year=2006,
                month=1,
                day=5,
                hour=0,
                minute=0,
                tzinfo=pytz.utc)

            sim_params = SimulationParameters(
                period_start=start_date,
                period_end=end_date,
                emission_rate='daily',
                data_frequency='minute')

            algo = BuyAndHoldAlgorithm(
                sim_params=sim_params,
                data_frequency='minute')

            first_date = datetime.datetime(2006, 1, 3, tzinfo=pytz.utc)
            first_open, first_close = \
                trading.environment.get_open_and_close(first_date)

            second_date = datetime.datetime(2006, 1, 4, tzinfo=pytz.utc)
            second_open, second_close = \
                trading.environment.get_open_and_close(second_date)

            third_date = datetime.datetime(2006, 1, 5, tzinfo=pytz.utc)
            third_open, third_close = \
                trading.environment.get_open_and_close(third_date)

            benchmark_data = [
                Event({
                    'returns': 0.1,
                    'dt': first_close,
                    'source_id': 'test-benchmark-source',
                    'type': DATASOURCE_TYPE.BENCHMARK
                }),
                Event({
                    'returns': 0.2,
                    'dt': second_close,
                    'source_id': 'test-benchmark-source',
                    'type': DATASOURCE_TYPE.BENCHMARK
                }),
                Event({
                    'returns': 0.4,
                    'dt': third_close,
                    'source_id': 'test-benchmark-source',
                    'type': DATASOURCE_TYPE.BENCHMARK
                }),
            ]

            trade_bar_data = [
                Event({
                    'open_price': 10,
                    'close_price': 15,
                    'price': 15,
                    'volume': 1000,
                    'sid': 1,
                    'dt': first_open,
                    'source_id': 'test-trade-source',
                    'type': DATASOURCE_TYPE.TRADE
                }),
                Event({
                    'open_price': 10,
                    'close_price': 15,
                    'price': 15,
                    'volume': 1000,
                    'sid': 1,
                    'dt': first_open + datetime.timedelta(minutes=10),
                    'source_id': 'test-trade-source',
                    'type': DATASOURCE_TYPE.TRADE
                }),
                Event({
                    'open_price': 15,
                    'close_price': 20,
                    'price': 20,
                    'volume': 2000,
                    'sid': 1,
                    'dt': second_open,
                    'source_id': 'test-trade-source',
                    'type': DATASOURCE_TYPE.TRADE
                }),
                Event({
                    'open_price': 15,
                    'close_price': 20,
                    'price': 20,
                    'volume': 2000,
                    'sid': 1,
                    'dt': second_open + datetime.timedelta(minutes=10),
                    'source_id': 'test-trade-source',
                    'type': DATASOURCE_TYPE.TRADE
                }),
                Event({
                    'open_price': 20,
                    'close_price': 15,
                    'price': 15,
                    'volume': 1000,
                    'sid': 1,
                    'dt': third_open,
                    'source_id': 'test-trade-source',
                    'type': DATASOURCE_TYPE.TRADE
                }),
                Event({
                    'open_price': 20,
                    'close_price': 15,
                    'price': 15,
                    'volume': 1000,
                    'sid': 1,
                    'dt': third_open + datetime.timedelta(minutes=10),
                    'source_id': 'test-trade-source',
                    'type': DATASOURCE_TYPE.TRADE
                }),
            ]

            algo.benchmark_return_source = benchmark_data
            algo.sources = list([trade_bar_data])
            gen = algo._create_generator(sim_params)

            crm = algo.perf_tracker.cumulative_risk_metrics

            first_msg = gen.next()

            self.assertIsNotNone(first_msg,
                                 "There should be a message emitted.")

            # Protects against bug where the positions appeared to be
            # a day late, because benchmarks were triggering
            # calculations before the events for the day were
            # processed.
            self.assertEqual(1, len(algo.portfolio.positions), "There should "
                             "be one position after the first day.")

            self.assertEquals(
                0,
                crm.metrics.algorithm_volatility[algo.datetime.date()],
                "On the first day algorithm volatility does not exist.")

            second_msg = gen.next()

            self.assertIsNotNone(second_msg, "There should be a message "
                                 "emitted.")

            self.assertEqual(1, len(algo.portfolio.positions),
                             "Number of positions should stay the same.")

            # TODO: Hand derive. Current value is just a canary to
            # detect changes.
            np.testing.assert_almost_equal(
                0.050022510129558301,
                crm.algorithm_returns[-1],
                decimal=6)

            third_msg = gen.next()

            self.assertEqual(1, len(algo.portfolio.positions),
                             "Number of positions should stay the same.")

            self.assertIsNotNone(third_msg, "There should be a message "
                                 "emitted.")

            # TODO: Hand derive. Current value is just a canary to
            # detect changes.
            np.testing.assert_almost_equal(
                -0.047639464532418657,
                crm.algorithm_returns[-1],
                decimal=6)

Example 3

Project: zipline
Source File: test_slippage.py
View license
    def test_orders_limit(self):
        slippage_model = VolumeShareSlippage()
        slippage_model.data_portal = self.data_portal

        # long, does not trade
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': self.ASSET133,
                'limit': 3.5})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[3],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # long, does not trade - impacted price worse than limit price
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': self.ASSET133,
                'limit': 3.5})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[3],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # long, does trade
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': self.ASSET133,
                'limit': 3.6})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[3],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 1)
        txn = orders_txns[0][1]

        expected_txn = {
            'price': float(3.50021875),
            'dt': datetime.datetime(
                2006, 1, 5, 14, 34, tzinfo=pytz.utc),
            # we ordered 100 shares, but default volume slippage only allows
            # for 2.5% of the volume.  2.5% * 2000 = 50 shares
            'amount': int(50),
            'sid': int(133),
            'order_id': open_orders[0].id
        }

        self.assertIsNotNone(txn)

        for key, value in expected_txn.items():
            self.assertEquals(value, txn[key])

        # short, does not trade
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': self.ASSET133,
                'limit': 3.5})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[0],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # short, does not trade - impacted price worse than limit price
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': self.ASSET133,
                'limit': 3.5})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[0],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # short, does trade
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': self.ASSET133,
                'limit': 3.4})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[1],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 1)
        _, txn = orders_txns[0]

        expected_txn = {
            'price': float(3.49978125),
            'dt': datetime.datetime(
                2006, 1, 5, 14, 32, tzinfo=pytz.utc),
            'amount': int(-50),
            'sid': int(133)
        }

        self.assertIsNotNone(txn)

        for key, value in expected_txn.items():
            self.assertEquals(value, txn[key])

Example 4

Project: inspectors-general
Source File: lsc.py
View license
def report_from(result, landing_url, report_type, year_range):
  if not result.text or result.text in BLACKLIST_REPORT_TITLES:
    # There are a few empty links due to bad html and some links for alternative
    # formats (PDF) that we will just ignore.
    return

  link_text = None
  if result.name == 'a':
    report_url = result.get('href')
    link_text = inspector.sanitize(result.text)
    title = inspector.sanitize("%s %s" % (result.text, result.next_sibling))
  else:
    links = [link for link in result.find_all('a') if link.text.strip()]
    report_url = links[0].get('href')
    link_text = inspector.sanitize(result.a.text)
    title = inspector.sanitize(result.text)
  report_url = urljoin(landing_url, report_url)
  report_filename = os.path.basename(report_url)

  if title.endswith("PDF"):
    title = title[:-3]
  title = title.rstrip(" .")

  prev = result.previous_sibling
  if isinstance(prev, NavigableString) and "See, also:" in prev:
    return None

  report_no_match = REPORT_NO_RE.match(link_text)
  if report_no_match:
    report_id = report_no_match.group(0)
    if "fraud" in report_url.lower():
      report_id = "fraud-alert-" + report_id
    elif "Client_Trust_Fund" in report_url:
      report_id = "CTF-" + report_id
    elif report_filename.startswith("sr"):
      report_id = "special-report-" + report_id
  else:
    report_id, _ = os.path.splitext(report_filename)
    report_id = unquote(report_id)
  report_id = "-".join(report_id.split())
  report_id = report_id.replace("\\", "") # strip backslashes

  estimated_date = False
  published_on = None
  if report_id in REPORT_PUBLISHED_MAP:
    published_on = REPORT_PUBLISHED_MAP[report_id]
  elif link_text == "June 2015":
    published_on = datetime.datetime(2015, 6, 1)
  else:
    published_on_text = None
    try:
      published_on_text = re.search('(\d+/\d+/\d+)', title).groups()[0]
    except AttributeError:
      pass
    if not published_on_text:
      try:
        published_on_text = re.search('(\w+ \d+, \d+)', title).groups()[0]
      except AttributeError:
        pass
    if not published_on_text:
      try:
        published_on_text = re.search('(\d+/\d+)', title).groups()[0]
      except AttributeError:
        pass

    if not published_on_text:
      admin.log_no_date("lsc", report_id, title, report_url)
      return

    if not published_on:
      datetime_formats = [
        '%B %d, %Y',
        '%m/%d/%Y',
        '%m/%d/%y',
        '%m/%Y',
        '%m/%y'
      ]
      for datetime_format in datetime_formats:
        try:
          published_on = datetime.datetime.strptime(published_on_text, datetime_format)
        except ValueError:
          pass
        else:
          break

  if not published_on:
    admin.log_no_date("lsc", report_id, title, report_url)
    return

  if published_on.year not in year_range:
    logging.debug("[%s] Skipping, not in requested range." % report_url)
    return

  report = {
    'inspector': 'lsc',
    'inspector_url': 'https://www.oig.lsc.gov',
    'agency': 'lsc',
    'agency_name': 'Legal Services Corporation',
    'type': report_type,
    'report_id': report_id,
    'url': report_url,
    'title': title,
    'published_on': datetime.datetime.strftime(published_on, "%Y-%m-%d"),
  }

  if estimated_date:
    report['estimated_date'] = estimated_date

  if report_url in ("https://www.oig.lsc.gov/core-legal-services"):
    report['file_type'] = "html"

  if report_url.startswith("https://oig.lsc.gov/mapping/references/eval"):
    report['unreleased'] = True
    report['missing'] = True

  return report

Example 5

View license
    def test04__integration_1(self):
        core = mock.Mock()
        playlist = 'Playlist URI'
        core.playlists.lookup('Playlist URI').get().tracks = 'Tracks 811, 821, 823, 827, 829, 839'
        self.assertEqual(core.playlists.lookup.call_count, 1)  # First call when setting up the Mock
        threadcount = threading.active_count()

        am = AlarmManager()

        # Test get_core()
        self.assertTrue(am is am.get_core(core))

        # Test is_set() and threading when NOT set
        self.assertFalse(am.is_set())
        self.assertEqual(threading.active_count(), threadcount)

        # Set alarm to FAR future
        am.set_alarm(datetime.datetime(2055, 4, 28, 7, 59, 15, 324341), playlist, False, 41, 83)

        # Test when set
        self.assertTrue(am.is_set())
        self.assertEqual(threading.active_count(), threadcount + 1)
        self.assertEqual(am.get_ring_time(), b'07:59')
        self.assertFalse(am.random_mode)
        self.assertEqual(am.volume, 41)
        self.assertEqual(am.volume_increase_seconds, 83)

        # Cancel alarm
        am.cancel()

        # Test is_set() and threading when NOT set
        self.assertFalse(am.is_set())
        self.assertEqual(threading.active_count(), threadcount)

        # Set alarm to NEAR future
        am.set_alarm(datetime.datetime.now() + datetime.timedelta(seconds=29), playlist, False, 23, 127)

        # Tests a few seconds BEFORE alarm
        time.sleep(27)
        self.assertTrue(am.is_set())
        self.assertEqual(threading.active_count(), threadcount + 1)
        self.assertFalse(am.random_mode)
        self.assertEqual(am.volume, 23)
        self.assertEqual(am.volume_increase_seconds, 127)
        self.assertEqual(core.playlists.lookup.call_count, 1)  # First call when setting up the Mock

        # Cancel alarm
        am.cancel()

        # Test is_set() and threading when NOT set
        self.assertFalse(am.is_set())
        self.assertEqual(threading.active_count(), threadcount)

        # Sleep 20 seconds more to ensure that alarm will start if not cancelled
        time.sleep(20)

        # Set alarm to NEAR future
        am.set_alarm(datetime.datetime.now() + datetime.timedelta(seconds=31), playlist, True, 3, 17)

        # Test when set
        self.assertTrue(am.is_set())
        self.assertEqual(threading.active_count(), threadcount + 1)
        self.assertTrue(am.random_mode)
        self.assertEqual(am.volume, 3)
        self.assertEqual(am.volume_increase_seconds, 17)

        # Tests a few seconds BEFORE alarm
        time.sleep(29)
        self.assertTrue(am.is_set())
        self.assertEqual(threading.active_count(), threadcount + 1)
        self.assertIsInstance(core.tracklist.consume, mock.Mock)
        self.assertIsInstance(core.tracklist.single, mock.Mock)
        self.assertIsInstance(core.tracklist.repeat, mock.Mock)
        self.assertIsInstance(core.tracklist.random, mock.Mock)
        self.assertIsInstance(core.playback.mute, mock.Mock)
        self.assertIsInstance(core.playback.volume, mock.Mock)
        self.assertEqual(core.playback.stop.call_count, 0)
        self.assertEqual(core.tracklist.clear.call_count, 0)
        self.assertEqual(core.tracklist.add.call_count, 0)
        self.assertEqual(core.playback.next.call_count, 0)
        self.assertEqual(core.playback.play.call_count, 0)
        self.assertEqual(core.playlists.lookup.call_count, 1)  # First call when setting up the Mock

        # Tests a few seconds AFTER alarm START
        time.sleep(8)
        self.assertFalse(am.is_set())
        self.assertEqual(threading.active_count(), threadcount + 1)  # Additional thread is created by adjust_volume()
        self.assertEqual(core.tracklist.consume, False)
        self.assertEqual(core.tracklist.single, False)
        self.assertEqual(core.tracklist.repeat, True)
        self.assertEqual(core.tracklist.random, True)
        self.assertEqual(core.playback.mute, False)
        self.assertEqual(core.playback.volume, 1)  # First step of gradual volume increasing
        core.playback.stop.assert_called_once_with()
        core.tracklist.clear.assert_called_once_with()
        core.tracklist.add.assert_called_once_with('Tracks 811, 821, 823, 827, 829, 839')
        core.playback.next.assert_called_once_with()
        core.playback.play.assert_called_once_with()
        self.assertEqual(core.playlists.lookup.call_count, 2)

        # Further tests of gradual volume increasing
        time.sleep(5.67)  # Race conditions already prevented by previous sleep()
        self.assertEqual(core.playback.volume, 2)
        self.assertEqual(threading.active_count(), threadcount + 1)
        time.sleep(5.67)
        self.assertEqual(core.playback.volume, 2)
        self.assertEqual(threading.active_count(), threadcount + 1)
        time.sleep(5.67)
        self.assertEqual(core.playback.volume, 3)
        self.assertEqual(threading.active_count(), threadcount)
        time.sleep(20)  # More than 3x increase step time
        self.assertEqual(core.playback.volume, 3)
        self.assertEqual(threading.active_count(), threadcount)

        # Test alarm cancellation after alarm has been started
        self.assertFalse(am.is_set())
        am.cancel()
        self.assertFalse(am.is_set())
        self.assertEqual(threading.active_count(), threadcount)

        # Set alarm to FAR future
        am.set_alarm(datetime.datetime(2055, 4, 28, 7, 59, 15, 324341), playlist, False, 41, 83)

        # Test when set
        self.assertTrue(am.is_set())
        self.assertEqual(threading.active_count(), threadcount + 1)
        self.assertEqual(am.get_ring_time(), b'07:59')
        self.assertFalse(am.random_mode)
        self.assertEqual(am.volume, 41)
        self.assertEqual(am.volume_increase_seconds, 83)

        # Cancel alarm
        am.cancel()

        # Test is_set() and threading when NOT set
        self.assertFalse(am.is_set())
        self.assertEqual(threading.active_count(), threadcount)

Example 6

Project: mediadrop
Source File: websetup.py
View license
def add_default_data():
    log.info('Adding default data')

    settings = [
        (u'email_media_uploaded', None),
        (u'email_comment_posted', None),
        (u'email_support_requests', None),
        (u'email_send_from', u'[email protected]'),
        (u'wording_user_uploads', N_(u"Upload your media using the form below. We'll review it and get back to you.")),
        (u'wording_administrative_notes', None),
        (u'wording_display_administrative_notes', u''),
        (u'popularity_decay_exponent', u'4'),
        (u'popularity_decay_lifetime', u'36'),
        (u'rich_text_editor', u'tinymce'),
        (u'google_analytics_uacct', u''),
        (u'featured_category', u'1'),
        (u'max_upload_size', u'314572800'),
        (u'ftp_storage', u'false'),
        (u'ftp_server', u'ftp.someserver.com'),
        (u'ftp_user', u'username'),
        (u'ftp_password', u'password'),
        (u'ftp_upload_directory', u'media'),
        (u'ftp_download_url', u'http://www.someserver.com/web/accessible/media/'),
        (u'ftp_upload_integrity_retries', u'10'),
        (u'akismet_key', u''),
        (u'akismet_url', u''),
        (u'req_comment_approval', u''),
        (u'use_embed_thumbnails', u'true'),
        (u'api_secret_key_required', u'true'),
        (u'api_secret_key', random_string(20)),
        (u'api_media_max_results', u'50'),
        (u'api_tree_max_depth', u'10'),
        (u'general_site_name', u'MediaDrop'),
        (u'general_site_title_display_order', u'prepend'),
        (u'sitemaps_display', u'True'),
        (u'rss_display', u'True'),
        (u'vulgarity_filtered_words', u''),
        (u'primary_language', u'en'),
        (u'advertising_banner_html', u''),
        (u'advertising_sidebar_html', u''),
        (u'comments_engine', u'builtin'),
        (u'facebook_appid', u''),
        (u'youtube_apikey', u''),
    ]
    settings.extend(appearance_settings)

    for key, value in settings:
        s = Setting()
        s.key = key
        s.value = value
        DBSession.add(s)

    admin_user = User()
    admin_user.user_name = u'admin'
    admin_user.display_name = u'Admin'
    admin_user.email_address = u'[email protected]'
    admin_user.password = u'admin'
    DBSession.add(admin_user)

    admin_group = Group(name=u'admins', display_name=u'Admins')
    admin_group.users.append(admin_user)
    DBSession.add(admin_group)

    editor_group = Group(name=u'editors', display_name=u'Editors')
    DBSession.add(editor_group)

    anonymous_group = Group(name=u'anonymous', display_name=u'Everyone (including guests)')
    DBSession.add(anonymous_group)

    authenticated_group = Group(name=u'authenticated', display_name=u'Logged in users')
    DBSession.add(authenticated_group)

    admin_perm = Permission(name=u'admin', groups=[admin_group], 
        description=u'Grants access to the admin panel')
    DBSession.add(admin_perm)

    edit_perm = Permission(name=u'edit', groups=[admin_group, editor_group], 
        description=u'Grants access to edit site content')
    DBSession.add(edit_perm)
    
    view_perm = Permission(name=u'view', 
        groups=[admin_group, anonymous_group, editor_group], 
        description=u'View published media')
    DBSession.add(view_perm)

    upload_perm = Permission(name=u'upload', 
        groups=[admin_group, anonymous_group, editor_group], 
        description=u'Can upload new media')
    DBSession.add(upload_perm)
    media_upload_perm = Permission()
    media_upload_perm.permission_name = u'MEDIA_UPLOAD'
    media_upload_perm.description = u'Grants the ability to upload new media'
    media_upload_perm.groups.append(admin_group)
    media_upload_perm.groups.append(editor_group)
    media_upload_perm.groups.append(anonymous_group)
    DBSession.add(edit_perm)


    category = Category(name=u'Featured', slug=u'featured')
    DBSession.add(category)

    category2 = Category(name=u'Instructional', slug=u'instructional')
    DBSession.add(category2)

    podcast = Podcast()
    podcast.slug = u'hello-world'
    podcast.title = u'Hello World'
    podcast.subtitle = u'My very first podcast!'
    podcast.description = u"""<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"""
    podcast.category = u'Technology'
    podcast.author = Author(admin_user.display_name, admin_user.email_address)
    podcast.explicit = None
    podcast.copyright = u'Copyright 2009 Xyz'
    podcast.itunes_url = None
    podcast.feedburner_url = None
    DBSession.add(podcast)

    comment = Comment()
    comment.subject = u'Re: New Media'
    comment.author = AuthorWithIP(name=u'John Doe', ip=2130706433)
    comment.body = u'<p>Hello to you too!</p>'
    DBSession.add(comment)

    media = Media()
    media.type = None
    media.slug = u'new-media'
    media.reviewed = True
    media.encoded = False
    media.publishable = False
    media.title = u'New Media'
    media.subtitle = None
    media.description = u"""<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"""
    media.description_plain = u"""Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."""
    media.author = Author(admin_user.display_name, admin_user.email_address)
    media.categories.append(category)
    media.comments.append(comment)
    DBSession.add(media)

    #XXX The list of default players is actually defined in model.players
    # and should at some point be moved here to avoid inconsistency
    # between the default storage engines and default players.
    remote_url_storage = RemoteURLStorage()
    default_engines = [
        LocalFileStorage(),
        remote_url_storage,
        YoutubeStorage(),
        VimeoStorage(),
        BlipTVStorage(),
        DailyMotionStorage(),
    ]
    for engine in default_engines:
        DBSession.add(engine)

    import datetime
    instructional_media = [
        (u'workflow-in-mediadrop',
        u'Workflow in MediaDrop',
        u'<p>This sceencast explains the publish status feature in MediaDrop.</p><p>Initially all videos uploaded through the front-end or admin panel are placed under &quot;awaiting review&quot; status. Once the administrator hits the &quot;review complete&quot; button, they can upload media. Videos can be added in any format, however, they can only be published if they are in a web-ready format such as FLV, M4V, MP3, or MP4. Alternatively, if they are published through Youtube or Vimeo the encoding step is skipped</p><p>Once uploaded and encoded the administrator can then publish the video.</p>',
        u'This sceencast explains the publish status feature in MediaDrop.\nInitially all videos uploaded through the front-end or admin panel are placed under \"awaiting review\" status. Once the administrator hits the \"review complete\" button, they can upload media. Videos can be added in any format, however, they can only be published if they are in a web-ready format such as FLV, M4V, MP3, or MP4. Alternatively, if they are published through Youtube or Vimeo the encoding step is skipped\nOnce uploaded and encoded the administrator can then publish the video.',
        datetime.datetime(2010, 5, 13, 2, 29, 40),
        218,
        u'http://static.mediadrop.net/files/videos/tutorial-workflow-in-mediadrop.mp4',
        u'video',
        u'mp4',
        ),
        (u'creating-a-podcast-in-mediadrop',
        u'Creating a Podcast in MediaDrop',
        u'<p>This describes the process an administrator goes through in creating a podcast in MediaDrop. An administrator can enter information that will automatically generate the iTunes/RSS feed information. Any episodes published to a podcast will automatically publish to iTunes/RSS.</p>',
        u'This describes the process an administrator goes through in creating a podcast in MediaDrop. An administrator can enter information that will automatically generate the iTunes/RSS feed information. Any episodes published to a podcast will automatically publish to iTunes/RSS.',
        datetime.datetime(2010, 5, 13, 2, 33, 44),
        100,
        u'http://static.mediadrop.net/files/videos/tutorial-create-podcast-in-mediadrop.mp4',
        u'video',
        u'mp4',
        ),
        (u'adding-a-video-in-mediadrop',
        u'Adding a Video in MediaDrop',
        u'<p>This screencast shows how video or audio can be added in MediaDrop.</p><p>MediaDrop supports a wide range of formats including (but not limited to): YouTube, Vimeo, Amazon S3, Bits on the Run, BrightCove, Kaltura, and either your own server or someone else\'s.</p><p>Videos can be uploaded in any format, but can only be published in web-ready formats such as FLV, MP3, M4V, MP4 etc.</p>',
        u'This screencast shows how video or audio can be added in MediaDrop.\nMediaDrop supports a wide range of formats including (but not limited to): YouTube, Vimeo, Amazon S3, Bits on the Run, BrightCove, Kaltura, and either your own server or someone else\'s.\nVideos can be uploaded in any format, but can only be published in web-ready formats such as FLV, MP3, M4V, MP4 etc.',
        datetime.datetime(2010, 5, 13, 02, 37, 36),
        169,
        u'http://static.mediadrop.net/files/videos/tutorial-add-video-in-mediadrop.mp4',
        u'video',
        u'mp4',
        ),
    ]

    name = u'MediaDrop Team'
    email = u'[email protected]'
    for slug, title, desc, desc_plain, publish_on, duration, url, type_, container in instructional_media:
        media = Media()
        media.author = Author(name, email)
        media.description = desc
        media.description_plain = desc_plain
        media.duration = duration
        media.publish_on = publish_on
        media.slug = slug
        media.title = title
        media.type = type_

        media_file = MediaFile()
        media_file.container = container
        media_file.created_on = publish_on
        media_file.display_name = os.path.basename(url)
        media_file.duration = duration
        media_file.type = type_
        media_file.storage = remote_url_storage
        media_file.unique_id = url

        DBSession.add(media)
        DBSession.add(media_file)

        media.files.append(media_file)
        media.categories.append(category2)

        media.encoded = True
        media.reviewed = True
        media.publishable = True

Example 7

Project: PySAR
Source File: plot_network.py
View license
def main(argv):

  lineWidth=2
  fontSize=12
  markerColor='orange'
  markerSize=16
  saveFig='no'
  if len(sys.argv)>2:

    try:
      opts, args = getopt.getopt(argv,"h:f:s:w:m:c:S:")
      
#      for i in range(opts):
 #        if '-f' in opts[0][i]:
  #          fileCheck
   #      Usage() ; sys.exit(1)

    except getopt.GetoptError:
#      print 'No input option by user'
#      print 'runing with default options'
      Usage() ; sys.exit(1)

    for opt,arg in opts:
      if opt in ("-h","--help"):
        Usage()
        sys.exit()
      elif opt == '-f':
        igramsFile = arg
      elif opt == '-s':
        fontSize = int(arg)
      elif opt == '-w':
        lineWidth=int(arg)
      elif opt == '-m':
        markerSize=int(arg)
      elif opt == '-c':
        markerColor=arg
      elif opt == '-S':
        saveFig=arg
    try:
      igramsFile
    except:
       Usage() ; sys.exit(1)

  elif len(sys.argv)==2:
     igramsFile = argv[0]
  else:
     Usage() ; sys.exit(1)

#  igramsFile=argv[0]

#############################################
#  Bp = ut.Baseline_timeseries(igramsFile)
 
  h5file = h5py.File(igramsFile)
  if h5file.keys()[0] != 'interferograms':
      print 'Inout file should be interferograms'
      Usage() ; sys.exit(1)
   
 
  tbase,dateList,dateDict,dateList1=ut.date_list(h5file)
  dateIndex={}
  for ni in range(len(dateList)):
    dateIndex[dateList[ni]]=ni
  tbase=[]
  d1 = datetime.datetime(*time.strptime(dateList[0],"%Y%m%d")[0:5])

  for ni in range(len(dateList)):
    d2 = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
    diff = d2-d1
    tbase.append(diff.days)

  dates=[]
  for ni in range(len(dateList)):
    d = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
    dates.append(d)

  datevector=[]
  for i in range(len(dates)):
    datevector.append(np.float(dates[i].year) + np.float(dates[i].month-1)/12 + np.float(dates[i].day-1)/365)
  datevector2=[round(i,2) for i in datevector]

##################################################  
  Bp = ut.Baseline_timeseries(igramsFile)
  
  fig2 = plt.figure(2)
  ax2=fig2.add_subplot(111) 
  
  ax2.cla()
  ax2.plot(dates,Bp, '-ko',ms=markerSize, lw=lineWidth, alpha=0.7, mfc=markerColor)
  
  ax2.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
  ax2.set_ylabel('Bperp [m]',fontsize=fontSize)
  ax2.set_xlabel('Time [years]',fontsize=fontSize)
  ts=datevector[0]-0.2
  te=datevector[-1]+0.2
  ys=int(ts)
  ye=int(te)
  ms=int((ts-ys)*12)
  me=int((te-ye)*12)

  if ms>12:
       ys =ys+1
       ms=1
  if me>12:
       ye =ye+1
       me=1

  if ms<1:
       ys =ys-1
       ms=12
  if me<1:
       ye =ye-1
       me=12

  dss=datetime.datetime(ys,ms,1,0,0)
  dee=datetime.datetime(ye,me,1,0,0)
  ax2.set_xlim(dss,dee)
  ax2.set_ylim(min(Bp)-0.4*abs(min(Bp)),max(Bp)+0.4*max(Bp))

  xticklabels = getp(gca(), 'xticklabels')
  yticklabels = getp(gca(), 'yticklabels')
  setp(yticklabels, 'color', 'k', fontsize=fontSize)
  setp(xticklabels, 'color', 'k', fontsize=fontSize)
  
  fig2.autofmt_xdate()
  if saveFig=='yes':
     figName='baselineHistory.png'
     plt.savefig(figName)
#############################################################
 

  ifgramList = h5file['interferograms'].keys()
  print 'Number of interferograms: '+str(len(ifgramList))
  igram_pairs=np.zeros([len(ifgramList),2],np.int)
  i=0
  for ifgram in  ifgramList:
     date1,date2 = h5file['interferograms'][ifgram].attrs['DATE12'].split('-')
     igram_pairs[i][0]=dateList1.index(date1)
     igram_pairs[i][1]=dateList1.index(date2)
     i=i+1

##########################################################################
# For simulated interferograms only
# To plot the interferograms with unwrapping errors with a different color
  N_unw_err=0
  try:
    for ifgram in  ifgramList:
      if h5file['interferograms'][ifgram].attrs['unwrap_error']=='yes':
         N_unw_err=N_unw_err+1

  except:
    print ""
      
  if N_unw_err>0:
     igram_pairs_ue=np.zeros([N_unw_err,2],np.int)
     i=0
     for ifgram in  ifgramList:
       if h5file['interferograms'][ifgram].attrs['unwrap_error']=='yes':
         date1,date2 = h5file['interferograms'][ifgram].attrs['DATE12'].split('-')
         igram_pairs_ue[i][0]=dateList1.index(date1)
         igram_pairs_ue[i][1]=dateList1.index(date2)
         i=i+1


  h5file.close()
##########################################################################
  fig1 = plt.figure(1)
  ax1=fig1.add_subplot(111)

  ax1.cla()
  ax1.plot(dates,Bp, 'o',ms=markerSize, lw=lineWidth, alpha=0.7, mfc=markerColor)

  for ni in range(len(ifgramList)):
    ax1.plot(array([dates[igram_pairs[ni][0]],dates[igram_pairs[ni][1]]]),array([Bp[igram_pairs[ni][0]],Bp[igram_pairs[ni][1]]]),'k',lw=lineWidth)
  
  if N_unw_err>0:
     for ni in range(N_unw_err):
        ax1.plot(array([dates[igram_pairs_ue[ni][0]],dates[igram_pairs_ue[ni][1]]]),array([Bp[igram_pairs_ue[ni][0]],Bp[igram_pairs_ue[ni][1]]]),'r',lw=lineWidth)
  
  ax1.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
  ax1.set_ylabel('Bperp [m]',fontsize=fontSize)
  ax1.set_xlabel('Time [years]',fontsize=fontSize)
  ts=datevector[0]-0.2
  te=datevector[-1]+0.2
  ys=int(ts)
  ye=int(te)
  ms=int((ts-ys)*12)
  me=int((te-ye)*12)

  if ms>12:
       ys =ys+1
       ms=1
  if me>12:
       ye =ye+1
       me=1

  if ms<1:
       ys =ys-1
       ms=12
  if me<1:
       ye =ye-1
       me=12

  dss=datetime.datetime(ys,ms,1,0,0)
  dee=datetime.datetime(ye,me,1,0,0)
  ax1.set_xlim(dss,dee)
  ax1.set_ylim(min(Bp)-0.4*abs(min(Bp)),max(Bp)+0.4*max(Bp))

  xticklabels = getp(gca(), 'xticklabels')
  yticklabels = getp(gca(), 'yticklabels')
  setp(yticklabels, 'color', 'k', fontsize=fontSize)
  setp(xticklabels, 'color', 'k', fontsize=fontSize)
  fig1.autofmt_xdate()
 
  if saveFig=='yes':
     figName='igramsNetwork.png'
     plt.savefig(figName)

  plt.show() 

Example 8

Project: PySAR
Source File: modify_network.py
View license
def main(argv):

  lineWidth   = 2
  fontSize    = 12
  markerColor = 'orange'
  markerSize  = 16
  networkDisplay = 'no'

  if len(sys.argv)>2:

    try:
      opts, args = getopt.getopt(argv,"h:f:C:s:w:m:c:t:b:d:l:n:N:T:")
    except getopt.GetoptError:
      Usage() ; sys.exit(1)

    for opt,arg in opts:
      if opt in ("-h","--help"):
        Usage();  sys.exit()
      elif opt == '-f':        igramsFile     = arg
      elif opt == '-C':        corFile        = arg
      elif opt == '-s':        fontSize       = int(arg)
      elif opt == '-w':        lineWidth      = int(arg)
      elif opt == '-m':        markerSize     = int(arg)
      elif opt == '-c':        markerColor    = arg
      elif opt == '-t':        temp_thr       = float(arg)
      elif opt == '-b':        base_thr       = float(arg)
      elif opt == '-d':        dates2Rmv      = arg
      elif opt == '-l':        ifgrams_to_rmv = arg
      elif opt == '-n':        networkDisplay = arg
      elif opt == '-N':        ifgrams_Number_to_rmv = arg.split()
      elif opt == '-T':        templateFile   = arg

    try:  igramsFile
    except:  Usage() ; sys.exit(1)

  elif len(sys.argv)==2:
    igramsFile = argv[0]
    networkDisplay = 'yes'
  else:   Usage() ; sys.exit(1)

  ## display network for modification, if no other limit setted
  try:
    temp_thr
    base_trh
    dates2Rmv
    ifgrams_to_rmv
    ifgrams_Number_to_rmv
    networkDisplay = 'yes'
  except: pass

###########################################################
  h5file = h5py.File(igramsFile)
  if h5file.keys()[0] != 'interferograms':
      print 'Input file should be interferograms'
      Usage() ; sys.exit(1)
  ifgramList=h5file['interferograms'].keys()

  try:     ifgrams_to_rmv
  except:  ifgrams_to_rmv=[]

###########################################################

  #####  T - templateFile, pysar.dropIfgIndex
  try:
    templateFile
    import pysar._readfile as readfile
    template = readfile.read_template(templateFile)
    drop_ifg_index = template['pysar.dropIfgIndex'].split(',')
    print 'drop interferogram index:'
    print drop_ifg_index
    try:    ifgrams_Number_to_rmv
    except: ifgrams_Number_to_rmv = []
    for index in drop_ifg_index:
       index_temp = [int(i) for i in index.split(':')];    index_temp.sort()
       if   len(index_temp)==2:
           for j in range(index_temp[0],index_temp[1]+1):  ifgrams_Number_to_rmv.append(str(j))
       elif len(index_temp)==1:                            ifgrams_Number_to_rmv.append(index)
       else: print 'Unrecoganized input: '+index
  except: pass

  #####  N - interferogram number list
  try:
    for i in ifgrams_Number_to_rmv:
       print i+'    '+ifgramList[int(i)]
       ifgrams_to_rmv.append(ifgramList[int(i)])
  except: pass

  #####  b - perpendicular baseline limit
  try:
    base_thr
    print 'interferograms with the spatial baseline longer than '+ str(base_thr)+' m is removed'
    for ifgram in  ifgramList:
       Baseline = (float(h5file['interferograms'][ifgram].attrs['P_BASELINE_BOTTOM_HDR'])+\
                   float(h5file['interferograms'][ifgram].attrs['P_BASELINE_TOP_HDR']))/2
       if abs(Baseline) > base_thr:
         if not ifgram in ifgrams_to_rmv:   ifgrams_to_rmv.append(ifgram)
  except:    print 'No Spatial Baseline threshold applied'

  ##### d - dates to remove
  try:
    dates2Rmv
    print 'interferograms with any of following dates will be removed: '+ dates2Rmv
    for ifgram in  ifgramList:
      date1,date2 = h5file['interferograms'][ifgram].attrs['DATE12'].split('-')
      if (date1 in dates2Rmv) or (date2 in dates2Rmv):
         if not ifgram in ifgrams_to_rmv:   ifgrams_to_rmv.append(ifgram)
  except:   print 'No specific dates selected to remove'

  ##### t - temporal baseline limit
  tbase,dateList,dateDict,dateList1=ut.date_list(h5file)
  try:
    temp_thr
    print 'Applying the temporal baseline threshold with threshold of '+str(temp_thr)+' days'
    for ifgram in  ifgramList:
       date1,date2 = h5file['interferograms'][ifgram].attrs['DATE12'].split('-')      
       ind1 = dateList1.index(date1)
       ind2 = dateList1.index(date2)
       dt=tbase[ind2]-tbase[ind1]
       if dt>temp_thr:
          if not ifgram in ifgrams_to_rmv:
            ifgrams_to_rmv.append(ifgram)
  except:
    print 'No Temporal Baseline threshold applied'

############################################################
############################################################
  if networkDisplay=='yes':
  
    tbase,dateList,dateDict,dateList1=ut.date_list(h5file)
    dateIndex={}
    for ni in range(len(dateList)):
      dateIndex[dateList[ni]]=ni
    tbase=[]
    d1 = datetime.datetime(*time.strptime(dateList[0],"%Y%m%d")[0:5])

    for ni in range(len(dateList)):
      d2 = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
      diff = d2-d1
      tbase.append(diff.days)

    dates=[]
    for ni in range(len(dateList)):
      d = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
      dates.append(d)

    datevector=[]
    for i in range(len(dates)):
      datevector.append(np.float(dates[i].year) + np.float(dates[i].month-1)/12 + np.float(dates[i].day-1)/365)
    datevector2=[round(i,2) for i in datevector]

##################################################  
    Bp = ut.Baseline_timeseries(igramsFile)
#############################################################
 
    ifgramList = h5file['interferograms'].keys()
    igram_pairs=np.zeros([len(ifgramList),2],np.int)
    i=0
    for ifgram in  ifgramList:
      date1,date2 = h5file['interferograms'][ifgram].attrs['DATE12'].split('-')
      igram_pairs[i][0]=dateList1.index(date1)
      igram_pairs[i][1]=dateList1.index(date2)
      i=i+1

############################################################
    import matplotlib.pyplot as plt
    fig1 = plt.figure(1)
    ax1=fig1.add_subplot(111)

    ax1.cla()
    # ax1.plot(dates,Bp, 'o',ms=markerSize, lw=lineWidth, alpha=0.7, mfc=markerColor)
    print tbase
    ax1.plot(tbase,Bp, 'o',ms=markerSize, lw=lineWidth, alpha=0.7, mfc=markerColor)
    for ni in range(len(ifgramList)):
      ax1.plot(array([tbase[igram_pairs[ni][0]],tbase[igram_pairs[ni][1]]]),\
               array([Bp[igram_pairs[ni][0]],Bp[igram_pairs[ni][1]]]),'k',lw=4) 
    # ax1.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
    ax1.set_ylabel('Bperp [m]',fontsize=fontSize)
    ax1.set_xlabel('Time [years]',fontsize=fontSize)
    ts=datevector[0]+0.2
    te=datevector[-1]+0.2
    ys=int(ts)
    ye=int(te)
    ms=int((ts-ys)*12)
    me=int((te-ye)*12)
    if ms>12:       ys =ys+1;       ms=1
    if me>12:       ye =ye+1;       me=1
    if ms<1:        ys =ys-1;       ms=12
    if me<1:        ye =ye-1;       me=12

    dss=datetime.datetime(ys,ms,1,0,0)
    dee=datetime.datetime(ye,me,1,0,0)
    ax1.set_ylim(min(Bp)-0.4*abs(min(Bp)),max(Bp)+0.4*max(Bp))

    xticklabels = getp(gca(), 'xticklabels')
    yticklabels = getp(gca(), 'yticklabels')
    setp(yticklabels, 'color', 'k', fontsize=fontSize)
    setp(xticklabels, 'color', 'k', fontsize=fontSize)

##########################################  
    x=[]
    y=[]
    Master_index_torremove=[]
    Slave_index_torremove=[]
    a_tbase=array(tbase)
    a_Bp=array(Bp)
    def onclick(event):
      if event.button==1:
        print 'click'
        xClick = event.xdata
        yClick = event.ydata
        idx=nearest_neighbor(xClick,yClick, a_tbase, a_Bp)       
        xr = a_tbase[idx]
        yr = a_Bp[idx]
        ix=tbase.index(xr)+1
        print ix
        x.append(xr)
        y.append(yr)
        if mod(len(x),2)==0:
           Master_index_torremove.append(tbase.index(xr))
           ax1.plot([x[len(x)-1],x[len(x)-2]],[y[len(x)-1],y[len(x)-2]],'r',lw=4)
        else:
           Slave_index_torremove.append(tbase.index(xr))
      plt.show()
    cid = fig1.canvas.mpl_connect('button_press_event', onclick)


    plt.show()
    print Master_index_torremove
    print Slave_index_torremove

    if len(Master_index_torremove) == len(Slave_index_torremove):
       R=np.vstack((Master_index_torremove,Slave_index_torremove))
    else:
       R=np.vstack((Master_index_torremove[:-1],Slave_index_torremove))

    R=np.vstack((Master_index_torremove,Slave_index_torremove)) 
    R.sort(0)
    print R
    print dateList1
    numIgrams_rmv=np.shape(R)[1]
    for ifgram in  ifgramList:
       date1,date2 = h5file['interferograms'][ifgram].attrs['DATE12'].split('-')
       for i in range(numIgrams_rmv):
           if dateList1[R[0][i]]==date1 and dateList1[R[1][i]]==date2:
               ifgrams_to_rmv.append(ifgram)

  else:
    print 'No network display.'
############################################################
############################################################

  print 'The list of interferograms to remove:' 
  print ifgrams_to_rmv
  igramsFile_modified='Modified_'+igramsFile
  h5filem = h5py.File(igramsFile_modified,'w')
  gg = h5filem.create_group('interferograms')
  ifgram=ifgramList[0]
  unw = h5file['interferograms'][ifgram].get(ifgram)
  MaskZero=np.ones([unw.shape[0],unw.shape[1]])

  print 'writing the modified interferogram file ...'
  for ifgram in  ifgramList:
     if not ifgram in ifgrams_to_rmv:
        print ifgram
        unwSet = h5file['interferograms'][ifgram].get(ifgram)
        unw = unwSet[0:unwSet.shape[0],0:unwSet.shape[1]]        
        MaskZero=unw*MaskZero
        group = gg.create_group(ifgram)
        dset = group.create_dataset(ifgram, data=unw, compression='gzip')
        for key, value in h5file['interferograms'][ifgram].attrs.iteritems():
           group.attrs[key] = value

  Mask=np.ones([unwSet.shape[0],unwSet.shape[1]])
  Mask[MaskZero==0]=0

  # updating Coherence file
  # convert ifgrams_to_rmv to cor_to_rmv
  date12_to_rmv=[]
  for igram in ifgrams_to_rmv:
     date12_to_rmv.append(igram.split('-sim')[0].split('filt_')[-1])

  try:
     corFile
     h5fileCor=h5py.File(corFile)
     corList=h5fileCor['coherence'].keys()

     corFile_modified='Modified_'+corFile
     h5fileCorm=h5py.File(corFile_modified,'w')
     gc = h5fileCorm.create_group('coherence')
     print 'writing the modified coherence file ...'
     for cor in corList:
        date12=cor.split('-sim')[0].split('filt_')[-1]
        if not date12 in date12_to_rmv:
           print cor
           unwSet = h5fileCor['coherence'][cor].get(cor)
           unw = unwSet[0:unwSet.shape[0],0:unwSet.shape[1]]
           group = gc.create_group(cor)
           dset = group.create_dataset(cor, data=unw, compression='gzip')
           for key, value in h5fileCor['coherence'][cor].attrs.iteritems():
              group.attrs[key] = value  
  except:
     print 'No coherence file to be updated.'

########################################################################

  print 'writing Modified_Mask.h5'
  
  h5mask = h5py.File('Modified_Mask.h5','w')
  group=h5mask.create_group('mask')
  dset = group.create_dataset(os.path.basename('mask'), data=Mask, compression='gzip')
  h5mask.close()      

  gm = h5filem.create_group('mask')
  dset = gm.create_dataset('mask', data=Mask, compression='gzip')

  h5file.close()
  h5filem.close()

Example 9

Project: MininGit
Source File: BzrParser.py
View license
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Separator
        match = self.patterns['separator'].match(line)
        if match:
            self.flush()

            return

        # Ignore details about merges
        match = self.patterns['ignore'].match(line)
        if match:
            self.state = BzrParser.UNKNOWN

            return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            self.flush()
            self.commit = Commit()
            self.commit.revision = match.group(1)

            self.state = BzrParser.COMMIT

            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)

            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)

            return

        # Date
        match = self.patterns['commit-date'].match(line)
        if match:
            self.commit.commit_date = datetime.datetime(*(time.strptime
                                                   (match.group(1).strip(" "),
                                                    "%Y-%m-%d %H:%M:%S")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.commit_date = datetime.datetime.strptime(\
            #    match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")

            return

        # Message
        match = self.patterns['message'].match(line)
        if match:
            self.state = BzrParser.MESSAGE

            return

        # Added files
        match = self.patterns['added'].match(line)
        if match:
            self.state = BzrParser.ADDED

            return

        # Modified files
        match = self.patterns['modified'].match(line)
        if match:
            self.state = BzrParser.MODIFIED

            return

        # Removed files
        match = self.patterns['removed'].match(line)
        if match:
            self.state = BzrParser.REMOVED

            return

        # Renamed files
        match = self.patterns['renamed'].match(line)
        if match:
            self.state = BzrParser.RENAMED

            return

        if self.state == BzrParser.MESSAGE:
            self.commit.message += line.lstrip() + '\n'
        elif self.state == BzrParser.ADDED or \
             self.state == BzrParser.MODIFIED or \
             self.state == BzrParser.REMOVED:
            action = Action()
            if self.state == BzrParser.ADDED:
                action.type = 'A'
            elif self.state == BzrParser.MODIFIED:
                action.type = 'M'
            elif self.state == BzrParser.REMOVED:
                action.type = 'D'
            action.f1 = line.strip()

            self.commit.actions.append(action)
            self.handler.file(action.f1)
        elif self.state == BzrParser.RENAMED:
            m = re.compile("^[ \t]+(.*) => (.*)$").match(line)
            if not m:
                return

            action = Action()
            action.type = 'V'
            action.f1 = m.group(2)
            action.f2 = m.group(1)

            self.commit.actions.append(action)
            self.handler.file(action.f1)
        else:
            self.state = BzrParser.UNKNOWN

Example 10

Project: python-devicecloud
Source File: streams.py
View license
    def read(self, start_time=None, end_time=None, use_client_timeline=True, newest_first=True,
             rollup_interval=None, rollup_method=None, timezone=None, page_size=1000):
        """Read one or more DataPoints from a stream

        .. warning::
           The data points from the device cloud is a paged data set.  When iterating over the
           result set there could be delays when we hit the end of a page.  If this is undesirable,
           the caller should collect all results into a data structure first before iterating over
           the result set.

        :param start_time: The start time for the window of data points to read.  None means
            that we should start with the oldest data available.
        :type start_time: :class:`datetime.datetime` or None
        :param end_time: The end time for the window of data points to read.  None means
            that we should include all points received until this point in time.
        :type end_time: :class:`datetime.datetime` or None
        :param bool use_client_timeline: If True, the times used will be those provided by
              clients writing data points into the cloud (which also default to server time
              if the a timestamp was not included by the client).  This is usually what you
              want.  If False, the server timestamp will be used which records when the data
              point was received.
        :param bool newest_first: If True, results will be ordered from newest to oldest (descending order).
            If False, results will be returned oldest to newest.
        :param rollup_interval: the roll-up interval that should be used if one is desired at all.  Rollups
            will not be performed if None is specified for the interval.  Valid roll-up interval values
            are None, "half", "hourly", "day", "week", and "month".  See `DataPoints documentation
            <http://ftp1.digi.com/support/documentation/html/90002008/90002008_P/Default.htm#ProgrammingTopics/DataStreams.htm#DataPoints>`_
            for additional details on these values.
        :type rollup_interval: str or None
        :param rollup_method: The aggregation applied to values in the points within the specified
            rollup_interval.  Available methods are None, "sum", "average", "min", "max", "count", and
            "standarddev".  See `DataPoint documentation
            <http://ftp1.digi.com/support/documentation/html/90002008/90002008_P/Default.htm#ProgrammingTopics/DataStreams.htm#DataPoints>`_
            for additional details on these values.
        :type rollup_method: str or None
        :param timezone: timezone for calculating roll-ups. This determines roll-up interval
            boundaries and only applies to roll-ups of a day or larger (for example, day,
            week, or month). Note that it does not apply to the startTime and endTime parameters.
            See the `Timestamps <http://ftp1.digi.com/support/documentation/html/90002008/90002008_P/Default.htm#ProgrammingTopics/DataStreams.htm#timestamp>`_
            and `Supported Time Zones <http://ftp1.digi.com/support/documentation/html/90002008/90002008_P/Default.htm#ProgrammingTopics/DataStreams.htm#TimeZones>`_
            sections for more information.
        :type timezone: str or None
        :param int page_size: The number of results that we should attempt to retrieve from the
            device cloud in each page.  Generally, this can be left at its default value unless
            you have a good reason to change the parameter for performance reasons.
        :returns: A generator object which one can iterate over the DataPoints read.

        """

        is_rollup = False
        if (rollup_interval is not None) or (rollup_method is not None):
            is_rollup = True
            numeric_types = [
                STREAM_TYPE_INTEGER,
                STREAM_TYPE_LONG,
                STREAM_TYPE_FLOAT,
                STREAM_TYPE_DOUBLE,
                STREAM_TYPE_STRING,
                STREAM_TYPE_BINARY,
                STREAM_TYPE_UNKNOWN,
            ]

            if self.get_data_type(use_cached=True) not in numeric_types:
                raise InvalidRollupDatatype('Rollups only support numerical DataPoints')

        # Validate function inputs
        start_time = to_none_or_dt(validate_type(start_time, datetime.datetime, type(None)))
        end_time = to_none_or_dt(validate_type(end_time, datetime.datetime, type(None)))
        use_client_timeline = validate_type(use_client_timeline, bool)
        newest_first = validate_type(newest_first, bool)
        rollup_interval = validate_type(rollup_interval, type(None), *six.string_types)
        if not rollup_interval in {None,
                                   ROLLUP_INTERVAL_HALF,
                                   ROLLUP_INTERVAL_HOUR,
                                   ROLLUP_INTERVAL_DAY,
                                   ROLLUP_INTERVAL_WEEK,
                                   ROLLUP_INTERVAL_MONTH, }:
            raise ValueError("Invalid rollup_interval %r provided" % (rollup_interval, ))
        rollup_method = validate_type(rollup_method, type(None), *six.string_types)
        if not rollup_method in {None,
                                 ROLLUP_METHOD_SUM,
                                 ROLLUP_METHOD_AVERAGE,
                                 ROLLUP_METHOD_MIN,
                                 ROLLUP_METHOD_MAX,
                                 ROLLUP_METHOD_COUNT,
                                 ROLLUP_METHOD_STDDEV}:
            raise ValueError("Invalid rollup_method %r provided" % (rollup_method, ))
        timezone = validate_type(timezone, type(None), *six.string_types)
        page_size = validate_type(page_size, *six.integer_types)

        # Remember that there could be multiple pages of data and we want to provide
        # in iterator over the result set.  To start the process out, we need to make
        # an initial request without a page cursor.  We should get one in response to
        # our first request which we will use to page through the result set
        query_parameters = {
            'timeline': 'client' if use_client_timeline else 'server',
            'order': 'descending' if newest_first else 'ascending',
            'size': page_size
        }
        if start_time is not None:
            query_parameters["startTime"] = isoformat(start_time)
        if end_time is not None:
            query_parameters["endTime"] = isoformat(end_time)
        if rollup_interval is not None:
            query_parameters["rollupInterval"] = rollup_interval
        if rollup_method is not None:
            query_parameters["rollupMethod"] = rollup_method
        if timezone is not None:
            query_parameters["timezone"] = timezone

        result_size = page_size
        while result_size == page_size:
            # request the next page of data or first if pageCursor is not set as query param
            try:
                result = self._conn.get_json("/ws/DataPoint/{stream_id}?{query_params}".format(
                    stream_id=self.get_stream_id(),
                    query_params=urllib.parse.urlencode(query_parameters)
                ))
            except DeviceCloudHttpException as http_exception:
                if http_exception.response.status_code == 404:
                    raise NoSuchStreamException()
                raise http_exception

            result_size = int(result["resultSize"])  # how many are actually included here?
            query_parameters["pageCursor"] = result.get("pageCursor")  # will not be present if result set is empty
            for item_info in result.get("items", []):
                if is_rollup:
                    data_point = DataPoint.from_rollup_json(self, item_info)
                else:
                    data_point = DataPoint.from_json(self, item_info)
                yield data_point

Example 11

Project: theyworkforyou
Source File: query.py
View license
def query_class(QueryClass, Database):
    """
    Returns a custom django.db.models.sql.query.Query subclass that is
    appropriate for Oracle.

    The 'Database' module (cx_Oracle) is passed in here so that all the setup
    required to import it only needs to be done by the calling module.
    """
    global _classes
    try:
        return _classes[QueryClass]
    except KeyError:
        pass

    class OracleQuery(QueryClass):
        def __reduce__(self):
            """
            Enable pickling for this class (normal pickling handling doesn't
            work as Python can only pickle module-level classes by default).
            """
            if hasattr(QueryClass, '__getstate__'):
                assert hasattr(QueryClass, '__setstate__')
                data = self.__getstate__()
            else:
                data = self.__dict__
            return (unpickle_query_class, (QueryClass,), data)

        def resolve_columns(self, row, fields=()):
            # If this query has limit/offset information, then we expect the
            # first column to be an extra "_RN" column that we need to throw
            # away.
            if self.high_mark is not None or self.low_mark:
                rn_offset = 1
            else:
                rn_offset = 0
            index_start = rn_offset + len(self.extra_select.keys())
            values = [self.convert_values(v, None)
                      for v in row[rn_offset:index_start]]
            for value, field in map(None, row[index_start:], fields):
                values.append(self.convert_values(value, field))
            return tuple(values)

        def convert_values(self, value, field):
            if isinstance(value, Database.LOB):
                value = value.read()
                if field and field.get_internal_type() == 'TextField':
                    value = force_unicode(value)

            # Oracle stores empty strings as null. We need to undo this in
            # order to adhere to the Django convention of using the empty
            # string instead of null, but only if the field accepts the
            # empty string.
            if value is None and field and field.empty_strings_allowed:
                value = u''
            # Convert 1 or 0 to True or False
            elif value in (1, 0) and field and field.get_internal_type() in ('BooleanField', 'NullBooleanField'):
                value = bool(value)
            # Force floats to the correct type
            elif value is not None and field and field.get_internal_type() == 'FloatField':
                value = float(value)
            # Convert floats to decimals
            elif value is not None and field and field.get_internal_type() == 'DecimalField':
                value = util.typecast_decimal(field.format_number(value))
            # cx_Oracle always returns datetime.datetime objects for
            # DATE and TIMESTAMP columns, but Django wants to see a
            # python datetime.date, .time, or .datetime.  We use the type
            # of the Field to determine which to cast to, but it's not
            # always available.
            # As a workaround, we cast to date if all the time-related
            # values are 0, or to time if the date is 1/1/1900.
            # This could be cleaned a bit by adding a method to the Field
            # classes to normalize values from the database (the to_python
            # method is used for validation and isn't what we want here).
            elif isinstance(value, Database.Timestamp):
                # In Python 2.3, the cx_Oracle driver returns its own
                # Timestamp object that we must convert to a datetime class.
                if not isinstance(value, datetime.datetime):
                    value = datetime.datetime(value.year, value.month,
                            value.day, value.hour, value.minute, value.second,
                            value.fsecond)
                if field and field.get_internal_type() == 'DateTimeField':
                    pass
                elif field and field.get_internal_type() == 'DateField':
                    value = value.date()
                elif field and field.get_internal_type() == 'TimeField' or (value.year == 1900 and value.month == value.day == 1):
                    value = value.time()
                elif value.hour == value.minute == value.second == value.microsecond == 0:
                    value = value.date()
            return value

        def as_sql(self, with_limits=True, with_col_aliases=False):
            """
            Creates the SQL for this query. Returns the SQL string and list
            of parameters.  This is overriden from the original Query class
            to handle the additional SQL Oracle requires to emulate LIMIT
            and OFFSET.

            If 'with_limits' is False, any limit/offset information is not
            included in the query.
            """

            # The `do_offset` flag indicates whether we need to construct
            # the SQL needed to use limit/offset with Oracle.
            do_offset = with_limits and (self.high_mark is not None
                                         or self.low_mark)
            if not do_offset:
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                        with_col_aliases=with_col_aliases)
            else:
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                                                        with_col_aliases=True)

                # Wrap the base query in an outer SELECT * with boundaries on
                # the "_RN" column.  This is the canonical way to emulate LIMIT
                # and OFFSET on Oracle.
                high_where = ''
                if self.high_mark is not None:
                    high_where = 'WHERE ROWNUM <= %d' % (self.high_mark,)
                sql = 'SELECT * FROM (SELECT ROWNUM AS "_RN", "_SUB".* FROM (%s) "_SUB" %s) WHERE "_RN" > %d' % (sql, high_where, self.low_mark)

            return sql, params

    _classes[QueryClass] = OracleQuery
    return OracleQuery

Example 12

Project: parsedatetime
Source File: icu.py
View license
def get_icu(locale):
    from . import base
    result = dict([(key, getattr(base, key))
                   for key in dir(base) if not key.startswith('_')])
    result['icu'] = None

    if pyicu is None:
        return icu_object(result)

    if locale is None:
        locale = 'en_US'
    result['icu'] = icu = pyicu.Locale(locale)

    if icu is None:
        return icu_object(result)

    # grab spelled out format of all numbers from 0 to 100
    rbnf = pyicu.RuleBasedNumberFormat(pyicu.URBNFRuleSetTag.SPELLOUT, icu)
    result['numbers'].update([(rbnf.format(i), i) for i in range(0, 100)])

    symbols = result['symbols'] = pyicu.DateFormatSymbols(icu)

    # grab ICU list of weekdays, skipping first entry which
    # is always blank
    wd = [w.lower() for w in symbols.getWeekdays()[1:]]
    swd = [sw.lower() for sw in symbols.getShortWeekdays()[1:]]

    # store them in our list with Monday first (ICU puts Sunday first)
    result['Weekdays'] = merge_weekdays(result['Weekdays'],
                                        wd[1:] + wd[0:1])
    result['shortWeekdays'] = merge_weekdays(result['shortWeekdays'],
                                             swd[1:] + swd[0:1])
    result['Months'] = [m.lower() for m in symbols.getMonths()]
    result['shortMonths'] = [sm.lower() for sm in symbols.getShortMonths()]
    keys = ['full', 'long', 'medium', 'short']

    createDateInstance = pyicu.DateFormat.createDateInstance
    createTimeInstance = pyicu.DateFormat.createTimeInstance
    icu_df = result['icu_df'] = {
        'full': createDateInstance(pyicu.DateFormat.kFull, icu),
        'long': createDateInstance(pyicu.DateFormat.kLong, icu),
        'medium': createDateInstance(pyicu.DateFormat.kMedium, icu),
        'short': createDateInstance(pyicu.DateFormat.kShort, icu),
    }
    icu_tf = result['icu_tf'] = {
        'full': createTimeInstance(pyicu.DateFormat.kFull, icu),
        'long': createTimeInstance(pyicu.DateFormat.kLong, icu),
        'medium': createTimeInstance(pyicu.DateFormat.kMedium, icu),
        'short': createTimeInstance(pyicu.DateFormat.kShort, icu),
    }

    result['dateFormats'] = {}
    result['timeFormats'] = {}
    for x in keys:
        result['dateFormats'][x] = icu_df[x].toPattern()
        result['timeFormats'][x] = icu_tf[x].toPattern()

    am = pm = ts = ''

    # ICU doesn't seem to provide directly the date or time separator
    # so we have to figure it out
    o = result['icu_tf']['short']
    s = result['timeFormats']['short']

    result['usesMeridian'] = 'a' in s
    result['uses24'] = 'H' in s

    # '11:45 AM' or '11:45'
    s = o.format(datetime.datetime(2003, 10, 30, 11, 45))

    # ': AM' or ':'
    s = s.replace('11', '').replace('45', '')

    if len(s) > 0:
        ts = s[0]

    if result['usesMeridian']:
        # '23:45 AM' or '23:45'
        am = s[1:].strip()
        s = o.format(datetime.datetime(2003, 10, 30, 23, 45))

        if result['uses24']:
            s = s.replace('23', '')
        else:
            s = s.replace('11', '')

            # 'PM' or ''
        pm = s.replace('45', '').replace(ts, '').strip()

    result['timeSep'] = [ts]
    result['meridian'] = [am, pm] if am and pm else []

    o = result['icu_df']['short']
    s = o.format(datetime.datetime(2003, 10, 30, 11, 45))
    s = s.replace('10', '').replace('30', '').replace(
        '03', '').replace('2003', '')

    if len(s) > 0:
        ds = s[0]
    else:
        ds = '/'

    result['dateSep'] = [ds]
    s = result['dateFormats']['short']
    l = s.lower().split(ds)
    dp_order = []

    for s in l:
        if len(s) > 0:
            dp_order.append(s[:1])

    result['dp_order'] = dp_order
    return icu_object(result)

Example 13

Project: airmozilla
Source File: dashboard.py
View license
@staff_required
@json_view
def dashboard_data_graphs(request):  # pragma: no cover
    """experimental"""
    YEARS = 3
    now = timezone.now()

    def get_events(years_back):
        first_date = datetime.datetime(now.year - years_back + 1, 1, 1)

        objects = (
            Event.objects
            .filter(archive_time__lt=now)
            .filter(created__gt=first_date.replace(tzinfo=timezone.utc))
            .order_by('created')
        )
        buckets = {}
        for each in objects.values_list('created'):
            created, = each
            year = created.year
            if year not in buckets:
                buckets[year] = defaultdict(int)
            next_monday = created + datetime.timedelta(
                days=7 - created.weekday()
            )
            key = next_monday.strftime('%Y-%m-%d')
            buckets[year][key] += 1
        legends = sorted(buckets.keys())

        last_year = legends[-1]

        def fake_year(date_str, year):
            return date_str.replace(str(year), str(last_year))

        data = []
        for year in legends:
            group = sorted(
                {'date': fake_year(k, year), 'value': v}
                for k, v in buckets[year].items()
            )
            data.append(group)
        return {
            'type': 'events',
            'title': 'New Events',
            'data': data,
            'description': 'Number of added events per year',
            'legends': legends,
        }

    def get_revisions(years_back):
        first_date = datetime.datetime(now.year - years_back + 1, 1, 1)

        objects = (
            EventRevision.objects
            .filter(created__gt=first_date.replace(tzinfo=timezone.utc))
            .order_by('created')
        )
        buckets = {}
        for each in objects.values_list('created'):
            created, = each
            year = created.year
            if year not in buckets:
                buckets[year] = defaultdict(int)
            next_monday = created + datetime.timedelta(
                days=7 - created.weekday()
            )
            key = next_monday.strftime('%Y-%m-%d')
            buckets[year][key] += 1
        legends = sorted(buckets.keys())

        last_year = legends[-1]

        def fake_year(date_str, year):
            return date_str.replace(str(year), str(last_year))

        data = []
        for year in legends:
            group = sorted(
                {'date': fake_year(k, year), 'value': v}
                for k, v in buckets[year].items()
            )
            data.append(group)
        return {
            'type': 'revisions',
            'title': 'Event Revisions',
            'data': data,
            'description': 'Number of event edits per year',
            'legends': legends,
        }

    def get_users(years_back):
        first_date = datetime.datetime(now.year - years_back + 1, 1, 1)

        objects = (
            User.objects
            .filter(date_joined__gt=first_date.replace(tzinfo=timezone.utc))
            .order_by('date_joined')
        )
        buckets = {}
        for each in objects.values_list('date_joined'):
            created, = each
            year = created.year
            if year not in buckets:
                buckets[year] = defaultdict(int)
            next_monday = created + datetime.timedelta(
                days=7 - created.weekday()
            )
            key = next_monday.strftime('%Y-%m-%d')
            buckets[year][key] += 1
        legends = sorted(buckets.keys())

        last_year = legends[-1]

        def fake_year(date_str, year):
            return date_str.replace(str(year), str(last_year))

        data = []
        for year in legends:
            group = sorted(
                {'date': fake_year(k, year), 'value': v}
                for k, v in buckets[year].items()
            )
            data.append(group)
        return {
            'type': 'users',
            'title': 'New Users',
            'data': data,
            'description': 'Number of first joining users per year',
            'legends': legends,
        }

    groups = []
    groups.append(get_events(YEARS))
    groups.append(get_users(YEARS))
    groups.append(get_revisions(2))
    return {'groups': groups}

Example 14

Project: CVSAnalY
Source File: GitParser.py
View license
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None and self.branch is not None:
                if self.branch.tail.svn_tag is None:  # Skip commits on svn tags
                    self.handler.commit(self.branch.tail.commit)

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
                self.commit.parents = parents
            git_commit = self.GitCommit(self.commit, parents)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1), git_commit)
                    printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this decoration
                        if self.branch and self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged", (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit)
                            printdbg("Commit %s on stash", (self.commit.revision,))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0]))

            if not branch and not self.branch:
                branch = self.GitBranch(self.GitBranch.LOCAL, "(no-branch)", git_commit)
                printdbg("Commit %s on unknown local branch '%s'", (self.commit.revision, branch.name))

            # This part of code looks wired at first time so here is a small description what it does:
            #
            # * self.branch is the branch to which the last inspected commit belonged to
            # * branch is the branch of the current parsed commit
            #
            # This check is only to find branches which are fully merged into a already analyzed branch
            #
            # For more detailed information see https://github.com/MetricsGrimoire/CVSAnalY/issues/64
            if branch is not None and self.branch is not None:
                # Detect empty branches.
                # Ideally, the head of a branch can't have children.
                # When this happens is because the branch is empty, so we just ignore such branch.
                if self.branch.is_my_parent(git_commit):
                    printout(
                        "Info: Branch '%s' will be ignored, because it was already merged in an active one.",
                        (branch.name,)
                    )
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s to current %s", (pending_tag,
                                                                                         self.branch.tail.commit.revision,
                                                                                         self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                if self.branch is not None:
                    self.branch.set_tail(git_commit)
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)
            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)
            return

        # Commit date
        match = self.patterns['date'].match(line)
        if match:
            self.commit.date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # Author date
        match = self.patterns['author_date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.author_date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.author_date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if len(type) > 1:
                # merge actions
                if 'M' in type:
                    type = 'M'
                else:
                    # ignore merge actions without 'M'
                    return

            action.type = type
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)
            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # This is a workaround for a bug in the GNOME Git migration
        # There are commits on tags not correctly detected like this one:
        # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f
        # We want to ignore commits on tags since it doesn't make any sense in Git
        if self.is_gnome:
            match = self.patterns['svn-tag'].match(line.strip())
            if match:
                printout("Warning: detected a commit on a svn tag: %s", (match.group(0),))
                tag = match.group(1)
                if self.commit.tags and tag in self.commit.tags:
                    # The commit will be ignored, so move the tag
                    # to the next (previous in history) commit
                    self.branch.tail.svn_tag = tag

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)

Example 15

Project: edx2bigquery
Source File: analyze_content.py
View license
def analyze_course_content(course_id, 
                           listings_file=None,
                           basedir="X-Year-2-data-sql", 
                           datedir="2013-09-21", 
                           use_dataset_latest=False,
                           do_upload=False,
                           courses=None,
                           verbose=True,
                           pin_date=None,
                           ):
    '''
    Compute course_content table, which quantifies:

    - number of chapter, sequential, vertical modules
    - number of video modules
    - number of problem, *openended, mentoring modules
    - number of dicussion, annotatable, word_cloud modules

    Do this using the course "xbundle" file, produced when the course axis is computed.

    Include only modules which had nontrivial use, to rule out the staff and un-shown content. 
    Do the exclusion based on count of module appearing in the studentmodule table, based on 
    stats_module_usage for each course.

    Also, from the course listings file, compute the number of weeks the course was open.

    If do_upload (triggered by --force-recompute) then upload all accumulated data to the course report dataset 
    as the "stats_course_content" table.  Also generate a "course_summary_stats" table, stored in the
    course_report_ORG or course_report_latest dataset.  The course_summary_stats table combines
    data from many reports,, including stats_course_content, the medians report, the listings file,
    broad_stats_by_course, and time_on_task_stats_by_course.
    
    '''

    if do_upload:
        if use_dataset_latest:
            org = "latest"
        else:
            org = courses[0].split('/',1)[0]	# extract org from first course_id in courses

        crname = 'course_report_%s' % org

        gspath = gsutil.gs_path_from_course_id(crname)
        gsfnp = gspath / CCDATA
        gsutil.upload_file_to_gs(CCDATA, gsfnp)
        tableid = "stats_course_content"
        dataset = crname

        mypath = os.path.dirname(os.path.realpath(__file__))
        SCHEMA_FILE = '%s/schemas/schema_content_stats.json' % mypath

        try:
            the_schema = json.loads(open(SCHEMA_FILE).read())[tableid]
        except Exception as err:
            print "Oops!  Failed to load schema file for %s.  Error: %s" % (tableid, str(err))
            raise

        if 0:
            bqutil.load_data_to_table(dataset, tableid, gsfnp, the_schema, wait=True, verbose=False,
                                      format='csv', skiprows=1)

        table = 'course_metainfo'
        course_tables = ',\n'.join([('[%s.course_metainfo]' % bqutil.course_id2dataset(x)) for x in courses])
        sql = "select * from {course_tables}".format(course_tables=course_tables)
        print "--> Creating %s.%s using %s" % (dataset, table, sql)

        if 1:
            metainfo_dataset = bqutil.get_bq_table(dataset, table, sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          )
            # bqutil.create_bq_table(dataset, table, sql, overwrite=True)


        #-----------------------------------------------------------------------------
        # make course_summary_stats table
        #
        # This is a combination of the broad_stats_by_course table (if that exists), and course_metainfo.
        # Also use (and create if necessary) the nregistered_by_wrap table.

        # get the broad_stats_by_course data
        bsbc = bqutil.get_table_data(dataset, 'broad_stats_by_course')

        table_list = bqutil.get_list_of_table_ids(dataset)

        latest_person_course = max([ x for x in table_list if x.startswith('person_course_')])
        print "Latest person_course table in %s is %s" % (dataset, latest_person_course)
        
        sql = """
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        nr_by_wrap = bqutil.get_bq_table(dataset, 'nregistered_by_wrap', sql=sql, key={'name': 'course_id'})

        # rates for registrants before and during course
        
        sql = """
                SELECT 
                    *,
                    ncertified / nregistered * 100 as pct_certified_of_reg,
                    ncertified_and_registered_before_launch / nregistered_before_launch * 100 as pct_certified_reg_before_launch,
                    ncertified_and_registered_during_course / nregistered_during_course * 100 as pct_certified_reg_during_course,
                    ncertified / nregistered_by_wrap * 100 as pct_certified_of_reg_by_wrap,
                    ncertified / nviewed * 100 as pct_certified_of_viewed,
                    ncertified / nviewed_by_wrap * 100 as pct_certified_of_viewed_by_wrap,
                    ncertified_by_ewrap / nviewed_by_ewrap * 100 as pct_certified_of_viewed_by_ewrap,
                FROM
                (
                # ------------------------
                # get aggregate data
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.certified then 1 else 0 end) ncertified,
                    sum(case when (TIMESTAMP(pc.cert_created_date) < cminfo.ewrap_date) and (pc.certified and pc.viewed) then 1 else 0 end) ncertified_by_ewrap,
                    sum(case when pc.viewed then 1 else 0 end) nviewed,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                    sum(case when (pc.start_time < cminfo.wrap_date) and pc.viewed then 1 else 0 end) nviewed_by_wrap,
                    sum(case when (pc.start_time < cminfo.ewrap_date) and pc.viewed then 1 else 0 end) nviewed_by_ewrap,
                    sum(case when pc.start_time < cminfo.launch_date then 1 else 0 end) nregistered_before_launch,
                    sum(case when pc.start_time < cminfo.launch_date 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_before_launch,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) then 1 else 0 end) nregistered_during_course,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_during_course,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                
                # --------------------
                #  get course launch and wrap dates from course_metainfo

       SELECT AA.course_id as course_id, 
              AA.wrap_date as wrap_date,
              AA.launch_date as launch_date,
              BB.ewrap_date as ewrap_date,
       FROM (
               #  inner get course launch and wrap dates from course_metainfo
                SELECT A.course_id as course_id,
                  A.wrap_date as wrap_date,
                  B.launch_date as launch_date,
                from
                (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )
                ) as A
                left outer join 
                (
                 SELECT course_id,
                      TIMESTAMP(concat(launch_year, "-", launch_month, '-', launch_day)) as launch_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as launch_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as launch_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as launch_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Launch'
                 )
                ) as B
                on A.course_id = B.course_id 
                # end inner course_metainfo subquery
            ) as AA
            left outer join
            (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as ewrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Empirical Course Wrap'
                 )
            ) as BB
            on AA.course_id = BB.course_id

                # end course_metainfo subquery
                # --------------------
                
                ) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
                # ---- end get aggregate data
                )
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        print "--> Assembling course_summary_stats from %s" % 'stats_cert_rates_by_registration'
        sys.stdout.flush()
        cert_by_reg = bqutil.get_bq_table(dataset, 'stats_cert_rates_by_registration', sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          key={'name': 'course_id'})

        # start assembling course_summary_stats

        c_sum_stats = defaultdict(OrderedDict)
        for entry in bsbc['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            cmci.update(entry)
            cnbw = nr_by_wrap['data_by_key'][course_id]
            nbw = int(cnbw['nregistered_by_wrap'])
            cmci['nbw_wrap_date'] = cnbw['wrap_date']
            cmci['nregistered_by_wrap'] = nbw
            cmci['nregistered_by_wrap_pct'] = cnbw['nregistered_by_wrap_pct']
            cmci['frac_female'] = float(entry['n_female_viewed']) / (float(entry['n_male_viewed']) + float(entry['n_female_viewed']))
            ncert = float(cmci['certified_sum'])
            if ncert:
                cmci['certified_of_nregistered_by_wrap_pct'] = nbw / ncert * 100.0
            else:
                cmci['certified_of_nregistered_by_wrap_pct'] = None
            cbr = cert_by_reg['data_by_key'][course_id]
            for field, value in cbr.items():
                cmci['cbr_%s' % field] = value

        # add medians for viewed, explored, and certified

        msbc_tables = {'msbc_viewed': "viewed_median_stats_by_course",
                       'msbc_explored': 'explored_median_stats_by_course',
                       'msbc_certified': 'certified_median_stats_by_course',
                       'msbc_verified': 'verified_median_stats_by_course',
                       }
        for prefix, mtab in msbc_tables.items():
            print "--> Merging median stats data from %s" % mtab
            sys.stdout.flush()
            bqdat = bqutil.get_table_data(dataset, mtab)
            for entry in bqdat['data']:
                course_id = entry['course_id']
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    cmci['%s_%s' % (prefix, field)] = value

        # add time on task data

        tot_table = "time_on_task_stats_by_course"
        prefix = "ToT"
        print "--> Merging time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add serial time on task data

        tot_table = "time_on_task_serial_stats_by_course"
        prefix = "SToT"
        print "--> Merging serial time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add show_answer stats

        tot_table = "show_answer_stats_by_course"
        prefix = "SAS"
        print "--> Merging show_answer stats data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # setup list of keys, for CSV output

        css_keys = c_sum_stats.values()[0].keys()

        # retrieve course_metainfo table, pivot, add that to summary_stats

        print "--> Merging course_metainfo from %s" % table
        sys.stdout.flush()
        bqdat = bqutil.get_table_data(dataset, table)

        listings_keys = map(make_key, ["Institution", "Semester", "New or Rerun", "Andrew Recodes New/Rerun", 
                                       "Course Number", "Short Title", "Andrew's Short Titles", "Title", 
                                       "Instructors", "Registration Open", "Course Launch", "Course Wrap", "course_id",
                                       "Empirical Course Wrap", "Andrew's Order", "certifies", "MinPassGrade",
                                       '4-way Category by name', "4-way (CS, STEM, HSocSciGov, HumHistRel)"
                                       ])
        listings_keys.reverse()
        
        for lk in listings_keys:
            css_keys.insert(1, "listings_%s" % lk)

        COUNTS_TO_KEEP = ['discussion', 'problem', 'optionresponse', 'checkboxgroup', 'optioninput', 
                          'choiceresponse', 'video', 'choicegroup', 'vertical', 'choice', 'sequential', 
                          'multiplechoiceresponse', 'numericalresponse', 'chapter', 'solution', 'img', 
                          'formulaequationinput', 'responseparam', 'selfassessment', 'track', 'task', 'rubric', 
                          'stringresponse', 'combinedopenended', 'description', 'textline', 'prompt', 'category', 
                          'option', 'lti', 'annotationresponse', 
                          'annotatable', 'colgroup', 'tag_prompt', 'comment', 'annotationinput', 'image', 
                          'options', 'comment_prompt', 'conditional', 
                          'answer', 'poll_question', 'section', 'wrapper', 'map', 'area', 
                          'customtag', 'transcript', 
                          'split_test', 'word_cloud', 
                          'openended', 'openendedparam', 'answer_display', 'code', 
                          'drag_and_drop_input', 'customresponse', 'draggable', 'mentoring', 
                          'textannotation', 'imageannotation', 'videosequence', 
                          'feedbackprompt', 'assessments', 'openassessment', 'assessment', 'explanation', 'criterion']

        for entry in bqdat['data']:
            thekey = make_key(entry['key'])
            # if thekey.startswith('count_') and thekey[6:] not in COUNTS_TO_KEEP:
            #     continue
            if thekey.startswith('listings_') and thekey[9:] not in listings_keys:
                # print "dropping key=%s for course_id=%s" % (thekey, entry['course_id'])
                continue
            c_sum_stats[entry['course_id']][thekey] = entry['value']
            #if 'certifies' in thekey:
            #    print "course_id=%s, key=%s, value=%s" % (entry['course_id'], thekey, entry['value'])
            if thekey not in css_keys:
                css_keys.append(thekey)

        # compute forum_posts_per_week
        for course_id, entry in c_sum_stats.items():
            nfps = entry.get('nforum_posts_sum', 0)
            if nfps:
                fppw = int(nfps) / float(entry['nweeks'])
                entry['nforum_posts_per_week'] = fppw
                print "    course: %s, assessments_per_week=%s, forum_posts_per_week=%s" % (course_id, entry['total_assessments_per_week'], fppw)
            else:
                entry['nforum_posts_per_week'] = None
        css_keys.append('nforum_posts_per_week')

        # read in listings file and merge that in also
        if listings_file:
            if listings_file.endswith('.csv'):
                listings = csv.DictReader(open(listings_file))
            else:
                listings = [ json.loads(x) for x in open(listings_file) ]
            for entry in listings:
                course_id = entry['course_id']
                if course_id not in c_sum_stats:
                    continue
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    lkey = "listings_%s" % make_key(field)
                    if not (lkey in cmci) or (not cmci[lkey]):
                        cmci[lkey] = value

        print "Storing these fields: %s" % css_keys

        # get schema
        mypath = os.path.dirname(os.path.realpath(__file__))
        the_schema = json.loads(open('%s/schemas/schema_combined_course_summary_stats.json' % mypath).read())
        schema_dict = { x['name'] : x for x in the_schema }

        # write out CSV
        css_table = "course_summary_stats"
        ofn = "%s__%s.csv" % (dataset, css_table)
        ofn2 = "%s__%s.json" % (dataset, css_table)
        print "Writing data to %s and %s" % (ofn, ofn2)

        ofp = open(ofn, 'w')
        ofp2 = open(ofn2, 'w')
        dw = csv.DictWriter(ofp, fieldnames=css_keys)
        dw.writeheader()
        for cid, entry in c_sum_stats.items():
            for ek in entry:
                if ek not in schema_dict:
                    entry.pop(ek)
                # entry[ek] = str(entry[ek])	# coerce to be string
            ofp2.write(json.dumps(entry) + "\n")
            for key in css_keys:
                if key not in entry:
                    entry[key] = None
            dw.writerow(entry)
        ofp.close()
        ofp2.close()

        # upload to bigquery
        # the_schema = [ { 'type': 'STRING', 'name': x } for x in css_keys ]
        if 1:
            gsfnp = gspath / dataset / (css_table + ".json")
            gsutil.upload_file_to_gs(ofn2, gsfnp)
            # bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False,
            #                           format='csv', skiprows=1)
            bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False)

        return

    
    print "-"*60 + " %s" % course_id

    # get nweeks from listings
    lfn = path(listings_file)
    if not lfn.exists():
        print "[analyze_content] course listings file %s doesn't exist!" % lfn
        return

    data = None
    if listings_file.endswith('.json'):
        data_feed = map(json.loads, open(lfn))
    else:
        data_feed = csv.DictReader(open(lfn))
    for k in data_feed:
        if not 'course_id' in k:
            print "Strange course listings row, no course_id in %s" % k
            raise Exception("Missing course_id")
        if k['course_id']==course_id:
            data = k
            break

    if not data:
        print "[analyze_content] no entry for %s found in course listings file %s!" % (course_id, lfn)
        return

    def date_parse(field):
        (m, d, y) = map(int, data[field].split('/'))
        return datetime.datetime(y, m, d)

    launch = date_parse('Course Launch')
    wrap = date_parse('Course Wrap')
    ndays = (wrap - launch).days
    nweeks = ndays / 7.0

    print "Course length = %6.2f weeks (%d days)" % (nweeks, ndays)

    if pin_date:
        datedir = pin_date
    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest and not pin_date)
    cfn = gsutil.path_from_course_id(course_id)

    xbfn = course_dir / ("xbundle_%s.xml" % cfn)
    
    if not xbfn.exists():
        print "[analyze_content] cannot find xbundle file %s for %s!" % (xbfn, course_id)

        if use_dataset_latest:
            # try looking in earlier directories for xbundle file
            import glob
            spath = course_dir / ("../*/xbundle_%s.xml" % cfn)
            files = list(glob.glob(spath))
            if files:
                xbfn = path(files[-1])
            if not xbfn.exists():
                print "   --> also cannot find any %s ; aborting!" % spath
            else:
                print "   --> Found and using instead: %s " % xbfn
        if not xbfn.exists():
            raise Exception("[analyze_content] missing xbundle file %s" % xbfn)

    # if there is an xbundle*.fixed file, use that instead of the normal one
    if os.path.exists(str(xbfn) + ".fixed"):
        xbfn = path(str(xbfn) + ".fixed")

    print "[analyze_content] For %s using %s" % (course_id, xbfn)
    
    # get module usage data
    mudata = get_stats_module_usage(course_id, basedir, datedir, use_dataset_latest)

    xml = etree.parse(open(xbfn)).getroot()
    
    counts = defaultdict(int)
    nexcluded = defaultdict(int)

    IGNORE = ['html', 'p', 'div', 'iframe', 'ol', 'li', 'ul', 'blockquote', 'h1', 'em', 'b', 'h2', 'h3', 'body', 'span', 'strong',
              'a', 'sub', 'strike', 'table', 'td', 'tr', 's', 'tbody', 'sup', 'sub', 'strike', 'i', 's', 'pre', 'policy', 'metadata',
              'grading_policy', 'br', 'center',  'wiki', 'course', 'font', 'tt', 'it', 'dl', 'startouttext', 'endouttext', 'h4', 
              'head', 'source', 'dt', 'hr', 'u', 'style', 'dd', 'script', 'th', 'p', 'P', 'TABLE', 'TD', 'small', 'text', 'title']

    problem_stats = defaultdict(int)

    def does_problem_have_random_script(problem):
        '''
        return 1 if problem has a script with "random." in it
        else return 0
        '''
        for elem in problem.findall('.//script'):
            if elem.text and ('random.' in elem.text):
                return 1
        return 0

    # walk through xbundle 
    def walk_tree(elem, policy=None):
        '''
        Walk XML tree recursively.
        elem = current element
        policy = dict of attributes for children to inherit, with fields like due, graded, showanswer
        '''
        policy = policy or {}
        if  type(elem.tag)==str and (elem.tag.lower() not in IGNORE):
            counts[elem.tag.lower()] += 1
        if elem.tag in ["sequential", "problem", "problemset", "course", "chapter"]:	# very old courses may use inheritance from course & chapter
            keys = ["due", "graded", "format", "showanswer", "start"]
            for k in keys:		# copy inheritable attributes, if they are specified
                val = elem.get(k)
                if val:
                    policy[k] = val
        if elem.tag=="problem":	# accumulate statistics about problems: how many have show_answer = [past_due, closed] ?  have random. in script?
            problem_stats['n_capa_problems'] += 1
            if policy.get('showanswer'):
                problem_stats["n_showanswer_%s" % policy.get('showanswer')] += 1
            else:
                problem_stats['n_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
                # see https://github.com/edx/edx-platform/blob/master/common/lib/xmodule/xmodule/capa_base.py#L118
                # finished = Show the answer after the student has answered the problem correctly, the student has no attempts left, or the problem due date has passed.
            problem_stats['n_random_script'] += does_problem_have_random_script(elem)

            if policy.get('graded')=='true' or policy.get('graded')=='True':
                problem_stats['n_capa_problems_graded'] += 1
                problem_stats['n_graded_random_script'] += does_problem_have_random_script(elem)
                if policy.get('showanswer'):
                    problem_stats["n_graded_showanswer_%s" % policy.get('showanswer')] += 1
                else:
                    problem_stats['n_graded_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
            
        for k in elem:
            midfrag = (k.tag, k.get('url_name_orig', None))
            if (midfrag in mudata) and int(mudata[midfrag]['ncount']) < 20:
                nexcluded[k.tag] += 1
                if verbose:
                    try:
                        print "    -> excluding %s (%s), ncount=%s" % (k.get('display_name', '<no_display_name>').encode('utf8'), 
                                                                       midfrag, 
                                                                       mudata.get(midfrag, {}).get('ncount'))
                    except Exception as err:
                        print "    -> excluding ", k
                continue
            walk_tree(k, policy.copy())

    walk_tree(xml)
    print "--> Count of individual element tags throughout XML: ", counts
    
    print "--> problem_stats:", json.dumps(problem_stats, indent=4)

    # combine some into "qual_axis" and others into "quant_axis"
    qual_axis = ['openassessment', 'optionresponse', 'multiplechoiceresponse', 
                 # 'discussion', 
                 'choiceresponse', 'word_cloud', 
                 'combinedopenended', 'choiceresponse', 'stringresponse', 'textannotation', 'openended', 'lti']
    quant_axis = ['formularesponse', 'numericalresponse', 'customresponse', 'symbolicresponse', 'coderesponse',
                  'imageresponse']

    nqual = 0
    nquant = 0
    for tag, count in counts.items():
        if tag in qual_axis:
            nqual += count
        if tag in quant_axis:
            nquant += count
    
    print "nqual=%d, nquant=%d" % (nqual, nquant)

    nqual_per_week = nqual / nweeks
    nquant_per_week = nquant / nweeks
    total_per_week = nqual_per_week + nquant_per_week

    print "per week: nqual=%6.2f, nquant=%6.2f total=%6.2f" % (nqual_per_week, nquant_per_week, total_per_week)

    # save this overall data in CCDATA
    lock_file(CCDATA)
    ccdfn = path(CCDATA)
    ccd = {}
    if ccdfn.exists():
        for k in csv.DictReader(open(ccdfn)):
            ccd[k['course_id']] = k
    
    ccd[course_id] = {'course_id': course_id,
                      'nweeks': nweeks,
                      'nqual_per_week': nqual_per_week,
                      'nquant_per_week': nquant_per_week,
                      'total_assessments_per_week' : total_per_week,
                      }

    # fields = ccd[ccd.keys()[0]].keys()
    fields = ['course_id', 'nquant_per_week', 'total_assessments_per_week', 'nqual_per_week', 'nweeks']
    cfp = open(ccdfn, 'w')
    dw = csv.DictWriter(cfp, fieldnames=fields)
    dw.writeheader()
    for cid, entry in ccd.items():
        dw.writerow(entry)
    cfp.close()
    lock_file(CCDATA, release=True)

    # store data in course_metainfo table, which has one (course_id, key, value) on each line
    # keys include nweeks, nqual, nquant, count_* for module types *

    cmfields = OrderedDict()
    cmfields['course_id'] = course_id
    cmfields['course_length_days'] = str(ndays)
    cmfields.update({ make_key('listings_%s' % key) : value for key, value in data.items() })	# from course listings
    cmfields.update(ccd[course_id].copy())

    # cmfields.update({ ('count_%s' % key) : str(value) for key, value in counts.items() })	# from content counts

    cmfields['filename_xbundle'] = xbfn
    cmfields['filename_listings'] = lfn

    for key in sorted(counts):	# store counts in sorted order, so that the later generated CSV file can have a predictable structure
        value = counts[key]
        cmfields['count_%s' % key] =  str(value) 	# from content counts

    for key in sorted(problem_stats):	# store problem stats
        value = problem_stats[key]
        cmfields['problem_stat_%s' % key] =  str(value)

    cmfields.update({ ('nexcluded_sub_20_%s' % key) : str(value) for key, value in nexcluded.items() })	# from content counts

    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest)
    csvfn = course_dir / CMINFO

    # manual overriding of the automatically computed fields can be done by storing course_id,key,value data
    # in the CMINFO_OVERRIDES file

    csvfn_overrides = course_dir / CMINFO_OVERRIDES
    if csvfn_overrides.exists():
        print "--> Loading manual override information from %s" % csvfn_overrides
        for ovent in csv.DictReader(open(csvfn_overrides)):
            if not ovent['course_id']==course_id:
                print "===> ERROR! override file has entry with wrong course_id: %s" % ovent
                continue
            print "    overriding key=%s with value=%s" % (ovent['key'], ovent['value'])
            cmfields[ovent['key']] = ovent['value']

    print "--> Course metainfo writing to %s" % csvfn

    fp = open(csvfn, 'w')

    cdw = csv.DictWriter(fp, fieldnames=['course_id', 'key', 'value'])
    cdw.writeheader()

    for k, v in cmfields.items():
        cdw.writerow({'course_id': course_id, 'key': k, 'value': v})
        
    fp.close()

    # build and output course_listings_and_metainfo 

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    mypath = os.path.dirname(os.path.realpath(__file__))
    clm_table = "course_listing_and_metainfo"
    clm_schema_file = '%s/schemas/schema_%s.json' % (mypath, clm_table)
    clm_schema = json.loads(open(clm_schema_file).read())

    clm = {}
    for finfo in clm_schema:
        field = finfo['name']
        clm[field] = cmfields.get(field)
    clm_fnb = clm_table + ".json"
    clm_fn = course_dir / clm_fnb
    open(clm_fn, 'w').write(json.dumps(clm))

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / clm_fnb
    print "--> Course listing + metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, clm_table)
    sys.stdout.flush()
    gsutil.upload_file_to_gs(clm_fn, gsfnp)
    bqutil.load_data_to_table(dataset, clm_table, gsfnp, clm_schema, wait=True, verbose=False)

    # output course_metainfo

    table = 'course_metainfo'
    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / CMINFO
    print "--> Course metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, table)
    sys.stdout.flush()

    gsutil.upload_file_to_gs(csvfn, gsfnp)

    mypath = os.path.dirname(os.path.realpath(__file__))
    SCHEMA_FILE = '%s/schemas/schema_course_metainfo.json' % mypath
    the_schema = json.loads(open(SCHEMA_FILE).read())[table]

    bqutil.load_data_to_table(dataset, table, gsfnp, the_schema, wait=True, verbose=False, format='csv', skiprows=1)

Example 16

Project: hortonworks-sandbox
Source File: 0001_initial.py
View license
    def forwards(self, orm):

        # Adding model 'Job'
        db.create_table('oozie_job', (
            ('is_shared', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True, blank=True)),
            ('description', self.gf('django.db.models.fields.CharField')(max_length=1024, blank=True)),
            ('parameters', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('deployment_dir', self.gf('django.db.models.fields.CharField')(max_length=1024, blank=True)),
            ('schema_version', self.gf('django.db.models.fields.CharField')(max_length=128)),
            ('last_modified', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, db_index=True, blank=True)),
            ('owner', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'])),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['Job'])

        # Adding model 'Workflow'
        db.create_table('oozie_workflow', (
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('end', self.gf('django.db.models.fields.related.ForeignKey')(blank=True, related_name='end_workflow', null=True, to=orm['oozie.Node'])),
            ('is_single', self.gf('django.db.models.fields.BooleanField')(default=False, blank=True)),
            ('job_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Job'], unique=True, primary_key=True)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('start', self.gf('django.db.models.fields.related.ForeignKey')(blank=True, related_name='start_workflow', null=True, to=orm['oozie.Node'])),
        ))
        db.send_create_signal('oozie', ['Workflow'])

        # Adding model 'Link'
        db.create_table('oozie_link', (
            ('comment', self.gf('django.db.models.fields.CharField')(default='', max_length=1024, blank=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('parent', self.gf('django.db.models.fields.related.ForeignKey')(related_name='child_node', to=orm['oozie.Node'])),
            ('child', self.gf('django.db.models.fields.related.ForeignKey')(related_name='parent_node', to=orm['oozie.Node'])),
        ))
        db.send_create_signal('oozie', ['Link'])

        # Adding model 'Node'
        db.create_table('oozie_node', (
            ('description', self.gf('django.db.models.fields.CharField')(default='', max_length=1024, blank=True)),
            ('workflow', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Workflow'])),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('node_type', self.gf('django.db.models.fields.CharField')(max_length=64)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['Node'])

        # Adding model 'Mapreduce'
        db.create_table('oozie_mapreduce', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('jar_path', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True)),
            ('prepares', self.gf('django.db.models.fields.TextField')(default='[]')),
        ))
        db.send_create_signal('oozie', ['Mapreduce'])

        # Adding model 'Streaming'
        db.create_table('oozie_streaming', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('mapper', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('reducer', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[{"name":"oozie.use.system.libpath","value":"true"}]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Streaming'])

        # Adding model 'Java'
        db.create_table('oozie_java', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('jar_path', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('java_opts', self.gf('django.db.models.fields.CharField')(max_length=256, blank=True)),
            ('args', self.gf('django.db.models.fields.CharField')(max_length=4096, blank=True)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('prepares', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
            ('main_class', self.gf('django.db.models.fields.CharField')(max_length=256)),
        ))
        db.send_create_signal('oozie', ['Java'])

        # Adding model 'Pig'
        db.create_table('oozie_pig', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[{"name":"oozie.use.system.libpath","value":"true"}]')),
            ('params', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
            ('prepares', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('script_path', self.gf('django.db.models.fields.CharField')(max_length=256)),
        ))
        db.send_create_signal('oozie', ['Pig'])

        # Adding model 'Start'
        db.create_table('oozie_start', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True)),
        ))
        db.send_create_signal('oozie', ['Start'])

        # Adding model 'End'
        db.create_table('oozie_end', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['End'])

        # Adding model 'Kill'
        db.create_table('oozie_kill', (
            ('message', self.gf('django.db.models.fields.CharField')(default='Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]', max_length=256)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Kill'])

        # Adding model 'Fork'
        db.create_table('oozie_fork', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Fork'])

        # Adding model 'Join'
        db.create_table('oozie_join', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Join'])

        # Adding model 'Coordinator'
        db.create_table('oozie_coordinator', (
            ('end', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 9, 7, 15, 12, 23, 992784))),
            ('concurrency', self.gf('django.db.models.fields.PositiveSmallIntegerField')(null=True, blank=True)),
            ('frequency_number', self.gf('django.db.models.fields.SmallIntegerField')(default=1)),
            ('workflow', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Workflow'], null=True)),
            ('job_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Job'], unique=True, primary_key=True)),
            ('frequency_unit', self.gf('django.db.models.fields.CharField')(default='days', max_length=20)),
            ('start', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 9, 4, 15, 12, 23, 992735))),
            ('timeout', self.gf('django.db.models.fields.SmallIntegerField')(null=True, blank=True)),
            ('timezone', self.gf('django.db.models.fields.CharField')(default='America/Los_Angeles', max_length=24)),
            ('throttle', self.gf('django.db.models.fields.PositiveSmallIntegerField')(null=True, blank=True)),
            ('execution', self.gf('django.db.models.fields.CharField')(max_length=10, null=True, blank=True)),
        ))
        db.send_create_signal('oozie', ['Coordinator'])

        # Adding model 'Dataset'
        db.create_table('oozie_dataset', (
            ('description', self.gf('django.db.models.fields.CharField')(default='', max_length=1024, blank=True)),
            ('frequency_number', self.gf('django.db.models.fields.SmallIntegerField')(default=1)),
            ('coordinator', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Coordinator'])),
            ('frequency_unit', self.gf('django.db.models.fields.CharField')(default='days', max_length=20)),
            ('uri', self.gf('django.db.models.fields.CharField')(default='/data/${YEAR}${MONTH}${DAY}', max_length=1024)),
            ('start', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 9, 4, 15, 12, 23, 993608))),
            ('timezone', self.gf('django.db.models.fields.CharField')(default='America/Los_Angeles', max_length=24)),
            ('done_flag', self.gf('django.db.models.fields.CharField')(default='', max_length=64, blank=True)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['Dataset'])

        # Adding model 'DataInput'
        db.create_table('oozie_datainput', (
            ('coordinator', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Coordinator'])),
            ('dataset', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Dataset'], unique=True)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['DataInput'])

        # Adding model 'DataOutput'
        db.create_table('oozie_dataoutput', (
            ('coordinator', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Coordinator'])),
            ('dataset', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Dataset'], unique=True)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['DataOutput'])

        # Adding model 'History'
        db.create_table('oozie_history', (
            ('submission_date', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, db_index=True, blank=True)),
            ('job', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Job'])),
            ('properties', self.gf('django.db.models.fields.TextField')()),
            ('oozie_job_id', self.gf('django.db.models.fields.CharField')(max_length=128)),
            ('submitter', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'])),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
        ))
        db.send_create_signal('oozie', ['History'])

Example 17

Project: PySAR
Source File: tsviewer.py
View license
def main(argv):

  #default settings
  markerSize=16
  markerSize2=16
  markerColor='g'
  markerColor2='red'
  lineWidth=2
  fontSize=16
  unit='cm'
  Save_timeseries='no'
  dispTsFig='yes'
  dispVelFig='yes'
  dispContour='only'
  contour_step=200
  smoothContour='no'
  radius=0;
  edgeWidth=1.5
  fig_dpi=300

  if len(sys.argv)>2:
    try:
      opts, args = getopt.getopt(argv,"f:F:v:a:b:s:m:c:w:u:l:h:S:D:C:V:t:T:d:r:x:y:P:p:")
    except getopt.GetoptError:
      Usage() ; sys.exit(1)
 
    for opt,arg in opts:
      if   opt == '-f':     timeSeriesFile = arg
      elif opt == '-F':     timeSeriesFile_2 = arg
      elif opt == '-v':     velocityFile = arg
      elif opt == '-a':     vmin = float(arg)
      elif opt == '-b':     vmax = float(arg)
      elif opt == '-s':     fontSize = int(arg)
      elif opt == '-m':     markerSize=int(arg);       markerSize2=int(arg)
      elif opt == '-S':     Save_timeseries=arg
      elif opt == '-c':     markerColor=arg
      elif opt == '-w':     lineWidth=int(arg)
      elif opt == '-u':     unit=arg
      elif opt == '-l':     lbound=float(arg)
      elif opt == '-h':     hbound=float(arg)
      elif opt == '-D':     demFile=arg
      elif opt == '-C':     dispContour=arg
      elif opt == '-V':     contour_step=float(arg)
      elif opt == '-t':     minDate=arg
      elif opt == '-T':     maxDate=arg
      elif opt == '-d':     datesNot2show = arg.split()
      elif opt == '-r':     radius=abs(int(arg))
      elif opt == '-x':     xsub = [int(i) for i in arg.split(':')];   xsub.sort();   dispVelFig='no'
      elif opt == '-y':     ysub = [int(i) for i in arg.split(':')];   ysub.sort();   dispVelFig='no'
      elif opt == '-P':     dispTsFig=arg
      elif opt == '-p':     dispVelFig=arg


  elif len(sys.argv)==2:
    if argv[0]=='-h':
       Usage(); sys.exit(1)
    elif os.path.isfile(argv[0]):
       timeSeriesFile = argv[0]
       h5timeseries = h5py.File(timeSeriesFile)
       if not 'timeseries' in h5timeseries.keys():
          print 'ERROR'
          Usage(); sys.exit(1)
    else:  Usage(); sys.exit(1)
  elif len(sys.argv)<2:
    Usage(); sys.exit(1)

  if   unit in ('m','M'):              unitFac=1
  elif unit in ('cm','Cm','CM'):       unitFac=100
  elif unit in ('mm','Mm','MM','mM'):  unitFac=1000
  else:
     print 'Warning:'
     print 'wrong unit input!'
     print 'cm is considered to display the displacement'

##############################################################
# Read time series file info

  if not os.path.isfile(timeSeriesFile):
     Usage();sys.exit(1)

  h5timeseries = h5py.File(timeSeriesFile)
  if not 'timeseries' in h5timeseries.keys():
     Usage(); sys.exit(1)
 
  dateList1 = h5timeseries['timeseries'].keys()

##############################################################
# Dates to show time series plot

  import matplotlib.dates as mdates
  years    = mdates.YearLocator()   # every year
  months   = mdates.MonthLocator()  # every month
  yearsFmt = mdates.DateFormatter('%Y')

  print '*******************'
  print 'All dates existed:'
  print dateList1
  print '*******************'

  try:
     datesNot2show
     print 'dates not to show: '+str(datesNot2show)
  except:  datesNot2show=[]

  try:
    minDate
    minDateyy=yyyymmdd2years(minDate)
    print 'minimum date: '+minDate
    for date in dateList1:
       yy=yyyymmdd2years(date)
       if yy < minDateyy:
           datesNot2show.append(date)
  except:  pass
  try:
    maxDate
    maxDateyy=yyyymmdd2years(maxDate)
    print 'maximum date: '+maxDate
    for date in dateList1:
       yy=yyyymmdd2years(date)
       if yy > maxDateyy:
           datesNot2show.append(date)
  except:  pass

  try:
     dateList=[]
     for date in dateList1:
        if date not in datesNot2show:
           dateList.append(date)
     print '--------------------------------------------'
     print 'dates used to show time series displacements:'
     print dateList
     print '--------------------------------------------'
  except:
     dateList=dateList1
     print 'using all dates to show time series displacement'

###################################################################
# Date info

  dateIndex={}
  for ni in range(len(dateList)):
     dateIndex[dateList[ni]]=ni
  tbase=[]
  d1 = datetime.datetime(*time.strptime(dateList[0],"%Y%m%d")[0:5])

  for ni in range(len(dateList)):
     d2 = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
     diff = d2-d1
     tbase.append(diff.days)

  dates=[]
  for ni in range(len(dateList)):
     d = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
     dates.append(d)
  
  datevector=[]
  for i in range(len(dates)):
     datevector.append(np.float(dates[i].year) + np.float(dates[i].month-1)/12 + np.float(dates[i].day-1)/365)
  datevector2=[round(i,2) for i in datevector]


###########################################
# Plot Fig 1 - Velocity / last epoch of time series / DEM

  import matplotlib.pyplot as plt
  if dispVelFig in ('yes','Yes','y','Y','YES'):
     fig = plt.figure()
     ax=fig.add_subplot(111)

     try:
        velocityFile
        h5file=h5py.File(velocityFile,'r')
        k=h5file.keys()
        dset= h5file[k[0]].get(k[0])
        print 'display: ' + k[0]
     except:
        dset = h5timeseries['timeseries'].get(h5timeseries['timeseries'].keys()[-1])
        print 'display: last epoch of timeseries'

     #DEM/contour option
     try:
        demFile
        import _readfile as readfile
        if   os.path.basename(demFile).split('.')[1]=='hgt':  amp,dem,demRsc = readfile.read_float32(demFile)
        elif os.path.basename(demFile).split('.')[1]=='dem':  dem,demRsc = readfile.read_dem(demFile)

        if dispContour in ('no','No','n','N','NO','yes','Yes','y','Y','YES'):
           print 'show DEM as basemap'
           cmap_dem=plt.get_cmap('gray')
           import _pysar_utilities as ut
           plt.imshow(ut.hillshade(dem,50.0),cmap=cmap_dem)
        if dispContour in ('only','Only','o','O','ONLY','yes','Yes','y','Y','YES'):
           print 'show contour'
           if smoothContour in ('yes','Yes','y','Y','YES'):
              import scipy.ndimage as ndimage
              dem=ndimage.gaussian_filter(dem,sigma=10.0,order=0)
           contour_sequence=np.arange(-6000,9000,contour_step)
           plt.contour(dem,contour_sequence,origin='lower',colors='black',alpha=0.5)
     except: print 'No DEM file' 

     try:     img=ax.imshow(dset,vmin=vmin,vmax=vmax)
     except:  img=ax.imshow(dset)

     import matplotlib.patches as patches      # need for draw rectangle of points selected on VelFig

########################################## 
# Plot Fig 2 - Time series plot
  import scipy.stats as stats
  fig2 = plt.figure(2)
  ax2=fig2.add_subplot(111) 

  try:
     timeSeriesFile_2
     h5timeseries_2=h5py.File(timeSeriesFile_2)
     print 'plot 2nd time series'
  except:  pass   

  ########### Plot Time Series with x/y ##########
  try:
     xsub
     ysub
     try:     xmin=xsub[0];         xmax=xsub[1]+1;         print 'x='+str(xsub[0])+':'+str(xsub[1])
     except:  xmin=xsub[0]-radius;  xmax=xsub[0]+radius+1;  print 'x='+str(xsub[0])+'+/-'+str(radius)
     try:     ymin=ysub[0];         ymax=ysub[1]+1;         print 'y='+str(ysub[0])+':'+str(ysub[1])
     except:  ymin=ysub[0]-radius;  ymax=ysub[0]+radius+1;  print 'y='+str(ysub[0])+'+/-'+str(radius)
     try:
        fig
        rectSelect=patches.Rectangle((xmin,ymin),radius*2+1,radius*2+1,fill=False,lw=edgeWidth)
        ax.add_patch(rectSelect)
     except: pass

     Dis=[]
     for date in dateList:  Dis.append(h5timeseries['timeseries'].get(date)[ymin:ymax,xmin:xmax])
     Dis0=array(Dis)
     dis=Dis0*unitFac
     dis=reshape(dis,(len(dateList),-1))
     dis_mean=stats.nanmean(dis,1)
     if (xmax-xmin)*(ymax-ymin)==1:  dis_std=[0]*len(dateList)
     else:                           dis_std=stats.nanstd(dis,1)
     (_, caps, _)=ax2.errorbar(dates,dis_mean,yerr=dis_std,fmt='-ko',\
                               ms=markerSize, lw=lineWidth, alpha=1, mfc=markerColor,\
                               elinewidth=edgeWidth,ecolor='black',capsize=markerSize*0.5)
     for cap in caps:  cap.set_markeredgewidth(edgeWidth)
     print dis_mean

     # x axis format
     ax2.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
     if unitFac==100:     ax2.set_ylabel('Displacement [cm]',fontsize=fontSize)
     elif unitFac==1000:  ax2.set_ylabel('Displacement [mm]',fontsize=fontSize)
     else:                ax2.set_ylabel('Displacement [m]' ,fontsize=fontSize)
     ax2.set_xlabel('Time [years]',fontsize=fontSize)
     ax2.set_title('x='+str(xmin)+':'+str(xmax-1)+', y='+str(ymin)+':'+str(ymax-1))
     ax2.xaxis.set_major_locator(years)
     ax2.xaxis.set_major_formatter(yearsFmt)
     ax2.xaxis.set_minor_locator(months)
     datemin = datetime.date(int(datevector[0]),1,1)
     datemax = datetime.date(int(datevector[-1])+1,1,1)
     ax2.set_xlim(datemin, datemax)

     # y axis format
     try:
        lbound
        hbound
        ax2.set_ylim(lbound,hbound)
     except:
        ax2.set_ylim(nanmin(dis_mean-dis_std)-0.4*abs(nanmin(dis_mean)),\
                     nanmax(dis_mean+dis_std)+0.4*abs(nanmax(dis_mean)))

     for tick in ax2.xaxis.get_major_ticks():  tick.label.set_fontsize(fontSize)
     for tick in ax2.yaxis.get_major_ticks():  tick.label.set_fontsize(fontSize)
     #fig2.autofmt_xdate()     #adjust x overlap by rorating, may enble again

     if Save_timeseries in ('yes','Yes','Y','y','YES'):
        import scipy.io as sio
        Delay={}
        Delay['displacement']=Dis0
        Delay['unit']='m'
        Delay['time']=datevector
        tsNameBase='ts_x'+str(xmin)+'_'+str(xmax-1)+'y'+str(ymin)+'_'+str(ymax-1)
        sio.savemat(tsNameBase+'.mat', {'displacement': Delay})
        print 'saved data to '+tsNameBase+'.mat'
        plt.savefig(tsNameBase+'.pdf',dpi=fig_dpi)
        print 'saved plot to '+tsNameBase+'.pdf'

  except:  print 'No x/y input' ; pass

  ########### Plot Time Series with Click ##########
  def onclick(event):
    if event.button==1:
      xClick = int(event.xdata)
      yClick = int(event.ydata)
      print 'x='+str(xClick)+'+/-'+str(radius)+', y='+str(yClick)+'+/-'+str(radius)
      xmin=xClick-radius;  xmax=xClick+radius+1;
      ymin=yClick-radius;  ymax=yClick+radius+1;
      try:
         fig
         rectSelect=patches.Rectangle((xmin,ymin),radius*2+1,radius*2+1,fill=False,lw=edgeWidth)
         ax.add_patch(rectSelect)
      except: pass

      ax2.cla()

      #plot 1st time series
      Dis=[]
      for date in dateList:  Dis.append(h5timeseries['timeseries'].get(date)[ymin:ymax,xmin:xmax])
      Dis0=array(Dis)
      dis=Dis0*unitFac
      dis=reshape(dis,(len(dateList),-1))
      dis_mean=stats.nanmean(dis,1)
      if (xmax-xmin)*(ymax-ymin)==1:  dis_std=[0]*len(dateList)
      else:                           dis_std=stats.nanstd(dis,1)
      (_, caps, _)=ax2.errorbar(dates,dis_mean,yerr=dis_std,fmt='-ko',\
                                ms=markerSize, lw=lineWidth, alpha=1, mfc=markerColor,\
                                elinewidth=edgeWidth,ecolor='black',capsize=markerSize*0.5)
      for cap in caps:  cap.set_markeredgewidth(edgeWidth)
      print dis_mean

      #plot 2nd time series
      try:
         timeSeriesFile_2
         Dis2=[]
         for date in dateList:  Dis2.append(h5timeseries_2['timeseries'].get(date)[ymin:ymax,xmin:xmax])
         dis2=array(Dis2)
         dis2=dis2*unitFac
         dis2=reshape(dis2,(len(dateList),-1))
         dis2_mean=stats.nanmean(dis2,1)
         if (xmax-xmin)*(ymax-ymin)==1:  dis2_std=[0]*len(dateList)
         else:                           dis2_std=stats.nanstd(dis2,1)
         (_, caps, _)=ax2.errorbar(dates,dis2_mean,yerr=dis2_std,fmt='^',\
                                   ms=markerSize2, lw=lineWidth, alpha=1, mfc=markerColor2,\
                                   elinewidth=edgeWidth,ecolor='black',capsize=markerSize*0.5)
         for cap in caps:  cap.set_markeredgewidth(edgeWidth)
      except:  Dis2=[]

      #axis formating
      ax2.fmt_xdata = DateFormatter('%Y-%m-%d %H:%M:%S')
      if unitFac==100:     ax2.set_ylabel('Displacement [cm]',fontsize=fontSize)
      elif unitFac==1000:  ax2.set_ylabel('Displacement [mm]',fontsize=fontSize)
      else:                ax2.set_ylabel('Displacement [m]' ,fontsize=fontSize)
      ax2.set_xlabel('Time [years]',fontsize=fontSize)
      ax2.set_title('x='+str(xClick)+'+/-'+str(radius)+', y='+str(yClick)+'+/-'+str(radius))
      #ds=datevector[0]-0.2
      #de=datevector[-1]+0.2
      #ys=int(ds)
      #ye=int(de)
      #ms=int((ds-ys)*12)+1
      #me=int((de-ye)*12)+1
      #dss=datetime.datetime(ys,ms,1,0,0)
      #dee=datetime.datetime(ye,me,1,0,0)
      #ax2.set_xlim(dss,dee)
      ax2.xaxis.set_major_locator(years)
      ax2.xaxis.set_major_formatter(yearsFmt)
      ax2.xaxis.set_minor_locator(months)
      datemin = datetime.date(int(datevector[0]),1,1)
      datemax = datetime.date(int(datevector[-1])+1,1,1)
      ax2.set_xlim(datemin, datemax)

      try:
        lbound
        hbound
        ax2.set_ylim(lbound,hbound)
      except:
        ax2.set_ylim(nanmin(dis_mean-dis_std)-0.4*abs(nanmin(dis_mean)),\
                     nanmax(dis_mean+dis_std)+0.4*abs(nanmax(dis_mean)))

      for tick in ax2.xaxis.get_major_ticks():  tick.label.set_fontsize(fontSize)
      for tick in ax2.yaxis.get_major_ticks():  tick.label.set_fontsize(fontSize)
      #fig2.autofmt_xdate()     #adjust x overlap by rorating, may enble again

      if Save_timeseries in ('yes','Yes','Y','y','YES'):
         import scipy.io as sio
         Delay={}
         Delay['displacement']=Dis0
         Delay['unit']='m'
         Delay['time']=datevector
         tsNameBase='ts_x'+str(xmin)+'_'+str(xmax-1)+'y'+str(ymin)+'_'+str(ymax-1)
         sio.savemat(tsNameBase+'.mat', {'displacement': Delay})
         print 'saved data to '+tsNameBase+'.mat'
         plt2.savefig(tsNameBase+'.pdf',dpi=fig_dpi)
         print 'saved plot to '+tsNameBase+'.pdf'

      if dispTsFig in ('yes','Yes','Y','y','YES'):  plt.show()
  try:
     cid = fig.canvas.mpl_connect('button_press_event', onclick)       # Click function is available when VelFig is shown
  except: pass

  if dispTsFig in ('yes','Yes','Y','y','YES'):  plt.show()

Example 18

Project: tp-libvirt
Source File: virsh_domtime.py
View license
@error.context_aware
def run(test, params, env):
    """
    This test virsh domtime command and its options.

    1) Start a guest with/without guest agent configured;
    2) Record guest times;
    3) Do some operation to stop VM;
    4) Run virsh domtime command with different options;
    5) Check the command result;
    6) Check the guest times against expectation;
    7) Cleanup test environment.
    """
    epoch = datetime.datetime(1970, 1, 1, 0, 0, 0)
    # Max time can be set with domtime successfully in newer qemu-ga
    time_max_1 = 3155731199
    # Max time can be set with domtime successfully in older qemu-ga
    time_max_2 = 3155759999
    # Max time can be set with domtime bug failed to set RTC in older qemu-ga
    time_max_3 = 9223372035

    def init_time(session):
        """
        Initialize guest RTC time to epoch + 1234567890 and system time
        one day latter.

        :param session: Session from which to access guest
        """
        res = virsh.domtime(vm_name, time=1234567890)
        if res.exit_status:
            logging.debug("Failed to init time to 1234567890:\n%s", res)
        status, output = session.cmd_status_output('date -s "1 day"')
        if status:
            raise error.TestError("Failed to set guest time:\n%s" % output)

    def get_host_utc_time():
        """
        Get host UTC time from date command.
        """
        res = utils.run("date -u")
        # Strip timezone info from output
        # e.g. 'Sun Feb 15 07:31:40 CST 2009' -> 'Sun Feb 15 07:31:40 2009'
        time_str = re.sub(r'\S+ (?=\S+$)', '', res.stdout.strip())
        return datetime.datetime.strptime(time_str,
                                          r"%a %b %d %H:%M:%S %Y")

    def run_cmd(session, cmd):
        """
        Run a command in a session and record duration of call.
        """
        start = time.time()
        output = session.cmd_output(cmd)
        duration = time.time() - start
        logging.info('Result of command "%s". Duration: %s. Output:%s',
                     cmd, duration, output.strip())
        return output, duration

    def get_guest_times(session):
        """
        Retrieve different guest time as a dict for checking.
        Keys:
            local_hw: Guest RTC time in local timezone
            local_sys: Guest system time in local timezone
            utc_sys: Guest system time in UTC
            domtime: Guest system time in UTC got from virsh domtime command

        :param session: Session from which to access guest
        """
        times = {}
        get_begin = time.time()
        # Guest RTC local timezone time
        output, _ = run_cmd(session, 'hwclock')
        time_str, _ = re.search(r"(.+)  (\S+ seconds)", output).groups()

        try:
            # output format 1: Tue 01 Mar 2016 01:53:46 PM CST
            # Remove timezone info from output
            new_str = re.sub(r'\S+$', '', time_str)
            times['local_hw'] = datetime.datetime.strptime(
                new_str, r"%a %d %b %Y %I:%M:%S %p")
        except ValueError:
            # There are two possible output format for `hwclock`
            # output format 2: Sat Feb 14 07:31:33 2009
            times['local_hw'] = datetime.datetime.strptime(
                time_str, r"%a %b %d %H:%M:%S %Y")
        delta = time.time() - get_begin
        times['local_hw'] -= datetime.timedelta(seconds=delta)

        # Guest system local timezone time
        output, _ = run_cmd(session, 'date')
        # Strip timezone info from output
        # e.g. 'Sun Feb 15 07:31:40 CST 2009' -> 'Sun Feb 15 07:31:40 2009'
        time_str = re.sub(r'\S+ (?=\S+$)', '', output.strip())
        times['local_sys'] = datetime.datetime.strptime(
            time_str, r"%a %b %d %H:%M:%S %Y")
        delta = time.time() - get_begin
        times['local_sys'] -= datetime.timedelta(seconds=delta)

        # Guest system UTC timezone time
        output, _ = run_cmd(session, 'date -u')
        # Strip timezone info from output
        # e.g. 'Sun Feb 15 07:31:40 CST 2009' -> 'Sun Feb 15 07:31:40 2009'
        time_str = re.sub(r'\S+ (?=\S+$)', '', output.strip())
        times['utc_sys'] = datetime.datetime.strptime(
            time_str, r"%a %b %d %H:%M:%S %Y")
        delta = time.time() - get_begin
        times['utc_sys'] -= datetime.timedelta(seconds=delta)

        # Guest UTC time from virsh domtime
        res = virsh.domtime(vm_name, pretty=True, ignore_status=True)
        if not res.exit_status:
            logging.info('Result of "domtime". Duration: %s. Output:%s',
                         res.duration, res.stdout.strip())
            _, time_str = res.stdout.split(" ", 1)
            times['domtime'] = datetime.datetime.strptime(
                time_str.strip(), r"%Y-%m-%d %H:%M:%S")
            delta = time.time() - get_begin
            times['domtime'] -= datetime.timedelta(seconds=delta)
        else:
            logging.debug("Unable to get domain time:\n%s", res)
            times['domtime'] = None

        return times, time.time() - get_begin

    def check_get_success(expected_times):
        """
        Check virsh command get result against expected times

        :param expected_times: Expected time for checking
        """
        _, time_str = res.stdout.split(" ", 1)
        if pretty:
            # Time: 2015-01-13 06:29:18
            domtime = datetime.datetime.strptime(time_str.strip(),
                                                 r"%Y-%m-%d %H:%M:%S")
        else:
            # Time: 1421130740
            domtime = epoch + datetime.timedelta(seconds=int(time_str))
        time_shift = time.time() - start
        logging.debug("Time shift is %s", time_shift)
        result_diff = (domtime - expected_times['domtime']).total_seconds()
        if abs(result_diff) > 2.0:
            raise error.TestFail("Expect get time %s, but got %s, time "
                                 "diff: %s" % (org_times['domtime'],
                                               domtime, result_diff))

    def check_guest_times(expected_times, cur_times):
        """
        Check guest times after test against expected times

        :param expected_times: Expected time for checking
        """
        time_shift = time.time() - start
        logging.debug("Time shift is %s", time_shift)

        error_msgs = []
        for key in cur_times:
            if cur_times[key] is not None:
                cur = cur_times[key]
                expect = expected_times[key]

                diff = (cur - expect).total_seconds()
                msg = "For %s, expect get time %s, got %s, time diff: %s" % (
                    key, expect, cur, diff)
                logging.debug(msg)
                if abs(diff) > 2.0:
                    error_msgs.append(msg)
        if error_msgs:
            raise error.TestFail('\n'.join(error_msgs))

    def check_time(result, org_times, cur_times):
        """
        Check whether domain time has been change accordingly.

        :param result: virsh domtime CmdResult instance
        :param org_times: Original guest times
        """
        action = "get"
        if now or sync or (set_time is not None):
            action = "set"

        tz_diff = org_times['local_sys'] - org_times['utc_sys']
        logging.debug("Timezone diff on guest is %d hours.",
                      (tz_diff.total_seconds() / 3600))

        # Hardware time will never stop
        logging.info('Add %ss to expected guest time', interval)
        if action == 'get':
            expected_times = org_times
        elif action == 'set':
            if result.exit_status:
                # Time not change if domtime fails
                expected_times = org_times
            else:
                # Time change accordingly if succeed.
                if now:
                    utc_time = org_host_time
                    local_time = utc_time + tz_diff
                elif sync:
                    local_time = org_times["local_hw"]
                    utc_time = local_time - tz_diff
                elif set_time is not None:
                    utc_time = epoch + datetime.timedelta(
                        seconds=(int(set_time) - guest_duration))
                    local_time = utc_time + tz_diff
                expected_times = {}
                expected_times['local_hw'] = local_time
                expected_times['local_sys'] = local_time
                expected_times["utc_sys"] = utc_time
                expected_times["domtime"] = utc_time

        # Add interval between two checks of guest time
        for key in expected_times:
            if expected_times[key] is not None:
                expected_times[key] += interval

        # Hardware time will never stop
        # Software time will stop if suspended or managed-saved
        if suspend or managedsave:
            logging.info('Remove %ss from expected guest software time',
                         stop_time)
            expected_times["domtime"] -= stop_time
            expected_times["local_sys"] -= stop_time
            expected_times["utc_sys"] -= stop_time

        # Check guest time if domtime succeeded
        check_guest_times(expected_times, cur_times)

        # Check if output of domtime is correct
        if action == 'get' and not result.exit_status:
            check_get_success(expected_times)

    def prepare_fail_patts():
        """
        Predict fail pattern from test parameters.
        """
        fail_patts = []
        if not channel:
            fail_patts.append(r"QEMU guest agent is not configured")
        if not agent:
            # For older version
            fail_patts.append(r"Guest agent not available for now")
            # For newer version
            fail_patts.append(r"Guest agent is not responding")
        if int(now) + int(sync) + int(bool(set_time)) > 1:
            fail_patts.append(r"Options \S+ and \S+ are mutually exclusive")
        if shutdown:
            fail_patts.append(r"domain is not running")

        if set_time is not None:
            if int(set_time) < 0:
                fail_patts.append(r"Invalid argument")
            elif time_max_1 < int(set_time) <= time_max_2:
                fail_patts.append(r"Invalid time")
            elif time_max_2 < int(set_time) <= time_max_3:
                fail_patts.append(r"Invalid time")
            elif time_max_3 < int(set_time):
                fail_patts.append(r"too big for guest agent")
        return fail_patts

    def stop_vm():
        """
        Suspend, managedsave, pmsuspend or shutdown a VM for a period of time
        """
        stop_start = time.time()
        if suspend:
            vm.pause()
            time.sleep(10)
            vm.resume()
        elif managedsave:
            vm.managedsave()
            time.sleep(10)
            vm.start()
            vm.wait_for_login()
        elif pmsuspend:
            vm.pmsuspend()
            time.sleep(10)
            vm.pmwakeup()
            vm.wait_for_login()
        elif shutdown:
            vm.destroy()

        # Check real guest stop time
        stop_seconds = time.time() - stop_start
        stop_time = datetime.timedelta(seconds=stop_seconds)
        logging.debug("Guest stopped: %s", stop_time)
        return stop_time

    # Check availability of virsh command domtime
    if not virsh.has_help_command('domtime'):
        raise error.TestNAError("This version of libvirt does not support "
                                "the domtime test")

    channel = (params.get("prepare_channel", "yes") == 'yes')
    agent = (params.get("start_agent", "yes") == 'yes')
    pretty = (params.get("domtime_pretty", "no") == 'yes')
    now = (params.get("domtime_now", "no") == 'yes')
    sync = (params.get("domtime_sync", "no") == 'yes')
    set_time = params.get("domtime_time", None)

    shutdown = (params.get("shutdown_vm", "no") == 'yes')
    suspend = (params.get("suspend_vm", "no") == 'yes')
    managedsave = (params.get("managedsave_vm", "no") == 'yes')
    pmsuspend = (params.get("pmsuspend_vm", "no") == 'yes')

    vm_name = params.get("main_vm")
    vm = env.get_vm(vm_name)

    # Backup domain XML
    xml_backup = vm_xml.VMXML.new_from_inactive_dumpxml(vm_name)
    try:
        if pmsuspend:
            vm_xml.VMXML.set_pm_suspend(vm_name)
        # Add or remove qemu-agent from guest before test
        vm.prepare_guest_agent(channel=channel, start=agent)
        session = vm.wait_for_login()
        try:
            if channel and agent:
                init_time(session)

            # Expected fail message patterns
            fail_patts = prepare_fail_patts()

            # Message patterns test should skip when met
            skip_patts = [
                r'The command \S+ has not been found',
            ]

            # Record start time
            start = time.time()

            # Record host time before testing
            org_host_time = get_host_utc_time()
            # Get original guest times
            org_times, guest_duration = get_guest_times(session)

            # Run some operations to stop guest system
            stop_time = stop_vm()

            # Run command with specified options.
            res = virsh.domtime(vm_name, now=now, pretty=pretty, sync=sync,
                                time=set_time)
            libvirt.check_result(res, fail_patts, skip_patts)

            # Check interval between two check of guest time
            interval = datetime.timedelta(
                seconds=(time.time() - start))
            logging.debug("Interval between guest checking: %s", interval)

            if not shutdown:
                # Get current guest times
                cur_times, _ = get_guest_times(session)

                check_time(res, org_times, cur_times)
        finally:
            # Sync guest time with host
            if channel and agent and not shutdown:
                res = virsh.domtime(vm_name, now=True)
                if res.exit_status:
                    session.close()
                    raise error.TestError("Failed to recover guest time:\n%s"
                                          % res)
            session.close()
    finally:
        # Restore VM XML
        xml_backup.sync()

Example 19

Project: geraldo
Source File: query.py
View license
def query_class(QueryClass, Database):
    """
    Returns a custom django.db.models.sql.query.Query subclass that is
    appropriate for Oracle.

    The 'Database' module (cx_Oracle) is passed in here so that all the setup
    required to import it only needs to be done by the calling module.
    """
    global _classes
    try:
        return _classes[QueryClass]
    except KeyError:
        pass

    class OracleQuery(QueryClass):
        def resolve_columns(self, row, fields=()):
            index_start = len(self.extra_select.keys())
            values = [self.convert_values(v, None) for v in row[:index_start]]
            for value, field in map(None, row[index_start:], fields):
                values.append(self.convert_values(value, field))
            return values

        def convert_values(self, value, field):
            from django.db.models.fields import DateField, DateTimeField, \
                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
            if isinstance(value, Database.LOB):
                value = value.read()
            # Oracle stores empty strings as null. We need to undo this in
            # order to adhere to the Django convention of using the empty
            # string instead of null, but only if the field accepts the
            # empty string.
            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
                value = u''
            # Convert 1 or 0 to True or False
            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
                value = bool(value)
            # Convert floats to decimals
            elif value is not None and isinstance(field, DecimalField):
                value = util.typecast_decimal(field.format_number(value))
            # cx_Oracle always returns datetime.datetime objects for
            # DATE and TIMESTAMP columns, but Django wants to see a
            # python datetime.date, .time, or .datetime.  We use the type
            # of the Field to determine which to cast to, but it's not
            # always available.
            # As a workaround, we cast to date if all the time-related
            # values are 0, or to time if the date is 1/1/1900.
            # This could be cleaned a bit by adding a method to the Field
            # classes to normalize values from the database (the to_python
            # method is used for validation and isn't what we want here).
            elif isinstance(value, Database.Timestamp):
                # In Python 2.3, the cx_Oracle driver returns its own
                # Timestamp object that we must convert to a datetime class.
                if not isinstance(value, datetime.datetime):
                    value = datetime.datetime(value.year, value.month,
                            value.day, value.hour, value.minute, value.second,
                            value.fsecond)
                if isinstance(field, DateTimeField):
                    # DateTimeField subclasses DateField so must be checked
                    # first.
                    pass
                elif isinstance(field, DateField):
                    value = value.date()
                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
                    value = value.time()
                elif value.hour == value.minute == value.second == value.microsecond == 0:
                    value = value.date()
            return value

        def as_sql(self, with_limits=True, with_col_aliases=False):
            """
            Creates the SQL for this query. Returns the SQL string and list
            of parameters.  This is overriden from the original Query class
            to handle the additional SQL Oracle requires to emulate LIMIT
            and OFFSET.

            If 'with_limits' is False, any limit/offset information is not
            included in the query.
            """

            # The `do_offset` flag indicates whether we need to construct
            # the SQL needed to use limit/offset with Oracle.
            do_offset = with_limits and (self.high_mark is not None
                                         or self.low_mark)
            if not do_offset:
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                        with_col_aliases=with_col_aliases)
            else:
                # `get_columns` needs to be called before `get_ordering` to
                # populate `_select_alias`.
                self.pre_sql_setup()
                self.get_columns()
                ordering = self.get_ordering()

                # Oracle's ROW_NUMBER() function requires an ORDER BY clause.
                if ordering:
                    rn_orderby = ', '.join(ordering)
                else:
                    # Create a default ORDER BY since none was specified.
                    qn = self.quote_name_unless_alias
                    opts = self.model._meta
                    rn_orderby = '%s.%s' % (qn(opts.db_table),
                        qn(opts.fields[0].db_column or opts.fields[0].column))

                # Ensure the base query SELECTs our special "_RN" column
                self.extra_select['_RN'] = ('ROW_NUMBER() OVER (ORDER BY %s)'
                                            % rn_orderby, '')
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                                                        with_col_aliases=True)

                # Wrap the base query in an outer SELECT * with boundaries on
                # the "_RN" column.  This is the canonical way to emulate LIMIT
                # and OFFSET on Oracle.
                sql = 'SELECT * FROM (%s) WHERE "_RN" > %d' % (sql, self.low_mark)
                if self.high_mark is not None:
                    sql = '%s AND "_RN" <= %d' % (sql, self.high_mark)

            return sql, params

        def set_limits(self, low=None, high=None):
            super(OracleQuery, self).set_limits(low, high)
            # We need to select the row number for the LIMIT/OFFSET sql.
            # A placeholder is added to extra_select now, because as_sql is
            # too late to be modifying extra_select.  However, the actual sql
            # depends on the ordering, so that is generated in as_sql.
            self.extra_select['_RN'] = ('1', '')

        def clear_limits(self):
            super(OracleQuery, self).clear_limits()
            if '_RN' in self.extra_select:
                del self.extra_select['_RN']

    _classes[QueryClass] = OracleQuery
    return OracleQuery

Example 20

Project: iCal-Analyzer
Source File: reader.py
View license
    @classmethod
    def read_event_file(klass, fname):
        """
        Reads calendar info file, which looks like:
        
        BEGIN:VCALENDAR
        VERSION:2.0
        PRODID:-//Apple Inc.//iCal 3.0//EN
        CALSCALE:GREGORIAN
        BEGIN:VEVENT
        SEQUENCE:3
        TRANSP:OPAQUE
        UID:7AE01F84-F885-49B2-A35B-9CF73771B797
        DTSTART;TZID=US/Eastern:20090727T200000
        DTSTAMP:20090809T180349Z
        SUMMARY:[email protected]
        CREATED:20090809T180331Z
        DTEND;TZID=US/Eastern:20090727T220000
        (( or if traveling...
        DTSTART;TZID=Europe/Berlin:20090804T230000
        ))
        RRULE:FREQ=WEEKLY;INTERVAL=1
        END:VEVENT
        END:VCALENDAR
        
        @return: Event
        """
        # only record key values whose keys exist in this dictionary.
        # use these dictionary values as self field names
        def split_event_if_crosses_midnight(event):
            """
            Events that cross midnight become two events.
            This is not what we want to do. We actually want to make days
            that end with sleep, regardless of midnight.
            """
            ret = []
            if event.end.day != event.start.day:
                midnight_day1 = datetime.datetime(event.start.year,
                                                  event.start.month,
                                                  event.start.day,
                                                  23, 59, 59)
                midnight_day2 = datetime.datetime(event.end.year,
                                                  event.end.month,
                                                  event.end.day,
                                                  0, 0, 0)
                event2 = Event(start=midnight_day2,
                               end=event.end,
                               calendar=event.calendar,
                               summary=event.summary)
                event.end = midnight_day1
                ret.append(event)
                ret.append(event2)
            else:
                ret.append(event)
            return ret
        
        def smart_split(item_to_split, splitch):
            """
            Splits a list into components based on the split character, 'splitch'
            Each component is stripped. Components that look like dates become datetime.
            """
            items = item_to_split.split(splitch)
            ret = []
            for item in items:
                item = item.strip()
                try:
                    #20090727T200000
                    item = datetime.datetime.strptime(item, "%Y%m%dT%H%M%S")
                except ValueError:
                    pass
                ret.append(item)
            return ret
        
        def smart_name(name):
            """
            remaps name using file_key_value_map
            """
            if name in file_key_value_map:
                return file_key_value_map[name]
            else:
                return name
    
        file_key_value_map = {'SUMMARY':'summary',
                              'DTSTART':'start',
                              'DTEND':'end',
                              'TZID':'tz',
                              'BEGIN':'begin'}

        event_fields = {'start':None,
                        'end':None,
                        'calendar':None,
                        'summary':None,
                        'begin':None}
        f = open(fname, 'r')
        last_key_is_summary = False
        for line in f.readlines():
            """
            NOTE: keys can exist multiple times in the file
            
            every line should have this form:: 
                <key>:<value>
            where key is an expression of this form::
                <key_name>(;<sub_key_name>=<sub_value>)*
            and value is an expression of the same form but with different names::
                <value> | <sub_value_name>=<sub_value>;(<sub_value_name>=<sub_value>)+
            """
            """
                DTSTART;TZID=US/Eastern:20090727T200000
            yields
                self.start = datetime(20090727T200000)
                self.start_tz = US/Eastern
                
                RRULE:FREQ=WEEKLY;INTERVAL=1
            yeilds
                self.rrule =  {'freq':'weekly', 'interval'=1}
            """
            # parse the line
            if line.startswith(' ') and last_key_is_summary:
                event_fields['summary'] += line.strip()
                continue
            key_value = smart_split(line, ':')
            if len(key_value) == 1:
                print "skipping line because ':' not found", key_value
                continue
            key = key_value[0]
            value = key_value[1]
            # find sub keys:: <key>;<subkey>=<subvalue>;...:<value>
            if isinstance(key, str) and ';' in key:
                sub_keys = smart_split(key, ';')
                key = sub_keys[0]
                sub_keys = sub_keys[1:]
            else:
                sub_keys = []
            # find sub values:: <key>:<va;<subkey>=<subvalue>;...:<value>
            if isinstance(value, str) and ';' in value and not '\;' in value:
                sub_values = smart_split(value, ';')
                value = None
            else:
                sub_values = []
            
            # now add properties to this event
            if key in file_key_value_map:
                # create sub value dictionary if necessary and set to values
                if sub_values:
                    value = {}
                    for sub_value in sub_values:
                        key_value = smart_split(sub_value, '=')
                        value[key_value[0].lower()] = key_value[1].lower()
                event_fields[smart_name(key)] = value
                # set sub key  properties if necessary
                if sub_keys:
                    for sub_key in sub_keys:
                        key_value = smart_split(sub_key, '=')
                        event_fields["%s_%s" % (smart_name(key),
                                                smart_name(key_value[0]))] = key_value[1]
                last_key_is_summary = (key == 'SUMMARY')
        f.close()
        if event_fields['begin'] != 'VEVENT':
            return None
        
        #events = split_event_if_crosses_midnight(event)
        
        if 'start_tz' in event_fields and event_fields['start_tz'] == 'Europe/Berlin':
            event_fields['start'] = event_fields['start'] - datetime.timedelta(hours=6)
        if 'end_tz' in event_fields and event_fields['end_tz'] == 'Europe/Berlin':
            event_fields['end'] = event_fields['end'] - datetime.timedelta(hours=6)
        
        if not klass.current_calendar:
            print "NO CALENDAR ERROR"
        
        return Event(event_fields['start'],
                     event_fields['end'],
                     event_fields['summary'],
                     klass.current_calendar)

Example 21

View license
	def extractSeriesReleases(self, seriesPageUrl, soup):

		titletg  = soup.find("h4", class_='seriestitle')
		altnametg  = soup.find("div", id='editassociated')
		descrtg  = soup.find("div", id='editdescription')

		link_sets = {
			'authortg'        : soup.find("div", id='showauthors'),
			'artisttg'        : soup.find("div", id='showartists'),
			'langtg'          : soup.find("div", id='showlang'),
			'genretg'         : soup.find("div", id='seriesgenre'),
			'tagstg'          : soup.find("div", id='showtags'),
			'typetg'          : soup.find("div", id='showtype'),
			'orig_pub_tg'     : soup.find("div", id='showopublisher'),
			'eng_pub_tg'      : soup.find("div", id='showepublisher'),
		}

		text_sets = {
			'transcompletetg' : soup.find("div", id='showtranslated'),
			'yeartg'          : soup.find("div", id='edityear'),
			'coostatustg'     : soup.find("div", id='editstatus'),
			'licensedtg'      : soup.find("div", id='showlicensed'),
			}

		if not titletg:
			self.log.warn("Could not find item title!")
			return []
		if not altnametg:
			self.log.warn("Could not find alt-name container tag!")
			return []
		if not descrtg:
			self.log.warn("Could not find description container tag!")
			return []

		data_sets = {}
		for key in list(link_sets.keys()):
			if not link_sets[key]:
				self.log.warn("Could not find tag for name: '%s'", key)
				return []
			data_sets[key] = [tag.get_text() for tag in link_sets[key].find_all("a")]

		for key in list(text_sets.keys()):
			if not text_sets[key]:
				self.log.warn("Could not find tag for name: '%s'", key)
				return []
			data_sets[key] = [tmp.strip() for tmp in text_sets[key].contents if isinstance(tmp, bs4.NavigableString)]

		title  = titletg.get_text().strip()

		data_sets['title'] = title
		data_sets['altnames'] = [tmp.strip() for tmp in altnametg.contents if isinstance(tmp, bs4.NavigableString)]

		# Scrub incoming markup
		for key in list(data_sets.keys()):
			if isinstance(data_sets[key], list):
				data_sets[key] = [bleach.clean(val, tags=[], attributes=[], styles=[], strip=True, strip_comments=True).strip() for val in data_sets[key]]
			else:
				data_sets[key] = bleach.clean(data_sets[key], tags=[], attributes=[], styles=[], strip=True, strip_comments=True).strip()


		if data_sets['yeartg'] and data_sets['yeartg'][0]:
			# print("Non-null data_sets['yeartg']:", data_sets['yeartg'])
			tmp_d = datetime.datetime(year=int(data_sets['yeartg'].pop()), month=1, day=1)
			data_sets['yeartg'] = calendar.timegm(tmp_d.timetuple())
		else:
			data_sets['yeartg'] = None

		# {
		# 	'coostatustg': ['3 Volumes (Ongoing)', '5 Web Volumes (Ongoing)'],
		# 	'orig_pub_tg': ['Media Factory'],
		# 	'eng_pub_tg': [],
		# 	'typetg': ['Web Novel'],
		# 	'genretg': ['Action', 'Adventure', 'Comedy', 'Ecchi', 'Fantasy', 'Romance', 'Seinen'],
		# 	'licensedtg': ['No'],
		# 	'altnames': ['Sendai Yuusha wa Inkyoshitai', 'The Previous Hero wants to Retire', '先代勇者は隠居したい'],
		# 	'authortg': ['Iida K'],
		# 	'artisttg': ['Shimotsuki Eito'],
		# 	'title': 'Sendai Yuusha wa Inkyou Shitai',
		# 	'description': '<p>\n  Three years ago, in the land of Reinbulk, a Legendary Hero was summoned in the Kindom of Leezalion and he succeeded in repelling the Demon King. Now, five students are summoned back into Reinbulk by the Kingdom of Luxeria to fight against the Demon King and the demon army. Unlike the other heroes, Yashiro Yuu has no magical affinity and the Luxeria Kingdom has no intention on acknowledging his existence or returning him to his world.\n </p>\n <p>\n  However, Yuu is actually the previous Hero that had fought the Demon King. Moreover, he is perplexed at the situation since he knows the Demon King has not returned since he sealed him. If the seal was ever broken then he would be automatically summoned instead of normal summoned. Since he already saved the world once and the Demon King hasn’t been unsealed, Yuu decides to leave the demons to the new heroes and retire from the Hero business. So he decides to become an adventurer.\n </p>',
		# 	'tagstg': ['Elves', 'Heroes', 'Magic', 'Monsters', 'Multiple Narrators', 'Protagonist Strong from the Start', 'Strong Male Lead', 'Sword and Sorcery', 'Transported to Another World'],
		# 	'langtg': ['Japanese'],
		# 	'yeartg': ['2013']

		# 	'transcompletetg': ['No'],
		# }

		data_sets['description'] = bleach.clean(descrtg.prettify(), tags=['a', 'abbr', 'acronym', 'b', 'blockquote', 'code', 'em', 'i', 'li', 'ol', 'strong', 'ul', 'p'], strip=True).strip()

		series_message = {
			'update_only'   : False,
			'sourcesite'    : "NovelUpdates",
			'title'         : data_sets['title'],
			'alt_titles'    : data_sets['altnames'] + [data_sets['title'], ],

			'desc'          : data_sets['description'],
			# 'homepage'      : data_sets[''],
			'author'        : data_sets['authortg'],
			'illust'        : data_sets['artisttg'],

			'pubdate'       : data_sets['yeartg'],
			'pubnames'      : data_sets['orig_pub_tg'] + data_sets['eng_pub_tg'],
			# 'sourcesite'    : data_sets[''],
			'tags'          : data_sets['tagstg'],

			# AFICT, NovelUpdates doesn't have any english items, but wth.
			'tl_type'       : "translated" if 'English' not in data_sets['langtg'] else "oel",

			# New:
			'coostate'      : data_sets['coostatustg'],
			'type'          : data_sets['typetg'],
			'genres'        : data_sets['genretg'],
			'licensed'      : data_sets['licensedtg'],
			'transcomplete' : data_sets['transcompletetg'],

			'create_tags'   : True,
		}
		# pprint.pprint(series_message)
		series_info_packet = msgpackers.createSeriesInfoPacket(series_message, matchAuthor=True, beta=self.is_beta)
		# print(series_info_packet)

		extra = {}
		extra['tags']     = data_sets['tagstg']
		# extra['homepage'] = seriesPageUrl
		extra['sourcesite']  = 'Unknown'


		chapter_tbl = soup.find("table", id='myTable')
		if not chapter_tbl:
			self.log.error("No chapter table!")
			return

		releases = chapter_tbl.find_all("tr")

		valid_releases = 0
		for release in releases:

			items = release.find_all("td")
			if len(items) != 3:
				continue

			date_tg, group_tg, chp_tg = items

			rel = datetime.datetime.strptime(date_tg.get_text().strip(), '%m/%d/%y')
			if rel.date() == datetime.date.today():
				reldate = datetime.datetime.now()
			else:
				reldate = datetime.datetime.fromtimestamp(calendar.timegm(rel.timetuple()))

			release_info  = chp_tg.get_text().strip()
			group_name = group_tg.get_text().strip()
			group_name = msgpackers.fixSmartQuotes(group_name)


			upsertNuItem(self.raw_cur,
				{
					'seriesname'       : title,
					'releaseinfo'      : release_info,
					'groupinfo'        : group_name,
					'referrer'         : seriesPageUrl,
					'outbound_wrapper' : chp_tg.a['href'],
					'first_seen'       : reldate,
				})


			valid_releases += 1


		self.log.info("Committing!")
		self.raw_cur.execute("COMMIT;")
		self.log.info("Committed!")
		# Do not add series without 3 chapters.
		if valid_releases < 3:
			self.log.warning("Less then three chapters!")
			return

		self.amqp_put_item(series_info_packet)
		return

Example 22

Project: django-compositepks
Source File: query.py
View license
def query_class(QueryClass, Database):
    """
    Returns a custom django.db.models.sql.query.Query subclass that is
    appropriate for Oracle.

    The 'Database' module (cx_Oracle) is passed in here so that all the setup
    required to import it only needs to be done by the calling module.
    """
    global _classes
    try:
        return _classes[QueryClass]
    except KeyError:
        pass

    class OracleQuery(QueryClass):
        def __reduce__(self):
            """
            Enable pickling for this class (normal pickling handling doesn't
            work as Python can only pickle module-level classes by default).
            """
            if hasattr(QueryClass, '__getstate__'):
                assert hasattr(QueryClass, '__setstate__')
                data = self.__getstate__()
            else:
                data = self.__dict__
            return (unpickle_query_class, (QueryClass,), data)

        def resolve_columns(self, row, fields=()):
            # If this query has limit/offset information, then we expect the
            # first column to be an extra "_RN" column that we need to throw
            # away.
            if self.high_mark is not None or self.low_mark:
                rn_offset = 1
            else:
                rn_offset = 0
            index_start = rn_offset + len(self.extra_select.keys())
            values = [self.convert_values(v, None)
                      for v in row[rn_offset:index_start]]
            for value, field in map(None, row[index_start:], fields):
                values.append(self.convert_values(value, field))
            return values

        def convert_values(self, value, field):
            from django.db.models.fields import DateField, DateTimeField, \
                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
            if isinstance(value, Database.LOB):
                value = value.read()
            # Oracle stores empty strings as null. We need to undo this in
            # order to adhere to the Django convention of using the empty
            # string instead of null, but only if the field accepts the
            # empty string.
            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
                value = u''
            # Convert 1 or 0 to True or False
            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
                value = bool(value)
            # Convert floats to decimals
            elif value is not None and isinstance(field, DecimalField):
                value = util.typecast_decimal(field.format_number(value))
            # cx_Oracle always returns datetime.datetime objects for
            # DATE and TIMESTAMP columns, but Django wants to see a
            # python datetime.date, .time, or .datetime.  We use the type
            # of the Field to determine which to cast to, but it's not
            # always available.
            # As a workaround, we cast to date if all the time-related
            # values are 0, or to time if the date is 1/1/1900.
            # This could be cleaned a bit by adding a method to the Field
            # classes to normalize values from the database (the to_python
            # method is used for validation and isn't what we want here).
            elif isinstance(value, Database.Timestamp):
                # In Python 2.3, the cx_Oracle driver returns its own
                # Timestamp object that we must convert to a datetime class.
                if not isinstance(value, datetime.datetime):
                    value = datetime.datetime(value.year, value.month,
                            value.day, value.hour, value.minute, value.second,
                            value.fsecond)
                if isinstance(field, DateTimeField):
                    # DateTimeField subclasses DateField so must be checked
                    # first.
                    pass
                elif isinstance(field, DateField):
                    value = value.date()
                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
                    value = value.time()
                elif value.hour == value.minute == value.second == value.microsecond == 0:
                    value = value.date()
            return value

        def as_sql(self, with_limits=True, with_col_aliases=False):
            """
            Creates the SQL for this query. Returns the SQL string and list
            of parameters.  This is overriden from the original Query class
            to handle the additional SQL Oracle requires to emulate LIMIT
            and OFFSET.

            If 'with_limits' is False, any limit/offset information is not
            included in the query.
            """

            # The `do_offset` flag indicates whether we need to construct
            # the SQL needed to use limit/offset with Oracle.
            do_offset = with_limits and (self.high_mark is not None
                                         or self.low_mark)
            if not do_offset:
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                        with_col_aliases=with_col_aliases)
            else:
                sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                                                        with_col_aliases=True)

                # Wrap the base query in an outer SELECT * with boundaries on
                # the "_RN" column.  This is the canonical way to emulate LIMIT
                # and OFFSET on Oracle.
                high_where = ''
                if self.high_mark is not None:
                    high_where = 'WHERE ROWNUM <= %d' % (self.high_mark,)
                sql = 'SELECT * FROM (SELECT ROWNUM AS "_RN", "_SUB".* FROM (%s) "_SUB" %s) WHERE "_RN" > %d' % (sql, high_where, self.low_mark)

            return sql, params

    _classes[QueryClass] = OracleQuery
    return OracleQuery

Example 23

Project: courtlistener
Source File: import_law_box.py
View license
def get_date_filed(clean_html_tree, citations, case_path=None, court=None):
    path = ('//center[descendant::text()[not('
              'starts-with(normalize-space(.), "No.") or '
              'starts-with(normalize-space(.), "Case No.") or '
              'starts-with(normalize-space(.), "Record No.")'
            ')]]')

    # Get a reasonable date range based on reporters in the citations.
    reporter_keys = [citation.reporter for citation in citations]
    range_dates = []
    for reporter_key in reporter_keys:
        for reporter in REPORTERS.get(EDITIONS.get(reporter_key)):
            try:
                range_dates.extend(reporter['editions'][reporter_key])
            except KeyError:
                # Fails when a reporter_key points to more than one reporter,
                # one of which doesn't have the edition queried. For example,
                # Wash. 2d isn't in REPORTERS['Wash.']['editions'][0].
                pass
    if range_dates:
        start, end = min(range_dates) - timedelta(weeks=(20 * 52)), max(
            range_dates) + timedelta(weeks=20 * 52)
        if end > now():
            end = now()

    dates = []
    for e in clean_html_tree.xpath(path):
        text = tostring(e, method='text', encoding='unicode')
        # Items like "February 4, 1991, at 9:05 A.M." stump the lexer in the
        # date parser. Consequently, we purge the word at, and anything after
        # it.
        text = re.sub(' at .*', '', text)

        # The parser recognizes numbers like 121118 as a date. This corpus
        # does not have dates in that format.
        text = re.sub('\d{5,}', '', text)

        # The parser can't handle 'Sept.' so we tweak it.
        text = text.replace('Sept.', 'Sep.')

        # The parser recognizes dates like December 3, 4, 1908 as
        # 2004-12-3 19:08.
        re_match = re.search('\d{1,2}, \d{1,2}, \d{4}', text)
        if re_match:
            # These are always date argued, thus continue.
            continue

        # The parser recognizes dates like October 12-13, 1948 as 2013-10-12,
        # not as 1948-10-12
        # See: https://www.courtlistener.com/scotus/9ANY/x/
        re_match = re.search('\d{1,2}-\d{1,2}, \d{4}', text)
        if re_match:
            # These are always date argued, thus continue.
            continue

        # Sometimes there's a string like: "Review Denied July 26, 2006.
        # Skip this.
        if 'denied' in text.lower():
            continue

        try:
            if range_dates:
                found = parse_dates.parse_dates(text, sane_start=start,
                                                sane_end=end)
            else:
                found = parse_dates.parse_dates(text, sane_end=now())
            if found:
                dates.extend(found)
        except UnicodeEncodeError:
            # If it has unicode is crashes dateutil's parser, but is unlikely
            # to be the date.
            pass

    # Get the date from our SCOTUS date table
    scotus_dates_found = []
    if not dates and court == 'scotus':
        for citation in citations:
            try:
                # Scotus dates are in the form of a list, since a single
                # citation can refer to several dates.
                found = scotus_dates["%s %s %s" % (
                    citation.volume, citation.reporter, citation.page)]
                if len(found) == 1:
                    scotus_dates_found.extend(found)
            except KeyError:
                pass
        if len(scotus_dates_found) == 1:
            dates = scotus_dates_found

    if not dates:
        # Try to grab the year from the citations, if it's the same in all of
        # them.
        years = set([citation.year for citation in citations if citation.year])
        if len(years) == 1:
            dates.append(datetime.datetime(list(years)[0], 1, 1))

    if not dates:
        try:
            dates = fixes[case_path]['dates']
        except KeyError:
            if 'input_dates' in DEBUG:
                # subprocess.Popen(
                #     ['firefox', 'file://%s' % case_path],
                #     shell=False
                # ).communicate()
                print '  No date found for: file://%s' % case_path
                input_date = raw_input('  What should be here (YYYY-MM-DD)? ')
                add_fix(case_path, {
                    'dates': [datetime.datetime.strptime(input_date, '%Y-%m-%d')]})
                dates = [datetime.datetime.strptime(input_date, '%Y-%m-%d')]
            if 'log_bad_dates' in DEBUG:
                # Write the failed case out to file.
                with open('missing_dates.txt', 'a') as out:
                    out.write('%s\n' % case_path)

    if dates:
        if 'date' in DEBUG:
            log_print(
                "  Using date: %s of dates found: %s" % (max(dates), dates))
        return max(dates)
    else:
        if 'date' in DEBUG:
            log_print("  No dates found")
        return []

Example 24

Project: vcs
Source File: inmemory.py
View license
    def commit(self, message, author, parents=None, branch=None, date=None,
               **kwargs):
        """
        Performs in-memory commit (doesn't check workdir in any way) and
        returns newly created ``Changeset``. Updates repository's
        ``revisions``.

        :param message: message of the commit
        :param author: full username, i.e. "Joe Doe <[email protected]>"
        :param parents: single parent or sequence of parents from which commit
          would be derieved
        :param date: ``datetime.datetime`` instance. Defaults to
          ``datetime.datetime.now()``.
        :param branch: branch name, as string. If none given, default backend's
          branch would be used.

        :raises ``CommitError``: if any error occurs while committing
        """
        self.check_integrity(parents)

        from .repository import GitRepository
        if branch is None:
            branch = GitRepository.DEFAULT_BRANCH_NAME

        repo = self.repository._repo
        object_store = repo.object_store

        ENCODING = "UTF-8"
        DIRMOD = 040000

        # Create tree and populates it with blobs
        commit_tree = self.parents[0] and repo[self.parents[0]._commit.tree] or\
            objects.Tree()
        for node in self.added + self.changed:
            # Compute subdirs if needed
            dirpath, nodename = posixpath.split(node.path)
            dirnames = dirpath and dirpath.split('/') or []
            parent = commit_tree
            ancestors = [('', parent)]

            # Tries to dig for the deepest existing tree
            while dirnames:
                curdir = dirnames.pop(0)
                try:
                    dir_id = parent[curdir][1]
                except KeyError:
                    # put curdir back into dirnames and stops
                    dirnames.insert(0, curdir)
                    break
                else:
                    # If found, updates parent
                    parent = self.repository._repo[dir_id]
                    ancestors.append((curdir, parent))
            # Now parent is deepest existing tree and we need to create subtrees
            # for dirnames (in reverse order) [this only applies for nodes from added]
            new_trees = []

            if not node.is_binary:
                content = node.content.encode(ENCODING)
            else:
                content = node.content
            blob = objects.Blob.from_string(content)

            node_path = node.name.encode(ENCODING)
            if dirnames:
                # If there are trees which should be created we need to build
                # them now (in reverse order)
                reversed_dirnames = list(reversed(dirnames))
                curtree = objects.Tree()
                curtree[node_path] = node.mode, blob.id
                new_trees.append(curtree)
                for dirname in reversed_dirnames[:-1]:
                    newtree = objects.Tree()
                    #newtree.add(DIRMOD, dirname, curtree.id)
                    newtree[dirname] = DIRMOD, curtree.id
                    new_trees.append(newtree)
                    curtree = newtree
                parent[reversed_dirnames[-1]] = DIRMOD, curtree.id
            else:
                parent.add(name=node_path, mode=node.mode, hexsha=blob.id)

            new_trees.append(parent)
            # Update ancestors
            for parent, tree, path in reversed([(a[1], b[1], b[0]) for a, b in
                zip(ancestors, ancestors[1:])]):
                parent[path] = DIRMOD, tree.id
                object_store.add_object(tree)

            object_store.add_object(blob)
            for tree in new_trees:
                object_store.add_object(tree)
        for node in self.removed:
            paths = node.path.split('/')
            tree = commit_tree
            trees = [tree]
            # Traverse deep into the forest...
            for path in paths:
                try:
                    obj = self.repository._repo[tree[path][1]]
                    if isinstance(obj, objects.Tree):
                        trees.append(obj)
                        tree = obj
                except KeyError:
                    break
            # Cut down the blob and all rotten trees on the way back...
            for path, tree in reversed(zip(paths, trees)):
                del tree[path]
                if tree:
                    # This tree still has elements - don't remove it or any
                    # of it's parents
                    break

        object_store.add_object(commit_tree)

        # Create commit
        commit = objects.Commit()
        commit.tree = commit_tree.id
        commit.parents = [p._commit.id for p in self.parents if p]
        commit.author = commit.committer = safe_str(author)
        commit.encoding = ENCODING
        commit.message = safe_str(message)

        # Compute date
        if date is None:
            date = time.time()
        elif isinstance(date, datetime.datetime):
            date = time.mktime(date.timetuple())

        author_time = kwargs.pop('author_time', date)
        commit.commit_time = int(date)
        commit.author_time = int(author_time)
        tz = time.timezone
        author_tz = kwargs.pop('author_timezone', tz)
        commit.commit_timezone = tz
        commit.author_timezone = author_tz

        object_store.add_object(commit)

        ref = 'refs/heads/%s' % branch
        repo.refs[ref] = commit.id

        # Update vcs repository object & recreate dulwich repo
        self.repository.revisions.append(commit.id)
        # invalidate parsed refs after commit
        self.repository._parsed_refs = self.repository._get_parsed_refs()
        tip = self.repository.get_changeset()
        self.reset()
        return tip

Example 25

Project: crate-site
Source File: 0001_initial.py
View license
    def forwards(self, orm):
        # Adding model 'TroveClassifier'
        db.create_table('packages_troveclassifier', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('trove', self.gf('django.db.models.fields.CharField')(unique=True, max_length=350)),
        ))
        db.send_create_signal('packages', ['TroveClassifier'])

        # Adding model 'Package'
        db.create_table('packages_package', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('created', self.gf('model_utils.fields.AutoCreatedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 227535))),
            ('modified', self.gf('model_utils.fields.AutoLastModifiedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 227680))),
            ('name', self.gf('django.db.models.fields.SlugField')(unique=True, max_length=150)),
        ))
        db.send_create_signal('packages', ['Package'])

        # Adding model 'PackageURI'
        db.create_table('packages_packageuri', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('package', self.gf('django.db.models.fields.related.ForeignKey')(related_name='package_links', to=orm['packages.Package'])),
            ('uri', self.gf('django.db.models.fields.URLField')(max_length=400)),
        ))
        db.send_create_signal('packages', ['PackageURI'])

        # Adding unique constraint on 'PackageURI', fields ['package', 'uri']
        db.create_unique('packages_packageuri', ['package_id', 'uri'])

        # Adding model 'Release'
        db.create_table('packages_release', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('created', self.gf('model_utils.fields.AutoCreatedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 229663), db_index=True)),
            ('modified', self.gf('model_utils.fields.AutoLastModifiedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 229762))),
            ('package', self.gf('django.db.models.fields.related.ForeignKey')(related_name='releases', to=orm['packages.Package'])),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('hidden', self.gf('django.db.models.fields.BooleanField')(default=False)),
            ('order', self.gf('django.db.models.fields.IntegerField')(default=0)),
            ('platform', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('summary', self.gf('django.db.models.fields.TextField')()),
            ('description', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('keywords', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('license', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('author', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('author_email', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('maintainer', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('maintainer_email', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('requires_python', self.gf('django.db.models.fields.CharField')(max_length=25, blank=True)),
            ('download_uri', self.gf('django.db.models.fields.URLField')(max_length=1024, blank=True)),
            ('raw_data', self.gf('crate.fields.json.JSONField')(null=True, blank=True)),
        ))
        db.send_create_signal('packages', ['Release'])

        # Adding unique constraint on 'Release', fields ['package', 'version']
        db.create_unique('packages_release', ['package_id', 'version'])

        # Adding M2M table for field classifiers on 'Release'
        db.create_table('packages_release_classifiers', (
            ('id', models.AutoField(verbose_name='ID', primary_key=True, auto_created=True)),
            ('release', models.ForeignKey(orm['packages.release'], null=False)),
            ('troveclassifier', models.ForeignKey(orm['packages.troveclassifier'], null=False))
        ))
        db.create_unique('packages_release_classifiers', ['release_id', 'troveclassifier_id'])

        # Adding model 'ReleaseFile'
        db.create_table('packages_releasefile', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('created', self.gf('model_utils.fields.AutoCreatedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 228759), db_index=True)),
            ('modified', self.gf('model_utils.fields.AutoLastModifiedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 228860))),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='files', to=orm['packages.Release'])),
            ('type', self.gf('django.db.models.fields.CharField')(max_length=25)),
            ('file', self.gf('django.db.models.fields.files.FileField')(max_length=512)),
            ('filename', self.gf('django.db.models.fields.CharField')(default=None, max_length=200, null=True, blank=True)),
            ('digest', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('python_version', self.gf('django.db.models.fields.CharField')(max_length=25)),
            ('downloads', self.gf('django.db.models.fields.PositiveIntegerField')(default=0)),
            ('comment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseFile'])

        # Adding unique constraint on 'ReleaseFile', fields ['release', 'type', 'python_version', 'filename']
        db.create_unique('packages_releasefile', ['release_id', 'type', 'python_version', 'filename'])

        # Adding model 'ReleaseURI'
        db.create_table('packages_releaseuri', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='uris', to=orm['packages.Release'])),
            ('label', self.gf('django.db.models.fields.CharField')(max_length=64)),
            ('uri', self.gf('django.db.models.fields.URLField')(max_length=500)),
        ))
        db.send_create_signal('packages', ['ReleaseURI'])

        # Adding model 'ReleaseRequire'
        db.create_table('packages_releaserequire', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='requires', to=orm['packages.Release'])),
            ('kind', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=150)),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('environment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseRequire'])

        # Adding model 'ReleaseProvide'
        db.create_table('packages_releaseprovide', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='provides', to=orm['packages.Release'])),
            ('kind', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=150)),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('environment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseProvide'])

        # Adding model 'ReleaseObsolete'
        db.create_table('packages_releaseobsolete', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='obsoletes', to=orm['packages.Release'])),
            ('kind', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=150)),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('environment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseObsolete'])

Example 26

Project: prov
Source File: provxml.py
View license
    def serialize_bundle(self, bundle, element=None, force_types=False):
        """
        Serializes a bundle or document to PROV XML.

        :param bundle: The bundle or document.
        :param element: The XML element to write to. Will be created if None.
        :type force_types: boolean, optional
        :param force_types: Will force xsd:types to be written for most
            attributes mainly PROV-"attributes", e.g. tags not in the
            PROV namespace. Off by default meaning xsd:type attributes will
            only be set for prov:type, prov:location, and prov:value as is
            done in the official PROV-XML specification. Furthermore the
            types will always be set if the Python type requires it. False
            is a good default and it should rarely require changing.
        """
        # Build the namespace map for lxml and attach it to the root XML
        # element. No dictionary comprehension in Python 2.6!
        nsmap = dict((ns.prefix, ns.uri) for ns in
                     self.document._namespaces.get_registered_namespaces())
        if self.document._namespaces._default:
            nsmap[None] = self.document._namespaces._default.uri
        for namespace in bundle.namespaces:
            if namespace not in nsmap:
                nsmap[namespace.prefix] = namespace.uri

        for key, value in DEFAULT_NAMESPACES.items():
            uri = value.uri
            if value.prefix == "xsd":
                # The XSD namespace for some reason has no hash at the end
                # for PROV XML, but for all other serializations it does.
                uri = uri.rstrip("#")
            nsmap[value.prefix] = uri

        if element is not None:
            xml_bundle_root = etree.SubElement(
                element, _ns_prov("bundleContent"), nsmap=nsmap)
        else:
            xml_bundle_root = etree.Element(_ns_prov("document"), nsmap=nsmap)

        if bundle.identifier:
            xml_bundle_root.attrib[_ns_prov("id")] = \
                six.text_type(bundle.identifier)

        for record in bundle._records:
            rec_type = record.get_type()
            identifier = six.text_type(record._identifier) \
                if record._identifier else None

            if identifier:
                attrs = {_ns_prov("id"): identifier}
            else:
                attrs = None

            # Derive the record label from its attributes which is sometimes
            # needed.
            attributes = list(record.attributes)
            rec_label = self._derive_record_label(rec_type, attributes)

            elem = etree.SubElement(xml_bundle_root,
                                    _ns_prov(rec_label), attrs)

            for attr, value in sorted_attributes(rec_type, attributes):
                subelem = etree.SubElement(
                    elem, _ns(attr.namespace.uri, attr.localpart))
                if isinstance(value, prov.model.Literal):
                    if value.datatype not in \
                            [None, PROV["InternationalizedString"]]:
                        subelem.attrib[_ns_xsi("type")] = "%s:%s" % (
                            value.datatype.namespace.prefix,
                            value.datatype.localpart)
                    if value.langtag is not None:
                        subelem.attrib[_ns_xml("lang")] = value.langtag
                    v = value.value
                elif isinstance(value, prov.model.QualifiedName):
                    if attr not in PROV_ATTRIBUTE_QNAMES:
                        subelem.attrib[_ns_xsi("type")] = "xsd:QName"
                    v = six.text_type(value)
                elif isinstance(value, datetime.datetime):
                    v = value.isoformat()
                else:
                    v = six.text_type(value)

                # xsd type inference.
                #
                # This is a bit messy and there are all kinds of special
                # rules but it appears to get the job done.
                #
                # If it is a type element and does not yet have an
                # associated xsi type, try to infer it from the value.
                # The not startswith("prov:") check is a little bit hacky to
                # avoid type interference when the type is a standard prov
                # type.
                #
                # To enable a mapping of Python types to XML and back,
                # the XSD type must be written for these types.
                ALWAYS_CHECK = [bool, datetime.datetime, float,
                                prov.identifier.Identifier]
                # Add long and int on Python 2, only int on Python 3.
                ALWAYS_CHECK.extend(six.integer_types)
                ALWAYS_CHECK = tuple(ALWAYS_CHECK)
                if (force_types or
                        type(value) in ALWAYS_CHECK or
                        attr in [PROV_TYPE, PROV_LOCATION, PROV_VALUE]) and \
                        _ns_xsi("type") not in subelem.attrib and \
                        not six.text_type(value).startswith("prov:") and \
                        not (attr in PROV_ATTRIBUTE_QNAMES and v) and \
                        attr not in [PROV_ATTR_TIME, PROV_LABEL]:
                    xsd_type = None
                    if isinstance(value, bool):
                        xsd_type = XSD_BOOLEAN
                        v = v.lower()
                    elif isinstance(value, six.string_types):
                        xsd_type = XSD_STRING
                    elif isinstance(value, float):
                        xsd_type = XSD_DOUBLE
                    elif isinstance(value, six.integer_types):
                        xsd_type = XSD_INT
                    elif isinstance(value, datetime.datetime):
                        # Exception of the exception, while technically
                        # still correct, do not write XSD dateTime type for
                        # attributes in the PROV namespaces as the type is
                        # already declared in the XSD and PROV XML also does
                        # not specify it in the docs.
                        if attr.namespace.prefix != "prov" \
                                or "time" not in attr.localpart.lower():
                            xsd_type = XSD_DATETIME
                    elif isinstance(value, prov.identifier.Identifier):
                        xsd_type = XSD_ANYURI

                    if xsd_type is not None:
                        subelem.attrib[_ns_xsi("type")] = \
                            six.text_type(xsd_type)

                if attr in PROV_ATTRIBUTE_QNAMES and v:
                    subelem.attrib[_ns_prov("ref")] = v
                else:
                    subelem.text = v
        return xml_bundle_root

Example 27

Project: zipline
Source File: test_slippage.py
View license
    def test_orders_stop_limit(self):
        slippage_model = VolumeShareSlippage()
        slippage_model.data_portal = self.data_portal

        # long, does not trade
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': self.ASSET133,
                'stop': 4.0,
                'limit': 3.0})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[2],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[3],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # long, does not trade - impacted price worse than limit price
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': self.ASSET133,
                'stop': 4.0,
                'limit': 3.5})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[2],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[3],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # long, does trade
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': self.ASSET133,
                'stop': 4.0,
                'limit': 3.6})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[2],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[3],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 1)
        _, txn = orders_txns[0]

        expected_txn = {
            'price': float(3.50021875),
            'dt': datetime.datetime(
                2006, 1, 5, 14, 34, tzinfo=pytz.utc),
            'amount': int(50),
            'sid': int(133)
        }

        for key, value in expected_txn.items():
            self.assertEquals(value, txn[key])

        # short, does not trade

        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': self.ASSET133,
                'stop': 3.0,
                'limit': 4.0})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[0],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[1],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # short, does not trade - impacted price worse than limit price
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': self.ASSET133,
                'stop': 3.0,
                'limit': 3.5})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[0],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[1],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        # short, does trade
        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': self.ASSET133,
                'stop': 3.0,
                'limit': 3.4})
        ]

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[0],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 0)

        bar_data = self.create_bardata(
            simulation_dt_func=lambda: self.minutes[1],
        )

        orders_txns = list(slippage_model.simulate(
            bar_data,
            self.ASSET133,
            open_orders,
        ))

        self.assertEquals(len(orders_txns), 1)
        _, txn = orders_txns[0]

        expected_txn = {
            'price': float(3.49978125),
            'dt': datetime.datetime(
                2006, 1, 5, 14, 32, tzinfo=pytz.utc),
            'amount': int(-50),
            'sid': int(133)
        }

        for key, value in expected_txn.items():
            self.assertEquals(value, txn[key])

Example 28

Project: Memacs
Source File: simplephonelogs.py
View license
    def _generateOrgentry(self, e_time, e_name, e_batt, e_uptime,
                          e_last_opposite_occurrence, e_last_occurrence,
                          prev_office_sum, prev_office_first_begin, office_lunchbreak):
        """
        takes the data from the parameters and generates an Org-mode entry.

        @param e_time: time-stamp of the entry
        @param e_name: entry name/description
        @param e_batt: battery level
        @param e_uptime: uptime in seconds
        @param e_last_opposite_occurrence: time-stamp of previous opposite occurrence (if not False)
        @param e_last_occurrence: time-stamp of previous occurrence
        @param additional_paren_string: string that gets appended to the parenthesis
        @param prev_office_sum: holds the sum of all previous working duration today
        @param prev_office_first_begin: holds the first time-stamp of wifi-office for today
        @param office_lunchbreak: array of begin- and end-time-stamp of lunch-break (if any)
        """

        assert e_time.__class__ == datetime.datetime
        assert e_name.__class__ == unicode
        assert e_batt.__class__ == unicode
        assert e_uptime.__class__ == unicode
        assert (e_last_opposite_occurrence.__class__ == datetime.datetime or not e_last_opposite_occurrence)
        assert (e_last_occurrence.__class__ == datetime.datetime or not e_last_occurrence)

        last_info = u''
        in_between_hms = u''
        in_between_s = u''
        ignore_occurrence = False

        ## convert parameters to be writable:
        office_sum = prev_office_sum
        office_first_begin = prev_office_first_begin

        if e_last_opposite_occurrence:

            in_between_s = (e_time - e_last_opposite_occurrence).seconds + \
                (e_time - e_last_opposite_occurrence).days * 3600 * 24
            in_between_hms = unicode(OrgFormat.get_hms_from_sec(in_between_s))

            if e_name == u'boot':
                last_info = u' (off for '
            elif e_name == u'shutdown':
                last_info = u' (on for '
            elif e_name.endswith(u'-end'):
                last_info = u' (' + e_name[0:-4].replace('wifi-', '') + u' for '
            else:
                last_info = u' (not ' + e_name.replace('wifi-', '') + u' for '

            ## handle special case: office hours
            additional_paren_string = ""
            if e_name == 'wifi-office-end':
                office_total = None
                ## calculate office_sum and office_total
                if not office_sum:
                    office_sum = (e_time - e_last_opposite_occurrence).seconds
                    office_total = office_sum
                else:
                    assert(office_first_begin)
                    assert(office_sum)
                    office_sum = office_sum + (e_time - e_last_opposite_occurrence).seconds
                    office_total = int(time.mktime(e_time.timetuple()) - time.mktime(office_first_begin.timetuple()))

                assert(type(office_total) == int)
                assert(type(office_sum) == int)
                assert(type(in_between_s) == int)

                ## come up with the additional office-hours string:
                additional_paren_string = u'; today ' + OrgFormat.get_hms_from_sec(office_sum) + \
                    '; today total ' + OrgFormat.get_hms_from_sec(office_total)

            if additional_paren_string:
                last_info += unicode(OrgFormat.get_dhms_from_sec(in_between_s)) + additional_paren_string + u')'
            else:
                last_info += unicode(OrgFormat.get_dhms_from_sec(in_between_s)) + u')'

        ## handle special case: office hours
        if e_name == 'wifi-office':
            if not office_sum or not office_first_begin:
                ## new day
                office_first_begin = e_time
            else:
                ## check if we've found a lunch-break (first wifi-office between 11:30-13:00 where not office for > 17min)
                if e_time.time() > datetime.time(11, 30) and e_time.time() < datetime.time(13, 00) and e_last_opposite_occurrence:
                    if e_last_opposite_occurrence.date() == e_time.date() and in_between_s > (17 * 60) and in_between_s < (80 * 60):
                        #import pdb; pdb.set_trace()
                        office_lunchbreak = [e_last_opposite_occurrence.time(), e_time.time()]

        ## handle special case: boot without previous shutdown = crash
        if (e_name == u'boot') and \
                (e_last_occurrence and e_last_opposite_occurrence) and \
                (e_last_occurrence > e_last_opposite_occurrence):
            ## last boot is more recent than last shutdown -> crash has happened
            last_info = u' after crash'
            in_between_hms = u''
            in_between_s = u''
            ignore_occurrence = True

        properties = OrgProperties()
        properties.add("IN-BETWEEN", in_between_hms)
        properties.add("IN-BETWEEN-S", unicode(in_between_s))
        properties.add("BATT-LEVEL", e_batt)
        properties.add("UPTIME", OrgFormat.get_hms_from_sec(int(e_uptime)))
        properties.add("UPTIME-S", e_uptime)
        if e_name == 'wifi-office-end' and office_lunchbreak:
            properties.add("OFFICE-SUMMARY",
                           e_last_opposite_occurrence.strftime('| %Y-%m-%d | %a ') +
                           prev_office_first_begin.strftime('| %H:%M ') +
                           office_lunchbreak[0].strftime('| %H:%M ') +
                           office_lunchbreak[1].strftime('| %H:%M ') +
                           e_time.strftime('| %H:%M | | |'))
        elif e_name == 'wifi-office-end' and not office_lunchbreak:
            properties.add("OFFICE-SUMMARY",
                           e_last_opposite_occurrence.strftime('| %Y-%m-%d | %a ') +
                           prev_office_first_begin.strftime('| %H:%M | 11:30 | 12:00 ') +
                           e_time.strftime('| %H:%M | | |'))
        self._writer.write_org_subitem(timestamp=e_time.strftime('<%Y-%m-%d %a %H:%M>'),
                                       output=e_name + last_info,
                                       properties=properties)

        return u'** ' + e_time.strftime('<%Y-%m-%d %a %H:%M>') + u' ' + e_name + last_info + \
            u'\n:PROPERTIES:\n:IN-BETWEEN: ' + in_between_hms + \
            u'\n:IN-BETWEEN-S: ' + unicode(in_between_s) + \
            u'\n:BATT-LEVEL: ' + e_batt + \
            u'\n:UPTIME: ' + unicode(OrgFormat.get_hms_from_sec(int(e_uptime))) + \
            u'\n:UPTIME-S: ' + unicode(e_uptime) + u'\n:END:\n', \
            ignore_occurrence, office_sum, office_first_begin, office_lunchbreak

Example 29

Project: airmozilla
Source File: dashboard.py
View license
@staff_required
@json_view
def dashboard_data_graphs(request):  # pragma: no cover
    """experimental"""
    YEARS = 3
    now = timezone.now()

    def get_events(years_back):
        first_date = datetime.datetime(now.year - years_back + 1, 1, 1)

        objects = (
            Event.objects
            .filter(archive_time__lt=now)
            .filter(created__gt=first_date.replace(tzinfo=timezone.utc))
            .order_by('created')
        )
        buckets = {}
        for each in objects.values_list('created'):
            created, = each
            year = created.year
            if year not in buckets:
                buckets[year] = defaultdict(int)
            next_monday = created + datetime.timedelta(
                days=7 - created.weekday()
            )
            key = next_monday.strftime('%Y-%m-%d')
            buckets[year][key] += 1
        legends = sorted(buckets.keys())

        last_year = legends[-1]

        def fake_year(date_str, year):
            return date_str.replace(str(year), str(last_year))

        data = []
        for year in legends:
            group = sorted(
                {'date': fake_year(k, year), 'value': v}
                for k, v in buckets[year].items()
            )
            data.append(group)
        return {
            'type': 'events',
            'title': 'New Events',
            'data': data,
            'description': 'Number of added events per year',
            'legends': legends,
        }

    def get_revisions(years_back):
        first_date = datetime.datetime(now.year - years_back + 1, 1, 1)

        objects = (
            EventRevision.objects
            .filter(created__gt=first_date.replace(tzinfo=timezone.utc))
            .order_by('created')
        )
        buckets = {}
        for each in objects.values_list('created'):
            created, = each
            year = created.year
            if year not in buckets:
                buckets[year] = defaultdict(int)
            next_monday = created + datetime.timedelta(
                days=7 - created.weekday()
            )
            key = next_monday.strftime('%Y-%m-%d')
            buckets[year][key] += 1
        legends = sorted(buckets.keys())

        last_year = legends[-1]

        def fake_year(date_str, year):
            return date_str.replace(str(year), str(last_year))

        data = []
        for year in legends:
            group = sorted(
                {'date': fake_year(k, year), 'value': v}
                for k, v in buckets[year].items()
            )
            data.append(group)
        return {
            'type': 'revisions',
            'title': 'Event Revisions',
            'data': data,
            'description': 'Number of event edits per year',
            'legends': legends,
        }

    def get_users(years_back):
        first_date = datetime.datetime(now.year - years_back + 1, 1, 1)

        objects = (
            User.objects
            .filter(date_joined__gt=first_date.replace(tzinfo=timezone.utc))
            .order_by('date_joined')
        )
        buckets = {}
        for each in objects.values_list('date_joined'):
            created, = each
            year = created.year
            if year not in buckets:
                buckets[year] = defaultdict(int)
            next_monday = created + datetime.timedelta(
                days=7 - created.weekday()
            )
            key = next_monday.strftime('%Y-%m-%d')
            buckets[year][key] += 1
        legends = sorted(buckets.keys())

        last_year = legends[-1]

        def fake_year(date_str, year):
            return date_str.replace(str(year), str(last_year))

        data = []
        for year in legends:
            group = sorted(
                {'date': fake_year(k, year), 'value': v}
                for k, v in buckets[year].items()
            )
            data.append(group)
        return {
            'type': 'users',
            'title': 'New Users',
            'data': data,
            'description': 'Number of first joining users per year',
            'legends': legends,
        }

    groups = []
    groups.append(get_events(YEARS))
    groups.append(get_users(YEARS))
    groups.append(get_revisions(2))
    return {'groups': groups}

Example 30

Project: Memacs
Source File: simplephonelogs.py
View license
    def _generateOrgentry(self, e_time, e_name, e_batt, e_uptime,
                          e_last_opposite_occurrence, e_last_occurrence,
                          prev_office_sum, prev_office_first_begin, office_lunchbreak):
        """
        takes the data from the parameters and generates an Org-mode entry.

        @param e_time: time-stamp of the entry
        @param e_name: entry name/description
        @param e_batt: battery level
        @param e_uptime: uptime in seconds
        @param e_last_opposite_occurrence: time-stamp of previous opposite occurrence (if not False)
        @param e_last_occurrence: time-stamp of previous occurrence
        @param additional_paren_string: string that gets appended to the parenthesis
        @param prev_office_sum: holds the sum of all previous working duration today
        @param prev_office_first_begin: holds the first time-stamp of wifi-office for today
        @param office_lunchbreak: array of begin- and end-time-stamp of lunch-break (if any)
        """

        assert e_time.__class__ == datetime.datetime
        assert e_name.__class__ == unicode
        assert e_batt.__class__ == unicode
        assert e_uptime.__class__ == unicode
        assert (e_last_opposite_occurrence.__class__ == datetime.datetime or not e_last_opposite_occurrence)
        assert (e_last_occurrence.__class__ == datetime.datetime or not e_last_occurrence)

        last_info = u''
        in_between_hms = u''
        in_between_s = u''
        ignore_occurrence = False

        ## convert parameters to be writable:
        office_sum = prev_office_sum
        office_first_begin = prev_office_first_begin

        if e_last_opposite_occurrence:

            in_between_s = (e_time - e_last_opposite_occurrence).seconds + \
                (e_time - e_last_opposite_occurrence).days * 3600 * 24
            in_between_hms = unicode(OrgFormat.get_hms_from_sec(in_between_s))

            if e_name == u'boot':
                last_info = u' (off for '
            elif e_name == u'shutdown':
                last_info = u' (on for '
            elif e_name.endswith(u'-end'):
                last_info = u' (' + e_name[0:-4].replace('wifi-', '') + u' for '
            else:
                last_info = u' (not ' + e_name.replace('wifi-', '') + u' for '

            ## handle special case: office hours
            additional_paren_string = ""
            if e_name == 'wifi-office-end':
                office_total = None
                ## calculate office_sum and office_total
                if not office_sum:
                    office_sum = (e_time - e_last_opposite_occurrence).seconds
                    office_total = office_sum
                else:
                    assert(office_first_begin)
                    assert(office_sum)
                    office_sum = office_sum + (e_time - e_last_opposite_occurrence).seconds
                    office_total = int(time.mktime(e_time.timetuple()) - time.mktime(office_first_begin.timetuple()))

                assert(type(office_total) == int)
                assert(type(office_sum) == int)
                assert(type(in_between_s) == int)

                ## come up with the additional office-hours string:
                additional_paren_string = u'; today ' + OrgFormat.get_hms_from_sec(office_sum) + \
                    '; today total ' + OrgFormat.get_hms_from_sec(office_total)

            if additional_paren_string:
                last_info += unicode(OrgFormat.get_dhms_from_sec(in_between_s)) + additional_paren_string + u')'
            else:
                last_info += unicode(OrgFormat.get_dhms_from_sec(in_between_s)) + u')'

        ## handle special case: office hours
        if e_name == 'wifi-office':
            if not office_sum or not office_first_begin:
                ## new day
                office_first_begin = e_time
            else:
                ## check if we've found a lunch-break (first wifi-office between 11:30-13:00 where not office for > 17min)
                if e_time.time() > datetime.time(11, 30) and e_time.time() < datetime.time(13, 00) and e_last_opposite_occurrence:
                    if e_last_opposite_occurrence.date() == e_time.date() and in_between_s > (17 * 60) and in_between_s < (80 * 60):
                        #import pdb; pdb.set_trace()
                        office_lunchbreak = [e_last_opposite_occurrence.time(), e_time.time()]

        ## handle special case: boot without previous shutdown = crash
        if (e_name == u'boot') and \
                (e_last_occurrence and e_last_opposite_occurrence) and \
                (e_last_occurrence > e_last_opposite_occurrence):
            ## last boot is more recent than last shutdown -> crash has happened
            last_info = u' after crash'
            in_between_hms = u''
            in_between_s = u''
            ignore_occurrence = True

        properties = OrgProperties()
        properties.add("IN-BETWEEN", in_between_hms)
        properties.add("IN-BETWEEN-S", unicode(in_between_s))
        properties.add("BATT-LEVEL", e_batt)
        properties.add("UPTIME", OrgFormat.get_hms_from_sec(int(e_uptime)))
        properties.add("UPTIME-S", e_uptime)
        if e_name == 'wifi-office-end' and office_lunchbreak:
            properties.add("OFFICE-SUMMARY",
                           e_last_opposite_occurrence.strftime('| %Y-%m-%d | %a ') +
                           prev_office_first_begin.strftime('| %H:%M ') +
                           office_lunchbreak[0].strftime('| %H:%M ') +
                           office_lunchbreak[1].strftime('| %H:%M ') +
                           e_time.strftime('| %H:%M | | |'))
        elif e_name == 'wifi-office-end' and not office_lunchbreak:
            properties.add("OFFICE-SUMMARY",
                           e_last_opposite_occurrence.strftime('| %Y-%m-%d | %a ') +
                           prev_office_first_begin.strftime('| %H:%M | 11:30 | 12:00 ') +
                           e_time.strftime('| %H:%M | | |'))
        self._writer.write_org_subitem(timestamp=e_time.strftime('<%Y-%m-%d %a %H:%M>'),
                                       output=e_name + last_info,
                                       properties=properties)

        return u'** ' + e_time.strftime('<%Y-%m-%d %a %H:%M>') + u' ' + e_name + last_info + \
            u'\n:PROPERTIES:\n:IN-BETWEEN: ' + in_between_hms + \
            u'\n:IN-BETWEEN-S: ' + unicode(in_between_s) + \
            u'\n:BATT-LEVEL: ' + e_batt + \
            u'\n:UPTIME: ' + unicode(OrgFormat.get_hms_from_sec(int(e_uptime))) + \
            u'\n:UPTIME-S: ' + unicode(e_uptime) + u'\n:END:\n', \
            ignore_occurrence, office_sum, office_first_begin, office_lunchbreak

Example 31

Project: CVSAnalY
Source File: GitParser.py
View license
    def _parse_line(self, line):
        if line is None or line == '':
            return

        # Ignore
        for patt in self.patterns['ignore']:
            if patt.match(line):
                return

        # Commit
        match = self.patterns['commit'].match(line)
        if match:
            if self.commit is not None and self.branch is not None:
                if self.branch.tail.svn_tag is None:  # Skip commits on svn tags
                    self.handler.commit(self.branch.tail.commit)

            self.commit = Commit()
            self.commit.revision = match.group(1)

            parents = match.group(3)
            if parents:
                parents = parents.split()
                self.commit.parents = parents
            git_commit = self.GitCommit(self.commit, parents)

            decorate = match.group(5)
            branch = None
            if decorate:
                # Remote branch
                m = re.search(self.patterns['branch'], decorate)
                if m:
                    branch = self.GitBranch(self.GitBranch.REMOTE, m.group(1), git_commit)
                    printdbg("Branch '%s' head at acommit %s", (branch.name, self.commit.revision))
                else:
                    # Local Branch
                    m = re.search(self.patterns['local-branch'], decorate)
                    if m:
                        branch = self.GitBranch(self.GitBranch.LOCAL, m.group(1), git_commit)
                        printdbg("Commit %s on local branch '%s'", (self.commit.revision, branch.name))
                        # If local branch was merged we just ignore this decoration
                        if self.branch and self.branch.is_my_parent(git_commit):
                            printdbg("Local branch '%s' was merged", (branch.name,))
                            branch = None
                    else:
                        # Stash
                        m = re.search(self.patterns['stash'], decorate)
                        if m:
                            branch = self.GitBranch(self.GitBranch.STASH, "stash", git_commit)
                            printdbg("Commit %s on stash", (self.commit.revision,))
                # Tag
                m = re.search(self.patterns['tag'], decorate)
                if m:
                    self.commit.tags = [m.group(1)]
                    printdbg("Commit %s tagged as '%s'", (self.commit.revision, self.commit.tags[0]))

            if not branch and not self.branch:
                branch = self.GitBranch(self.GitBranch.LOCAL, "(no-branch)", git_commit)
                printdbg("Commit %s on unknown local branch '%s'", (self.commit.revision, branch.name))

            # This part of code looks wired at first time so here is a small description what it does:
            #
            # * self.branch is the branch to which the last inspected commit belonged to
            # * branch is the branch of the current parsed commit
            #
            # This check is only to find branches which are fully merged into a already analyzed branch
            #
            # For more detailed information see https://github.com/MetricsGrimoire/CVSAnalY/issues/64
            if branch is not None and self.branch is not None:
                # Detect empty branches.
                # Ideally, the head of a branch can't have children.
                # When this happens is because the branch is empty, so we just ignore such branch.
                if self.branch.is_my_parent(git_commit):
                    printout(
                        "Info: Branch '%s' will be ignored, because it was already merged in an active one.",
                        (branch.name,)
                    )
                    branch = None

            if len(self.branches) >= 2:
                # If current commit is the start point of a new branch
                # we have to look at all the current branches since
                # we haven't inserted the new branch yet.
                # If not, look at all other branches excluding the current one
                for i, b in enumerate(self.branches):
                    if i == 0 and branch is None:
                        continue

                    if b.is_my_parent(git_commit):
                        # We assume current branch is always the last one
                        # AFAIK there's no way to make sure this is right
                        printdbg("Start point of branch '%s' at commit %s",
                                 (self.branches[0].name, self.commit.revision))
                        self.branches.pop(0)
                        self.branch = b

            if self.branch and self.branch.tail.svn_tag is not None and self.branch.is_my_parent(git_commit):
                # There's a pending tag in previous commit
                pending_tag = self.branch.tail.svn_tag
                printdbg("Move pending tag '%s' from previous commit %s to current %s", (pending_tag,
                                                                                         self.branch.tail.commit.revision,
                                                                                         self.commit.revision))
                if self.commit.tags and pending_tag not in self.commit.tags:
                    self.commit.tags.append(pending_tag)
                else:
                    self.commit.tags = [pending_tag]
                self.branch.tail.svn_tag = None

            if branch is not None:
                self.branch = branch

                # Insert master always at the end
                if branch.name == 'master':
                    self.branches.append(self.branch)
                else:
                    self.branches.insert(0, self.branch)
            else:
                if self.branch is not None:
                    self.branch.set_tail(git_commit)
            return

        # Committer
        match = self.patterns['committer'].match(line)
        if match:
            self.commit.committer = Person()
            self.commit.committer.name = match.group(1)
            self.commit.committer.email = match.group(2)
            self.handler.committer(self.commit.committer)
            return

        # Author
        match = self.patterns['author'].match(line)
        if match:
            self.commit.author = Person()
            self.commit.author.name = match.group(1)
            self.commit.author.email = match.group(2)
            self.handler.author(self.commit.author)
            return

        # Commit date
        match = self.patterns['date'].match(line)
        if match:
            self.commit.date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # Author date
        match = self.patterns['author_date'].match(line)
        if match:
            self.commit.author_date = datetime.datetime(
                *(time.strptime(match.group(1).strip(" "), "%a %b %d %H:%M:%S %Y")[0:6]))
            # datetime.datetime.strptime not supported by Python2.4
            #self.commit.author_date = datetime.datetime.strptime (match.group (1).strip (" "), "%a %b %d %H:%M:%S %Y")

            # match.group(2) represents the timezone. E.g. -0300, +0200, +0430 (Afghanistan)
            # This string will be parsed to int and recalculated into seconds (60 * 60)
            self.commit.author_date_tz = (((int(match.group(2))) * 60 * 60) / 100)
            return

        # File
        match = self.patterns['file'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if len(type) > 1:
                # merge actions
                if 'M' in type:
                    type = 'M'
                else:
                    # ignore merge actions without 'M'
                    return

            action.type = type
            action.f1 = match.group(2)

            self.commit.actions.append(action)
            self.handler.file(action.f1)
            return

        # File moved/copied
        match = self.patterns['file-moved'].match(line)
        if match:
            action = Action()
            type = match.group(1)
            if type == 'R':
                action.type = 'V'
            else:
                action.type = type
            action.f1 = match.group(3)
            action.f2 = match.group(2)
            action.rev = self.commit.revision

            self.commit.actions.append(action)
            self.handler.file(action.f1)

            return

        # This is a workaround for a bug in the GNOME Git migration
        # There are commits on tags not correctly detected like this one:
        # http://git.gnome.org/cgit/evolution/commit/?id=b8e52acac2b9fc5414a7795a73c74f7ee4eeb71f
        # We want to ignore commits on tags since it doesn't make any sense in Git
        if self.is_gnome:
            match = self.patterns['svn-tag'].match(line.strip())
            if match:
                printout("Warning: detected a commit on a svn tag: %s", (match.group(0),))
                tag = match.group(1)
                if self.commit.tags and tag in self.commit.tags:
                    # The commit will be ignored, so move the tag
                    # to the next (previous in history) commit
                    self.branch.tail.svn_tag = tag

        # Message
        self.commit.message += line + '\n'

        assert True, "Not match for line %s" % (line)

Example 32

Project: edx2bigquery
Source File: analyze_content.py
View license
def analyze_course_content(course_id, 
                           listings_file=None,
                           basedir="X-Year-2-data-sql", 
                           datedir="2013-09-21", 
                           use_dataset_latest=False,
                           do_upload=False,
                           courses=None,
                           verbose=True,
                           pin_date=None,
                           ):
    '''
    Compute course_content table, which quantifies:

    - number of chapter, sequential, vertical modules
    - number of video modules
    - number of problem, *openended, mentoring modules
    - number of dicussion, annotatable, word_cloud modules

    Do this using the course "xbundle" file, produced when the course axis is computed.

    Include only modules which had nontrivial use, to rule out the staff and un-shown content. 
    Do the exclusion based on count of module appearing in the studentmodule table, based on 
    stats_module_usage for each course.

    Also, from the course listings file, compute the number of weeks the course was open.

    If do_upload (triggered by --force-recompute) then upload all accumulated data to the course report dataset 
    as the "stats_course_content" table.  Also generate a "course_summary_stats" table, stored in the
    course_report_ORG or course_report_latest dataset.  The course_summary_stats table combines
    data from many reports,, including stats_course_content, the medians report, the listings file,
    broad_stats_by_course, and time_on_task_stats_by_course.
    
    '''

    if do_upload:
        if use_dataset_latest:
            org = "latest"
        else:
            org = courses[0].split('/',1)[0]	# extract org from first course_id in courses

        crname = 'course_report_%s' % org

        gspath = gsutil.gs_path_from_course_id(crname)
        gsfnp = gspath / CCDATA
        gsutil.upload_file_to_gs(CCDATA, gsfnp)
        tableid = "stats_course_content"
        dataset = crname

        mypath = os.path.dirname(os.path.realpath(__file__))
        SCHEMA_FILE = '%s/schemas/schema_content_stats.json' % mypath

        try:
            the_schema = json.loads(open(SCHEMA_FILE).read())[tableid]
        except Exception as err:
            print "Oops!  Failed to load schema file for %s.  Error: %s" % (tableid, str(err))
            raise

        if 0:
            bqutil.load_data_to_table(dataset, tableid, gsfnp, the_schema, wait=True, verbose=False,
                                      format='csv', skiprows=1)

        table = 'course_metainfo'
        course_tables = ',\n'.join([('[%s.course_metainfo]' % bqutil.course_id2dataset(x)) for x in courses])
        sql = "select * from {course_tables}".format(course_tables=course_tables)
        print "--> Creating %s.%s using %s" % (dataset, table, sql)

        if 1:
            metainfo_dataset = bqutil.get_bq_table(dataset, table, sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          )
            # bqutil.create_bq_table(dataset, table, sql, overwrite=True)


        #-----------------------------------------------------------------------------
        # make course_summary_stats table
        #
        # This is a combination of the broad_stats_by_course table (if that exists), and course_metainfo.
        # Also use (and create if necessary) the nregistered_by_wrap table.

        # get the broad_stats_by_course data
        bsbc = bqutil.get_table_data(dataset, 'broad_stats_by_course')

        table_list = bqutil.get_list_of_table_ids(dataset)

        latest_person_course = max([ x for x in table_list if x.startswith('person_course_')])
        print "Latest person_course table in %s is %s" % (dataset, latest_person_course)
        
        sql = """
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        nr_by_wrap = bqutil.get_bq_table(dataset, 'nregistered_by_wrap', sql=sql, key={'name': 'course_id'})

        # rates for registrants before and during course
        
        sql = """
                SELECT 
                    *,
                    ncertified / nregistered * 100 as pct_certified_of_reg,
                    ncertified_and_registered_before_launch / nregistered_before_launch * 100 as pct_certified_reg_before_launch,
                    ncertified_and_registered_during_course / nregistered_during_course * 100 as pct_certified_reg_during_course,
                    ncertified / nregistered_by_wrap * 100 as pct_certified_of_reg_by_wrap,
                    ncertified / nviewed * 100 as pct_certified_of_viewed,
                    ncertified / nviewed_by_wrap * 100 as pct_certified_of_viewed_by_wrap,
                    ncertified_by_ewrap / nviewed_by_ewrap * 100 as pct_certified_of_viewed_by_ewrap,
                FROM
                (
                # ------------------------
                # get aggregate data
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.certified then 1 else 0 end) ncertified,
                    sum(case when (TIMESTAMP(pc.cert_created_date) < cminfo.ewrap_date) and (pc.certified and pc.viewed) then 1 else 0 end) ncertified_by_ewrap,
                    sum(case when pc.viewed then 1 else 0 end) nviewed,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                    sum(case when (pc.start_time < cminfo.wrap_date) and pc.viewed then 1 else 0 end) nviewed_by_wrap,
                    sum(case when (pc.start_time < cminfo.ewrap_date) and pc.viewed then 1 else 0 end) nviewed_by_ewrap,
                    sum(case when pc.start_time < cminfo.launch_date then 1 else 0 end) nregistered_before_launch,
                    sum(case when pc.start_time < cminfo.launch_date 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_before_launch,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) then 1 else 0 end) nregistered_during_course,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_during_course,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                
                # --------------------
                #  get course launch and wrap dates from course_metainfo

       SELECT AA.course_id as course_id, 
              AA.wrap_date as wrap_date,
              AA.launch_date as launch_date,
              BB.ewrap_date as ewrap_date,
       FROM (
               #  inner get course launch and wrap dates from course_metainfo
                SELECT A.course_id as course_id,
                  A.wrap_date as wrap_date,
                  B.launch_date as launch_date,
                from
                (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )
                ) as A
                left outer join 
                (
                 SELECT course_id,
                      TIMESTAMP(concat(launch_year, "-", launch_month, '-', launch_day)) as launch_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as launch_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as launch_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as launch_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Launch'
                 )
                ) as B
                on A.course_id = B.course_id 
                # end inner course_metainfo subquery
            ) as AA
            left outer join
            (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as ewrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Empirical Course Wrap'
                 )
            ) as BB
            on AA.course_id = BB.course_id

                # end course_metainfo subquery
                # --------------------
                
                ) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
                # ---- end get aggregate data
                )
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        print "--> Assembling course_summary_stats from %s" % 'stats_cert_rates_by_registration'
        sys.stdout.flush()
        cert_by_reg = bqutil.get_bq_table(dataset, 'stats_cert_rates_by_registration', sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          key={'name': 'course_id'})

        # start assembling course_summary_stats

        c_sum_stats = defaultdict(OrderedDict)
        for entry in bsbc['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            cmci.update(entry)
            cnbw = nr_by_wrap['data_by_key'][course_id]
            nbw = int(cnbw['nregistered_by_wrap'])
            cmci['nbw_wrap_date'] = cnbw['wrap_date']
            cmci['nregistered_by_wrap'] = nbw
            cmci['nregistered_by_wrap_pct'] = cnbw['nregistered_by_wrap_pct']
            cmci['frac_female'] = float(entry['n_female_viewed']) / (float(entry['n_male_viewed']) + float(entry['n_female_viewed']))
            ncert = float(cmci['certified_sum'])
            if ncert:
                cmci['certified_of_nregistered_by_wrap_pct'] = nbw / ncert * 100.0
            else:
                cmci['certified_of_nregistered_by_wrap_pct'] = None
            cbr = cert_by_reg['data_by_key'][course_id]
            for field, value in cbr.items():
                cmci['cbr_%s' % field] = value

        # add medians for viewed, explored, and certified

        msbc_tables = {'msbc_viewed': "viewed_median_stats_by_course",
                       'msbc_explored': 'explored_median_stats_by_course',
                       'msbc_certified': 'certified_median_stats_by_course',
                       'msbc_verified': 'verified_median_stats_by_course',
                       }
        for prefix, mtab in msbc_tables.items():
            print "--> Merging median stats data from %s" % mtab
            sys.stdout.flush()
            bqdat = bqutil.get_table_data(dataset, mtab)
            for entry in bqdat['data']:
                course_id = entry['course_id']
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    cmci['%s_%s' % (prefix, field)] = value

        # add time on task data

        tot_table = "time_on_task_stats_by_course"
        prefix = "ToT"
        print "--> Merging time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add serial time on task data

        tot_table = "time_on_task_serial_stats_by_course"
        prefix = "SToT"
        print "--> Merging serial time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add show_answer stats

        tot_table = "show_answer_stats_by_course"
        prefix = "SAS"
        print "--> Merging show_answer stats data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # setup list of keys, for CSV output

        css_keys = c_sum_stats.values()[0].keys()

        # retrieve course_metainfo table, pivot, add that to summary_stats

        print "--> Merging course_metainfo from %s" % table
        sys.stdout.flush()
        bqdat = bqutil.get_table_data(dataset, table)

        listings_keys = map(make_key, ["Institution", "Semester", "New or Rerun", "Andrew Recodes New/Rerun", 
                                       "Course Number", "Short Title", "Andrew's Short Titles", "Title", 
                                       "Instructors", "Registration Open", "Course Launch", "Course Wrap", "course_id",
                                       "Empirical Course Wrap", "Andrew's Order", "certifies", "MinPassGrade",
                                       '4-way Category by name', "4-way (CS, STEM, HSocSciGov, HumHistRel)"
                                       ])
        listings_keys.reverse()
        
        for lk in listings_keys:
            css_keys.insert(1, "listings_%s" % lk)

        COUNTS_TO_KEEP = ['discussion', 'problem', 'optionresponse', 'checkboxgroup', 'optioninput', 
                          'choiceresponse', 'video', 'choicegroup', 'vertical', 'choice', 'sequential', 
                          'multiplechoiceresponse', 'numericalresponse', 'chapter', 'solution', 'img', 
                          'formulaequationinput', 'responseparam', 'selfassessment', 'track', 'task', 'rubric', 
                          'stringresponse', 'combinedopenended', 'description', 'textline', 'prompt', 'category', 
                          'option', 'lti', 'annotationresponse', 
                          'annotatable', 'colgroup', 'tag_prompt', 'comment', 'annotationinput', 'image', 
                          'options', 'comment_prompt', 'conditional', 
                          'answer', 'poll_question', 'section', 'wrapper', 'map', 'area', 
                          'customtag', 'transcript', 
                          'split_test', 'word_cloud', 
                          'openended', 'openendedparam', 'answer_display', 'code', 
                          'drag_and_drop_input', 'customresponse', 'draggable', 'mentoring', 
                          'textannotation', 'imageannotation', 'videosequence', 
                          'feedbackprompt', 'assessments', 'openassessment', 'assessment', 'explanation', 'criterion']

        for entry in bqdat['data']:
            thekey = make_key(entry['key'])
            # if thekey.startswith('count_') and thekey[6:] not in COUNTS_TO_KEEP:
            #     continue
            if thekey.startswith('listings_') and thekey[9:] not in listings_keys:
                # print "dropping key=%s for course_id=%s" % (thekey, entry['course_id'])
                continue
            c_sum_stats[entry['course_id']][thekey] = entry['value']
            #if 'certifies' in thekey:
            #    print "course_id=%s, key=%s, value=%s" % (entry['course_id'], thekey, entry['value'])
            if thekey not in css_keys:
                css_keys.append(thekey)

        # compute forum_posts_per_week
        for course_id, entry in c_sum_stats.items():
            nfps = entry.get('nforum_posts_sum', 0)
            if nfps:
                fppw = int(nfps) / float(entry['nweeks'])
                entry['nforum_posts_per_week'] = fppw
                print "    course: %s, assessments_per_week=%s, forum_posts_per_week=%s" % (course_id, entry['total_assessments_per_week'], fppw)
            else:
                entry['nforum_posts_per_week'] = None
        css_keys.append('nforum_posts_per_week')

        # read in listings file and merge that in also
        if listings_file:
            if listings_file.endswith('.csv'):
                listings = csv.DictReader(open(listings_file))
            else:
                listings = [ json.loads(x) for x in open(listings_file) ]
            for entry in listings:
                course_id = entry['course_id']
                if course_id not in c_sum_stats:
                    continue
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    lkey = "listings_%s" % make_key(field)
                    if not (lkey in cmci) or (not cmci[lkey]):
                        cmci[lkey] = value

        print "Storing these fields: %s" % css_keys

        # get schema
        mypath = os.path.dirname(os.path.realpath(__file__))
        the_schema = json.loads(open('%s/schemas/schema_combined_course_summary_stats.json' % mypath).read())
        schema_dict = { x['name'] : x for x in the_schema }

        # write out CSV
        css_table = "course_summary_stats"
        ofn = "%s__%s.csv" % (dataset, css_table)
        ofn2 = "%s__%s.json" % (dataset, css_table)
        print "Writing data to %s and %s" % (ofn, ofn2)

        ofp = open(ofn, 'w')
        ofp2 = open(ofn2, 'w')
        dw = csv.DictWriter(ofp, fieldnames=css_keys)
        dw.writeheader()
        for cid, entry in c_sum_stats.items():
            for ek in entry:
                if ek not in schema_dict:
                    entry.pop(ek)
                # entry[ek] = str(entry[ek])	# coerce to be string
            ofp2.write(json.dumps(entry) + "\n")
            for key in css_keys:
                if key not in entry:
                    entry[key] = None
            dw.writerow(entry)
        ofp.close()
        ofp2.close()

        # upload to bigquery
        # the_schema = [ { 'type': 'STRING', 'name': x } for x in css_keys ]
        if 1:
            gsfnp = gspath / dataset / (css_table + ".json")
            gsutil.upload_file_to_gs(ofn2, gsfnp)
            # bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False,
            #                           format='csv', skiprows=1)
            bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False)

        return

    
    print "-"*60 + " %s" % course_id

    # get nweeks from listings
    lfn = path(listings_file)
    if not lfn.exists():
        print "[analyze_content] course listings file %s doesn't exist!" % lfn
        return

    data = None
    if listings_file.endswith('.json'):
        data_feed = map(json.loads, open(lfn))
    else:
        data_feed = csv.DictReader(open(lfn))
    for k in data_feed:
        if not 'course_id' in k:
            print "Strange course listings row, no course_id in %s" % k
            raise Exception("Missing course_id")
        if k['course_id']==course_id:
            data = k
            break

    if not data:
        print "[analyze_content] no entry for %s found in course listings file %s!" % (course_id, lfn)
        return

    def date_parse(field):
        (m, d, y) = map(int, data[field].split('/'))
        return datetime.datetime(y, m, d)

    launch = date_parse('Course Launch')
    wrap = date_parse('Course Wrap')
    ndays = (wrap - launch).days
    nweeks = ndays / 7.0

    print "Course length = %6.2f weeks (%d days)" % (nweeks, ndays)

    if pin_date:
        datedir = pin_date
    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest and not pin_date)
    cfn = gsutil.path_from_course_id(course_id)

    xbfn = course_dir / ("xbundle_%s.xml" % cfn)
    
    if not xbfn.exists():
        print "[analyze_content] cannot find xbundle file %s for %s!" % (xbfn, course_id)

        if use_dataset_latest:
            # try looking in earlier directories for xbundle file
            import glob
            spath = course_dir / ("../*/xbundle_%s.xml" % cfn)
            files = list(glob.glob(spath))
            if files:
                xbfn = path(files[-1])
            if not xbfn.exists():
                print "   --> also cannot find any %s ; aborting!" % spath
            else:
                print "   --> Found and using instead: %s " % xbfn
        if not xbfn.exists():
            raise Exception("[analyze_content] missing xbundle file %s" % xbfn)

    # if there is an xbundle*.fixed file, use that instead of the normal one
    if os.path.exists(str(xbfn) + ".fixed"):
        xbfn = path(str(xbfn) + ".fixed")

    print "[analyze_content] For %s using %s" % (course_id, xbfn)
    
    # get module usage data
    mudata = get_stats_module_usage(course_id, basedir, datedir, use_dataset_latest)

    xml = etree.parse(open(xbfn)).getroot()
    
    counts = defaultdict(int)
    nexcluded = defaultdict(int)

    IGNORE = ['html', 'p', 'div', 'iframe', 'ol', 'li', 'ul', 'blockquote', 'h1', 'em', 'b', 'h2', 'h3', 'body', 'span', 'strong',
              'a', 'sub', 'strike', 'table', 'td', 'tr', 's', 'tbody', 'sup', 'sub', 'strike', 'i', 's', 'pre', 'policy', 'metadata',
              'grading_policy', 'br', 'center',  'wiki', 'course', 'font', 'tt', 'it', 'dl', 'startouttext', 'endouttext', 'h4', 
              'head', 'source', 'dt', 'hr', 'u', 'style', 'dd', 'script', 'th', 'p', 'P', 'TABLE', 'TD', 'small', 'text', 'title']

    problem_stats = defaultdict(int)

    def does_problem_have_random_script(problem):
        '''
        return 1 if problem has a script with "random." in it
        else return 0
        '''
        for elem in problem.findall('.//script'):
            if elem.text and ('random.' in elem.text):
                return 1
        return 0

    # walk through xbundle 
    def walk_tree(elem, policy=None):
        '''
        Walk XML tree recursively.
        elem = current element
        policy = dict of attributes for children to inherit, with fields like due, graded, showanswer
        '''
        policy = policy or {}
        if  type(elem.tag)==str and (elem.tag.lower() not in IGNORE):
            counts[elem.tag.lower()] += 1
        if elem.tag in ["sequential", "problem", "problemset", "course", "chapter"]:	# very old courses may use inheritance from course & chapter
            keys = ["due", "graded", "format", "showanswer", "start"]
            for k in keys:		# copy inheritable attributes, if they are specified
                val = elem.get(k)
                if val:
                    policy[k] = val
        if elem.tag=="problem":	# accumulate statistics about problems: how many have show_answer = [past_due, closed] ?  have random. in script?
            problem_stats['n_capa_problems'] += 1
            if policy.get('showanswer'):
                problem_stats["n_showanswer_%s" % policy.get('showanswer')] += 1
            else:
                problem_stats['n_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
                # see https://github.com/edx/edx-platform/blob/master/common/lib/xmodule/xmodule/capa_base.py#L118
                # finished = Show the answer after the student has answered the problem correctly, the student has no attempts left, or the problem due date has passed.
            problem_stats['n_random_script'] += does_problem_have_random_script(elem)

            if policy.get('graded')=='true' or policy.get('graded')=='True':
                problem_stats['n_capa_problems_graded'] += 1
                problem_stats['n_graded_random_script'] += does_problem_have_random_script(elem)
                if policy.get('showanswer'):
                    problem_stats["n_graded_showanswer_%s" % policy.get('showanswer')] += 1
                else:
                    problem_stats['n_graded_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
            
        for k in elem:
            midfrag = (k.tag, k.get('url_name_orig', None))
            if (midfrag in mudata) and int(mudata[midfrag]['ncount']) < 20:
                nexcluded[k.tag] += 1
                if verbose:
                    try:
                        print "    -> excluding %s (%s), ncount=%s" % (k.get('display_name', '<no_display_name>').encode('utf8'), 
                                                                       midfrag, 
                                                                       mudata.get(midfrag, {}).get('ncount'))
                    except Exception as err:
                        print "    -> excluding ", k
                continue
            walk_tree(k, policy.copy())

    walk_tree(xml)
    print "--> Count of individual element tags throughout XML: ", counts
    
    print "--> problem_stats:", json.dumps(problem_stats, indent=4)

    # combine some into "qual_axis" and others into "quant_axis"
    qual_axis = ['openassessment', 'optionresponse', 'multiplechoiceresponse', 
                 # 'discussion', 
                 'choiceresponse', 'word_cloud', 
                 'combinedopenended', 'choiceresponse', 'stringresponse', 'textannotation', 'openended', 'lti']
    quant_axis = ['formularesponse', 'numericalresponse', 'customresponse', 'symbolicresponse', 'coderesponse',
                  'imageresponse']

    nqual = 0
    nquant = 0
    for tag, count in counts.items():
        if tag in qual_axis:
            nqual += count
        if tag in quant_axis:
            nquant += count
    
    print "nqual=%d, nquant=%d" % (nqual, nquant)

    nqual_per_week = nqual / nweeks
    nquant_per_week = nquant / nweeks
    total_per_week = nqual_per_week + nquant_per_week

    print "per week: nqual=%6.2f, nquant=%6.2f total=%6.2f" % (nqual_per_week, nquant_per_week, total_per_week)

    # save this overall data in CCDATA
    lock_file(CCDATA)
    ccdfn = path(CCDATA)
    ccd = {}
    if ccdfn.exists():
        for k in csv.DictReader(open(ccdfn)):
            ccd[k['course_id']] = k
    
    ccd[course_id] = {'course_id': course_id,
                      'nweeks': nweeks,
                      'nqual_per_week': nqual_per_week,
                      'nquant_per_week': nquant_per_week,
                      'total_assessments_per_week' : total_per_week,
                      }

    # fields = ccd[ccd.keys()[0]].keys()
    fields = ['course_id', 'nquant_per_week', 'total_assessments_per_week', 'nqual_per_week', 'nweeks']
    cfp = open(ccdfn, 'w')
    dw = csv.DictWriter(cfp, fieldnames=fields)
    dw.writeheader()
    for cid, entry in ccd.items():
        dw.writerow(entry)
    cfp.close()
    lock_file(CCDATA, release=True)

    # store data in course_metainfo table, which has one (course_id, key, value) on each line
    # keys include nweeks, nqual, nquant, count_* for module types *

    cmfields = OrderedDict()
    cmfields['course_id'] = course_id
    cmfields['course_length_days'] = str(ndays)
    cmfields.update({ make_key('listings_%s' % key) : value for key, value in data.items() })	# from course listings
    cmfields.update(ccd[course_id].copy())

    # cmfields.update({ ('count_%s' % key) : str(value) for key, value in counts.items() })	# from content counts

    cmfields['filename_xbundle'] = xbfn
    cmfields['filename_listings'] = lfn

    for key in sorted(counts):	# store counts in sorted order, so that the later generated CSV file can have a predictable structure
        value = counts[key]
        cmfields['count_%s' % key] =  str(value) 	# from content counts

    for key in sorted(problem_stats):	# store problem stats
        value = problem_stats[key]
        cmfields['problem_stat_%s' % key] =  str(value)

    cmfields.update({ ('nexcluded_sub_20_%s' % key) : str(value) for key, value in nexcluded.items() })	# from content counts

    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest)
    csvfn = course_dir / CMINFO

    # manual overriding of the automatically computed fields can be done by storing course_id,key,value data
    # in the CMINFO_OVERRIDES file

    csvfn_overrides = course_dir / CMINFO_OVERRIDES
    if csvfn_overrides.exists():
        print "--> Loading manual override information from %s" % csvfn_overrides
        for ovent in csv.DictReader(open(csvfn_overrides)):
            if not ovent['course_id']==course_id:
                print "===> ERROR! override file has entry with wrong course_id: %s" % ovent
                continue
            print "    overriding key=%s with value=%s" % (ovent['key'], ovent['value'])
            cmfields[ovent['key']] = ovent['value']

    print "--> Course metainfo writing to %s" % csvfn

    fp = open(csvfn, 'w')

    cdw = csv.DictWriter(fp, fieldnames=['course_id', 'key', 'value'])
    cdw.writeheader()

    for k, v in cmfields.items():
        cdw.writerow({'course_id': course_id, 'key': k, 'value': v})
        
    fp.close()

    # build and output course_listings_and_metainfo 

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    mypath = os.path.dirname(os.path.realpath(__file__))
    clm_table = "course_listing_and_metainfo"
    clm_schema_file = '%s/schemas/schema_%s.json' % (mypath, clm_table)
    clm_schema = json.loads(open(clm_schema_file).read())

    clm = {}
    for finfo in clm_schema:
        field = finfo['name']
        clm[field] = cmfields.get(field)
    clm_fnb = clm_table + ".json"
    clm_fn = course_dir / clm_fnb
    open(clm_fn, 'w').write(json.dumps(clm))

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / clm_fnb
    print "--> Course listing + metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, clm_table)
    sys.stdout.flush()
    gsutil.upload_file_to_gs(clm_fn, gsfnp)
    bqutil.load_data_to_table(dataset, clm_table, gsfnp, clm_schema, wait=True, verbose=False)

    # output course_metainfo

    table = 'course_metainfo'
    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / CMINFO
    print "--> Course metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, table)
    sys.stdout.flush()

    gsutil.upload_file_to_gs(csvfn, gsfnp)

    mypath = os.path.dirname(os.path.realpath(__file__))
    SCHEMA_FILE = '%s/schemas/schema_course_metainfo.json' % mypath
    the_schema = json.loads(open(SCHEMA_FILE).read())[table]

    bqutil.load_data_to_table(dataset, table, gsfnp, the_schema, wait=True, verbose=False, format='csv', skiprows=1)

Example 33

Project: mediadrop
Source File: websetup.py
View license
def add_default_data():
    log.info('Adding default data')

    settings = [
        (u'email_media_uploaded', None),
        (u'email_comment_posted', None),
        (u'email_support_requests', None),
        (u'email_send_from', u'[email protected]'),
        (u'wording_user_uploads', N_(u"Upload your media using the form below. We'll review it and get back to you.")),
        (u'wording_administrative_notes', None),
        (u'wording_display_administrative_notes', u''),
        (u'popularity_decay_exponent', u'4'),
        (u'popularity_decay_lifetime', u'36'),
        (u'rich_text_editor', u'tinymce'),
        (u'google_analytics_uacct', u''),
        (u'featured_category', u'1'),
        (u'max_upload_size', u'314572800'),
        (u'ftp_storage', u'false'),
        (u'ftp_server', u'ftp.someserver.com'),
        (u'ftp_user', u'username'),
        (u'ftp_password', u'password'),
        (u'ftp_upload_directory', u'media'),
        (u'ftp_download_url', u'http://www.someserver.com/web/accessible/media/'),
        (u'ftp_upload_integrity_retries', u'10'),
        (u'akismet_key', u''),
        (u'akismet_url', u''),
        (u'req_comment_approval', u''),
        (u'use_embed_thumbnails', u'true'),
        (u'api_secret_key_required', u'true'),
        (u'api_secret_key', random_string(20)),
        (u'api_media_max_results', u'50'),
        (u'api_tree_max_depth', u'10'),
        (u'general_site_name', u'MediaDrop'),
        (u'general_site_title_display_order', u'prepend'),
        (u'sitemaps_display', u'True'),
        (u'rss_display', u'True'),
        (u'vulgarity_filtered_words', u''),
        (u'primary_language', u'en'),
        (u'advertising_banner_html', u''),
        (u'advertising_sidebar_html', u''),
        (u'comments_engine', u'builtin'),
        (u'facebook_appid', u''),
        (u'youtube_apikey', u''),
    ]
    settings.extend(appearance_settings)

    for key, value in settings:
        s = Setting()
        s.key = key
        s.value = value
        DBSession.add(s)

    admin_user = User()
    admin_user.user_name = u'admin'
    admin_user.display_name = u'Admin'
    admin_user.email_address = u'[email protected]'
    admin_user.password = u'admin'
    DBSession.add(admin_user)

    admin_group = Group(name=u'admins', display_name=u'Admins')
    admin_group.users.append(admin_user)
    DBSession.add(admin_group)

    editor_group = Group(name=u'editors', display_name=u'Editors')
    DBSession.add(editor_group)

    anonymous_group = Group(name=u'anonymous', display_name=u'Everyone (including guests)')
    DBSession.add(anonymous_group)

    authenticated_group = Group(name=u'authenticated', display_name=u'Logged in users')
    DBSession.add(authenticated_group)

    admin_perm = Permission(name=u'admin', groups=[admin_group], 
        description=u'Grants access to the admin panel')
    DBSession.add(admin_perm)

    edit_perm = Permission(name=u'edit', groups=[admin_group, editor_group], 
        description=u'Grants access to edit site content')
    DBSession.add(edit_perm)
    
    view_perm = Permission(name=u'view', 
        groups=[admin_group, anonymous_group, editor_group], 
        description=u'View published media')
    DBSession.add(view_perm)

    upload_perm = Permission(name=u'upload', 
        groups=[admin_group, anonymous_group, editor_group], 
        description=u'Can upload new media')
    DBSession.add(upload_perm)
    media_upload_perm = Permission()
    media_upload_perm.permission_name = u'MEDIA_UPLOAD'
    media_upload_perm.description = u'Grants the ability to upload new media'
    media_upload_perm.groups.append(admin_group)
    media_upload_perm.groups.append(editor_group)
    media_upload_perm.groups.append(anonymous_group)
    DBSession.add(edit_perm)


    category = Category(name=u'Featured', slug=u'featured')
    DBSession.add(category)

    category2 = Category(name=u'Instructional', slug=u'instructional')
    DBSession.add(category2)

    podcast = Podcast()
    podcast.slug = u'hello-world'
    podcast.title = u'Hello World'
    podcast.subtitle = u'My very first podcast!'
    podcast.description = u"""<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"""
    podcast.category = u'Technology'
    podcast.author = Author(admin_user.display_name, admin_user.email_address)
    podcast.explicit = None
    podcast.copyright = u'Copyright 2009 Xyz'
    podcast.itunes_url = None
    podcast.feedburner_url = None
    DBSession.add(podcast)

    comment = Comment()
    comment.subject = u'Re: New Media'
    comment.author = AuthorWithIP(name=u'John Doe', ip=2130706433)
    comment.body = u'<p>Hello to you too!</p>'
    DBSession.add(comment)

    media = Media()
    media.type = None
    media.slug = u'new-media'
    media.reviewed = True
    media.encoded = False
    media.publishable = False
    media.title = u'New Media'
    media.subtitle = None
    media.description = u"""<p>Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.</p>"""
    media.description_plain = u"""Lorem ipsum dolor sit amet, consectetur adipisicing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum."""
    media.author = Author(admin_user.display_name, admin_user.email_address)
    media.categories.append(category)
    media.comments.append(comment)
    DBSession.add(media)

    #XXX The list of default players is actually defined in model.players
    # and should at some point be moved here to avoid inconsistency
    # between the default storage engines and default players.
    remote_url_storage = RemoteURLStorage()
    default_engines = [
        LocalFileStorage(),
        remote_url_storage,
        YoutubeStorage(),
        VimeoStorage(),
        BlipTVStorage(),
        DailyMotionStorage(),
    ]
    for engine in default_engines:
        DBSession.add(engine)

    import datetime
    instructional_media = [
        (u'workflow-in-mediadrop',
        u'Workflow in MediaDrop',
        u'<p>This sceencast explains the publish status feature in MediaDrop.</p><p>Initially all videos uploaded through the front-end or admin panel are placed under &quot;awaiting review&quot; status. Once the administrator hits the &quot;review complete&quot; button, they can upload media. Videos can be added in any format, however, they can only be published if they are in a web-ready format such as FLV, M4V, MP3, or MP4. Alternatively, if they are published through Youtube or Vimeo the encoding step is skipped</p><p>Once uploaded and encoded the administrator can then publish the video.</p>',
        u'This sceencast explains the publish status feature in MediaDrop.\nInitially all videos uploaded through the front-end or admin panel are placed under \"awaiting review\" status. Once the administrator hits the \"review complete\" button, they can upload media. Videos can be added in any format, however, they can only be published if they are in a web-ready format such as FLV, M4V, MP3, or MP4. Alternatively, if they are published through Youtube or Vimeo the encoding step is skipped\nOnce uploaded and encoded the administrator can then publish the video.',
        datetime.datetime(2010, 5, 13, 2, 29, 40),
        218,
        u'http://static.mediadrop.net/files/videos/tutorial-workflow-in-mediadrop.mp4',
        u'video',
        u'mp4',
        ),
        (u'creating-a-podcast-in-mediadrop',
        u'Creating a Podcast in MediaDrop',
        u'<p>This describes the process an administrator goes through in creating a podcast in MediaDrop. An administrator can enter information that will automatically generate the iTunes/RSS feed information. Any episodes published to a podcast will automatically publish to iTunes/RSS.</p>',
        u'This describes the process an administrator goes through in creating a podcast in MediaDrop. An administrator can enter information that will automatically generate the iTunes/RSS feed information. Any episodes published to a podcast will automatically publish to iTunes/RSS.',
        datetime.datetime(2010, 5, 13, 2, 33, 44),
        100,
        u'http://static.mediadrop.net/files/videos/tutorial-create-podcast-in-mediadrop.mp4',
        u'video',
        u'mp4',
        ),
        (u'adding-a-video-in-mediadrop',
        u'Adding a Video in MediaDrop',
        u'<p>This screencast shows how video or audio can be added in MediaDrop.</p><p>MediaDrop supports a wide range of formats including (but not limited to): YouTube, Vimeo, Amazon S3, Bits on the Run, BrightCove, Kaltura, and either your own server or someone else\'s.</p><p>Videos can be uploaded in any format, but can only be published in web-ready formats such as FLV, MP3, M4V, MP4 etc.</p>',
        u'This screencast shows how video or audio can be added in MediaDrop.\nMediaDrop supports a wide range of formats including (but not limited to): YouTube, Vimeo, Amazon S3, Bits on the Run, BrightCove, Kaltura, and either your own server or someone else\'s.\nVideos can be uploaded in any format, but can only be published in web-ready formats such as FLV, MP3, M4V, MP4 etc.',
        datetime.datetime(2010, 5, 13, 02, 37, 36),
        169,
        u'http://static.mediadrop.net/files/videos/tutorial-add-video-in-mediadrop.mp4',
        u'video',
        u'mp4',
        ),
    ]

    name = u'MediaDrop Team'
    email = u'[email protected]'
    for slug, title, desc, desc_plain, publish_on, duration, url, type_, container in instructional_media:
        media = Media()
        media.author = Author(name, email)
        media.description = desc
        media.description_plain = desc_plain
        media.duration = duration
        media.publish_on = publish_on
        media.slug = slug
        media.title = title
        media.type = type_

        media_file = MediaFile()
        media_file.container = container
        media_file.created_on = publish_on
        media_file.display_name = os.path.basename(url)
        media_file.duration = duration
        media_file.type = type_
        media_file.storage = remote_url_storage
        media_file.unique_id = url

        DBSession.add(media)
        DBSession.add(media_file)

        media.files.append(media_file)
        media.categories.append(category2)

        media.encoded = True
        media.reviewed = True
        media.publishable = True

Example 34

Project: nupic
Source File: clamodel_test.py
View license
  def testTemporalAnomalyModelFactory(self):
    """ Simple test to assert that ModelFactory.create() with a given specific
    Temporal Anomaly configuration will return a model that can return
    inferences
    """
    modelConfig = (
      {u'aggregationInfo': {u'days': 0,
                            u'fields': [],
                            u'hours': 0,
                            u'microseconds': 0,
                            u'milliseconds': 0,
                            u'minutes': 0,
                            u'months': 0,
                            u'seconds': 0,
                            u'weeks': 0,
                            u'years': 0},
       u'model': u'CLA',
       u'modelParams': {u'anomalyParams': {u'anomalyCacheRecords': None,
                                           u'autoDetectThreshold': None,
                                           u'autoDetectWaitRecords': 5030},
                        u'clEnable': False,
                        u'clParams': {u'alpha': 0.035828933612158,
                                      u'verbosity': 0,
                                      u'regionName': u'CLAClassifierRegion',
                                      u'steps': u'1'},
                        u'inferenceType': u'TemporalAnomaly',
                        u'sensorParams': {u'encoders': {u'c0_dayOfWeek': None,
                                                        u'c0_timeOfDay': {u'fieldname': u'c0',
                                                                          u'name': u'c0',
                                                                          u'timeOfDay': [21,
                                                                                         9.49122334747737],
                                                                          u'type': u'DateEncoder'},
                                                        u'c0_weekend': None,
                                                        u'c1': {u'fieldname': u'c1',
                                                                u'name': u'c1',
                                                                u'resolution': 0.8771929824561403,
                                                                u'seed': 42,
                                                                u'type': u'RandomDistributedScalarEncoder'}},
                                          u'sensorAutoReset': None,
                                          u'verbosity': 0},
                        u'spEnable': True,
                        u'spParams': {u'potentialPct': 0.8,
                                      u'columnCount': 2048,
                                      u'globalInhibition': 1,
                                      u'inputWidth': 0,
                                      u'maxBoost': 1.0,
                                      u'numActiveColumnsPerInhArea': 40,
                                      u'seed': 1956,
                                      u'spVerbosity': 0,
                                      u'spatialImp': u'cpp',
                                      u'synPermActiveInc': 0.0015,
                                      u'synPermConnected': 0.1,
                                      u'synPermInactiveDec': 0.0005,
                                      },
                        u'tpEnable': True,
                        u'tpParams': {u'activationThreshold': 13,
                                      u'cellsPerColumn': 32,
                                      u'columnCount': 2048,
                                      u'globalDecay': 0.0,
                                      u'initialPerm': 0.21,
                                      u'inputWidth': 2048,
                                      u'maxAge': 0,
                                      u'maxSegmentsPerCell': 128,
                                      u'maxSynapsesPerSegment': 32,
                                      u'minThreshold': 10,
                                      u'newSynapseCount': 20,
                                      u'outputType': u'normal',
                                      u'pamLength': 3,
                                      u'permanenceDec': 0.1,
                                      u'permanenceInc': 0.1,
                                      u'seed': 1960,
                                      u'temporalImp': u'cpp',
                                      u'verbosity': 0},
                        u'trainSPNetOnlyIfRequested': False},
       u'predictAheadTime': None,
       u'version': 1}
    )

    inferenceArgs = {u'inputPredictedField': u'auto',
                     u'predictedField': u'c1',
                     u'predictionSteps': [1]}

    data = [
      {'_category': [None],
       '_reset': 0,
       '_sequenceId': 0,
       '_timestamp': datetime.datetime(2013, 12, 5, 0, 0),
       '_timestampRecordIdx': None,
       u'c0': datetime.datetime(2013, 12, 5, 0, 0),
       u'c1': 5.0},
      {'_category': [None],
       '_reset': 0,
       '_sequenceId': 0,
       '_timestamp': datetime.datetime(2013, 12, 6, 0, 0),
       '_timestampRecordIdx': None,
       u'c0': datetime.datetime(2013, 12, 6, 0, 0),
       u'c1': 6.0},
      {'_category': [None],
       '_reset': 0,
       '_sequenceId': 0,
       '_timestamp': datetime.datetime(2013, 12, 7, 0, 0),
       '_timestampRecordIdx': None,
       u'c0': datetime.datetime(2013, 12, 7, 0, 0),
       u'c1': 7.0}
    ]

    model = ModelFactory.create(modelConfig=modelConfig)
    model.enableLearning()
    model.enableInference(inferenceArgs)

    for row in data:
      result = model.run(row)
      self.assertIsInstance(result, ModelResult)

Example 35

Project: calibre
Source File: reading.py
View license
    def test_read(self):  # {{{
        'Test the reading of data from the database'
        cache = self.init_cache(self.library_path)
        tests = {
                3  : {
                    'title': 'Unknown',
                    'sort': 'Unknown',
                    'authors': ('Unknown',),
                    'author_sort': 'Unknown',
                    'series' : None,
                    'series_index': 1.0,
                    'rating': None,
                    'tags': (),
                    'formats':(),
                    'identifiers': {},
                    'timestamp': datetime.datetime(2011, 9, 7, 19, 54, 41,
                        tzinfo=utc_tz),
                    'pubdate': datetime.datetime(2011, 9, 7, 19, 54, 41,
                        tzinfo=utc_tz),
                    'last_modified': datetime.datetime(2011, 9, 7, 19, 54, 41,
                        tzinfo=utc_tz),
                    'publisher': None,
                    'languages': (),
                    'comments': None,
                    '#enum': None,
                    '#authors':(),
                    '#date':None,
                    '#rating':None,
                    '#series':None,
                    '#series_index': None,
                    '#tags':(),
                    '#yesno':None,
                    '#comments': None,
                    'size':None,
                },

                2 : {
                    'title': 'Title One',
                    'sort': 'One',
                    'authors': ('Author One',),
                    'author_sort': 'One, Author',
                    'series' : 'A Series One',
                    'series_index': 1.0,
                    'tags':('Tag One', 'Tag Two'),
                    'formats': ('FMT1',),
                    'rating': 4.0,
                    'identifiers': {'test':'one'},
                    'timestamp': datetime.datetime(2011, 9, 5, 21, 6,
                        tzinfo=utc_tz),
                    'pubdate': datetime.datetime(2011, 9, 5, 21, 6,
                        tzinfo=utc_tz),
                    'publisher': 'Publisher One',
                    'languages': ('eng',),
                    'comments': '<p>Comments One</p>',
                    '#enum':'One',
                    '#authors':('Custom One', 'Custom Two'),
                    '#date':datetime.datetime(2011, 9, 5, 6, 0,
                        tzinfo=utc_tz),
                    '#rating':2.0,
                    '#series':'My Series One',
                    '#series_index': 1.0,
                    '#tags':('My Tag One', 'My Tag Two'),
                    '#yesno':True,
                    '#comments': '<div>My Comments One<p></p></div>',
                    'size':9,
                },
                1  : {
                    'title': 'Title Two',
                    'sort': 'Title Two',
                    'authors': ('Author Two', 'Author One'),
                    'author_sort': 'Two, Author & One, Author',
                    'series' : 'A Series One',
                    'series_index': 2.0,
                    'rating': 6.0,
                    'tags': ('Tag One', 'News'),
                    'formats':('FMT1', 'FMT2'),
                    'identifiers': {'test':'two'},
                    'timestamp': datetime.datetime(2011, 9, 6, 6, 0,
                        tzinfo=utc_tz),
                    'pubdate': datetime.datetime(2011, 8, 5, 6, 0,
                        tzinfo=utc_tz),
                    'publisher': 'Publisher Two',
                    'languages': ('deu',),
                    'comments': '<p>Comments Two</p>',
                    '#enum':'Two',
                    '#authors':('My Author Two',),
                    '#date':datetime.datetime(2011, 9, 1, 6, 0,
                        tzinfo=utc_tz),
                    '#rating':4.0,
                    '#series':'My Series Two',
                    '#series_index': 3.0,
                    '#tags':('My Tag Two',),
                    '#yesno':False,
                    '#comments': '<div>My Comments Two<p></p></div>',
                    'size':9,

                },
        }
        for book_id, test in tests.iteritems():
            for field, expected_val in test.iteritems():
                val = cache.field_for(field, book_id)
                if isinstance(val, tuple) and 'authors' not in field and 'languages' not in field:
                    val, expected_val = set(val), set(expected_val)
                self.assertEqual(expected_val, val,
                        'Book id: %d Field: %s failed: %r != %r'%(
                            book_id, field, expected_val, val))

Example 36

Project: geraldo
Source File: query.py
View license
def query_class(QueryClass, Database):
    """
    Returns a custom django.db.models.sql.query.Query subclass that is
    appropriate for Oracle.

    The 'Database' module (cx_Oracle) is passed in here so that all the setup
    required to import it only needs to be done by the calling module.
    """
    global _classes
    try:
        return _classes[QueryClass]
    except KeyError:
        pass

    class OracleQuery(QueryClass):
        def resolve_columns(self, row, fields=()):
            index_start = len(self.extra_select.keys())
            values = [self.convert_values(v, None) for v in row[:index_start]]
            for value, field in map(None, row[index_start:], fields):
                values.append(self.convert_values(value, field))
            return values

        def convert_values(self, value, field):
            from django.db.models.fields import DateField, DateTimeField, \
                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
            if isinstance(value, Database.LOB):
                value = value.read()
            # Oracle stores empty strings as null. We need to undo this in
            # order to adhere to the Django convention of using the empty
            # string instead of null, but only if the field accepts the
            # empty string.
            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
                value = u''
            # Convert 1 or 0 to True or False
            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
                value = bool(value)
            # Convert floats to decimals
            elif value is not None and isinstance(field, DecimalField):
                value = util.typecast_decimal(field.format_number(value))
            # cx_Oracle always returns datetime.datetime objects for
            # DATE and TIMESTAMP columns, but Django wants to see a
            # python datetime.date, .time, or .datetime.  We use the type
            # of the Field to determine which to cast to, but it's not
            # always available.
            # As a workaround, we cast to date if all the time-related
            # values are 0, or to time if the date is 1/1/1900.
            # This could be cleaned a bit by adding a method to the Field
            # classes to normalize values from the database (the to_python
            # method is used for validation and isn't what we want here).
            elif isinstance(value, Database.Timestamp):
                # In Python 2.3, the cx_Oracle driver returns its own
                # Timestamp object that we must convert to a datetime class.
                if not isinstance(value, datetime.datetime):
                    value = datetime.datetime(value.year, value.month,
                            value.day, value.hour, value.minute, value.second,
                            value.fsecond)
                if isinstance(field, DateTimeField):
                    # DateTimeField subclasses DateField so must be checked
                    # first.
                    pass
                elif isinstance(field, DateField):
                    value = value.date()
                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
                    value = value.time()
                elif value.hour == value.minute == value.second == value.microsecond == 0:
                    value = value.date()
            return value

        def as_sql(self, with_limits=True, with_col_aliases=False):
            """
            Creates the SQL for this query. Returns the SQL string and list
            of parameters.  This is overriden from the original Query class
            to accommodate Oracle's limit/offset SQL.

            If 'with_limits' is False, any limit/offset information is not
            included in the query.
            """

            # The `do_offset` flag indicates whether we need to construct
            # the SQL needed to use limit/offset w/Oracle.
            do_offset = with_limits and (self.high_mark is not None
                                         or self.low_mark)

            # If no offsets, just return the result of the base class
            # `as_sql`.
            if not do_offset:
                return super(OracleQuery, self).as_sql(with_limits=False,
                        with_col_aliases=with_col_aliases)

            # `get_columns` needs to be called before `get_ordering` to
            # populate `_select_alias`.
            self.pre_sql_setup()
            out_cols = self.get_columns()
            ordering = self.get_ordering()

            # Getting the "ORDER BY" SQL for the ROW_NUMBER() result.
            if ordering:
                rn_orderby = ', '.join(ordering)
            else:
                # Oracle's ROW_NUMBER() function always requires an
                # order-by clause.  So we need to define a default
                # order-by, since none was provided.
                qn = self.quote_name_unless_alias
                opts = self.model._meta
                rn_orderby = '%s.%s' % (qn(opts.db_table), qn(opts.fields[0].db_column or opts.fields[0].column))

            # Getting the selection SQL and the params, which has the `rn`
            # extra selection SQL.
            self.extra_select['rn'] = 'ROW_NUMBER() OVER (ORDER BY %s )' % rn_orderby
            sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                    with_col_aliases=True)

            # Constructing the result SQL, using the initial select SQL
            # obtained above.
            result = ['SELECT * FROM (%s)' % sql]

            # Place WHERE condition on `rn` for the desired range.
            result.append('WHERE rn > %d' % self.low_mark)
            if self.high_mark is not None:
                result.append('AND rn <= %d' % self.high_mark)

            # Returning the SQL w/params.
            return ' '.join(result), params

        def set_limits(self, low=None, high=None):
            super(OracleQuery, self).set_limits(low, high)

            # We need to select the row number for the LIMIT/OFFSET sql.
            # A placeholder is added to extra_select now, because as_sql is
            # too late to be modifying extra_select.  However, the actual sql
            # depends on the ordering, so that is generated in as_sql.
            self.extra_select['rn'] = '1'

        def clear_limits(self):
            super(OracleQuery, self).clear_limits()
            if 'rn' in self.extra_select:
                del self.extra_select['rn']

    _classes[QueryClass] = OracleQuery
    return OracleQuery

Example 37

Project: PySAR
Source File: timeseries2velocity.py
View license
def main(argv):

  if len(sys.argv)>2:

    try:
      opts, args = getopt.getopt(argv,"f:d:m:M:h:o:")
       
    except getopt.GetoptError:
      Usage() ; sys.exit(1)
       
    for opt,arg in opts:
      if opt == '-f':
        timeSeriesFile = arg
      elif opt == '-d':
        datesNot2include = arg.split()
      elif opt == '-m':
        minDate = arg
      elif opt == '-M':
        maxDate = arg
      elif opt == '-o':
        outName = arg
      
  elif len(sys.argv)==2:
    if argv[0]=='-h':
       Usage(); sys.exit(1)
    elif os.path.isfile(argv[0]):
       timeSeriesFile = argv[0]
    else:
       Usage(); sys.exit(1)

  else:
    Usage(); sys.exit(1)    

##############################################################
  print "Loading time series file: " + timeSeriesFile
  import h5py
  h5timeseries = h5py.File(timeSeriesFile)
  dateList1 = h5timeseries['timeseries'].keys()

##############################################################
  print 'All dates existed:'
  print dateList1
  print '*******************'

  try:
    datesNot2include
    print 'exclude dates: '+str(datesNot2include)
  except:
    datesNot2include=[]

  try:
    minDate
    minDateyy=yyyymmdd2years(minDate)
    print 'minimum date: '+minDate
    for date in dateList1:
       yy=yyyymmdd2years(date)
       if yy < minDateyy:
           print '  remove date: '+date
           datesNot2include.append(date)
  except:
    print ''

 # maxDate='20100521'
  try:
    maxDate
    maxDateyy=yyyymmdd2years(maxDate) 
    print 'maximum date: '+maxDate
    for date in dateList1:
       yy=yyyymmdd2years(date)
       if yy > maxDateyy:
           print '  remove date: '+date
           datesNot2include.append(date)
  except:
    print ''

  try:
    # datesNot2include = '20100903 20100730 20100625 20100521 20100416'
     dateList=[]
     for date in dateList1:
        if date not in datesNot2include:
           dateList.append(date)
  except:
     dateList=dateList1
     print 'using all dates to calculate the vlocity'
  print '--------------------------------------------'
  print 'dates used to estimate the velocity:'
  print dateList
  print '--------------------------------------------'
##############################################################
  dateIndex={}
  for ni in range(len(dateList)):
    dateIndex[dateList[ni]]=ni
  tbase=[]
  d1 = datetime.datetime(*time.strptime(dateList[0],"%Y%m%d")[0:5])
  
  for ni in range(len(dateList)):
    d2 = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
    diff = d2-d1
    tbase.append(diff.days)

  dates=[]
  for ni in range(len(dateList)):
    d = datetime.datetime(*time.strptime(dateList[ni],"%Y%m%d")[0:5])
    dates.append(d)
#  print 'Index and dates from ' + timeSeriesFile
#  for ni in range(len(dates)):
#    print ni,dates[ni]

###########################################
  print 'Calculating Velocity'

  datevector=[]
  for i in range(len(dates)):
    datevector.append(np.float(dates[i].year) + np.float(dates[i].month-1)/12 + np.float(dates[i].day-1)/365)

  B=np.ones([len(datevector),2])
  B[:,0]=datevector
  #B1 = np.linalg.pinv(B)
  B1 = np.dot(np.linalg.inv(np.dot(B.T,B)),B.T)
  B1 = np.array(B1,np.float32)
#########################################
    

  dset = h5timeseries['timeseries'].get(h5timeseries['timeseries'].keys()[0])
#  timeseries = np.zeros((len(h5timeseries['timeseries'].keys()),np.shape(dset)[0],np.shape(dset)[1]),np.float32)
#  for date in h5timeseries['timeseries'].keys():
#    timeseries[dateIndex[date]] = h5timeseries['timeseries'].get(date)

  timeseries = np.zeros((len(dateList),np.shape(dset)[0],np.shape(dset)[1]),np.float32)
  for date in dateList:
    timeseries[dateIndex[date]] = h5timeseries['timeseries'].get(date)


  lt,rows,cols=np.shape(timeseries)
  numpixels=rows*cols
  
  Data=np.zeros([lt,numpixels])
  for i in range(lt):
     Data[i,:]=np.reshape(timeseries[i],[1,numpixels])

  x=np.dot(B1,Data)
  velocity=np.reshape(x[0,:],[rows,cols])
#  import matplotlib.pyplot as plt
#  plt.imshow(velocity,vmin=-0.02, vmax=.02)
#  plt.colorbar()
#  plt.show()
#####################################################
  print 'Calculating rmse'
  Data_linear=np.dot(B,x)
  rmse=np.reshape(np.sqrt((np.sum((Data_linear-Data)**2,0))/lt),[rows,cols])
 # se=np.reshape((np.sum(np.abs(Data_linear-Data),0)/lt),[rows,cols])
 # rmse=np.reshape((np.sum((Data_linear-Data)**2,0))/lt,[rows,cols])
######################################################
  print 'Calculating the standard deviation of the estimated velocities'
  residual=Data_linear-Data
  s1=np.sqrt(np.sum(residual**2,0)/(lt-2))
  s2=np.sqrt(np.sum((datevector-np.mean(datevector))**2))
  se=np.reshape(s1/s2,[rows,cols])
######################################################
   
 # SSt=np.sum((Data-np.mean(Data,0))**2,0)
 # SSres=np.sum(residual**2,0)
 # SS_REG=SSt-SSres
 # Rsquared=np.reshape(SS_REG/SSt,[rows,cols])
######################################################  
  # covariance of the velocities
  
######################################################
#  h5file = projectDir+'/velocity_'+projectName+'.h5'
 # print 'saving results to hdf5 file'


  try:
    outName
    outName_rmse='rmse_'+outName
    outName_se='std_'+outName
    outName_Rsquared='R2_'+outName
  except:
    outName='velocity.h5'
    outName_rmse='rmse_velocity.h5'
    outName_se='std_velocity.h5'
    outName_Rsquared='R2_velocity.h5'


 # try:
   # h5file = argv[1]
  #  print 'writing velocity to '+argv[1]
 # except:
   # h5file = 'velocity.h5'
   # print 'writing to velocity.h5'
  print '--------------------------------------'
  print 'writing to '+outName
  h5velocity = h5py.File(outName,'w')
  group=h5velocity.create_group('velocity')
  dset = group.create_dataset('velocity', data=velocity, compression='gzip')
  group.attrs['date1'] = datevector[0]
  group.attrs['date2'] = datevector[lt-1]
  
  for key , value in h5timeseries['timeseries'].attrs.iteritems():
     group.attrs[key]=value
  h5velocity.close()  
#  h5timeseries.close()

  print '--------------------------------------'
  print 'writing to '+outName_rmse
  h5file = outName_rmse
  h5rmse = h5py.File(h5file,'w')
  group=h5rmse.create_group('rmse')
  dset = group.create_dataset(os.path.basename('rmse'), data=rmse, compression='gzip')
  group.attrs['date1'] = datevector[0]
  group.attrs['date2'] = datevector[lt-1]


  for key , value in h5timeseries['timeseries'].attrs.iteritems():
     group.attrs[key]=value  

  print '--------------------------------------'
  print 'writing to '+outName_se
  h5se = h5py.File(outName_se,'w')
  group=h5se.create_group('rmse')
  dset = group.create_dataset('rmse', data=se, compression='gzip')
  group.attrs['date1'] = datevector[0]
  group.attrs['date2'] = datevector[lt-1]

  for key , value in h5timeseries['timeseries'].attrs.iteritems():
     group.attrs[key]=value

  print '--------------------------------------'
 # print 'writing to '+outName_Rsquared
 # h5rsquared = h5py.File(outName_Rsquared,'w')
 # group=h5rsquared.create_group('rmse')
 # dset = group.create_dataset('rmse', data=Rsquared, compression='gzip')
 # group.attrs['date1'] = datevector[0]
 # group.attrs['date2'] = datevector[lt-1]

#  for key , value in h5timeseries['timeseries'].attrs.iteritems():
#     group.attrs[key]=value

#  h5rsquared.close()
  h5se.close()
  h5rmse.close()
  h5timeseries.close()

Example 38

Project: geraldo
Source File: query.py
View license
def query_class(QueryClass, Database):
    """
    Returns a custom django.db.models.sql.query.Query subclass that is
    appropriate for Oracle.

    The 'Database' module (cx_Oracle) is passed in here so that all the setup
    required to import it only needs to be done by the calling module.
    """
    global _classes
    try:
        return _classes[QueryClass]
    except KeyError:
        pass

    class OracleQuery(QueryClass):
        def resolve_columns(self, row, fields=()):
            index_start = len(self.extra_select.keys())
            values = [self.convert_values(v, None) for v in row[:index_start]]
            for value, field in map(None, row[index_start:], fields):
                values.append(self.convert_values(value, field))
            return values

        def convert_values(self, value, field):
            from django.db.models.fields import DateField, DateTimeField, \
                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
            if isinstance(value, Database.LOB):
                value = value.read()
            # Oracle stores empty strings as null. We need to undo this in
            # order to adhere to the Django convention of using the empty
            # string instead of null, but only if the field accepts the
            # empty string.
            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
                value = u''
            # Convert 1 or 0 to True or False
            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
                value = bool(value)
            # Convert floats to decimals
            elif value is not None and isinstance(field, DecimalField):
                value = util.typecast_decimal(field.format_number(value))
            # cx_Oracle always returns datetime.datetime objects for
            # DATE and TIMESTAMP columns, but Django wants to see a
            # python datetime.date, .time, or .datetime.  We use the type
            # of the Field to determine which to cast to, but it's not
            # always available.
            # As a workaround, we cast to date if all the time-related
            # values are 0, or to time if the date is 1/1/1900.
            # This could be cleaned a bit by adding a method to the Field
            # classes to normalize values from the database (the to_python
            # method is used for validation and isn't what we want here).
            elif isinstance(value, Database.Timestamp):
                # In Python 2.3, the cx_Oracle driver returns its own
                # Timestamp object that we must convert to a datetime class.
                if not isinstance(value, datetime.datetime):
                    value = datetime.datetime(value.year, value.month,
                            value.day, value.hour, value.minute, value.second,
                            value.fsecond)
                if isinstance(field, DateTimeField):
                    # DateTimeField subclasses DateField so must be checked
                    # first.
                    pass
                elif isinstance(field, DateField):
                    value = value.date()
                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
                    value = value.time()
                elif value.hour == value.minute == value.second == value.microsecond == 0:
                    value = value.date()
            return value

        def as_sql(self, with_limits=True, with_col_aliases=False):
            """
            Creates the SQL for this query. Returns the SQL string and list
            of parameters.  This is overriden from the original Query class
            to accommodate Oracle's limit/offset SQL.

            If 'with_limits' is False, any limit/offset information is not
            included in the query.
            """

            # The `do_offset` flag indicates whether we need to construct
            # the SQL needed to use limit/offset w/Oracle.
            do_offset = with_limits and (self.high_mark is not None
                                         or self.low_mark)

            # If no offsets, just return the result of the base class
            # `as_sql`.
            if not do_offset:
                return super(OracleQuery, self).as_sql(with_limits=False,
                        with_col_aliases=with_col_aliases)

            # `get_columns` needs to be called before `get_ordering` to
            # populate `_select_alias`.
            self.pre_sql_setup()
            out_cols = self.get_columns()
            ordering = self.get_ordering()

            # Getting the "ORDER BY" SQL for the ROW_NUMBER() result.
            if ordering:
                rn_orderby = ', '.join(ordering)
            else:
                # Oracle's ROW_NUMBER() function always requires an
                # order-by clause.  So we need to define a default
                # order-by, since none was provided.
                qn = self.quote_name_unless_alias
                opts = self.model._meta
                rn_orderby = '%s.%s' % (qn(opts.db_table), qn(opts.fields[0].db_column or opts.fields[0].column))

            # Getting the selection SQL and the params, which has the `rn`
            # extra selection SQL.
            self.extra_select['rn'] = 'ROW_NUMBER() OVER (ORDER BY %s )' % rn_orderby
            sql, params = super(OracleQuery, self).as_sql(with_limits=False,
                    with_col_aliases=True)

            # Constructing the result SQL, using the initial select SQL
            # obtained above.
            result = ['SELECT * FROM (%s)' % sql]

            # Place WHERE condition on `rn` for the desired range.
            result.append('WHERE rn > %d' % self.low_mark)
            if self.high_mark is not None:
                result.append('AND rn <= %d' % self.high_mark)

            # Returning the SQL w/params.
            return ' '.join(result), params

        def set_limits(self, low=None, high=None):
            super(OracleQuery, self).set_limits(low, high)

            # We need to select the row number for the LIMIT/OFFSET sql.
            # A placeholder is added to extra_select now, because as_sql is
            # too late to be modifying extra_select.  However, the actual sql
            # depends on the ordering, so that is generated in as_sql.
            self.extra_select['rn'] = '1'

        def clear_limits(self):
            super(OracleQuery, self).clear_limits()
            if 'rn' in self.extra_select:
                del self.extra_select['rn']

    _classes[QueryClass] = OracleQuery
    return OracleQuery

Example 39

View license
    def forwards(self, orm):

        # Removing unique constraint on 'AstakosUser', fields ['third_party_identifier', 'provider']
        try:
            db.delete_unique('im_astakosuser', ['third_party_identifier', 'provider'])
        except:
            pass

        # Adding model 'Chain'
        db.create_table('im_chain', (
            ('chain', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
        ))
        db.send_create_signal('im', ['Chain'])

        # Adding model 'Project'
        db.create_table('im_project', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('application', self.gf('django.db.models.fields.related.OneToOneField')(related_name='project', unique=True, to=orm['im.ProjectApplication'])),
            ('last_approval_date', self.gf('django.db.models.fields.DateTimeField')(null=True)),
            ('deactivation_reason', self.gf('django.db.models.fields.CharField')(max_length=255, null=True)),
            ('deactivation_date', self.gf('django.db.models.fields.DateTimeField')(null=True)),
            ('creation_date', self.gf('django.db.models.fields.DateTimeField')()),
            ('name', self.gf('django.db.models.fields.CharField')(unique=True, max_length=80, db_index=True)),
            ('is_modified', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True)),
            ('is_active', self.gf('django.db.models.fields.BooleanField')(default=True, db_index=True)),
            ('state', self.gf('django.db.models.fields.IntegerField')(default=1, db_index=True)),
        ))
        db.send_create_signal('im', ['Project'])

        # Adding model 'ProjectMembership'
        db.create_table('im_projectmembership', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('person', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['im.AstakosUser'])),
            ('request_date', self.gf('django.db.models.fields.DateField')(default=datetime.datetime(2013, 1, 11, 12, 54, 30, 986304))),
            ('project', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['im.Project'])),
            ('state', self.gf('django.db.models.fields.IntegerField')(default=0, db_index=True)),
            ('is_pending', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True)),
            ('is_active', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True)),
            ('application', self.gf('django.db.models.fields.related.ForeignKey')(related_name='memberships', null=True, to=orm['im.ProjectApplication'])),
            ('pending_application', self.gf('django.db.models.fields.related.ForeignKey')(related_name='pending_memebrships', null=True, to=orm['im.ProjectApplication'])),
            ('pending_serial', self.gf('django.db.models.fields.BigIntegerField')(null=True, db_index=True)),
            ('acceptance_date', self.gf('django.db.models.fields.DateField')(null=True, db_index=True)),
            ('leave_request_date', self.gf('django.db.models.fields.DateField')(null=True)),
        ))
        db.send_create_signal('im', ['ProjectMembership'])

        # Adding unique constraint on 'ProjectMembership', fields ['person', 'project']
        db.create_unique('im_projectmembership', ['person_id', 'project_id'])

        # Adding model 'ResourceMetadata'
        db.create_table('im_resourcemetadata', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('key', self.gf('django.db.models.fields.CharField')(unique=True, max_length=255, db_index=True)),
            ('value', self.gf('django.db.models.fields.CharField')(max_length=255)),
        ))
        db.send_create_signal('im', ['ResourceMetadata'])

        # Adding model 'AstakosUserAuthProvider'
        db.create_table('im_astakosuserauthprovider', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('affiliation', self.gf('django.db.models.fields.CharField')(default=None, max_length=255, null=True, blank=True)),
            ('user', self.gf('django.db.models.fields.related.ForeignKey')(related_name='auth_providers', to=orm['im.AstakosUser'])),
            ('module', self.gf('django.db.models.fields.CharField')(default='local', max_length=255)),
            ('identifier', self.gf('django.db.models.fields.CharField')(max_length=255, null=True, blank=True)),
            ('active', self.gf('django.db.models.fields.BooleanField')(default=True)),
            ('auth_backend', self.gf('django.db.models.fields.CharField')(default='astakos', max_length=255)),
            ('info_data', self.gf('django.db.models.fields.TextField')(default='', null=True, blank=True)),
            ('created', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, blank=True)),
        ))
        db.send_create_signal('im', ['AstakosUserAuthProvider'])

        # Adding unique constraint on 'AstakosUserAuthProvider', fields ['identifier', 'module', 'user']
        db.create_unique('im_astakosuserauthprovider', ['identifier', 'module', 'user_id'])

        # Adding model 'Serial'
        db.create_table('im_serial', (
            ('serial', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
        ))
        db.send_create_signal('im', ['Serial'])

        # Adding model 'Resource'
        db.create_table('im_resource', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=255)),
            ('service', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['im.Service'])),
            ('desc', self.gf('django.db.models.fields.TextField')(null=True)),
            ('unit', self.gf('django.db.models.fields.CharField')(max_length=255, null=True)),
            ('group', self.gf('django.db.models.fields.CharField')(max_length=255, null=True)),
        ))
        db.send_create_signal('im', ['Resource'])

        # Adding M2M table for field meta on 'Resource'
        db.create_table('im_resource_meta', (
            ('id', models.AutoField(verbose_name='ID', primary_key=True, auto_created=True)),
            ('resource', models.ForeignKey(orm['im.resource'], null=False)),
            ('resourcemetadata', models.ForeignKey(orm['im.resourcemetadata'], null=False))
        ))
        db.create_unique('im_resource_meta', ['resource_id', 'resourcemetadata_id'])

        # Adding unique constraint on 'Resource', fields ['name', 'service']
        db.create_unique('im_resource', ['name', 'service_id'])

        # Adding model 'SessionCatalog'
        db.create_table('im_sessioncatalog', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('session_key', self.gf('django.db.models.fields.CharField')(max_length=40)),
            ('user', self.gf('django.db.models.fields.related.ForeignKey')(related_name='sessions', null=True, to=orm['im.AstakosUser'])),
        ))
        db.send_create_signal('im', ['SessionCatalog'])

        # Adding model 'ProjectMembershipHistory'
        db.create_table('im_projectmembershiphistory', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('person', self.gf('django.db.models.fields.BigIntegerField')()),
            ('project', self.gf('django.db.models.fields.BigIntegerField')()),
            ('date', self.gf('django.db.models.fields.DateField')(default=datetime.datetime.now)),
            ('reason', self.gf('django.db.models.fields.IntegerField')()),
            ('serial', self.gf('django.db.models.fields.BigIntegerField')()),
        ))
        db.send_create_signal('im', ['ProjectMembershipHistory'])

        # Adding model 'AstakosUserQuota'
        db.create_table('im_astakosuserquota', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('capacity', self.gf('django.db.models.fields.BigIntegerField')(null=True)),
            ('quantity', self.gf('django.db.models.fields.BigIntegerField')(null=True)),
            ('export_limit', self.gf('django.db.models.fields.BigIntegerField')(null=True)),
            ('import_limit', self.gf('django.db.models.fields.BigIntegerField')(null=True)),
            ('resource', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['im.Resource'])),
            ('user', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['im.AstakosUser'])),
        ))
        db.send_create_signal('im', ['AstakosUserQuota'])

        # Adding unique constraint on 'AstakosUserQuota', fields ['resource', 'user']
        db.create_unique('im_astakosuserquota', ['resource_id', 'user_id'])

        # Adding model 'ProjectResourceGrant'
        db.create_table('im_projectresourcegrant', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('resource', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['im.Resource'])),
            ('project_application', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['im.ProjectApplication'], null=True)),
            ('project_capacity', self.gf('snf_django.lib.db.fields.IntDecimalField')(default=100000000000000000000000000000000L, max_digits=38, decimal_places=0)),
            ('project_import_limit', self.gf('snf_django.lib.db.fields.IntDecimalField')(default=100000000000000000000000000000000L, max_digits=38, decimal_places=0)),
            ('project_export_limit', self.gf('snf_django.lib.db.fields.IntDecimalField')(default=100000000000000000000000000000000L, max_digits=38, decimal_places=0)),
            ('member_capacity', self.gf('snf_django.lib.db.fields.IntDecimalField')(default=100000000000000000000000000000000L, max_digits=38, decimal_places=0)),
            ('member_import_limit', self.gf('snf_django.lib.db.fields.IntDecimalField')(default=100000000000000000000000000000000L, max_digits=38, decimal_places=0)),
            ('member_export_limit', self.gf('snf_django.lib.db.fields.IntDecimalField')(default=100000000000000000000000000000000L, max_digits=38, decimal_places=0)),
        ))
        db.send_create_signal('im', ['ProjectResourceGrant'])

        # Adding unique constraint on 'ProjectResourceGrant', fields ['resource', 'project_application']
        db.create_unique('im_projectresourcegrant', ['resource_id', 'project_application_id'])

        # Adding model 'PendingThirdPartyUser'
        db.create_table('im_pendingthirdpartyuser', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('third_party_identifier', self.gf('django.db.models.fields.CharField')(max_length=255, null=True, blank=True)),
            ('provider', self.gf('django.db.models.fields.CharField')(max_length=255, blank=True)),
            ('email', self.gf('django.db.models.fields.EmailField')(max_length=75, null=True, blank=True)),
            ('first_name', self.gf('django.db.models.fields.CharField')(max_length=30, null=True, blank=True)),
            ('last_name', self.gf('django.db.models.fields.CharField')(max_length=30, null=True, blank=True)),
            ('affiliation', self.gf('django.db.models.fields.CharField')(max_length=255, null=True, blank=True)),
            ('username', self.gf('django.db.models.fields.CharField')(unique=True, max_length=30)),
            ('token', self.gf('django.db.models.fields.CharField')(max_length=255, null=True, blank=True)),
            ('created', self.gf('django.db.models.fields.DateTimeField')(auto_now_add=True, null=True, blank=True)),
            ('info', self.gf('django.db.models.fields.TextField')(default='', null=True, blank=True)),
        ))
        db.send_create_signal('im', ['PendingThirdPartyUser'])

        # Adding unique constraint on 'PendingThirdPartyUser', fields ['provider', 'third_party_identifier']
        db.create_unique('im_pendingthirdpartyuser', ['provider', 'third_party_identifier'])

        # Adding model 'ProjectApplication'
        db.create_table('im_projectapplication', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('applicant', self.gf('django.db.models.fields.related.ForeignKey')(related_name='projects_applied', to=orm['im.AstakosUser'])),
            ('state', self.gf('django.db.models.fields.IntegerField')(default=0)),
            ('owner', self.gf('django.db.models.fields.related.ForeignKey')(related_name='projects_owned', to=orm['im.AstakosUser'])),
            ('chain', self.gf('django.db.models.fields.IntegerField')()),
            ('precursor_application', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['im.ProjectApplication'], unique=True, null=True, blank=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=80)),
            ('homepage', self.gf('django.db.models.fields.URLField')(max_length=255, null=True)),
            ('description', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
            ('start_date', self.gf('django.db.models.fields.DateTimeField')(null=True, blank=True)),
            ('end_date', self.gf('django.db.models.fields.DateTimeField')()),
            ('member_join_policy', self.gf('django.db.models.fields.IntegerField')()),
            ('member_leave_policy', self.gf('django.db.models.fields.IntegerField')()),
            ('limit_on_members_number', self.gf('django.db.models.fields.PositiveIntegerField')(null=True)),
            ('comments', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
            ('issue_date', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime.now)),
        ))
        db.send_create_signal('im', ['ProjectApplication'])

        # Adding unique constraint on 'ProjectApplication', fields ['chain', 'id']
        db.create_unique('im_projectapplication', ['chain', 'id'])

        # Adding field 'Service.order'
        db.add_column('im_service', 'order', self.gf('django.db.models.fields.PositiveIntegerField')(default=0), keep_default=False)

        # Adding index on 'Service', fields ['name']
        db.create_index('im_service', ['name'])

        # Adding field 'AstakosUser.uuid'
        db.add_column('im_astakosuser', 'uuid', self.gf('django.db.models.fields.CharField')(max_length=255, unique=True, null=True), keep_default=False)

        # Adding field 'AstakosUser.disturbed_quota'
        db.add_column('im_astakosuser', 'disturbed_quota', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True), keep_default=False)

        # Changing field 'AstakosUser.affiliation'
        db.alter_column('im_astakosuser', 'affiliation', self.gf('django.db.models.fields.CharField')(max_length=255, null=True))

        # Changing field 'AstakosUser.provider'
        db.alter_column('im_astakosuser', 'provider', self.gf('django.db.models.fields.CharField')(max_length=255, null=True))

        # Changin field 'auth_user.username'
        db.alter_column('auth_user', 'username', models.CharField(max_length=75))

Example 40

Project: montage
Source File: _run_dao_test.py
View license
def main():
    rdb_session = make_rdb_session()

    # TODO: startup should add maintainers as users
    mahm_user = lookup_user(rdb_session, 'MahmoudHashemi')  # maintainer

    maint_dao = MaintainerDAO(rdb_session, mahm_user)
    org_user = maint_dao.add_organizer('LilyOfTheWest')
    org_dao = OrganizerDAO(rdb_session, org_user)

    # should automatically add the creator as coordinator
    campaign = org_dao.create_campaign(name='Basic test campaign',
                                       open_date=datetime.datetime(2015, 9, 10),
                                       close_date=datetime.datetime(2015, 10, 1))

    mahm_user = org_dao.add_coordinator(campaign, username='MahmoudHashemi')
    slap_user = org_dao.add_coordinator(campaign, 'Slaporte')
    leila_user = org_dao.add_coordinator(campaign, 'LilyOfTheWest')
    import pdb;pdb.set_trace()
    coord_dao = CoordinatorDAO(rdb_session, yarl_user)

    juror_usernames = ['Slaporte', 'MahmoudHashemi', 'Yarl', 'Erwmat']

    rnd = coord_dao.create_round(name='Test Round 1',
                                 quorum=3,
                                 vote_method='rating',
                                 deadline_date=datetime.datetime(2015, 10, 15),
                                 jurors=juror_usernames,
                                 campaign=campaign)
    # returns successful, disqualified, total counts
    # coord_dao.add_entries_from_cat(rnd, 'Wiki Loves Monuments France 2015')


    if config.get('labs_db'):
        entries = coord_dao.add_entries_from_cat(rnd, 'Images_from_Wiki_Loves_Monuments_2015_in_Pakistan')
    else:
        entries = coord_dao.add_entries_from_csv_gist(rnd, GIST_URL)

    coord_dao.add_round_entries(rnd, entries)

    coord_dao.autodisqualify_by_date(rnd)
    coord_dao.autodisqualify_by_resolution(rnd)
    coord_dao.autodisqualify_by_uploader(rnd)

    #coord_dao.disqualify_entry(entry)

    coord_dao.activate_round(rnd)

    try:
        cross_complete(rdb_session, rnd)
    except PermissionDenied:
        pass
    else:
        raise ValueError('expected permission denied on cross complete')

    rate_round_tasks(rdb_session, rnd, limit_per=20)

    coord_dao.cancel_round(rnd)

    # # should fail, quorum must be <= # of jurors
    # coord_dao.reassign(active_jurors=['Slaporte'])

    rnd = coord_dao.create_round(name='Test Round 1.1',
                                 quorum=2,
                                 vote_method='rating',
                                 deadline_date=datetime.datetime(2015, 10, 15),
                                 jurors=juror_usernames,
                                 campaign=campaign)
    entries = coord_dao.add_entries_from_csv_gist(rnd, GIST_URL)
    coord_dao.add_round_entries(rnd, entries)
    coord_dao.activate_round(rnd)

    rate_round_tasks(rdb_session, rnd, limit_per=50)

    coord_dao.modify_jurors(rnd, [slap_user, yarl_user])

    # some read tasks

    rate_round_tasks(rdb_session, rnd)

    avg_ratings_map = coord_dao.get_round_average_rating_map(rnd)
    threshold_map = get_threshold_map(avg_ratings_map)

    # let at least 100 through
    cur_thresh = [t for t, c in sorted(threshold_map.items()) if c >= 100][-1]

    adv_group = coord_dao.get_rating_advancing_group(rnd, cur_thresh)
    coord_dao.finalize_rating_round(rnd, cur_thresh)
    campaign = coord_dao.get_campaign(campaign.id)

    assert campaign.active_round is None

    #
    # Time for Round 2
    #

    rnd2 = coord_dao.create_round(campaign,
                                  name='Test Round 2',
                                  vote_method='rating',
                                  quorum=2,
                                  jurors=juror_usernames,
                                  deadline_date=datetime.datetime(2015, 11, 1))
    final_rnds = [r for r in campaign.rounds if r.status == 'finalized']
    last_successful_rnd = final_rnds[-1]  # TODO: these are ordered by date?
    advancing_group = coord_dao.get_rating_advancing_group(last_successful_rnd)

    source = 'round(#%s)' % last_successful_rnd.id
    coord_dao.add_round_entries(rnd2, advancing_group, source)
    coord_dao.activate_round(rnd2)

    rate_round_tasks(rdb_session, rnd2, limit_per=20)
    coord_dao.pause_round(rnd2)
    coord_dao.activate_round(rnd2)
    rate_round_tasks(rdb_session, rnd2)
    avg_ratings_map = coord_dao.get_round_average_rating_map(rnd2)
    threshold_map = get_threshold_map(avg_ratings_map)
    if config.get('labs_db'):
        # Assumign the category stays the same
        assert threshold_map == ROUND_2_CAT_THRESH
    else:
        assert threshold_map == ROUND_2_THRESH
    #
    #
    #

    # # close campaign
    # # download audit logs

    rdb_session.commit()
    pprint(threshold_map)

    import pdb;pdb.set_trace()

Example 41

Project: AlephNull
Source File: test_slippage.py
View license
    def test_orders_stop_limit(self):

        events = self.gen_trades()
        slippage_model = VolumeShareSlippage()

        # long, does not trade

        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': 133,
                'stop': 4.0,
                'limit': 3.0})
        ]

        orders_txns = list(slippage_model.simulate(
            events[2],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 0)

        orders_txns = list(slippage_model.simulate(
            events[3],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 0)

        # long, does trade

        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': 100,
                'filled': 0,
                'sid': 133,
                'stop': 4.0,
                'limit': 3.5})
        ]

        orders_txns = list(slippage_model.simulate(
            events[2],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 0)

        orders_txns = list(slippage_model.simulate(
            events[3],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 1)
        _, txn = orders_txns[0]

        expected_txn = {
            'price': float(3.500875),
            'dt': datetime.datetime(
                2006, 1, 5, 14, 34, tzinfo=pytz.utc),
            'amount': int(100),
            'sid': int(133)
        }

        for key, value in expected_txn.items():
            self.assertEquals(value, txn[key])

        # short, does not trade

        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': 133,
                'stop': 3.0,
                'limit': 4.0})
        ]

        orders_txns = list(slippage_model.simulate(
            events[0],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 0)

        orders_txns = list(slippage_model.simulate(
            events[1],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 0)

        # short, does trade

        open_orders = [
            Order(**{
                'dt': datetime.datetime(2006, 1, 5, 14, 30, tzinfo=pytz.utc),
                'amount': -100,
                'filled': 0,
                'sid': 133,
                'stop': 3.0,
                'limit': 3.5})
        ]

        orders_txns = list(slippage_model.simulate(
            events[0],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 0)

        orders_txns = list(slippage_model.simulate(
            events[1],
            open_orders
        ))

        self.assertEquals(len(orders_txns), 1)
        _, txn = orders_txns[0]

        expected_txn = {
            'price': float(3.499125),
            'dt': datetime.datetime(
                2006, 1, 5, 14, 32, tzinfo=pytz.utc),
            'amount': int(-100),
            'sid': int(133)
        }

        for key, value in expected_txn.items():
            self.assertEquals(value, txn[key])

Example 42

Project: nupic
Source File: clamodel_test.py
View license
  def testTemporalAnomalyModelFactory(self):
    """ Simple test to assert that ModelFactory.create() with a given specific
    Temporal Anomaly configuration will return a model that can return
    inferences
    """
    modelConfig = (
      {u'aggregationInfo': {u'days': 0,
                            u'fields': [],
                            u'hours': 0,
                            u'microseconds': 0,
                            u'milliseconds': 0,
                            u'minutes': 0,
                            u'months': 0,
                            u'seconds': 0,
                            u'weeks': 0,
                            u'years': 0},
       u'model': u'CLA',
       u'modelParams': {u'anomalyParams': {u'anomalyCacheRecords': None,
                                           u'autoDetectThreshold': None,
                                           u'autoDetectWaitRecords': 5030},
                        u'clEnable': False,
                        u'clParams': {u'alpha': 0.035828933612158,
                                      u'verbosity': 0,
                                      u'regionName': u'CLAClassifierRegion',
                                      u'steps': u'1'},
                        u'inferenceType': u'TemporalAnomaly',
                        u'sensorParams': {u'encoders': {u'c0_dayOfWeek': None,
                                                        u'c0_timeOfDay': {u'fieldname': u'c0',
                                                                          u'name': u'c0',
                                                                          u'timeOfDay': [21,
                                                                                         9.49122334747737],
                                                                          u'type': u'DateEncoder'},
                                                        u'c0_weekend': None,
                                                        u'c1': {u'fieldname': u'c1',
                                                                u'name': u'c1',
                                                                u'resolution': 0.8771929824561403,
                                                                u'seed': 42,
                                                                u'type': u'RandomDistributedScalarEncoder'}},
                                          u'sensorAutoReset': None,
                                          u'verbosity': 0},
                        u'spEnable': True,
                        u'spParams': {u'potentialPct': 0.8,
                                      u'columnCount': 2048,
                                      u'globalInhibition': 1,
                                      u'inputWidth': 0,
                                      u'maxBoost': 1.0,
                                      u'numActiveColumnsPerInhArea': 40,
                                      u'seed': 1956,
                                      u'spVerbosity': 0,
                                      u'spatialImp': u'cpp',
                                      u'synPermActiveInc': 0.0015,
                                      u'synPermConnected': 0.1,
                                      u'synPermInactiveDec': 0.0005,
                                      },
                        u'tpEnable': True,
                        u'tpParams': {u'activationThreshold': 13,
                                      u'cellsPerColumn': 32,
                                      u'columnCount': 2048,
                                      u'globalDecay': 0.0,
                                      u'initialPerm': 0.21,
                                      u'inputWidth': 2048,
                                      u'maxAge': 0,
                                      u'maxSegmentsPerCell': 128,
                                      u'maxSynapsesPerSegment': 32,
                                      u'minThreshold': 10,
                                      u'newSynapseCount': 20,
                                      u'outputType': u'normal',
                                      u'pamLength': 3,
                                      u'permanenceDec': 0.1,
                                      u'permanenceInc': 0.1,
                                      u'seed': 1960,
                                      u'temporalImp': u'cpp',
                                      u'verbosity': 0},
                        u'trainSPNetOnlyIfRequested': False},
       u'predictAheadTime': None,
       u'version': 1}
    )

    inferenceArgs = {u'inputPredictedField': u'auto',
                     u'predictedField': u'c1',
                     u'predictionSteps': [1]}

    data = [
      {'_category': [None],
       '_reset': 0,
       '_sequenceId': 0,
       '_timestamp': datetime.datetime(2013, 12, 5, 0, 0),
       '_timestampRecordIdx': None,
       u'c0': datetime.datetime(2013, 12, 5, 0, 0),
       u'c1': 5.0},
      {'_category': [None],
       '_reset': 0,
       '_sequenceId': 0,
       '_timestamp': datetime.datetime(2013, 12, 6, 0, 0),
       '_timestampRecordIdx': None,
       u'c0': datetime.datetime(2013, 12, 6, 0, 0),
       u'c1': 6.0},
      {'_category': [None],
       '_reset': 0,
       '_sequenceId': 0,
       '_timestamp': datetime.datetime(2013, 12, 7, 0, 0),
       '_timestampRecordIdx': None,
       u'c0': datetime.datetime(2013, 12, 7, 0, 0),
       u'c1': 7.0}
    ]

    model = ModelFactory.create(modelConfig=modelConfig)
    model.enableLearning()
    model.enableInference(inferenceArgs)

    for row in data:
      result = model.run(row)
      self.assertIsInstance(result, ModelResult)

Example 43

Project: django-recurrence
Source File: base.py
View license
def deserialize(text):
    """
    Deserialize a rfc2445 formatted string.

    This is a basic parser that is a partial implementation of rfc2445
    which pertains to specifying recurring date/times. Limitations
    include:

      - Only collects `DTSTART`, `DTEND`, `RRULE`, `EXRULE`, `RDATE`,
        and `EXDATE` properties.

      - Does not capture parameter options (i.e. RDATE;VALUE=PERIOD).
        `dateutil.rrule` does not support anything other than
        `DATE-TIME` parameter types.

      - `VTIMEZONE` and `TZID` can't be specified, so dates without
        the 'Z' marker will be localized to
        `settings.TIME_ZONE`. `datetime.datetime` objects in
        `Recurrence`/`Rrule` objects will be serialized as UTC.

      - The `DTSTART`, `DTEND`, `RDATE` and `EXDATE` properties also
        only support the `DATE-TIME` type.

    :Returns:
        A `Recurrence` instance.
    """
    def deserialize_dt(text):
        try:
            year, month, day = int(text[:4]), int(text[4:6]), int(text[6:8])
        except ValueError:
            raise exceptions.DeserializationError('malformed date-time: %r' % text)
        if u'T' in text:
            # time is also specified
            try:
                hour, minute, second = (
                    int(text[9:11]), int(text[11:13]), int(text[13:15]))
            except ValueError:
                raise exceptions.DeserializationError('malformed date-time: %r' % text)
        else:
            # only date is specified, use midnight
            hour, minute, second = (0, 0, 0)
        if u'Z' in text:
            # time is in utc
            tzinfo = pytz.utc
        else:
            # right now there is no support for VTIMEZONE/TZID since
            # this is a partial implementation of rfc2445 so we'll
            # just use the time zone specified in the Django settings.
            tzinfo = localtz()

        dt = datetime.datetime(
            year, month, day, hour, minute, second, tzinfo=tzinfo)
        dt = dt.astimezone(localtz())

        # set tz to settings.TIME_ZONE and return offset-naive datetime
        return datetime.datetime(
            dt.year, dt.month, dt.day, dt.hour, dt.minute, dt.second)

    dtstart, dtend, rrules, exrules, rdates, exdates = None, None, [], [], [], []

    tokens = re.compile(
        u'(DTSTART|DTEND|RRULE|EXRULE|RDATE|EXDATE)[^:]*:(.*)',
        re.MULTILINE).findall(text)

    if not tokens and text:
        raise exceptions.DeserializationError('malformed data')

    for label, param_text in tokens:
        if not param_text:
            raise exceptions.DeserializationError('empty property: %r' % label)
        if u'=' not in param_text:
            params = param_text
        else:
            params = {}
            param_tokens = filter(lambda p: p, param_text.split(u';'))
            for item in param_tokens:
                try:
                    param_name, param_value = map(
                        lambda i: i.strip(), item.split(u'=', 1))
                except ValueError:
                    raise exceptions.DeserializationError(
                        'missing parameter value: %r' % item)
                params[param_name] = list(map(
                    lambda i: i.strip(), param_value.split(u',')))

        if label in (u'RRULE', u'EXRULE'):
            kwargs = {}
            for key, value in params.items():
                if key == u'FREQ':
                    try:
                        kwargs[str(key.lower())] = list(
                            Rule.frequencies).index(value[0])
                    except ValueError:
                        raise exceptions.DeserializationError(
                            'bad frequency value: %r' % value[0])
                elif key == u'INTERVAL':
                    try:
                        kwargs[str(key.lower())] = int(value[0])
                    except ValueError:
                        raise exceptions.DeserializationError(
                            'bad interval value: %r' % value[0])
                elif key == u'WKST':
                    try:
                        kwargs[str(key.lower())] = to_weekday(value[0])
                    except ValueError:
                        raise exceptions.DeserializationError(
                            'bad weekday value: %r' % value[0])
                elif key == u'COUNT':
                    try:
                        kwargs[str(key.lower())] = int(value[0])
                    except ValueError:
                        raise exceptions.DeserializationError(
                            'bad count value: %r' % value[0])
                elif key == u'UNTIL':
                    kwargs[str(key.lower())] = deserialize_dt(value[0])
                elif key == u'BYDAY':
                    bydays = []
                    for v in value:
                        try:
                            bydays.append(to_weekday(v))
                        except ValueError:
                            raise exceptions.DeserializationError(
                                'bad weekday value: %r' % v)
                    kwargs[str(key.lower())] = bydays
                elif key.lower() in Rule.byparams:
                    numbers = []
                    for v in value:
                        try:
                            numbers.append(int(v))
                        except ValueError:
                            raise exceptions.DeserializationError(
                                'bad value: %r' % value)
                    kwargs[str(key.lower())] = numbers
                else:
                    raise exceptions.DeserializationError('bad parameter: %s' % key)
            if 'freq' not in kwargs:
                raise exceptions.DeserializationError(
                    'frequency parameter missing from rule')
            if label == u'RRULE':
                rrules.append(Rule(**kwargs))
            else:
                exrules.append(Rule(**kwargs))
        elif label == u'DTSTART':
            dtstart = deserialize_dt(params)
        elif label == u'DTEND':
            dtend = deserialize_dt(params)
        elif label == u'RDATE':
            rdates.append(deserialize_dt(params))
        elif label == u'EXDATE':
            exdates.append(deserialize_dt(params))

    return Recurrence(dtstart, dtend, rrules, exrules, rdates, exdates)

Example 44

View license
    def setUp(self):
        """set up the test
        """
        # we need a database
        db.setup(self.config)
        db.init()

        # replace datetime now function

        # create departments
        self.test_dep1 = Department(name='Dep1')
        self.test_dep2 = Department(name='Dep2')

        # create resources
        self.test_user1 = User(
            login='user1',
            name='User1',
            email='[email protected]',
            password='1234',
            departments=[self.test_dep1]
        )
        DBSession.add(self.test_user1)

        self.test_user2 = User(
            login='user2',
            name='User2',
            email='[email protected]',
            password='1234',
            departments=[self.test_dep1]
        )
        DBSession.add(self.test_user2)

        self.test_user3 = User(
            login='user3',
            name='User3',
            email='[email protected]',
            password='1234',
            departments=[self.test_dep2]
        )
        DBSession.add(self.test_user3)

        self.test_user4 = User(
            login='user4',
            name='User4',
            email='[email protected]',
            password='1234',
            departments=[self.test_dep2]
        )
        DBSession.add(self.test_user4)

        # user with two departments
        self.test_user5 = User(
            login='user5',
            name='User5',
            email='[email protected]',
            password='1234',
            departments=[self.test_dep1, self.test_dep2]
        )
        DBSession.add(self.test_user5)

        # user with no departments
        self.test_user6 = User(
            login='user6',
            name='User6',
            email='[email protected]',
            password='1234'
        )
        DBSession.add(self.test_user6)

        # repository
        self.test_repo = Repository(
            name='Test Repository',
            linux_path='/mnt/T/',
            windows_path='T:/',
            osx_path='/Volumes/T/'
        )
        DBSession.add(self.test_repo)

        # statuses
        self.test_status1 = Status(name='Status 1', code='STS1')
        self.test_status2 = Status(name='Status 2', code='STS2')
        self.test_status3 = Status(name='Status 3', code='STS3')
        self.test_status4 = Status(name='Status 4', code='STS4')
        self.test_status5 = Status(name='Status 5', code='STS5')
        DBSession.add_all([self.test_status1,
                           self.test_status2,
                           self.test_status3,
                           self.test_status4,
                           self.test_status5])

        # status lists
        self.test_proj_status_list = StatusList(
            name='Project Status List',
            statuses=[self.test_status1, self.test_status2, self.test_status3],
            target_entity_type='Project'
        )
        DBSession.add(self.test_proj_status_list)

        # create one project
        self.test_proj1 = Project(
            name='Test Project 1',
            code='TP1',
            repository=self.test_repo,
            status_list=self.test_proj_status_list,
            start=datetime.datetime(2013, 4, 4),
            end=datetime.datetime(2013, 5, 4)
        )
        DBSession.add(self.test_proj1)
        self.test_proj1.now = datetime.datetime(2013, 4, 4)

        # create task status list
        with DBSession.no_autoflush:
            self.test_task_status_list = StatusList.query\
                .filter_by(target_entity_type='Task').first()

        # create two tasks with the same resources
        self.test_task1 = Task(
            name='Task1',
            project=self.test_proj1,
            resources=[self.test_user1, self.test_user2],
            alternative_resources=[
                self.test_user3, self.test_user4, self.test_user5
            ],
            schedule_model=0,
            schedule_timing=50,
            schedule_unit='h',
            status_list=self.test_task_status_list
        )
        DBSession.add(self.test_task1)

        self.test_task2 = Task(
            name='Task2',
            project=self.test_proj1,
            resources=[self.test_user1, self.test_user2],
            alternative_resources=[
                self.test_user3, self.test_user4, self.test_user5
            ],
            depends=[self.test_task1],
            schedule_model=0,
            schedule_timing=60,
            schedule_unit='h',
            status_list=self.test_task_status_list
        )
        DBSession.add(self.test_task2)
        DBSession.commit()

Example 45

View license
    def test_polled_interval_trigger(self):
        """
        test the trigger mechanism.
        """
        ###
        # Test all constructors and exceptions
        ###
        trigger = PolledIntervalTrigger(PolledScheduler.interval(seconds=1))
        self.assertEqual(trigger.min_interval_length, 1)
        self.assertIsNone(trigger.max_interval)
        self.assertIsNone(trigger.max_interval_length)
        self.assertIsInstance(trigger.next_min_date, datetime.datetime)
        self.assertIsNone(trigger.next_max_date)

        trigger = PolledIntervalTrigger(
            PolledScheduler.interval(seconds=1),
            PolledScheduler.interval(seconds=3)
        )
        self.assertEqual(trigger.min_interval_length, 1)
        self.assertEqual(trigger.max_interval_length, 3)

        trigger = PolledIntervalTrigger(
            PolledScheduler.interval(seconds=1),
            PolledScheduler.interval(seconds=3),
            datetime.datetime.now()
        )
        self.assertEqual(trigger.min_interval_length, 1)
        self.assertEqual(trigger.max_interval_length, 3)

        # Test Type Error Exception
        with self.assertRaises(TypeError):
            trigger = PolledIntervalTrigger('boom')

        with self.assertRaises(TypeError):
            trigger = PolledIntervalTrigger(
                PolledScheduler.interval(seconds=3),
                'boom'
            )

        # Test Value Error Exception
        with self.assertRaises(ValueError):
            trigger = PolledIntervalTrigger(
                PolledScheduler.interval(seconds=3),
                PolledScheduler.interval(seconds=1)
            )

        ###
        # Verify min and max dates are incremented correctly.
        ###
        now = datetime.datetime.now()
        log.debug("Now: %s" % now)
        min_interval = PolledScheduler.interval(seconds=1)
        max_interval = PolledScheduler.interval(seconds=3)

        trigger = PolledIntervalTrigger(min_interval, max_interval, now)

        # Initialized correctly?
        self.assert_datetime_close(trigger.next_min_date, now)
        self.assert_datetime_close(trigger.next_max_date, now + max_interval)
        self.assert_datetime_close(trigger.get_next_fire_time(now), now + max_interval)

        # First call should be successful, but second should not.
        self.assertTrue(trigger.pull_trigger())
        self.assertFalse(trigger.pull_trigger())

        self.assert_datetime_close(trigger.next_min_date, now + min_interval)
        self.assert_datetime_close(trigger.next_max_date, now + max_interval)
        self.assert_datetime_close(trigger.get_next_fire_time(now), now + max_interval)

        # Wait for the minimum interval and it should succeed again!
        time.sleep(2)
        now = datetime.datetime.now()
        self.assertTrue(trigger.pull_trigger())
        self.assertFalse(trigger.pull_trigger())

        ###
        # Now do the same sequence, but with no max_interval
        ###
        now = datetime.datetime.now()
        log.debug("Now: %s" % now)
        min_interval = PolledScheduler.interval(seconds=1)
        max_interval = None

        trigger = PolledIntervalTrigger(min_interval, max_interval, now)

        # Initialized correctly?
        self.assert_datetime_close(trigger.next_min_date, now)
        self.assertIsNone(trigger.next_max_date)
        self.assertIsNone(trigger.get_next_fire_time(now))

        # First call should be successful, but second should not.
        self.assertTrue(trigger.pull_trigger())
        self.assertFalse(trigger.pull_trigger())

        self.assert_datetime_close(trigger.next_min_date, now + min_interval)
        self.assertIsNone(trigger.next_max_date)
        self.assertIsNone(trigger.get_next_fire_time(now))

        # Wait for the minimum interval and it should succeed again!
        time.sleep(2)
        now = datetime.datetime.now()
        self.assertTrue(trigger.pull_trigger())
        self.assertFalse(trigger.pull_trigger())

Example 46

View license
def main():
    variable1='Teste'
    if len(sys.argv) == 1:
	    prog = os.path.basename(sys.argv[0])
	    print '      '+sys.argv[0]+' [options]'
	    print "     Aide : ", prog, " --help"
	    print "        ou : ", prog, " -h"
	    print "example (scene): python %s -o scene -d 20151001 -f 20151231 -s 199030 -u usgs.txt"%sys.argv[0]
	    print "example (scene): python %s -z unzip -b LT5 -o scene -d 20151001 -f 20151231 -s 203034 -u usgs.txt --output /outputdir/"%sys.argv[0]
	    print "example (scene): python %s -z unzip -b LT5 -o scene -d 20151001 -f 20151231 -s 203034 -u usgs.txt --output /outputdir/ -k update --outputcatalogs /outputcatalogsdir/"%sys.argv[0]		
	    print "example (scene): python %s -b LE7 -o scene -d 20151201 -f 20151231 -s 191025 -u usgs.txt --output . --dir=3373 --station SG1"%sys.argv[0]
	    print "example (liste): python %s -o liste -l /home/hagolle/LANDSAT/liste_landsat8_site.txt -u usgs.txt"%sys.argv[0]	
	    sys.exit(-1)
    else:
        usage = "usage: %prog [options] "
        parser = OptionParser(usage=usage)
        parser.add_option("-o", "--option", dest="option", action="store", type="choice", \
			    help="scene or liste", choices=['scene','liste','catalog'],default=None)
        parser.add_option("-l", "--liste", dest="fic_liste", action="store", type="string", \
			    help="list filename",default=None)
        parser.add_option("-s", "--scene", dest="scene", action="store", type="string", \
			    help="WRS2 coordinates of scene (ex 198030)", default=None)
        parser.add_option("-d", "--start_date", dest="start_date", action="store", type="string", \
			    help="start date, fmt('20131223')")
        parser.add_option("-f","--end_date", dest="end_date", action="store", type="string", \
			    help="end date, fmt('20131223')")
        parser.add_option("-c","--cloudcover", dest="clouds", action="store", type="float", \
			    help="Set a limit to the cloud cover of the image", default=None)			
        parser.add_option("-u","--usgs_passwd", dest="usgs", action="store", type="string", \
			    help="USGS earthexplorer account and password file")
        parser.add_option("-p","--proxy_passwd", dest="proxy", action="store", type="string", \
                help="Proxy account and password file")
        parser.add_option("-z","--unzip", dest="unzip", action="store", type="string", \
			    help="Unzip downloaded tgz file", default=None)			
        parser.add_option("-b","--sat", dest="bird", action="store", type="choice", \
			    help="Which satellite are you looking for", choices=['LT5','LE7', 'LC8'], default='LC8')	
        parser.add_option("--output", dest="output", action="store", type="string", \
			    help="Where to download files",default='/tmp/LANDSAT')
        parser.add_option("--outputcatalogs", dest="outputcatalogs", action="store", type="string", \
			    help="Where to download metadata catalog files",default='/tmp/LANDSAT')					
        parser.add_option("--dir", dest="dir", action="store", type="string", \
			    help="Dir number where files  are stored at USGS",default=None)
        parser.add_option("--station", dest="station", action="store", type="string", \
			    help="Station acronym (3 letters) of the receiving station where the file is downloaded",default=None)	
        parser.add_option("-k", "--updatecatalogfiles", dest="updatecatalogfiles", action="store", type="choice", \
			    help="Update catalog metadata files", choices=['update','noupdate'],default='noupdate')			



        (options, args) = parser.parse_args()
        parser.check_required("-o")
        if options.option=='scene':
	        parser.check_required("-d")
	        parser.check_required("-s")
	        parser.check_required("-u")
	    
        elif options.option=='liste' :
	        parser.check_required("-l")
    	        parser.check_required("-u")

    print options.station, options.dir
    rep=options.output
    if not os.path.exists(rep):
        os.mkdir(rep)
        if options.option=='liste':
            if not os.path.exists(rep+'/LISTE'):
                os.mkdir(rep+'/LISTE')
 
    # read password files
    try:
        f=file(options.usgs)
        (account,passwd)=f.readline().split(' ')
        if passwd.endswith('\n'):
            passwd=passwd[:-1]
        usgs={'account':account,'passwd':passwd}
        f.close()
    except :
        print "error with usgs password file"
        sys.exit(-2)

			

    if options.proxy != None :
        try:
            f=file(options.proxy)
            (user,passwd)=f.readline().split(' ')
            if passwd.endswith('\n'):
                passwd=passwd[:-1]
            host=f.readline()
            if host.endswith('\n'):
                host=host[:-1]
            port=f.readline()
            if port.endswith('\n'):
                port=port[:-1]
            proxy={'user':user,'pass':passwd,'host':host,'port':port}
            f.close()
        except :
            print "error with proxy password file"
            sys.exit(-3)


			
##########Telechargement des produits par scene
    if options.option=='scene':
        produit=options.bird
        path=options.scene[0:3]
        row=options.scene[3:6]
    
        year_start =int(options.start_date[0:4])
        month_start=int(options.start_date[4:6])
        day_start  =int(options.start_date[6:8])
        date_start=datetime.datetime(year_start,month_start, day_start)
        global downloaded_ids		
        downloaded_ids=[]

        if options.end_date!= None:
	        year_end =int(options.end_date[0:4])
	        month_end=int(options.end_date[4:6])
	        day_end  =int(options.end_date[6:8])
	        date_end =datetime.datetime(year_end,month_end, day_end)
        else:
	    date_end=datetime.datetime.now()
	
        if options.proxy!=None:
            connect_earthexplorer_proxy(proxy,usgs)
        else:
            connect_earthexplorer_no_proxy(usgs)	

        # rep_scene="%s/SCENES/%s_%s/GZ"%(rep,path,row)   #Original
        rep_scene="%s"%(rep)	#Modified vbnunes
        if not(os.path.exists(rep_scene)):
            os.makedirs(rep_scene)
			
        if produit.startswith('LC8'):
            repert='4923'
            stations=['LGN']
        if produit.startswith('LE7'):
            repert='3373'
            #repert='3372"
            stations=['EDC','SGS','AGS','ASN','SG1','CUB','COA']
        if produit.startswith('LT5'):
            repert='3119'
            stations=['GLC','ASA','KIR','MOR','KHC', 'PAC', 'KIS', 'CHM', 'LGS', 'MGR', 'COA', 'MPS', 'CUB']		
        
        if options.station !=None:
            stations=[options.station]
        if options.dir !=None:
            repert=options.dir
            
        check=1
		
        curr_date=next_overpass(date_start,int(path),produit)
 
        while (curr_date < date_end) and check==1:
            date_asc=curr_date.strftime("%Y%j")
            notfound = False		
            print 'Searching for images on (julian date): ' + date_asc + '...'
            curr_date=curr_date+datetime.timedelta(16)
            for station in stations:
                for version in ['00','01','02']:			
					nom_prod=produit+options.scene+date_asc+station+version
					tgzfile=os.path.join(rep_scene,nom_prod+'.tgz')
					lsdestdir=os.path.join(rep_scene,nom_prod)				
					url="http://earthexplorer.usgs.gov/download/%s/%s/STANDARD/EE"%(repert,nom_prod)
					print url
					if os.path.exists(lsdestdir):
						print '   product %s already downloaded and unzipped'%nom_prod
						downloaded_ids.append(nom_prod)
						check = 0						
					elif os.path.isfile(tgzfile):
						print '   product %s already downloaded'%nom_prod
						if options.unzip!= None:
							p=unzipimage(nom_prod,rep_scene)
							if p==1 and options.clouds!= None:					
								check=check_cloud_limit(lsdestdir,options.clouds)
								if check==0:
									downloaded_ids.append(nom_prod)							
					else:
						try:
							downloadChunks(url,"%s"%rep_scene,nom_prod+'.tgz')
						except:
							print '   product %s not found'%nom_prod
							notfound = True
						if notfound != True and options.unzip!= None:
							p=unzipimage(nom_prod,rep_scene)
							if p==1 and options.clouds!= None:					
								check=check_cloud_limit(lsdestdir,options.clouds)
								if check==0:
									downloaded_ids.append(nom_prod)								
        log(rep,downloaded_ids)

##########Telechargement des produits par catalog metadata search
    if options.option=='catalog':
        produit=options.bird
        path=options.scene[0:3]
        row=options.scene[3:6]
    
        year_start =int(options.start_date[0:4])
        month_start=int(options.start_date[4:6])
        day_start  =int(options.start_date[6:8])
        date_start=datetime.datetime(year_start,month_start, day_start)
        downloaded_ids=[]

        if options.end_date!= None:
	        year_end =int(options.end_date[0:4])
	        month_end=int(options.end_date[4:6])
	        day_end  =int(options.end_date[6:8])
	        date_end =datetime.datetime(year_end,month_end, day_end)
        else:
	        date_end=datetime.datetime.now()
	
        if options.proxy!=None:
            connect_earthexplorer_proxy(proxy,usgs)
        else:
            connect_earthexplorer_no_proxy(usgs)	

        # rep_scene="%s/SCENES/%s_%s/GZ"%(rep,path,row)   #Original
        rep_scene="%s"%(rep)	#Modified vbnunes
        if not(os.path.exists(rep_scene)):
            os.makedirs(rep_scene)

        getmetadatafiles(options.outputcatalogs, options.updatecatalogfiles)			
			
        if produit.startswith('LC8'):
            repert=['4923']
            collection_file=os.path.join(options.outputcatalogs,'LANDSAT_8.csv')
        if produit.startswith('LE7'):
            repert=['3372','3373']
            collection_file=os.path.join(options.outputcatalogs,'LANDSAT_ETM.csv')			
        if produit.startswith('LT5'):
            repert=['3119','4345']
            if 2000<=int(year_start)<=2009:
                collection_file=os.path.join(options.outputcatalogs,'LANDSAT_TM-2000-2009.csv')
            if 2010<=int(year_start)<=2012:
                collection_file=os.path.join(options.outputcatalogs,'LANDSAT_TM-2010-2012.csv')				
            
        check=1

        notfound = False		
        			
        nom_prod=find_in_collection_metadata(collection_file,options.clouds,date_start,date_end,path,row)
        if nom_prod=='':
            sys.exit('No image was found in the catalog with the given specifications! Exiting...')
        else:				
            tgzfile=os.path.join(rep_scene,nom_prod+'.tgz')
            lsdestdir=os.path.join(rep_scene,nom_prod)

        if os.path.exists(lsdestdir):
            print '   product %s already downloaded and unzipped'%nom_prod
            downloaded_ids.append(nom_prod)
            check = 0						
        elif os.path.isfile(tgzfile):
            print '   product %s already downloaded'%nom_prod
            if options.unzip!= None:
                p=unzipimage(nom_prod,rep_scene)
                if p==1:
                    downloaded_ids.append(nom_prod)	
                    check = 0						
        else:
            while check == 1:
                for collectionid in repert:
                    url="http://earthexplorer.usgs.gov/download/%s/%s/STANDARD/EE"%(collectionid,nom_prod)				
                    try:
                        downloadChunks(url,"%s"%rep_scene,nom_prod+'.tgz')
                    except:
                        print '   product %s not found'%nom_prod
                        notfound = True
                    if notfound != True and options.unzip!= None:
                        p=unzipimage(nom_prod,rep_scene)
                        if p==1 and options.clouds!= None:					
                            check=check_cloud_limit(lsdestdir,options.clouds)
                            if check==0:
                                downloaded_ids.append(nom_prod)			
        log(rep,downloaded_ids)		
		
##########Telechargement par liste
    if options.option=='liste':
        with file(options.fic_liste) as f:
	    lignes=f.readlines()
        for ligne in lignes:
            (site,nom_prod)=ligne.split(' ')
            produit=nom_prod.strip()
            print produit
            if produit.startswith('LC8'):
                repert='4923'
                stations=['LGN']
            if produit.startswith('LE7'):
                repert='3373'
                #repert='3372"
                stations=['EDC','SGS','AGS','ASN','SG1']
            if produit.startswith('LT5'):
                repert='3119'
                stations=['GLC','ASA','KIR','MOR','KHC', 'PAC', 'KIS', 'CHM', 'LGS', 'MGR', 'COA', 'MPS']	
            if not os.path.exists(rep+'/'+site):
                os.mkdir(rep+'/'+site)
            url="http://earthexplorer.usgs.gov/download/%s/%s/STANDARD/EE"%(repert,produit)
            print 'url=',url
            try:
                if options.proxy!=None :
                    connect_earthexplorer_proxy(proxy,usgs)
                else:
                    connect_earthexplorer_no_proxy(usgs)

                downloadChunks(url,rep+'/'+site,produit+'.tgz')
            except TypeError:
                print 'produit %s non trouve'%produit

Example 47

Project: courtlistener
Source File: import_f2.py
View license
def scrape_and_parse():
    """Traverses the bulk data from public.resource.org, and puts them in the
    DB.

    Probably lots of ways to go about this, but I think the easiest will be the following:
     - look at the index page of all volumes, and follow all the links it has.
     - for each volume, look at its index page, and follow the link to all cases
     - for each case, collect information wisely.
     - put it all in the DB
    """

    # begin by loading up the fix files into memory
    court_fix_dict, date_fix_dict, case_name_short_dict = load_fix_files()

    results = []
    DEBUG = 4
    # Set to False to disable automatic browser usage. Else, set to the
    # command you want to run, e.g. 'firefox'
    BROWSER = False
    court_fix_file = open('../logs/f2_court_fix_file.txt', 'a')
    date_fix_file = open('../logs/f2_date_fix_file.txt', 'a')
    case_name_short_fix_file = open('../logs/f2_short_case_name_fix_file.txt', 'a')
    vol_file = open('../logs/vol_file.txt', 'r+')
    case_file = open('../logs/case_file.txt', 'r+')

    url = "file://%s/Resource.org/F2/index.html" % INSTALL_ROOT
    openedURL = urllib2.urlopen(url)
    content = openedURL.read()
    openedURL.close()
    tree = fromstring(content)

    volumeLinks = tree.xpath('//table/tbody/tr/td[1]/a')

    try:
        i = int(vol_file.readline())
    except ValueError:
        # the volume file is emtpy or otherwise failing.
        i = 0
    vol_file.close()

    if DEBUG >= 1:
        print "Number of remaining volumes is: %d" % (len(volumeLinks) - i)

    # used later, needs a default value.
    saved_caseDate = None
    saved_court = None
    while i < len(volumeLinks):
        # we iterate over every case in the volume
        volumeURL = volumeLinks[i].text + "/index.html"
        volumeURL = urljoin(url, volumeURL)
        if DEBUG >= 1:
            print "Current volumeURL is: %s" % volumeURL

        openedVolumeURL = urllib2.urlopen(volumeURL)
        content = openedVolumeURL.read()
        volumeTree = fromstring(content)
        openedVolumeURL.close()
        caseLinks = volumeTree.xpath('//table/tbody/tr/td[1]/a')
        caseDates = volumeTree.xpath('//table/tbody/tr/td[2]')
        sha1Hashes = volumeTree.xpath('//table/tbody/tr/td[3]/a')

        # The following loads a serialized placeholder from disk.
        try:
            j = int(case_file.readline())
        except ValueError:
            j = 0
        case_file.close()
        while j < len(caseLinks):
            # iterate over each case, throwing it in the DB
            if DEBUG >= 1:
                print ''
            # like the scraper, we begin with the caseLink field (relative for
            # now, not absolute)
            caseLink = caseLinks[j].get('href')

            # sha1 is easy
            sha1Hash = sha1Hashes[j].text
            if DEBUG >= 4:
                print "SHA1 is: %s" % sha1Hash

            # using the caselink from above, and the volumeURL, we can get the
            # html
            absCaseLink = urljoin(volumeURL, caseLink)
            html = urllib2.urlopen(absCaseLink).read()
            htmlTree = fromstring(html)
            bodyContents = htmlTree.xpath('//body/*[not(@id="footer")]')

            body = ""
            bodyText = ""
            for element in bodyContents:
                body += tostring(element)
                try:
                    bodyText += tostring(element, method='text')
                except UnicodeEncodeError:
                    # Happens with odd characters. Simply pass this iteration.
                    pass
            if DEBUG >= 5:
                print body
                print bodyText

            # need to figure out the court ID
            try:
                courtPs = htmlTree.xpath('//p[@class = "court"]')
                # Often the court ends up in the parties field.
                partiesPs = htmlTree.xpath("//p[@class= 'parties']")
                court = ""
                for courtP in courtPs:
                    court += tostring(courtP).lower()
                for party in partiesPs:
                    court += tostring(party).lower()
            except IndexError:
                court = check_fix_list(sha1Hash, court_fix_dict)
                if not court:
                    print absCaseLink
                    if BROWSER:
                        subprocess.Popen([BROWSER, absCaseLink], shell=False).communicate()
                    court = raw_input("Please input court name (e.g. \"First Circuit of Appeals\"): ").lower()
                    court_fix_file.write("%s|%s\n" % (sha1Hash, court))
            if ('first' in court) or ('ca1' == court):
                court = 'ca1'
            elif ('second' in court) or ('ca2' == court):
                court = 'ca2'
            elif ('third' in court) or ('ca3' == court):
                court = 'ca3'
            elif ('fourth' in court) or ('ca4' == court):
                court = 'ca4'
            elif ('fifth' in court) or ('ca5' == court):
                court = 'ca5'
            elif ('sixth' in court) or ('ca6' == court):
                court = 'ca6'
            elif ('seventh' in court) or ('ca7' == court):
                court = 'ca7'
            elif ('eighth' in court) or ('ca8' == court):
                court = 'ca8'
            elif ('ninth' in court) or ('ca9' == court):
                court = 'ca9'
            elif ("tenth" in court) or ('ca10' == court):
                court = 'ca10'
            elif ("eleventh" in court) or ('ca11' == court):
                court = 'ca11'
            elif ('columbia' in court) or ('cadc' == court):
                court = 'cadc'
            elif ('federal' in court) or ('cafc' == court):
                court = 'cafc'
            elif ('patent' in court) or ('ccpa' == court):
                court = 'ccpa'
            elif (('emergency' in court) and ('temporary' not in court)) or ('eca' == court):
                court = 'eca'
            elif ('claims' in court) or ('uscfc' == court):
                court = 'uscfc'
            else:
                # No luck extracting the court name. Try the fix file.
                court = check_fix_list(sha1Hash, court_fix_dict)
                if not court:
                    # Not yet in the fix file. Check if it's a crazy ca5 case
                    court = ''
                    ca5courtPs = htmlTree.xpath('//p[@class = "center"]')
                    for ca5courtP in ca5courtPs:
                        court += tostring(ca5courtP).lower()
                    if 'fifth circuit' in court:
                        court = 'ca5'
                    else:
                        court = False

                    if not court:
                        # Still no luck. Ask for input, then append it to
                        # the fix file.
                        print absCaseLink
                        if BROWSER:
                            subprocess.Popen([BROWSER, absCaseLink], shell=False).communicate()
                        court = raw_input("Unknown court. Input the court code to proceed successfully [%s]: " % saved_court)
                        court = court or saved_court
                    court_fix_file.write("%s|%s\n" % (sha1Hash, court))

            saved_court = court
            court = Court.objects.get(pk=court)
            if DEBUG >= 4:
                print "Court is: %s" % court

            # next: west_cite, docket_number and caseName. Full casename is gotten later.
            west_cite = caseLinks[j].text
            docket_number = absCaseLink.split('.')[-2]
            caseName = caseLinks[j].get('title')

            caseName, precedential_status = exceptional_cleaner(caseName)
            cite, new = hasDuplicate(caseName, west_cite, docket_number)
            if cite.caseNameShort == '':
                # No luck getting the case name
                savedCaseNameShort = check_fix_list(sha1Hash, case_name_short_dict)
                if not savedCaseNameShort:
                    print absCaseLink
                    if BROWSER:
                        subprocess.Popen([BROWSER, absCaseLink], shell=False).communicate()
                    caseName = raw_input("Short casename: ")
                    cite.caseNameShort = trunc(caseName, 100)
                    cite.caseNameFull = caseName
                    case_name_short_fix_file.write("%s|%s\n" % (sha1Hash, caseName))
                else:
                    # We got both the values from the save files. Use 'em.
                    cite.caseNameShort = trunc(savedCaseNameShort, 100)
                    cite.caseNameFull = savedCaseNameShort

                # The slug needs to be done here, b/c it is only done automatically
                # the first time the citation is saved, and this will be
                # at least the second.
                cite.slug = slugify(trunc(cite.caseNameShort, 75))
                cite.save()

            if DEBUG >= 4:
                print "precedential_status: " + precedential_status
                print "west_cite: " + cite.west_cite
                print "caseName: " + cite.caseNameFull

            # date is kinda tricky...details here:
            # http://pleac.sourceforge.net/pleac_python/datesandtimes.html
            rawDate = caseDates[j].find('a')
            try:
                if rawDate is not None:
                    # Special cases
                    if sha1Hash == 'f0da421f117ef16223d7e61d1e4e5526036776e6':
                        date_text = 'August 28, 1980'
                    elif sha1Hash == '8cc192eaacd1c544b5e8ffbd751d9be84c311932':
                        date_text = 'August 16, 1985'
                    elif sha1Hash == 'd19bce155f72a9f981a12efabd760a35e1e7dbe7':
                        date_text = 'October 12, 1979'
                    elif sha1Hash == '9f7583cf0d46ddc9cad4e7943dd775f9e9ea99ff':
                        date_text = 'July 30, 1980'
                    elif sha1Hash == '211ea81a4ab4132483c483698d2a40f4366f5640':
                        date_text = 'November 3, 1981'
                    elif sha1Hash == 'eefb344034461e9c6912689677a32cd18381d5c2':
                        date_text = 'July 28, 1983'
                    else:
                        date_text = rawDate.text
                    try:
                        caseDate = datetime.datetime(*time.strptime(date_text, "%B, %Y")[0:5])
                    except ValueError, TypeError:
                        caseDate = datetime.datetime(*time.strptime(date_text, "%B %d, %Y")[0:5])
                else:
                    # No value was found. Throw an exception.
                    raise ValueError
            except:
                # No date provided.
                try:
                    # Try to get it from the saved list
                    caseDate = datetime.datetime(*time.strptime(check_fix_list(sha1Hash, date_fix_dict), "%B %d, %Y")[0:5])
                except:
                    caseDate = False
                if not caseDate:
                    # Parse out the dates with debug set to false.
                    try:
                        dates = parse_dates(bodyText, False)
                    except OverflowError:
                        # Happens when we try to make a date from a very large number
                        dates = []
                    try:
                        first_date_found = dates[0]
                    except IndexError:
                        # No dates found.
                        first_date_found = False
                    if first_date_found == saved_caseDate:
                        # High likelihood of date being correct. Use it.
                        caseDate = saved_caseDate
                    else:
                        print absCaseLink
                        if BROWSER:
                            subprocess.Popen([BROWSER, absCaseLink], shell=False).communicate()
                        print "Unknown date. Possible options are:"
                        try:
                            print "  1) %s" % saved_caseDate.strftime("%B %d, %Y")
                        except AttributeError:
                            # Happens on first iteration when saved_caseDate has no strftime attribute.
                            try:
                                saved_caseDate = dates[0]
                                print "  1) %s" % saved_caseDate.strftime(
                                    "%B %d, %Y")
                            except IndexError:
                                # Happens when dates has no values.
                                print "  No options available."
                        for k, date in enumerate(dates[0:4]):
                            if date.year >= 1900:
                                # strftime can't handle dates before 1900.
                                print "  %s) %s" % (k + 2,
                                                    date.strftime("%B %d, %Y"))
                        choice = raw_input("Enter the date or an option to proceed [1]: ")
                        choice = choice or 1
                        if str(choice) == '1':
                            # The user chose the default. Use the saved value from the last case
                            caseDate = saved_caseDate
                        elif choice in ['2', '3', '4', '5']:
                            # The user chose an option between 2 and 5. Use it.
                            caseDate = dates[int(choice) - 2]
                        else:
                            # The user typed a new date. Use it.
                            caseDate = datetime.datetime(*time.strptime(choice, "%B %d, %Y")[0:5])
                    date_fix_file.write("%s|%s\n" % (sha1Hash, caseDate.strftime("%B %d, %Y")))

            # Used during the next iteration as the default value
            saved_caseDate = caseDate

            if DEBUG >= 3:
                print "caseDate is: %s" % caseDate

            try:
                doc, created = Document.objects.get_or_create(
                    sha1=sha1Hash, court=court)
            except MultipleObjectsReturned:
                # this shouldn't happen now that we're using SHA1 as the dup
                # check, but the old data is problematic, so we must catch this.
                created = False

            if created:
                # we only do this if it's new
                doc.html = body
                doc.sha1 = sha1Hash
                doc.download_url = "http://bulk.resource.org/courts.gov/c/F2/"\
                    + str(i + 178) + "/" + caseLink
                doc.date_filed = caseDate
                doc.source = "R"

                doc.precedential_status = precedential_status
                doc.citation = cite
                doc.save()

            if not created:
                # something is afoot. Throw a big error.
                print "Duplicate found at volume " + str(i + 1) + \
                    " and row " + str(j + 1) + "!!!!"
                print "Found document %s in the database with doc id of %d!" % (doc, doc.pk)
                exit(1)

            # save our location within the volume.
            j += 1
            case_file = open('../logs/case_file.txt', 'w')
            case_file.write(str(j))
            case_file.close()

        # save the last volume completed.
        i += 1
        vol_file = open('../logs/vol_file.txt', 'w')
        vol_file.write(str(i))
        vol_file.close()

        # Clear query cache, as it presents a memory leak
        db.reset_queries()

    return 0

Example 48

Project: crate.web
Source File: 0001_initial.py
View license
    def forwards(self, orm):
        # Adding model 'TroveClassifier'
        db.create_table('packages_troveclassifier', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('trove', self.gf('django.db.models.fields.CharField')(unique=True, max_length=350)),
        ))
        db.send_create_signal('packages', ['TroveClassifier'])

        # Adding model 'Package'
        db.create_table('packages_package', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('created', self.gf('model_utils.fields.AutoCreatedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 227535))),
            ('modified', self.gf('model_utils.fields.AutoLastModifiedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 227680))),
            ('name', self.gf('django.db.models.fields.SlugField')(unique=True, max_length=150)),
        ))
        db.send_create_signal('packages', ['Package'])

        # Adding model 'PackageURI'
        db.create_table('packages_packageuri', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('package', self.gf('django.db.models.fields.related.ForeignKey')(related_name='package_links', to=orm['packages.Package'])),
            ('uri', self.gf('django.db.models.fields.URLField')(max_length=400)),
        ))
        db.send_create_signal('packages', ['PackageURI'])

        # Adding unique constraint on 'PackageURI', fields ['package', 'uri']
        db.create_unique('packages_packageuri', ['package_id', 'uri'])

        # Adding model 'Release'
        db.create_table('packages_release', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('created', self.gf('model_utils.fields.AutoCreatedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 229663), db_index=True)),
            ('modified', self.gf('model_utils.fields.AutoLastModifiedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 229762))),
            ('package', self.gf('django.db.models.fields.related.ForeignKey')(related_name='releases', to=orm['packages.Package'])),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('hidden', self.gf('django.db.models.fields.BooleanField')(default=False)),
            ('order', self.gf('django.db.models.fields.IntegerField')(default=0)),
            ('platform', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('summary', self.gf('django.db.models.fields.TextField')()),
            ('description', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('keywords', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('license', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('author', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('author_email', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('maintainer', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('maintainer_email', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('requires_python', self.gf('django.db.models.fields.CharField')(max_length=25, blank=True)),
            ('download_uri', self.gf('django.db.models.fields.URLField')(max_length=1024, blank=True)),
            ('raw_data', self.gf('django.db.models.fields.TextField')(null=True, blank=True)),
        ))
        db.send_create_signal('packages', ['Release'])

        # Adding unique constraint on 'Release', fields ['package', 'version']
        db.create_unique('packages_release', ['package_id', 'version'])

        # Adding M2M table for field classifiers on 'Release'
        db.create_table('packages_release_classifiers', (
            ('id', models.AutoField(verbose_name='ID', primary_key=True, auto_created=True)),
            ('release', models.ForeignKey(orm['packages.release'], null=False)),
            ('troveclassifier', models.ForeignKey(orm['packages.troveclassifier'], null=False))
        ))
        db.create_unique('packages_release_classifiers', ['release_id', 'troveclassifier_id'])

        # Adding model 'ReleaseFile'
        db.create_table('packages_releasefile', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('created', self.gf('model_utils.fields.AutoCreatedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 228759), db_index=True)),
            ('modified', self.gf('model_utils.fields.AutoLastModifiedField')(default=datetime.datetime(2012, 1, 28, 13, 38, 31, 228860))),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='files', to=orm['packages.Release'])),
            ('type', self.gf('django.db.models.fields.CharField')(max_length=25)),
            ('file', self.gf('django.db.models.fields.files.FileField')(max_length=512)),
            ('filename', self.gf('django.db.models.fields.CharField')(default=None, max_length=200, null=True, blank=True)),
            ('digest', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('python_version', self.gf('django.db.models.fields.CharField')(max_length=25)),
            ('downloads', self.gf('django.db.models.fields.PositiveIntegerField')(default=0)),
            ('comment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseFile'])

        # Adding unique constraint on 'ReleaseFile', fields ['release', 'type', 'python_version', 'filename']
        db.create_unique('packages_releasefile', ['release_id', 'type', 'python_version', 'filename'])

        # Adding model 'ReleaseURI'
        db.create_table('packages_releaseuri', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='uris', to=orm['packages.Release'])),
            ('label', self.gf('django.db.models.fields.CharField')(max_length=64)),
            ('uri', self.gf('django.db.models.fields.URLField')(max_length=500)),
        ))
        db.send_create_signal('packages', ['ReleaseURI'])

        # Adding model 'ReleaseRequire'
        db.create_table('packages_releaserequire', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='requires', to=orm['packages.Release'])),
            ('kind', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=150)),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('environment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseRequire'])

        # Adding model 'ReleaseProvide'
        db.create_table('packages_releaseprovide', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='provides', to=orm['packages.Release'])),
            ('kind', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=150)),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('environment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseProvide'])

        # Adding model 'ReleaseObsolete'
        db.create_table('packages_releaseobsolete', (
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('release', self.gf('django.db.models.fields.related.ForeignKey')(related_name='obsoletes', to=orm['packages.Release'])),
            ('kind', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=150)),
            ('version', self.gf('django.db.models.fields.CharField')(max_length=50)),
            ('environment', self.gf('django.db.models.fields.TextField')(blank=True)),
        ))
        db.send_create_signal('packages', ['ReleaseObsolete'])

Example 49

Project: hue
Source File: 0001_initial.py
View license
    def forwards(self, orm):

        # Adding model 'Job'
        db.create_table('oozie_job', (
            ('is_shared', self.gf('django.db.models.fields.BooleanField')(default=False, db_index=True, blank=True)),
            ('description', self.gf('django.db.models.fields.CharField')(max_length=1024, blank=True)),
            ('parameters', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('deployment_dir', self.gf('django.db.models.fields.CharField')(max_length=1024, blank=True)),
            ('schema_version', self.gf('django.db.models.fields.CharField')(max_length=128)),
            ('last_modified', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, db_index=True, blank=True)),
            ('owner', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'])),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['Job'])

        # Adding model 'Workflow'
        db.create_table('oozie_workflow', (
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('end', self.gf('django.db.models.fields.related.ForeignKey')(blank=True, related_name='end_workflow', null=True, to=orm['oozie.Node'])),
            ('is_single', self.gf('django.db.models.fields.BooleanField')(default=False, blank=True)),
            ('job_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Job'], unique=True, primary_key=True)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('start', self.gf('django.db.models.fields.related.ForeignKey')(blank=True, related_name='start_workflow', null=True, to=orm['oozie.Node'])),
        ))
        db.send_create_signal('oozie', ['Workflow'])

        # Adding model 'Link'
        db.create_table('oozie_link', (
            ('comment', self.gf('django.db.models.fields.CharField')(default='', max_length=1024, blank=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('parent', self.gf('django.db.models.fields.related.ForeignKey')(related_name='child_node', to=orm['oozie.Node'])),
            ('child', self.gf('django.db.models.fields.related.ForeignKey')(related_name='parent_node', to=orm['oozie.Node'])),
        ))
        db.send_create_signal('oozie', ['Link'])

        # Adding model 'Node'
        db.create_table('oozie_node', (
            ('description', self.gf('django.db.models.fields.CharField')(default='', max_length=1024, blank=True)),
            ('workflow', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Workflow'])),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('node_type', self.gf('django.db.models.fields.CharField')(max_length=64)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['Node'])

        # Adding model 'Mapreduce'
        db.create_table('oozie_mapreduce', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('jar_path', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True)),
            ('prepares', self.gf('django.db.models.fields.TextField')(default='[]')),
        ))
        db.send_create_signal('oozie', ['Mapreduce'])

        # Adding model 'Streaming'
        db.create_table('oozie_streaming', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('mapper', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('reducer', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[{"name":"oozie.use.system.libpath","value":"true"}]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Streaming'])

        # Adding model 'Java'
        db.create_table('oozie_java', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('jar_path', self.gf('django.db.models.fields.CharField')(max_length=512)),
            ('java_opts', self.gf('django.db.models.fields.CharField')(max_length=256, blank=True)),
            ('args', self.gf('django.db.models.fields.TextField')(blank=True)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('prepares', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
            ('main_class', self.gf('django.db.models.fields.CharField')(max_length=256)),
        ))
        db.send_create_signal('oozie', ['Java'])

        # Adding model 'Pig'
        db.create_table('oozie_pig', (
            ('files', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('job_xml', self.gf('django.db.models.fields.CharField')(default='', max_length=512, blank=True)),
            ('job_properties', self.gf('django.db.models.fields.TextField')(default='[{"name":"oozie.use.system.libpath","value":"true"}]')),
            ('params', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('archives', self.gf('django.db.models.fields.CharField')(default='[]', max_length=512)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
            ('prepares', self.gf('django.db.models.fields.TextField')(default='[]')),
            ('script_path', self.gf('django.db.models.fields.CharField')(max_length=256)),
        ))
        db.send_create_signal('oozie', ['Pig'])

        # Adding model 'Start'
        db.create_table('oozie_start', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True)),
        ))
        db.send_create_signal('oozie', ['Start'])

        # Adding model 'End'
        db.create_table('oozie_end', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['End'])

        # Adding model 'Kill'
        db.create_table('oozie_kill', (
            ('message', self.gf('django.db.models.fields.CharField')(default='Action failed, error message[${wf:errorMessage(wf:lastErrorNode())}]', max_length=256)),
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Kill'])

        # Adding model 'Fork'
        db.create_table('oozie_fork', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Fork'])

        # Adding model 'Join'
        db.create_table('oozie_join', (
            ('node_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Node'], unique=True, primary_key=True)),
        ))
        db.send_create_signal('oozie', ['Join'])

        # Adding model 'Coordinator'
        db.create_table('oozie_coordinator', (
            ('end', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 9, 7, 15, 12, 23, 992784))),
            ('concurrency', self.gf('django.db.models.fields.PositiveSmallIntegerField')(null=True, blank=True)),
            ('frequency_number', self.gf('django.db.models.fields.SmallIntegerField')(default=1)),
            ('workflow', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Workflow'], null=True)),
            ('job_ptr', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Job'], unique=True, primary_key=True)),
            ('frequency_unit', self.gf('django.db.models.fields.CharField')(default='days', max_length=20)),
            ('start', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 9, 4, 15, 12, 23, 992735))),
            ('timeout', self.gf('django.db.models.fields.SmallIntegerField')(null=True, blank=True)),
            ('timezone', self.gf('django.db.models.fields.CharField')(default='America/Los_Angeles', max_length=24)),
            ('throttle', self.gf('django.db.models.fields.PositiveSmallIntegerField')(null=True, blank=True)),
            ('execution', self.gf('django.db.models.fields.CharField')(max_length=10, null=True, blank=True)),
        ))
        db.send_create_signal('oozie', ['Coordinator'])

        # Adding model 'Dataset'
        db.create_table('oozie_dataset', (
            ('description', self.gf('django.db.models.fields.CharField')(default='', max_length=1024, blank=True)),
            ('frequency_number', self.gf('django.db.models.fields.SmallIntegerField')(default=1)),
            ('coordinator', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Coordinator'])),
            ('frequency_unit', self.gf('django.db.models.fields.CharField')(default='days', max_length=20)),
            ('uri', self.gf('django.db.models.fields.CharField')(default='/data/${YEAR}${MONTH}${DAY}', max_length=1024)),
            ('start', self.gf('django.db.models.fields.DateTimeField')(default=datetime.datetime(2012, 9, 4, 15, 12, 23, 993608))),
            ('timezone', self.gf('django.db.models.fields.CharField')(default='America/Los_Angeles', max_length=24)),
            ('done_flag', self.gf('django.db.models.fields.CharField')(default='', max_length=64, blank=True)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['Dataset'])

        # Adding model 'DataInput'
        db.create_table('oozie_datainput', (
            ('coordinator', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Coordinator'])),
            ('dataset', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Dataset'], unique=True)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['DataInput'])

        # Adding model 'DataOutput'
        db.create_table('oozie_dataoutput', (
            ('coordinator', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Coordinator'])),
            ('dataset', self.gf('django.db.models.fields.related.OneToOneField')(to=orm['oozie.Dataset'], unique=True)),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
            ('name', self.gf('django.db.models.fields.CharField')(max_length=40)),
        ))
        db.send_create_signal('oozie', ['DataOutput'])

        # Adding model 'History'
        db.create_table('oozie_history', (
            ('submission_date', self.gf('django.db.models.fields.DateTimeField')(auto_now=True, db_index=True, blank=True)),
            ('job', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['oozie.Job'])),
            ('properties', self.gf('django.db.models.fields.TextField')()),
            ('oozie_job_id', self.gf('django.db.models.fields.CharField')(max_length=128)),
            ('submitter', self.gf('django.db.models.fields.related.ForeignKey')(to=orm['auth.User'])),
            ('id', self.gf('django.db.models.fields.AutoField')(primary_key=True)),
        ))
        db.send_create_signal('oozie', ['History'])

Example 50

Project: talk.org
Source File: query.py
View license
def query_class(QueryClass, Database):
    """
    Returns a custom django.db.models.sql.query.Query subclass that is
    appropriate for Oracle.

    The 'Database' module (cx_Oracle) is passed in here so that all the setup
    required to import it only needs to be done by the calling module.
    """
    global _classes
    try:
        return _classes[QueryClass]
    except KeyError:
        pass

    class OracleQuery(QueryClass):
        def resolve_columns(self, row, fields=()):
            index_start = len(self.extra_select.keys())
            values = [self.convert_values(v, None) for v in row[:index_start]]
            for value, field in map(None, row[index_start:], fields):
                values.append(self.convert_values(value, field))
            return values

        def convert_values(self, value, field):
            from django.db.models.fields import DateField, DateTimeField, \
                 TimeField, BooleanField, NullBooleanField, DecimalField, Field
            if isinstance(value, Database.LOB):
                value = value.read()
            # Oracle stores empty strings as null. We need to undo this in
            # order to adhere to the Django convention of using the empty
            # string instead of null, but only if the field accepts the
            # empty string.
            if value is None and isinstance(field, Field) and field.empty_strings_allowed:
                value = u''
            # Convert 1 or 0 to True or False
            elif value in (1, 0) and isinstance(field, (BooleanField, NullBooleanField)):
                value = bool(value)
            # Convert floats to decimals
            elif value is not None and isinstance(field, DecimalField):
                value = util.typecast_decimal(field.format_number(value))
            # cx_Oracle always returns datetime.datetime objects for
            # DATE and TIMESTAMP columns, but Django wants to see a
            # python datetime.date, .time, or .datetime.  We use the type
            # of the Field to determine which to cast to, but it's not
            # always available.
            # As a workaround, we cast to date if all the time-related
            # values are 0, or to time if the date is 1/1/1900.
            # This could be cleaned a bit by adding a method to the Field
            # classes to normalize values from the database (the to_python
            # method is used for validation and isn't what we want here).
            elif isinstance(value, Database.Timestamp):
                # In Python 2.3, the cx_Oracle driver returns its own
                # Timestamp object that we must convert to a datetime class.
                if not isinstance(value, datetime.datetime):
                    value = datetime.datetime(value.year, value.month,
                            value.day, value.hour, value.minute, value.second,
                            value.fsecond)
                if isinstance(field, DateTimeField):
                    # DateTimeField subclasses DateField so must be checked
                    # first.
                    pass
                elif isinstance(field, DateField):
                    value = value.date()
                elif isinstance(field, TimeField) or (value.year == 1900 and value.month == value.day == 1):
                    value = value.time()
                elif value.hour == value.minute == value.second == value.microsecond == 0:
                    value = value.date()
            return value

        def as_sql(self, with_limits=True, with_col_aliases=False):
            """
            Creates the SQL for this query. Returns the SQL string and list
            of parameters.  This is overriden from the original Query class
            to accommodate Oracle's limit/offset SQL.

            If 'with_limits' is False, any limit/offset information is not
            included in the query.
            """
            # The `do_offset` flag indicates whether we need to construct
            # the SQL needed to use limit/offset w/Oracle.
            do_offset = with_limits and (self.high_mark or self.low_mark)

            # If no offsets, just return the result of the base class
            # `as_sql`.
            if not do_offset:
                return super(OracleQuery, self).as_sql(with_limits=False,
                        with_col_aliases=with_col_aliases)

            # `get_columns` needs to be called before `get_ordering` to
            # populate `_select_alias`.
            self.pre_sql_setup()
            out_cols = self.get_columns()
            ordering = self.get_ordering()

            # Getting the "ORDER BY" SQL for the ROW_NUMBER() result.
            if ordering:
                rn_orderby = ', '.join(ordering)
            else:
                # Oracle's ROW_NUMBER() function always requires an
                # order-by clause.  So we need to define a default
                # order-by, since none was provided.
                qn = self.quote_name_unless_alias
                opts = self.model._meta
                rn_orderby = '%s.%s' % (qn(opts.db_table), qn(opts.fields[0].db_column or opts.fields[0].column))

            # Getting the selection SQL and the params, which has the `rn`
            # extra selection SQL.
            self.extra_select['rn'] = 'ROW_NUMBER() OVER (ORDER BY %s )' % rn_orderby
            sql, params= super(OracleQuery, self).as_sql(with_limits=False,
                    with_col_aliases=True)

            # Constructing the result SQL, using the initial select SQL
            # obtained above.
            result = ['SELECT * FROM (%s)' % sql]

            # Place WHERE condition on `rn` for the desired range.
            result.append('WHERE rn > %d' % self.low_mark)
            if self.high_mark:
                result.append('AND rn <= %d' % self.high_mark)

            # Returning the SQL w/params.
            return ' '.join(result), params

        def set_limits(self, low=None, high=None):
            super(OracleQuery, self).set_limits(low, high)

            # We need to select the row number for the LIMIT/OFFSET sql.
            # A placeholder is added to extra_select now, because as_sql is
            # too late to be modifying extra_select.  However, the actual sql
            # depends on the ordering, so that is generated in as_sql.
            self.extra_select['rn'] = '1'

        def clear_limits(self):
            super(OracleQuery, self).clear_limits()
            if 'rn' in self.extra_select:
                del self.extra_select['rn']

    _classes[QueryClass] = OracleQuery
    return OracleQuery