sys.stdout.flush

Here are the examples of the python api sys.stdout.flush taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

200 Examples 7

Example 1

Project: CumulusCI
Source File: package_upload_ss.py
View license
    def build_package(self, build_name):
        """ Builds a managed package by calling SauceLabs via Selenium to click the Upload button """ 
        # Update Status
        print 'Starting browser'
        sys.stdout.flush()

        try:
            self.driver = self.get_selenium()
        except:
            print "Sleeping 5 more seconds to try again.  Last attempt to connect to Selenium failed"
            sleep(5)
            self.driver = self.get_selenium()

        self.driver.implicitly_wait(90) # seconds

        # Load the packages list page
        self.driver.get('%s/0A2' % self.instance_url)

        # Update Status
        print 'Loaded package listing page'
        sys.stdout.flush()

        # Click the link to the package
        self.driver.find_element_by_xpath("//th[contains(@class,'dataCell')]/a[text()='%s']" % self.package).click()

        # Update Status
        print 'Loaded package page'
        sys.stdout.flush()

        # Click the Upload button to open the upload form
        self.driver.find_element_by_xpath("//input[@class='btn' and @value='Upload']").click()

        # Update Status
        print 'Loaded Upload form'
        sys.stdout.flush()

        # Populate and submit the upload form to create a beta managed package
        name_input = self.driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsBlockSection:VersionInfoSectionItem:VersionText')
        name_input.clear()
        name_input.send_keys(build_name)
        self.driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsPageBlockButtons:bottom:upload').click()

        # Update Status
        print 'Upload Submitted'
        sys.stdout.flush()

        # Monitor the package upload progress
        retry_count = 0
        last_status = None
        while True:
            try:
                status_message = self.driver.find_element_by_css_selector('.messageText').text
            except selenium.common.exceptions.StaleElementReferenceException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                sleep(1)
                continue
            except selenium.common.exceptions.NoSuchElementException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                if retry_count > 15:
                    print ".messageText not found after 15 retries"
                    break
                sleep(1)
                retry_count += 1
                continue

            retry_count = 0

            if status_message.startswith('Upload Complete'):
                # Update Status
                print status_message
                sys.stdout.flush()
    
                # Get the version number and install url
                version = self.driver.find_element_by_xpath("//th[text()='Version Number']/following-sibling::td/span").text
                install_url = self.driver.find_element_by_xpath("//a[contains(@name, ':pkgInstallUrl')]").get_attribute('href')
            
                self.version = version
                self.install_url = install_url
    
                break

            if status_message.startswith('Upload Failed'):
                print status_message
                sys.stdout.flush()
                break 

            # Update Status
            if status_message != last_status:
                print status_message
                sys.stdout.flush()
            last_status = status_message

            sleep(1)

        self.driver.quit()    

Example 2

View license
def setup():
    # Intro
    print ''
    print 'Welcome to the LittleBits Hue Controller Setup!'
    print 'This program is open source, so feel free to hack it.'
    print '(c) 2014 Jeremy Blum, Blum Idea Labs (www.jeremyblum.com)'

    print ''
    print 'Follow the prompts. If you need to change your setup in the future, just run this script in setup mode again.'
    print 'You can also manually edit the config file that this setup script will generate.'

    # Hue Hub IP Address
    print ''
    print 'We need to be able to communicate with your Hue lighting hub.'
    print 'If it\'s not already, consider setting your hub to a static IP, or a reserved DHCP IP address.'
    valid_IP = False
    hub_found = False
    while not valid_IP or not hub_found:
        ip = raw_input('Enter the IPv4 Address of your hub (ie. 192.168.0.150): ')
        if is_valid_ipv4(ip):
            valid_IP = True
            print 'Now, go press the "connect" button on the top of your hub'
            raw_input('Once you\'ve done that, hit enter.')
            print 'Searching for Hub at ' + ip + '...',
            sys.stdout.flush()
            try:
                bridge = Bridge(ip)
                bridge.connect()
            except:
                print 'Failed!'
                print 'A Hue Bridge could not be found at that address. Try again.'
            else:
                print 'Found!'
                hub_found = True
        else:
            print 'IP Address is invalid.'

    # Light Choice
    print ''
    print 'Now, we need to choose what lights this will control.'
    print 'Go apply power only to the lights you want this to control.'
    print 'Switch off, unplug, or unscrew Hue lights that you DON\'T want to control.'
    raw_input('Press enter once you\'ve done that...')
    print 'Allowing 10 seconds for the hue api to refresh...',
    sys.stdout.flush()
    time.sleep(10)
    print 'Done.'
    light_ids = hue_get_active_light_ids(bridge)
    light_names = hue_get_light_names(bridge, light_ids)
    group_id = hue_get_group_id(bridge, light_ids)
    print 'Great, lighting group ' + str(group_id) + ' has been added.'
    print 'We\'ll be controlling lights with these IDs/Names:'
    for light_id, light_name in zip(light_ids, light_names):
            print 'Light ID: ' + str(light_id) + ' - ' + light_name
    # TODO: Add some error checking (list length zero, for example)

    # Saving Config
    print ''
    print 'Writing setup info to config file...',
    sys.stdout.flush()
    with open(os.path.dirname(os.path.abspath(__file__)) + "/config.ini", 'w') as f:
            write_config_header(f)
            if not config.has_section('LittleBits'): config.add_section('LittleBits')
            if not config.has_option('LittleBits', 'baud_rate'): config.set('LittleBits', 'baud_rate', baud_rate)

            if not config.has_section('PhilipsHue'): config.add_section('PhilipsHue')
            config.set('PhilipsHue', 'bridge_ip', ip)
            config.set('PhilipsHue', 'group_id',  group_id)
            if not config.has_option('PhilipsHue', 'default_bri'):    config.set('PhilipsHue', 'default_bri',    default_bri)
            if not config.has_option('PhilipsHue', 'default_mood'):   config.set('PhilipsHue', 'default_mood',   default_mood)
            if not config.has_option('PhilipsHue', 'default_tt_sec'): config.set('PhilipsHue', 'default_tt_sec', default_tt_sec)

            if not config.has_section('HueMoods'): config.add_section('HueMoods')
            if not config.has_option('HueMoods', '0'): config.set('HueMoods', '0', mood0)
            if not config.has_option('HueMoods', '1'): config.set('HueMoods', '1', mood1)
            if not config.has_option('HueMoods', '2'): config.set('HueMoods', '2', mood2)
            if not config.has_option('HueMoods', '3'): config.set('HueMoods', '3', mood3)
            if not config.has_option('HueMoods', '4'): config.set('HueMoods', '4', mood4)
            if not config.has_option('HueMoods', '5'): config.set('HueMoods', '5', mood5)
            if not config.has_option('HueMoods', '6'): config.set('HueMoods', '6', mood6)
            if not config.has_option('HueMoods', '7'): config.set('HueMoods', '7', mood7)
            if not config.has_option('HueMoods', '8'): config.set('HueMoods', '8', mood8)
            if not config.has_option('HueMoods', '9'): config.set('HueMoods', '9', mood9)

            config.write(f)
    print 'Done!'

    # Make script run at system boot in background
    print ''
    print 'Setting up cron service to launch the service at boot...',
    sys.stdout.flush()
    cron = CronTab(user=True)
    cron.remove_all(comment='littlebits')
    cron_command = os.path.abspath(__file__)
    job = cron.new(command=cron_command,comment='littlebits')
    job.enable()
    job.every_reboot()
    cron.write()
    print 'Done!'

    print ''
    print 'Setup is now complete. The listening service will launch automatically at system boot.'
    print 'You can test it interactively now by running this script without the -s argument.'

Example 3

Project: crunch.io-dashboard
Source File: models.py
View license
    def launch(self,msg=''):
        """
        In order to launch a ClusterInstance the following requirements must be
        satisfied:
            * A snapshot of the home volume must exist and be shared with the
              launching user (this should have been created at the
              ClusterTemplate creation time)
            * The user has AWS credentials including an ssh keypair.
            * A valid ClusterTemplate has been created.
        """
        from boto.ec2.connection import EC2Connection
        import scwrapper
        import random
        import time
        import datetime
        # FIXME: Add timestamps to logs.

        # Assigning Cluster Parameters
        aws_key_id     = self.cluster_template.user_profile.awscredential.aws_key_id
        aws_secret_key = self.cluster_template.user_profile.awscredential.aws_secret_key
        is_demo        = self.cluster_template.is_demo

        if ( not aws_key_id ) or ( not aws_secret_key ):
            raise 'AwsCredentialError'

        # Randomly selecting a us-east-1{a,b,c,d} availability zone
        # Maybe someday Amazon will give us capacity ideas
        availability_zone = 'us-east-1' + random.choice(('a','b','c','d'))

        # The availability zone needs to be saved for later actions on this
        # ClusterInstance
        self.availability_zone = availability_zone
        self.save()

        # Get latest_snapshot_id
        # TODO: When we support multiple disks, do this for all snapshots
        home_disk = self.cluster_template.disk_set.filter(name='Home')[0]
        latest_snapshot_id = home_disk.latest_snapshot_id
        size = int(home_disk.size)

        if is_demo:
            time.sleep(10)
            home_disk.home_volume_id = 'vol-aaaa1111'
            home_disk.save()
            self.cluster_template.status = 'running'
            self.cluster_template.save()
            print "DEMO: Launching cluster %s in availability zone %s" % \
                    ('demo-cluster', self.availability_zone)
            sys.stdout.flush()
        else:
            # create volume from snapshot in availability_zone
            print "Creating EBS volume from snapshot: %s" % latest_snapshot_id
            sys.stdout.flush()
            conn = EC2Connection(str(aws_key_id), str(aws_secret_key))
            volume = conn.create_volume( size, availability_zone, latest_snapshot_id)
            home_volume_id = volume.id

            # The home_volume_id needs to be saved for later.
            home_disk.home_volume_id = home_volume_id
            home_disk.save()

            star_cluster = scwrapper.Cluster(
                    self.cluster_template,
                    self.availability_zone,
                    )
            print "Launching cluster %s in availability zone %s" % \
                    (star_cluster.cluster_name, self.availability_zone)
            sys.stdout.flush()
            star_cluster.launch()
            sys.stdout.flush()
            self.cluster_template.status = 'running'
            self.cluster_template.save()

            print "Cluster started, saving nodes"
            sys.stdout.flush()
            
            # A new object must be created to get the updated node information.
            running_cluster = scwrapper.Cluster(
                    self.cluster_template,
                    self.availability_zone,
                    )

            # Create the Ec2Instance (Node) objects
            for node in running_cluster.sc.nodes:
                print "Saving node: %s, %s, %s" % ( 
                        node.alias,
                        node.ip_address,
                        node.id
                        )
                sys.stdout.flush()
                instance = Ec2Instance(
                    cluster_instance = self,
                    instance_type    = Ec2InstanceType.objects.filter(api_name = node.instance_type)[0],
                    alias            = node.alias,
                    arch             = node.arch,
                    instance_id      = node.id,
                    image_id         = node.image_id,
                    launch_time = datetime.datetime.strptime(
                        node.launch_time,
                        "%Y-%m-%dT%H:%M:%S.000Z"
                        ),
                    placement          = node.placement,
                    ip_address         = node.ip_address,
                    dns_name           = node.dns_name,
                    private_ip_address = node.private_ip_address,
                    public_dns_name    = node.public_dns_name,
                    state              = node.state
                )
                instance.save()
                print "Saved node: %s" % node.alias
                sys.stdout.flush()

            print "Launching of cluster completed."
            sys.stdout.flush()
 
        return "finished"

Example 4

Project: pyNastran
Source File: run_mapping.py
View license
def run_mapping():
    required_inputs = load_inputs()
    structural_call = required_inputs['structural_call']
    isubcase = required_inputs['isubcase']

    configpath = required_inputs['configpath']
    workpath = required_inputs['workpath']

    print("structural_call = %r" % structural_call)

    # load mapping
    cart3dLoads = os.path.join(workpath, 'Cart3d_35000_0.825_10_0_0_0_0.i.triq')
    bdfModel = os.path.join(configpath, 'aeroModel_mod.bdf')
    bdfModelOut = os.path.join(workpath, 'fem_loads_3.bdf')
    # mappingMatrix.new.out - stored in workpath

    # deflection mapping
    cart3dGeom = os.path.join(configpath, 'Cart3d_bwb.i.tri')
    cart3dGeom2 = os.path.join(workpath, 'Components.i.tri')
    bdf = os.path.join(workpath, 'fem3.bdf')
    #op2 = os.path.join(workpath, 'fem3.op2')
    f06 = os.path.join(workpath, 'fem3.f06')

    assert os.path.exists(bdf), '%r doesnt exist' % bdf
    assert os.path.exists(bdfModel), '%r doesnt exist' % bdfModel
    assert os.path.exists(cart3dGeom), '%r doesnt exist' % cart3dGeom

    os.chdir(workpath)
    copy_file(cart3dGeom, 'Components.i.tri')

    node_list = [
        20037, 21140, 21787, 21028, 1151, 1886, 2018, 1477, 1023, 1116, 1201,
        1116, 1201, 1828, 2589, 1373, 1315, 1571, 1507, 1532, 1317, 1327, 2011,
        1445, 2352, 1564, 1878, 1402, 1196, 1234, 1252, 1679, 1926, 1274, 2060,
        2365, 21486, 20018, 20890, 20035, 1393, 2350, 1487, 1530, 1698, 1782
    ]
    with open('convergeDeflections.out', 'ab') as outfile:
        max_aero_deflection_old = 0.
        niterations = 30
        #icart = 1
        for i in range(1, niterations):
            strI = '_' + str(i)
            assert os.path.exists('Components.i.tri')
            #if i==iCart:
            if 0:
                # run cart3d
                log.info("---running Cart3d #%s---" % i)
                sys.stdout.flush()

                # runs cart3d.i.tri, makes Components.i.triq
                fail_flag = os.system('./COMMAND > command.out')
                assert fail_flag == 0, 'Cart3d ./COMMAND failed on iteration #%s' % i
                move_file('Components.i.triq', cart3dLoads)
                copy_file(cart3dLoads, cart3dLoads + strI)
                copy_file('forces.dat', 'forces.dat' + strI)
                copy_file('moments.dat', 'moments.dat' + strI)
                copy_file('loadsCC.dat', 'loadsCC.dat' + strI)
                copy_file('history.dat', 'history.dat' + strI)
                os.remove('Components.i.tri') # verifies new Components.i.tri gets created
                sys.stdout.flush()

            # map loads
            run_map_loads(required_inputs, cart3dLoads, bdfModel, bdfModelOut)  # maps loads
            copy_file(bdfModelOut, bdfModelOut + strI)

            # run nastran
            log.info("---running Nastran #%s---" % i)
            sys.stdout.flush()
            # runs fem3.bdf with fem_loads_3.bdf
            #fail_flag = os.system('nastran scr=yes bat=no fem3.bdf')
            #assert fail_flag == 0,'nastran failed on iteration #%s' % i
            #copy_file('fem3.op2', 'fem3.op2' + strI)
            copy_file('fem3.f06', 'fem3.f06' + strI)

            # map deflections
            (wA, wS) = run_map_deflections(node_list, bdf, f06, cart3dGeom, cart3dGeom2, log=log)
            #(wA, wS) = run_map_deflections(nodeList, bdf, op2, cart3dGeom, cart3dGeom2, log=log)
            assert os.path.exists('Components.i.tri')

            # cleans up fem_loads.bdf
            os.remove(bdfModelOut)
            #if 0:
                # disabled b/c nastran isn't on this computer
                #os.remove(op2) # verifies new fem3.op2 was created
                #os.remove(f06) # verifies new fem3.f06 was created

            # post-processing
            (max_aero_nid, max_aero_deflection) = max_dict(wA)
            max_structural_nid = '???'
            max_aero_deflection = wA[max_aero_nid]
            max_structural_deflection = max(wS)[0, 0]
            log.info("AERO      - i=%s max_aero_nid=%s max_aero_deflection=%s"   % (
                i, max_aero_nid, max_aero_deflection))
            log.info("STRUCTURE - i=%s max_structural_nid=%s max_structural_deflection=%s"   % (
                i, max_structural_nid, max_structural_deflection))
            outfile.write("AERO      - i=%s max_aero_nid=%s max_aero_deflection=%s\n" % (
                i, max_aero_nid, max_aero_deflection))
            outfile.write("STRUCTURE - i=%s max_structural_nid=%s max_structural_deflection=%s\n" % (
                i, max_structural_nid, max_structural_deflection))

            msg = '\n'+'*' * 80 + '\n'
            msg += 'finished iteration #%s\n' % (i)
            msg += '*' * 80 + '\n'
            log.info(msg)

            if allclose(max_aero_deflection, max_aero_deflection_old, atol=0.001):
                break
            max_aero_deflection_old = copy.deepcopy(max_aero_deflection)
            #icart += 1
            sys.stdout.flush()

    log.info('---finished runMapping.py---')

Example 5

Project: oh-brother
Source File: oh-brother.py
View license
def update_firmware(cat, version):
  global password

  print 'Updating %s version %s' % (cat, version)

  # Build XML request info
  xml = ET.ElementTree(ET.fromstring(reqInfo))

  # At least for MFC-J4510DW M1405200717:EFAC (see Internet dumps)
  # and MFC-J4625DW,
  # this element's value is *not* equal to per-firmware cat[egory] value
  # (a "MAIN"-deviating "FIRM" in these cases!),
  # but rather a *fixed* "MAIN" value which is a completely unrelated item,
  # thus I assume this to model-unconditionally have been a BUG
  # (which causes a failure response of the web service request).
  #xml.find('FIRMUPDATETOOLINFO/FIRMCATEGORY').text = cat
  xml.find('FIRMUPDATETOOLINFO/FIRMCATEGORY').text = 'MAIN'

  modelInfo = xml.find('FIRMUPDATEINFO/MODELINFO')
  modelInfo.find('SELIALNO').text = serial
  modelInfo.find('NAME').text = model
  modelInfo.find('SPEC').text = spec

  firm = modelInfo.find('FIRMINFO/FIRM')
  ET.SubElement(firm, 'ID').text = cat
  ET.SubElement(firm, 'VERSION').text = version

  requestInfo = ET.tostring(xml.getroot(), encoding = 'utf8')

  if debug_dump_web_service_request_content:
    print 'request: %s' % requestInfo


  # Request firmware data
  url = 'https://firmverup.brother.co.jp/kne_bh7_update_nt_ssl/ifax2.asmx/' + \
      'fileUpdate'
  hdrs = {'Content-Type': 'text/xml'}

  print 'Looking up printer firmware info at vendor server...'
  sys.stdout.flush()

  import urllib2
  req = urllib2.Request(url, requestInfo, hdrs)
  response = urllib2.urlopen(req)
  response = response.read()

  print 'done'

  if debug_dump_web_service_response_content:
    print 'response: %s' % response

  # Parse response
  xml = ET.fromstring(response)

  if verbose: print_pretty(xml)

  # Check version
  versionCheck = xml.find('FIRMUPDATEINFO/VERSIONCHECK')
  if versionCheck is not None and versionCheck.text == '1':
    print 'Firmware already up to date'
    return


  # Get firmware URL
  firmwareURL = xml.find('FIRMUPDATEINFO/PATH')
  if firmwareURL is None:
    print 'No firmware update info path found'
    sys.exit(1)
  firmwareURL = firmwareURL.text
  filename = firmwareURL.split('/')[-1]


  # Download firmware
  f = open(filename, 'w')

  print 'Downloading firmware file %s from vendor server...' % filename
  sys.stdout.flush()

  req = urllib2.Request(firmwareURL)
  response = urllib2.urlopen(req)

  while True:
      block = response.read(102400)
      if not block: break
      f.write(block)
      sys.stdout.write('.')
      sys.stdout.flush()

  print 'done'
  f.close()

  if show_firmware_upgrade_safety_prompt:
    print 'About to upload the firmware to printer.'
    print 'This is a dangerous action since it is potentially destructive.'
    print 'Thus please double-check / review to ensure that:'
    print '- firmware file version is compatible with your hardware'
    print '- network connection is maximally reliable (strongly prefer wired connection to WLAN)'
    print '- power supply is maximally reliable (may be achieved by using a UPS)'
    raw_input("Press Ctrl-C to prevent firmware upgrade, or possibly Enter to continue...")

  # Get printer password
  if password is None:
    import getpass
    print
    password = getpass.getpass('Enter printer admin password: ')


  # Upload firmware to printer
  from ftplib import FTP

  print 'Now uploading firmware to printer (DO NOT REMOVE POWER!)...'
  sys.stdout.flush()

  ftp = FTP(ip, user = password) # Yes send password as user
  ftp.storbinary('STOR ' + filename, open(filename, 'r'))
  ftp.quit()

  print 'done'

  print
  print 'Wait for printer to finish updating and reboot before continuing.'
  raw_input("Press Enter to continue...")

Example 6

Project: AI_Reader
Source File: build_image_data.py
View license
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
                               texts, labels, num_shards):
  """Processes and saves list of images as TFRecord in 1 thread.

  Args:
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    thread_index: integer, unique batch to run index is within [0, len(ranges)).
    ranges: list of pairs of integers specifying ranges of each batches to
      analyze in parallel.
    name: string, unique identifier specifying the data set
    filenames: list of strings; each string is a path to an image file
    texts: list of strings; each string is human readable, e.g. 'dog'
    labels: list of integer; each integer identifies the ground truth
    num_shards: integer number of shards for this data set.
  """
  # Each thread produces N shards where N = int(num_shards / num_threads).
  # For instance, if num_shards = 128, and the num_threads = 2, then the first
  # thread would produce shards [0, 64).
  num_threads = len(ranges)
  assert not num_shards % num_threads
  num_shards_per_batch = int(num_shards / num_threads)

  shard_ranges = np.linspace(ranges[thread_index][0],
                             ranges[thread_index][1],
                             num_shards_per_batch + 1).astype(int)
  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]

  counter = 0
  for s in xrange(num_shards_per_batch):
    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
    shard = thread_index * num_shards_per_batch + s
    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
    output_file = os.path.join(FLAGS.output_directory, output_filename)
    writer = tf.python_io.TFRecordWriter(output_file)

    shard_counter = 0
    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
    for i in files_in_shard:
      filename = filenames[i]
      label = labels[i]
      text = texts[i]

      image_buffer, height, width = _process_image(filename, coder)

      example = _convert_to_example(filename, image_buffer, label,
                                    text, height, width)
      writer.write(example.SerializeToString())
      shard_counter += 1
      counter += 1

      if not counter % 1000:
        print('%s [thread %d]: Processed %d of %d images in thread batch.' %
              (datetime.now(), thread_index, counter, num_files_in_thread))
        sys.stdout.flush()

    print('%s [thread %d]: Wrote %d images to %s' %
          (datetime.now(), thread_index, shard_counter, output_file))
    sys.stdout.flush()
    shard_counter = 0
  print('%s [thread %d]: Wrote %d images to %d shards.' %
        (datetime.now(), thread_index, counter, num_files_in_thread))
  sys.stdout.flush()

Example 7

Project: models
Source File: swivel.py
View license
  def __init__(self, config):
    """Construct graph for dmc."""
    self._config = config

    # Create paths to input data files
    print 'Reading model from:', config.input_base_path
    sys.stdout.flush()
    count_matrix_files = glob.glob(config.input_base_path + '/shard-*.pb')
    row_sums_path = config.input_base_path + '/row_sums.txt'
    col_sums_path = config.input_base_path + '/col_sums.txt'

    # Read marginals
    row_sums = read_marginals_file(row_sums_path)
    col_sums = read_marginals_file(col_sums_path)

    self.n_rows = len(row_sums)
    self.n_cols = len(col_sums)
    print 'Matrix dim: (%d,%d) SubMatrix dim: (%d,%d) ' % (
        self.n_rows, self.n_cols, config.submatrix_rows, config.submatrix_cols)
    sys.stdout.flush()
    self.n_submatrices = (self.n_rows * self.n_cols /
                          (config.submatrix_rows * config.submatrix_cols))
    print 'n_submatrices: %d' % (self.n_submatrices)
    sys.stdout.flush()

    # ===== CREATE VARIABLES ======

    with tf.device('/cpu:0'):
      # embeddings
      self.row_embedding = embeddings_with_init(
          embedding_dim=config.embedding_size,
          vocab_size=self.n_rows,
          name='row_embedding')
      self.col_embedding = embeddings_with_init(
          embedding_dim=config.embedding_size,
          vocab_size=self.n_cols,
          name='col_embedding')
      tf.histogram_summary('row_emb', self.row_embedding)
      tf.histogram_summary('col_emb', self.col_embedding)

      matrix_log_sum = math.log(np.sum(row_sums) + 1)
      row_bias_init = [math.log(x + 1) for x in row_sums]
      col_bias_init = [math.log(x + 1) for x in col_sums]
      self.row_bias = tf.Variable(row_bias_init,
                                  trainable=config.trainable_bias)
      self.col_bias = tf.Variable(col_bias_init,
                                  trainable=config.trainable_bias)
      tf.histogram_summary('row_bias', self.row_bias)
      tf.histogram_summary('col_bias', self.col_bias)

    # ===== CREATE GRAPH =====

    # Get input
    with tf.device('/cpu:0'):
      global_row, global_col, count = count_matrix_input(
          count_matrix_files, config.submatrix_rows, config.submatrix_cols)

      # Fetch embeddings.
      selected_row_embedding = tf.nn.embedding_lookup(self.row_embedding,
                                                      global_row)
      selected_col_embedding = tf.nn.embedding_lookup(self.col_embedding,
                                                      global_col)

      # Fetch biases.
      selected_row_bias = tf.nn.embedding_lookup([self.row_bias], global_row)
      selected_col_bias = tf.nn.embedding_lookup([self.col_bias], global_col)

    # Multiply the row and column embeddings to generate predictions.
    predictions = tf.matmul(
        selected_row_embedding, selected_col_embedding, transpose_b=True)

    # These binary masks separate zero from non-zero values.
    count_is_nonzero = tf.to_float(tf.cast(count, tf.bool))
    count_is_zero = 1 - tf.to_float(tf.cast(count, tf.bool))

    objectives = count_is_nonzero * tf.log(count + 1e-30)
    objectives -= tf.reshape(selected_row_bias, [config.submatrix_rows, 1])
    objectives -= selected_col_bias
    objectives += matrix_log_sum

    err = predictions - objectives

    # The confidence function scales the L2 loss based on the raw co-occurrence
    # count.
    l2_confidence = (config.confidence_base + config.confidence_scale * tf.pow(
        count, config.confidence_exponent))

    l2_loss = config.loss_multiplier * tf.reduce_sum(
        0.5 * l2_confidence * err * err * count_is_nonzero)

    sigmoid_loss = config.loss_multiplier * tf.reduce_sum(
        tf.nn.softplus(err) * count_is_zero)

    self.loss = l2_loss + sigmoid_loss

    tf.scalar_summary("l2_loss", l2_loss)
    tf.scalar_summary("sigmoid_loss", sigmoid_loss)
    tf.scalar_summary("loss", self.loss)

    # Add optimizer.
    self.global_step = tf.Variable(0, name='global_step')
    opt = tf.train.AdagradOptimizer(config.learning_rate)
    self.train_op = opt.minimize(self.loss, global_step=self.global_step)
    self.saver = tf.train.Saver(sharded=True)

Example 8

Project: roboto
Source File: mecsolve.py
View license
def mecrange(figtype):
    scale = 130
    eps_prologue(50, 110, 570, 630)
    print -50, 0, 'translate'
    print '0.5 setlinewidth'
    thlmin, thlmax = -pi/2, 2.4
    thrmin, thrmax = -2.2, pi / 2 + .2
    print 306 + scale * thlmin, 396, 'moveto', 306 + scale * thlmax, 396, 'lineto stroke'
    print 306, 396 + scale * thrmin, 'moveto', 306, 396 + scale * thrmax, 'lineto stroke'

    print 'gsave [2] 0 setdash'
    print 306, 396 + scale * pi / 2, 'moveto'
    print 306 + scale * thlmax, 396 + scale * pi / 2, 'lineto stroke'
    print 306 + scale * thlmin, 396 - scale * pi / 2, 'moveto'
    print 306 + scale * thlmax, 396 - scale * pi / 2, 'lineto stroke'
    print 306 + scale * pi / 2, 396 + scale * thrmin, 'moveto'
    print 306 + scale * pi / 2, 396 + scale * thrmax, 'lineto stroke'
    print 'grestore'

    print 306 + 3, 396 + scale * thrmax - 10, 'moveto'
    print '/Symbol 12 selectfont (q) show'
    print 0, -2, 'rmoveto'
    print '/Times-Italic 9 selectfont (right) show'

    print 306 - 18, 396 + scale * pi / 2 - 4, 'moveto'
    print '/Symbol 12 selectfont (p/2) show'
    print 306 + scale * 2.2, 396 - scale * pi / 2 + 2, 'moveto'
    print '/Symbol 12 selectfont (-p/2) show'

    print 306 + scale * pi/2 + 2, 396 + scale * thrmax - 10, 'moveto'
    print '/Symbol 12 selectfont (p/2) show'

    print 306 + scale * 2.2, 396 + 6, 'moveto'
    print '/Symbol 12 selectfont (q) show'
    print 0, -2, 'rmoveto'
    print '/Times-Italic 9 selectfont (left) show'

    print '/ss 0.8 def'
    print '/circle { ss 0 moveto currentpoint exch ss sub exch ss 0 360 arc } bind def'
    cmd = 'moveto'
    for i in range(0, 201):
        th = (i * .005 - .75 )* pi
        rmin = 1.5
        rmax = 2.5
        for j in range(20):
            r = (rmin + rmax) * .5
            th0 = r * cos(th)
            th1 = r * sin(th)
            if findmec(th0, th1) == None:
                rmax = r
            else:
                rmin = r
        r = (rmin + rmax) * .5
        th0 = r * cos(th)
        th1 = r * sin(th)
        print '%', r, th, th0, th1
        print 306 + scale * th0, 396 + scale * th1, cmd
        cmd = 'lineto'
        sys.stdout.flush()
    print 'stroke'
    sys.stdout.flush()
        
    for i in range(-11, 12):
        for j in range(-11, i + 1):
            th0, th1 = i * .196, j * .196
            print '%', th0, th1
            params = findmec(th0, th1)
            if params != None:
                sm, sp = params
                print 'gsave'
                print 306 + scale * th0, 396 + scale * th1, 'translate'
                uscale = 22
                k0, lam1, lam2 = justify_mec(sm, sp)
                xys, cost, x, y, th = run_elastica(-.5, .5, k0, lam1, lam2)
                cmdm = 'moveto'
                dx = xys[-1][0] - xys[0][0]
                dy = xys[-1][1] - xys[0][1]
                ch = hypot(dx, dy)
                chth = atan2(dy, dx)
                if figtype == 'mecrange':
                    print 'circle fill'
                    s = uscale * sin(chth) / ch
                    c = uscale * cos(chth) / ch
                    h = -xys[0][0] * s + xys[0][1] * c
                    for xy in xys:
                        print xy[0] * c + xy[1] * s, h + xy[0] * s - xy[1] * c, cmdm
                        cmdm = 'lineto'
                elif figtype == 'mecrangek':
                    ds = 1. / (len(xys) - 1)
                    sscale = 13. / ch
                    kscale = 3 * ch
                    print 'gsave .25 setlinewidth'
                    print sscale * -.5, 0, 'moveto', sscale, 0, 'rlineto stroke'
                    print 'grestore'
                    for l in range(len(xys)):
                        print sscale * (ds * l - 0.5), kscale * xys[l][2], cmdm
                        cmdm = 'lineto'
                print 'stroke'
                print 'grestore'
            sys.stdout.flush()
    print 'showpage'
    eps_trailer()

Example 9

Project: COMMIT
Source File: core.py
View license
    def save_results( self, path_suffix = None ) :
        """Save the output (coefficients, errors, maps etc).

        Parameters
        ----------
        path_suffix : string
            Text to be appended to "Results" to create the output path (default : None)
        """
        if self.x is None :
            raise RuntimeError( 'Model not fitted to the data; call "fit()" first.' )

        RESULTS_path = 'Results_' + self.model.id
        if path_suffix :
            self.set_config('path_suffix', path_suffix)
            RESULTS_path = RESULTS_path +'_'+ path_suffix

        print '\n-> Saving results to "%s/*":' % RESULTS_path
        tic = time.time()

        # create folder or delete existing files (if any)
        RESULTS_path = pjoin( self.get_config('TRACKING_path'), RESULTS_path )
        if not exists( RESULTS_path ) :
            makedirs( RESULTS_path )
        else :
            for f in glob.glob( pjoin(RESULTS_path,'*') ) :
                remove( f )
        self.set_config('RESULTS_path', RESULTS_path)

        # Configuration and results
        print '\t* configuration and results...',
        sys.stdout.flush()
        nF = self.DICTIONARY['IC']['nF']
        nE = self.DICTIONARY['EC']['nE']
        nV = self.DICTIONARY['nV']
        # x is the x of the original problem
        # self.x is the x preconditioned
        # x_map is the x used to generate the intra-cellular, extra-cellular and isotropic maps (not divided by norm of the fiber)
        if self.get_config('doNormalizeKernels') :
            # renormalize the coefficients
            norm1 = np.repeat(self.KERNELS['wmr_norm'],nF)
            norm2 = np.repeat(self.KERNELS['wmh_norm'],nE)
            norm3 = np.repeat(self.KERNELS['iso_norm'],nV)
            norm_fib = np.kron(np.ones(self.KERNELS['wmr'].shape[0]), self.DICTIONARY['TRK']['norm'])
            x_map = self.x / np.hstack( (norm1,norm2,norm3) )
            x = self.x / np.hstack( (norm1*norm_fib,norm2,norm3) )
        else :
            x_map = self.x
            x = self.x
        with open( pjoin(RESULTS_path,'results.pickle'), 'wb+' ) as fid :
            cPickle.dump( [self.CONFIG, self.x, x], fid, protocol=2 )
        print '[ OK ]'

        # Map of wovelwise errors
        print '\t* fitting errors:'

        niiMAP_img = np.zeros( self.get_config('dim'), dtype=np.float32 )
        affine = self.niiDWI.affine if nibabel.__version__ >= '2.0.0' else self.niiDWI.get_affine()
        niiMAP     = nibabel.Nifti1Image( niiMAP_img, affine )
        niiMAP_hdr = niiMAP.header if nibabel.__version__ >= '2.0.0' else niiMAP.get_header()

        y_mea = np.reshape( self.niiDWI_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'], : ].flatten().astype(np.float32), (nV,-1) )
        y_est = np.reshape( self.A.dot(self.x), (nV,-1) ).astype(np.float32)

        print '\t\t- RMSE...',
        sys.stdout.flush()
        tmp = np.sqrt( np.mean((y_mea-y_est)**2,axis=1) )
        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
        niiMAP_hdr['cal_min'] = 0
        niiMAP_hdr['cal_max'] = tmp.max()
        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_RMSE.nii.gz') )
        print ' [ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() )

        print '\t\t- NRMSE...',
        sys.stdout.flush()
        tmp = np.sum(y_mea**2,axis=1)
        idx = np.where( tmp < 1E-12 )
        tmp[ idx ] = 1
        tmp = np.sqrt( np.sum((y_mea-y_est)**2,axis=1) / tmp )
        tmp[ idx ] = 0
        niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = tmp
        niiMAP_hdr['cal_min'] = 0
        niiMAP_hdr['cal_max'] = 1
        nibabel.save( niiMAP, pjoin(RESULTS_path,'fit_NRMSE.nii.gz') )
        print '[ %.3f +/- %.3f ]' % ( tmp.mean(), tmp.std() )

        # Map of compartment contributions
        print '\t* voxelwise contributions:'

        print '\t\t- intra-axonal',
        sys.stdout.flush()
        niiMAP_img[:] = 0
        if len(self.KERNELS['wmr']) > 0 :
            offset = nF * self.KERNELS['wmr'].shape[0]
            tmp = x_map[:offset].reshape( (-1,nF) ).sum( axis=0 )
            xv = np.bincount( self.DICTIONARY['IC']['v'], minlength=nV,
                weights=tmp[ self.DICTIONARY['IC']['fiber'] ] * self.DICTIONARY['IC']['len']
            ).astype(np.float32)
            niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
        nibabel.save( niiMAP, pjoin(RESULTS_path,'compartment_IC.nii.gz') )
        print '[ OK ]'

        print '\t\t- extra-axonal',
        sys.stdout.flush()
        niiMAP_img[:] = 0
        if len(self.KERNELS['wmh']) > 0 :
            offset = nF * self.KERNELS['wmr'].shape[0]
            tmp = x_map[offset:offset+nE*len(self.KERNELS['wmh'])].reshape( (-1,nE) ).sum( axis=0 )
            xv = np.bincount( self.DICTIONARY['EC']['v'], weights=tmp, minlength=nV ).astype(np.float32)
            niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
        nibabel.save( niiMAP, pjoin(RESULTS_path,'compartment_EC.nii.gz') )
        print '[ OK ]'

        print '\t\t- isotropic',
        sys.stdout.flush()
        niiMAP_img[:] = 0
        if len(self.KERNELS['iso']) > 0 :
            offset = nF * self.KERNELS['wmr'].shape[0] + nE * self.KERNELS['wmh'].shape[0]
            xv = x_map[offset:].reshape( (-1,nV) ).sum( axis=0 )
            niiMAP_img[ self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] ] = xv
        nibabel.save( niiMAP, pjoin(RESULTS_path,'compartment_ISO.nii.gz') )
        print '   [ OK ]'

        print '   [ %.1f seconds ]' % ( time.time() - tic )

Example 10

Project: COMMIT
Source File: core.py
View license
    def load_dictionary( self, path, use_mask = False ) :
        """Load the sparse structure previously created with "trk2dictionary" script.

        Parameters
        ----------
        path : string
            Folder containing the output of the trk2dictionary script (relative to subject path)
        use_mask : boolean
            If False (default) the optimization will be conducted only on the voxels actually
            traversed by tracts. If True, the mask specified in trk2dictionary
            (i.e. "filename_mask" paramater) will be used instead.
            NB: if no mask was specified in trk2dictionary, the "tdi" and
            "mask" masks are equivalent and this parameter is not influent.
        """
        if self.niiDWI is None :
            raise RuntimeError( 'Data not loaded; call "load_data()" first.' )

        tic = time.time()
        print '\n-> Loading the dictionary:'
        self.DICTIONARY = {}
        self.set_config('TRACKING_path', pjoin(self.get_config('DATA_path'),path))

        # load mask
        self.set_config('dictionary_mask', 'mask' if use_mask else 'tdi' )
        mask_filename = pjoin(self.get_config('TRACKING_path'),'dictionary_%s.nii'%self.get_config('dictionary_mask'))
        if not exists( mask_filename ) :
            mask_filename += '.gz'
            if not exists( mask_filename ) :
                raise RuntimeError( 'Dictionary not found. Execute ''trk2dictionary'' script first.' );
        niiMASK = nibabel.load( mask_filename )
        self.DICTIONARY['MASK'] = (niiMASK.get_data() > 0).astype(np.uint8)

        # segments from the tracts
        # ------------------------
        print '\t* segments from the tracts...',
        sys.stdout.flush()

        self.DICTIONARY['IC'] = {}

        self.DICTIONARY['TRK'] = {}

        self.DICTIONARY['TRK']['norm'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_norm.dict'), dtype=np.float32 )

        self.DICTIONARY['IC']['nF'] = self.DICTIONARY['TRK']['norm'].size

        self.DICTIONARY['IC']['fiber'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_f.dict'), dtype=np.uint32 )

        self.DICTIONARY['IC']['n'] = self.DICTIONARY['IC']['fiber'].size

        vx = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_vx.dict'), dtype=np.uint8 ).astype(np.uint32)
        vy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_vy.dict'), dtype=np.uint8 ).astype(np.uint32)
        vz = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_vz.dict'), dtype=np.uint8 ).astype(np.uint32)
        self.DICTIONARY['IC']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
        del vx, vy, vz

        ox = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
        oy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
        self.DICTIONARY['IC']['o'] = oy + 181*ox
        del ox, oy

        self.DICTIONARY['IC']['len'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_IC_len.dict'), dtype=np.float32 )

        self.DICTIONARY['TRK']['len'] = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_TRK_len.dict'), dtype=np.float32 )

        if self.get_config('doNormalizeKernels') :
            # divide the length of each segment by the fiber length so that all the columns of the libear operator will have same length
            # NB: it works in conjunction with the normalization of the kernels
            sl = self.DICTIONARY['IC']['len']
            tl = self.DICTIONARY['TRK']['norm']
            f  = self.DICTIONARY['IC']['fiber']
            for s in xrange(self.DICTIONARY['IC']['n']) :
                sl[s] /= tl[ f[s] ]

        # reorder the segments based on the "v" field
        idx = np.argsort( self.DICTIONARY['IC']['v'], kind='mergesort' )
        self.DICTIONARY['IC']['v']     = self.DICTIONARY['IC']['v'][ idx ]
        self.DICTIONARY['IC']['o']     = self.DICTIONARY['IC']['o'][ idx ]
        self.DICTIONARY['IC']['fiber'] = self.DICTIONARY['IC']['fiber'][ idx ]
        self.DICTIONARY['IC']['len']   = self.DICTIONARY['IC']['len'][ idx ]
        del idx

        print '[ %d fibers and %d segments ]' % ( self.DICTIONARY['IC']['nF'], self.DICTIONARY['IC']['n'] )

        # segments from the peaks
        # -----------------------
        print '\t* segments from the peaks...',
        sys.stdout.flush()

        self.DICTIONARY['EC'] = {}

        vx = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_vx.dict'), dtype=np.uint8 ).astype(np.uint32)
        vy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_vy.dict'), dtype=np.uint8 ).astype(np.uint32)
        vz = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_vz.dict'), dtype=np.uint8 ).astype(np.uint32)
        self.DICTIONARY['EC']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
        del vx, vy, vz

        self.DICTIONARY['EC']['nE'] = self.DICTIONARY['EC']['v'].size

        ox = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_ox.dict'), dtype=np.uint8 ).astype(np.uint16)
        oy = np.fromfile( pjoin(self.get_config('TRACKING_path'),'dictionary_EC_oy.dict'), dtype=np.uint8 ).astype(np.uint16)
        self.DICTIONARY['EC']['o'] = oy + 181*ox
        del ox, oy

        # reorder the segments based on the "v" field
        idx = np.argsort( self.DICTIONARY['EC']['v'], kind='mergesort' )
        self.DICTIONARY['EC']['v'] = self.DICTIONARY['EC']['v'][ idx ]
        self.DICTIONARY['EC']['o'] = self.DICTIONARY['EC']['o'][ idx ]
        del idx

        print ' [ %d segments ]' % self.DICTIONARY['EC']['nE']

        # isotropic compartments
        # ----------------------
        print '\t* isotropic contributions...',
        sys.stdout.flush()

        self.DICTIONARY['ISO'] = {}

        self.DICTIONARY['nV'] = self.DICTIONARY['MASK'].sum()

        vx, vy, vz = ( self.DICTIONARY['MASK'] > 0 ).nonzero() # [TODO] find a way to avoid using int64 (not necessary and waste of memory)
        vx = vx.astype(np.int32)
        vy = vy.astype(np.int32)
        vz = vz.astype(np.int32)
        self.DICTIONARY['ISO']['v'] = vx + self.get_config('dim')[0] * ( vy + self.get_config('dim')[1] * vz )
        del vx, vy, vz

        # reorder the segments based on the "v" field
        idx = np.argsort( self.DICTIONARY['ISO']['v'], kind='mergesort' )
        self.DICTIONARY['ISO']['v'] = self.DICTIONARY['ISO']['v'][ idx ]
        del idx

        print ' [ %d voxels ]' % self.DICTIONARY['nV']

        # post-processing
        # ---------------
        print '\t* post-processing...',
        sys.stdout.flush()

        # get the indices to extract the VOI as in MATLAB (in place of DICTIONARY.MASKidx)
        idx = self.DICTIONARY['MASK'].ravel(order='F').nonzero()[0]
        self.DICTIONARY['MASK_ix'], self.DICTIONARY['MASK_iy'], self.DICTIONARY['MASK_iz'] = np.unravel_index( idx, self.DICTIONARY['MASK'].shape, order='F' )

        lut = np.zeros( self.get_config('dim'), dtype=np.uint32 ).ravel()
        for i in xrange(idx.size) :
            lut[ idx[i] ] = i
        self.DICTIONARY['IC'][ 'v'] = lut[ self.DICTIONARY['IC'][ 'v'] ]
        self.DICTIONARY['EC'][ 'v'] = lut[ self.DICTIONARY['EC'][ 'v'] ]
        self.DICTIONARY['ISO']['v'] = lut[ self.DICTIONARY['ISO']['v'] ]

        print '         [ OK ]'

        print '   [ %.1f seconds ]' % ( time.time() - tic )

Example 11

Project: nrvr-commander
Source File: javaw.py
View license
    @classmethod
    def now(cls,
            force=False,
            dontDownload=False,
            ticker=True):
        """Download file or use previously downloaded file.
        
        As implemented uses wget.
        That has been a choice of convenience, could be written in Python instead.
        
        force
            whether to force downloading even if apparently downloaded already.
            
            May be useful for programmatically updating at times.
        
        dontDownload
            whether you don't want to start a download, for some reason.
        
        Return file path."""
        simpleFilename = "jre-version-windows-arch.exe"
        downloadDir = ScriptUser.loggedIn.userHomeRelative("Downloads")
        downloadPath = os.path.join(downloadDir, simpleFilename)
        semaphorePath = downloadPath + cls.semaphoreExtenstion
        #
        if os.path.exists(downloadPath) and not force:
            if not os.path.exists(semaphorePath):
                # file exists and not download in progress,
                # assume it is good
                return downloadPath
            else:
                # file exists and download in progress,
                # presumably from another script running in another process or thread,
                # wait for it to complete
                printed = False
                ticked = False
                # check the essential condition, initially and then repeatedly
                while os.path.exists(semaphorePath):
                    if not printed:
                        # first time only printing
                        print "waiting for " + semaphorePath + " to go away on completion"
                        sys.stdout.flush()
                        printed = True
                    if ticker:
                        if not ticked:
                            # first time only printing
                            sys.stdout.write("[")
                        sys.stdout.write(".")
                        sys.stdout.flush()
                        ticked = True
                    time.sleep(5)
                if ticked:
                    # final printing
                    sys.stdout.write("]\n")
                    sys.stdout.flush()
        elif not dontDownload: # it is normal to download
            if not os.path.exists(downloadDir):
                try:
                    os.makedirs(downloadDir)
                except OSError:
                    if os.path.exists(downloadDir): # concurrently made
                        pass
                    else: # failure
                        raise
            #
            # try downloading
            pid = os.getpid()
            try:
                with open(semaphorePath, "w") as semaphoreFile:
                    # create semaphore file
                    semaphoreFile.write("pid=" + str(pid))
                #
                offlineInstallerUrl = cls._currentOfflineInstallerUrl()
                print "starting to download " + offlineInstallerUrl
                if ticker:
                    sys.stdout.write("[.")
                    sys.stdout.flush()
                try:
                    wget = CommandCapture(
                        ["wget",
                         "--quiet",
                         "-O", downloadPath,
                         offlineInstallerUrl],
                        forgoPty=True)
                    if ticker:
                        sys.stdout.write("]")
                        sys.stdout.flush()
                finally:
                    if ticker:
                        sys.stdout.write("\n")
                        sys.stdout.flush()
            except: # apparently a problem
                print "problem downloading " + downloadPath + " from " + offlineInstallerUrl
                raise
            else:
                print "done downloading " + downloadPath
            finally:
                try:
                    # delete semaphore file
                    os.remove(semaphorePath)
                except:
                    pass
        if os.path.exists(downloadPath):
            # file exists now, assume it is good
            return downloadPath
        else:
            # apparently download has failed
            raise IOError("file not found " + downloadPath)

Example 12

Project: udacity-driving-reader
Source File: bagdump.py
View license
def main():
    parser = argparse.ArgumentParser(description='Convert rosbag to images and csv.')
    parser.add_argument('-o', '--outdir', type=str, nargs='?', default='/output',
        help='Output folder')
    parser.add_argument('-i', '--indir', type=str, nargs='?', default='/data',
        help='Input folder where bagfiles are located')
    parser.add_argument('-f', '--img_format', type=str, nargs='?', default='jpg',
        help='Image encode format, png or jpg')
    parser.add_argument('-d', dest='debug', action='store_true', help='Debug print enable')
    parser.set_defaults(debug=False)
    args = parser.parse_args()

    img_format = args.img_format
    base_outdir = args.outdir
    indir = args.indir
    debug_print = args.debug

    bridge = CvBridge()

    include_images = True
    filter_topics = [STEERING_TOPIC, GPS_FIX_TOPIC]
    if include_images:
        filter_topics += CAMERA_TOPICS

    bagsets = find_bagsets(indir, "*.bag", filter_topics)
    for bs in bagsets:
        print("Processing set %s" % bs.name)
        sys.stdout.flush()

        dataset_outdir = os.path.join(base_outdir, "%s" % bs.name)
        left_outdir = get_outdir(dataset_outdir, "left")
        center_outdir = get_outdir(dataset_outdir, "center")
        right_outdir = get_outdir(dataset_outdir, "right")

        camera_cols = ["seq", "timestamp", "width", "height", "frame_id", "filename"]
        camera_dict = defaultdict(list)

        steering_cols = ["seq", "timestamp", "angle", "torque", "speed"]
        steering_dict = defaultdict(list)

        gps_cols = ["seq", "timestamp", "status", "service", "lat", "long", "alt"]
        gps_dict = defaultdict(list)

        bs.write_infos(dataset_outdir)
        readers = bs.get_readers()
        stats_acc = defaultdict(int)

        def _process_msg(topic, msg, stats):
            timestamp = msg.header.stamp.to_nsec()
            if topic in CAMERA_TOPICS:
                outdir = camera_select(topic, (left_outdir, center_outdir, right_outdir))
                if debug_print:
                    print("%s_camera %d" % (topic[1], timestamp))

                results = write_image(bridge, outdir, msg, fmt=img_format)
                results['filename'] = os.path.relpath(results['filename'], dataset_outdir)
                camera2dict(msg, results, camera_dict)
                stats['img_count'] += 1
                stats['msg_count'] += 1

            elif topic == STEERING_TOPIC:
                if debug_print:
                    print("steering %d %f" % (timestamp, msg.steering_wheel_angle))

                steering2dict(msg, steering_dict)
                stats['msg_count'] += 1

            elif topic == GPS_FIX_TOPIC:
                if debug_print:
                    print("gps      %d %d, %d" % (timestamp, msg.latitude, msg.longitude))

                gps2dict(msg, gps_dict)
                stats['msg_count'] += 1

        # no need to cycle through readers in any order for dumping, rip through each on in sequence
        for reader in readers:
            for result in reader.read_messages():
                _process_msg(*result, stats=stats_acc)
                if stats_acc['img_count'] % 1000 == 0 or stats_acc['msg_count'] % 5000 == 0:
                    print("%d images, %d messages processed..." %
                          (stats_acc['img_count'], stats_acc['msg_count']))
                    sys.stdout.flush()

        print("Writing done. %d images, %d messages processed." %
              (stats_acc['img_count'], stats_acc['msg_count']))
        sys.stdout.flush()

        camera_csv_path = os.path.join(dataset_outdir, 'camera.csv')
        camera_df = pd.DataFrame(data=camera_dict, columns=camera_cols)
        camera_df.to_csv(camera_csv_path, index=False)

        steering_csv_path = os.path.join(dataset_outdir, 'steering.csv')
        steering_df = pd.DataFrame(data=steering_dict, columns=steering_cols)
        steering_df.to_csv(steering_csv_path, index=False)

        gps_csv_path = os.path.join(dataset_outdir, 'gps.csv')
        gps_df = pd.DataFrame(data=gps_dict, columns=gps_cols)
        gps_df.to_csv(gps_csv_path, index=False)

        gen_interpolated = True
        if gen_interpolated:
            # A little pandas magic to interpolate steering/gps samples to camera frames
            camera_df['timestamp'] = pd.to_datetime(camera_df['timestamp'])
            camera_df.set_index(['timestamp'], inplace=True)
            camera_df.index.rename('index', inplace=True)
            steering_df['timestamp'] = pd.to_datetime(steering_df['timestamp'])
            steering_df.set_index(['timestamp'], inplace=True)
            steering_df.index.rename('index', inplace=True)
            gps_df['timestamp'] = pd.to_datetime(gps_df['timestamp'])
            gps_df.set_index(['timestamp'], inplace=True)
            gps_df.index.rename('index', inplace=True)

            merged = functools.reduce(lambda left, right: pd.merge(
                left, right, how='outer', left_index=True, right_index=True), [camera_df, steering_df, gps_df])
            merged.interpolate(method='time', inplace=True)

            filtered_cols = ['timestamp', 'width', 'height', 'frame_id', 'filename',
                             'angle', 'torque', 'speed',
                             'lat', 'long', 'alt']
            filtered = merged.loc[camera_df.index]  # back to only camera rows
            filtered.fillna(0.0, inplace=True)
            filtered['timestamp'] = filtered.index.astype('int')  # add back original timestamp integer col
            filtered['width'] = filtered['width'].astype('int')  # cast back to int
            filtered['height'] = filtered['height'].astype('int')  # cast back to int
            filtered = filtered[filtered_cols]  # filter and reorder columns for final output

            interpolated_csv_path = os.path.join(dataset_outdir, 'interpolated.csv')
            filtered.to_csv(interpolated_csv_path, header=True)

Example 13

Project: CumulusCI
Source File: package_upload_jenkins.py
View license
    def build_package(self, build_name):
        """ Builds a managed package by calling SauceLabs via Selenium to click the Upload button """ 
        # Update Status
        print 'Starting browser'
        sys.stdout.flush()

        try:
            driver = self.get_selenium()
        except:
            print "Sleeping 5 more seconds to try again.  Last attempt to connect to Selenium failed"
            sleep(5)
            driver = self.get_selenium()

        driver.implicitly_wait(90) # seconds

        # Load the packages list page
        driver.get('%s/0A2' % self.instance_url)

        # Update Status
        print 'Loaded package listing page'
        sys.stdout.flush()

        # Click the link to the package
        driver.find_element_by_xpath("//th[contains(@class,'dataCell')]/a[text()='%s']" % self.package).click()

        # Update Status
        print 'Loaded package page'
        sys.stdout.flush()

        # Click the Upload button to open the upload form
        driver.find_element_by_xpath("//input[@class='btn' and @value='Upload']").click()

        # Update Status
        print 'Loaded Upload form'
        sys.stdout.flush()

        # Populate and submit the upload form to create a beta managed package
        name_input = driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsBlockSection:VersionInfoSectionItem:VersionText')
        name_input.clear()
        name_input.send_keys(build_name)
        driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsPageBlockButtons:bottom:upload').click()

        # Update Status
        print 'Upload Submitted'
        sys.stdout.flush()

        # Monitor the package upload progress
        retry_count = 0
        last_status = None
        while True:
            try:
                status_message = driver.find_element_by_css_selector('.messageText').text
            except selenium.common.exceptions.StaleElementReferenceException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                sleep(1)
                continue
            except selenium.common.exceptions.NoSuchElementException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                if retry_count > 15:
                    print ".messageText not found after 15 retries"
                    break
                sleep(1)
                retry_count += 1
                continue

            retry_count = 0

            if status_message.startswith('Upload Complete'):
                # Update Status
                print status_message
                sys.stdout.flush()
    
                # Get the version number and install url
                version = driver.find_element_by_xpath("//th[text()='Version Number']/following-sibling::td/span").text
                install_url = driver.find_element_by_xpath("//a[contains(@name, ':pkgInstallUrl')]").get_attribute('href')
            
                self.version = version
                self.install_url = install_url
    
                break

            if status_message.startswith('Upload Failed'):
                print status_message
                sys.stdout.flush()
                break 

            # Update Status
            if status_message != last_status:
                print status_message
                sys.stdout.flush()
            last_status = status_message

            sleep(1)

        driver.quit()    

Example 14

Project: CumulusCI
Source File: package_upload.py
View license
    def build_package(self, build_name):
        """ Builds a managed package by calling SauceLabs via Selenium to click the Upload button """ 
        # Update Status
        print 'Starting browser'
        sys.stdout.flush()

        try:
            driver = self.get_selenium()
        except:
            print "Sleeping 5 more seconds to try again.  Last attempt to connect to Selenium failed"
            sleep(5)
            driver = self.get_selenium()

        driver.implicitly_wait(90) # seconds

        # Load the packages list page
        sleep(5) # Not sure why this sleep is necessary, but it seems to be
        driver.get('%s/0A2' % self.instance_url)

        # Update Status
        print 'Loaded package listing page'
        sys.stdout.flush()

        # Click the link to the package
        driver.find_element_by_xpath("//th[contains(@class,'dataCell')]/a[text()='%s']" % self.package).click()

        # Update Status
        print 'Loaded package page'
        sys.stdout.flush()

        # Click the Upload button to open the upload form
        driver.find_element_by_xpath("//input[@class='btn' and @value='Upload']").click()

        # Update Status
        print 'Loaded Upload form'
        sys.stdout.flush()

        # Populate and submit the upload form to create a beta managed package
        name_input = driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsBlockSection:VersionInfoSectionItem:VersionText')
        name_input.clear()
        name_input.send_keys(build_name)
        driver.find_element_by_id('ExportPackagePage:UploadPackageForm:PackageDetailsPageBlock:PackageDetailsPageBlockButtons:bottom:upload').click()

        # Update Status
        print 'Upload Submitted'
        sys.stdout.flush()

        # Monitor the package upload progress
        retry_count = 0
        last_status = None
        while True:
            try:
                status_message = driver.find_element_by_css_selector('.messageText').text
            except selenium.common.exceptions.StaleElementReferenceException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                sleep(1)
                continue
            except selenium.common.exceptions.NoSuchElementException:
                # These come up, possibly if you catch the page in the middle of updating the text via javascript
                if retry_count > 15:
                    print ".messageText not found after 15 retries"
                    break
                sleep(1)
                retry_count += 1
                continue

            retry_count = 0

            if status_message.startswith('Upload Complete'):
                # Update Status
                print status_message
                sys.stdout.flush()
    
                # Get the version number and install url
                version = driver.find_element_by_xpath("//th[text()='Version Number']/following-sibling::td/span").text
                install_url = driver.find_element_by_xpath("//a[contains(@name, ':pkgInstallUrl')]").get_attribute('href')
            
                self.version = version
                self.install_url = install_url
    
                break

            if status_message.startswith('Upload Failed'):
                print status_message
                sys.stdout.flush()
                break 

            # Update Status
            if status_message != last_status:
                print status_message
                sys.stdout.flush()
            last_status = status_message

            sleep(1)

        driver.quit()    

Example 15

Project: acousticbrainz-server
Source File: hl_calc.py
View license
def main(num_threads):
    print("High-level extractor daemon starting with %d threads" % num_threads)
    sys.stdout.flush()
    build_sha1 = get_build_sha1(HIGH_LEVEL_EXTRACTOR_BINARY)
    create_profile(PROFILE_CONF_TEMPLATE, PROFILE_CONF, build_sha1)
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)

    num_processed = 0

    pool = {}
    docs = []
    while True:
        # Check to see if we need more database rows
        if len(docs) == 0:
            # Fetch more rows from the DB
            docs = db.data.get_unprocessed_highlevel_documents()

            # We will fetch some rows that are already in progress. Remove those.
            in_progress = pool.keys()
            filtered = []
            for mbid, doc, id in docs:
                if mbid not in in_progress:
                    filtered.append((mbid, doc, id))
            docs = filtered

        if len(docs):
            # Start one document
            mbid, doc, id = docs.pop()
            th = HighLevel(mbid, doc, id)
            th.start()
            print("start %s" % mbid)
            sys.stdout.flush()
            pool[mbid] = th

        # If we're at max threads, wait for one to complete
        while True:
            if len(pool) == 0 and len(docs) == 0:
                if num_processed > 0:
                    print("processed %s documents, none remain. Sleeping." % num_processed)
                    sys.stdout.flush()
                num_processed = 0
                # Let's be nice and not keep any connections to the DB open while we nap
                # TODO: Close connections when we're sleeping
                sleep(SLEEP_DURATION)

            for mbid in pool.keys():
                if not pool[mbid].is_alive():

                    # Fetch the data and clean up the thread object
                    hl_data = pool[mbid].get_data()
                    ll_id = pool[mbid].get_ll_id()
                    pool[mbid].join()
                    del pool[mbid]

                    try:
                        jdata = json.loads(hl_data)
                    except ValueError:
                        print("error %s: Cannot parse result document" % mbid)
                        print(hl_data)
                        sys.stdout.flush()
                        jdata = {}

                    db.data.write_high_level(mbid, ll_id, jdata, build_sha1)

                    print("done  %s" % mbid)
                    sys.stdout.flush()
                    num_processed += 1

            if len(pool) == num_threads:
                # tranquilo!
                sleep(.1)
            else:
                break

Example 16

Project: vbo-convert
Source File: vbo_to_ply.py
View license
def convert(filename):

	# todo: get zoom from filename
	zoom=15# current zoom level - sets x & y scale relative to z values
	maximum_range = 4096 # tile-space coordinate maximum

	# convert from tile-space coords to meters, depending on zoom
	def tile_to_meters(zoom):
		return 40075016.68557849 / pow(2, zoom)

	conversion_factor = tile_to_meters(zoom) / maximum_range
	lines = []

	# get lines from input file
	with open(filename, 'r') as f:
		lines = [line.strip() for line in f]
	f.close()

	vertex_count = 0
	newlines = []

	# add vertex definitions
	for i, line in enumerate(lines):
		index = 0

		if len(line) == 0: # skip the occasional empty line
			continue

		newlines.append(line+"\n")
		vertex_count += 1
		# print('vertex_count', vertex_count)
		if (i % 1000 == 0): # print progress
			sys.stdout.flush()
			sys.stdout.write("\r"+(str(round(i / len(lines) * 100, 2))+"%"))

	sys.stdout.flush()
	sys.stdout.write("\r100%")
	sys.stdout.flush()

	# add simple face definitions - every three vertices make a face
	face_count = int(vertex_count / 3)
	for i in range(face_count):
		j = i*3
		newline = "3 "+str(j)+" "+str(j+1)+" "+str(j+2)+"\n"
		newlines.append(newline)

	name, extension = os.path.splitext(filename)
	OUTFILE = name + ".ply"
	open(OUTFILE, 'w').close() # clear existing OUTFILE, if any
	newfile = open(OUTFILE, "w")
	for line in newlines:
		newfile.write("%s" % line)
	newfile.close()

	def line_prepend(filename,line):
	    with open(filename,'r+') as f:
	        content = f.read()
	        f.seek(0,0)
	        f.write(line.rstrip('\r\n') + '\n' + content)

	# generate PLY header
	header = '''ply
	format ascii 1.0
	element vertex '''+str(vertex_count)+'''
	property float x
	property float y
	property float z
	element face '''+str(face_count)+'''
	property list uchar int vertex_indices
	end_header'''

	##
	## a header with vertex colors
	##

	# header = '''ply
	# format ascii 1.0
	# element vertex '''+str(vertex_count)+'''
	# property float x
	# property float y
	# property float z
	# property uchar red
	# property uchar green
	# property uchar blue
	# element face '''+str(face_count)+'''
	# property list uchar int vertex_indices
	# end_header
	# '''


	line_prepend(OUTFILE, header)
	print("Wrote "+OUTFILE)

Example 17

Project: nototools
Source File: mecsolve.py
View license
def mecrange(figtype):
    scale = 130
    eps_prologue(50, 110, 570, 630)
    print -50, 0, 'translate'
    print '0.5 setlinewidth'
    thlmin, thlmax = -pi/2, 2.4
    thrmin, thrmax = -2.2, pi / 2 + .2
    print 306 + scale * thlmin, 396, 'moveto', 306 + scale * thlmax, 396, 'lineto stroke'
    print 306, 396 + scale * thrmin, 'moveto', 306, 396 + scale * thrmax, 'lineto stroke'

    print 'gsave [2] 0 setdash'
    print 306, 396 + scale * pi / 2, 'moveto'
    print 306 + scale * thlmax, 396 + scale * pi / 2, 'lineto stroke'
    print 306 + scale * thlmin, 396 - scale * pi / 2, 'moveto'
    print 306 + scale * thlmax, 396 - scale * pi / 2, 'lineto stroke'
    print 306 + scale * pi / 2, 396 + scale * thrmin, 'moveto'
    print 306 + scale * pi / 2, 396 + scale * thrmax, 'lineto stroke'
    print 'grestore'

    print 306 + 3, 396 + scale * thrmax - 10, 'moveto'
    print '/Symbol 12 selectfont (q) show'
    print 0, -2, 'rmoveto'
    print '/Times-Italic 9 selectfont (right) show'

    print 306 - 18, 396 + scale * pi / 2 - 4, 'moveto'
    print '/Symbol 12 selectfont (p/2) show'
    print 306 + scale * 2.2, 396 - scale * pi / 2 + 2, 'moveto'
    print '/Symbol 12 selectfont (-p/2) show'

    print 306 + scale * pi/2 + 2, 396 + scale * thrmax - 10, 'moveto'
    print '/Symbol 12 selectfont (p/2) show'

    print 306 + scale * 2.2, 396 + 6, 'moveto'
    print '/Symbol 12 selectfont (q) show'
    print 0, -2, 'rmoveto'
    print '/Times-Italic 9 selectfont (left) show'

    print '/ss 0.8 def'
    print '/circle { ss 0 moveto currentpoint exch ss sub exch ss 0 360 arc } bind def'
    cmd = 'moveto'
    for i in range(0, 201):
        th = (i * .005 - .75 )* pi
        rmin = 1.5
        rmax = 2.5
        for j in range(20):
            r = (rmin + rmax) * .5
            th0 = r * cos(th)
            th1 = r * sin(th)
            if findmec(th0, th1) == None:
                rmax = r
            else:
                rmin = r
        r = (rmin + rmax) * .5
        th0 = r * cos(th)
        th1 = r * sin(th)
        print '%', r, th, th0, th1
        print 306 + scale * th0, 396 + scale * th1, cmd
        cmd = 'lineto'
        sys.stdout.flush()
    print 'stroke'
    sys.stdout.flush()
        
    for i in range(-11, 12):
        for j in range(-11, i + 1):
            th0, th1 = i * .196, j * .196
            print '%', th0, th1
            params = findmec(th0, th1)
            if params != None:
                sm, sp = params
                print 'gsave'
                print 306 + scale * th0, 396 + scale * th1, 'translate'
                uscale = 22
                k0, lam1, lam2 = justify_mec(sm, sp)
                xys, cost, x, y, th = run_elastica(-.5, .5, k0, lam1, lam2)
                cmdm = 'moveto'
                dx = xys[-1][0] - xys[0][0]
                dy = xys[-1][1] - xys[0][1]
                ch = hypot(dx, dy)
                chth = atan2(dy, dx)
                if figtype == 'mecrange':
                    print 'circle fill'
                    s = uscale * sin(chth) / ch
                    c = uscale * cos(chth) / ch
                    h = -xys[0][0] * s + xys[0][1] * c
                    for xy in xys:
                        print xy[0] * c + xy[1] * s, h + xy[0] * s - xy[1] * c, cmdm
                        cmdm = 'lineto'
                elif figtype == 'mecrangek':
                    ds = 1. / (len(xys) - 1)
                    sscale = 13. / ch
                    kscale = 3 * ch
                    print 'gsave .25 setlinewidth'
                    print sscale * -.5, 0, 'moveto', sscale, 0, 'rlineto stroke'
                    print 'grestore'
                    for l in range(len(xys)):
                        print sscale * (ds * l - 0.5), kscale * xys[l][2], cmdm
                        cmdm = 'lineto'
                print 'stroke'
                print 'grestore'
            sys.stdout.flush()
    print 'showpage'
    eps_trailer()

Example 18

View license
    def GetAll(self):

        def progress(received, blockSize, fileSize):
            if fileSize < 0: return
            wDots = (100 * received * blockSize) / fileSize / 10
            if wDots > self.dots:
                for i in range(wDots - self.dots):
                    print '.',
                    sys.stdout.flush()
                    self.dots += 1

        maxRetries = 1
        for (fname, fdata) in self.source_files.items():
            for retries in range(maxRetries):
                try:
                    self.dots = 0
                    local_file = os.path.join(self.config.src_dir, fdata['filename'])
                    url = fdata['url']
                    print 'Downloading %s:' % fname, url
                    if retries > 0:
                        print '(retry)',
                    sys.stdout.flush()

                    completed = False
                    if os.path.exists(local_file):
                        md5_pass = self.checkHash(fdata)
                        if md5_pass:
                            print '[md5 match]',
                        else:
                            print '[md5 mismatch]',
                        sys.stdout.flush()
                        completed = md5_pass

                    if not completed:
                        urllib.urlretrieve(url, local_file, progress)

                    #
                    # BUGBUG: Suggest proxy to user if download fails.
                    #
                    # export http_proxy=http://proxyservername.mycompany.com:911
                    # export ftp_proxy=http://proxyservername.mycompany.com:911

                    if not completed and os.path.exists(local_file):
                        md5_pass = self.checkHash(fdata)
                        if md5_pass:
                            print '[md5 match]',
                        else:
                            print '[md5 mismatch]',
                        sys.stdout.flush()
                        completed = md5_pass

                    if completed:
                        print '[done]'
                        break
                    else:
                        print '[failed]'
                        print '  Tried to retrieve', url
                        print '  to', local_file
                        print 'Possible fixes:'
                        print '* If you are behind a web-proxy, try setting the',
                        print 'http_proxy environment variable'
                        print '* You can try to download this file separately',
                        print 'and rerun this script'
                        raise Exception()
                
                except KeyboardInterrupt:
                    print '[KeyboardInterrupt]'
                    return False

                except Exception, e:
                    print e

            if not completed: return False

        return True

Example 19

Project: AI_Reader
Source File: build_imagenet_data.py
View license
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
                               synsets, labels, humans, bboxes, num_shards):
  """Processes and saves list of images as TFRecord in 1 thread.

  Args:
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    thread_index: integer, unique batch to run index is within [0, len(ranges)).
    ranges: list of pairs of integers specifying ranges of each batches to
      analyze in parallel.
    name: string, unique identifier specifying the data set
    filenames: list of strings; each string is a path to an image file
    synsets: list of strings; each string is a unique WordNet ID
    labels: list of integer; each integer identifies the ground truth
    humans: list of strings; each string is a human-readable label
    bboxes: list of bounding boxes for each image. Note that each entry in this
      list might contain from 0+ entries corresponding to the number of bounding
      box annotations for the image.
    num_shards: integer number of shards for this data set.
  """
  # Each thread produces N shards where N = int(num_shards / num_threads).
  # For instance, if num_shards = 128, and the num_threads = 2, then the first
  # thread would produce shards [0, 64).
  num_threads = len(ranges)
  assert not num_shards % num_threads
  num_shards_per_batch = int(num_shards / num_threads)

  shard_ranges = np.linspace(ranges[thread_index][0],
                             ranges[thread_index][1],
                             num_shards_per_batch + 1).astype(int)
  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]

  counter = 0
  for s in xrange(num_shards_per_batch):
    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
    shard = thread_index * num_shards_per_batch + s
    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
    output_file = os.path.join(FLAGS.output_directory, output_filename)
    writer = tf.python_io.TFRecordWriter(output_file)

    shard_counter = 0
    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
    for i in files_in_shard:
      filename = filenames[i]
      label = labels[i]
      synset = synsets[i]
      human = humans[i]
      bbox = bboxes[i]

      image_buffer, height, width = _process_image(filename, coder)

      example = _convert_to_example(filename, image_buffer, label,
                                    synset, human, bbox,
                                    height, width)
      writer.write(example.SerializeToString())
      shard_counter += 1
      counter += 1

      if not counter % 1000:
        print('%s [thread %d]: Processed %d of %d images in thread batch.' %
              (datetime.now(), thread_index, counter, num_files_in_thread))
        sys.stdout.flush()

    print('%s [thread %d]: Wrote %d images to %s' %
          (datetime.now(), thread_index, shard_counter, output_file))
    sys.stdout.flush()
    shard_counter = 0
  print('%s [thread %d]: Wrote %d images to %d shards.' %
        (datetime.now(), thread_index, counter, num_files_in_thread))
  sys.stdout.flush()

Example 20

Project: scikit-learn
Source File: bench_plot_omp_lars.py
View license
def compute_bench(samples_range, features_range):

    it = 0

    results = dict()
    lars = np.empty((len(features_range), len(samples_range)))
    lars_gram = lars.copy()
    omp = lars.copy()
    omp_gram = lars.copy()

    max_it = len(samples_range) * len(features_range)
    for i_s, n_samples in enumerate(samples_range):
        for i_f, n_features in enumerate(features_range):
            it += 1
            n_informative = n_features / 10
            print('====================')
            print('Iteration %03d of %03d' % (it, max_it))
            print('====================')
            # dataset_kwargs = {
            #     'n_train_samples': n_samples,
            #     'n_test_samples': 2,
            #     'n_features': n_features,
            #     'n_informative': n_informative,
            #     'effective_rank': min(n_samples, n_features) / 10,
            #     #'effective_rank': None,
            #     'bias': 0.0,
            # }
            dataset_kwargs = {
                'n_samples': 1,
                'n_components': n_features,
                'n_features': n_samples,
                'n_nonzero_coefs': n_informative,
                'random_state': 0
            }
            print("n_samples: %d" % n_samples)
            print("n_features: %d" % n_features)
            y, X, _ = make_sparse_coded_signal(**dataset_kwargs)
            X = np.asfortranarray(X)

            gc.collect()
            print("benchmarking lars_path (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            G = np.dot(X.T, X)  # precomputed Gram matrix
            Xy = np.dot(X.T, y)
            lars_path(X, y, Xy=Xy, Gram=G, max_iter=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            lars_gram[i_f, i_s] = delta

            gc.collect()
            print("benchmarking lars_path (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            lars_path(X, y, Gram=None, max_iter=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            lars[i_f, i_s] = delta

            gc.collect()
            print("benchmarking orthogonal_mp (with Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            orthogonal_mp(X, y, precompute=True,
                          n_nonzero_coefs=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            omp_gram[i_f, i_s] = delta

            gc.collect()
            print("benchmarking orthogonal_mp (without Gram):", end='')
            sys.stdout.flush()
            tstart = time()
            orthogonal_mp(X, y, precompute=False,
                          n_nonzero_coefs=n_informative)
            delta = time() - tstart
            print("%0.3fs" % delta)
            omp[i_f, i_s] = delta

    results['time(LARS) / time(OMP)\n (w/ Gram)'] = (lars_gram / omp_gram)
    results['time(LARS) / time(OMP)\n (w/o Gram)'] = (lars / omp)
    return results

Example 21

Project: DIRAC
Source File: dirac-fix-ld-library-path.py
View license
def fixLDPath( root, ldpath, directory ):
  """
      This is a utility to fix the LD_LIBRARY_PATH on Grid WNs. The
      shared libraries from the original LD_LIBRARY_PATH are linked to
      the locally specified directory.  For Windows (and in general)
      this needs some refurbishment.
  """

  if os.path.exists( directory ):
    shutil.rmtree( directory )

  start = os.getcwd()
  os.mkdir( directory )
  os.chdir( directory )
  uniqueLD = uniquePath( ldpath )

  if DEBUG:
    print 'Unique LD LIBRARY PATH is:'
    print uniqueLD
    sys.stdout.flush()

  ldlist = uniqueLD.split( ':' )
  if DEBUG:
    print ''
    print 'LD List is:'
    print ldlist
    print ''
    sys.stdout.flush()

  for path in ldlist:
    if os.path.exists( path ):

      if DEBUG:
        print 'Searching for shared libraries in:'
        print path
        print '-----------------------------------------------'
        res = shellCall( 0, 'ls ' + path + '/*.so*' )
        if res['OK']:
          print res['Value']
        else:
          print res
        print '-----------------------------------------------'

      output = shellCall( 0, 'ls ' + path + '/*.so*' )
      #must be tidied for Windows (same below)

      if DEBUG:
        if not output['OK']:
          print '**************************'
          print 'Warning, problem with ls:'
          print output
          print '**************************'

      if not output['Value'][0]:
        ldlibs = output['Value'][1].split( '\n' )
        for lib in ldlibs:
          if os.path.exists( lib ):
            filename = os.path.basename( lib )
            output = shellCall( 0, 'ln -s ' + str( lib ) + ' ' + str( filename ) )
            #N.B. for Windows this should be a copy...
            if DEBUG:
              if not output['OK']:
                print '********************************'
                print 'Warning, problem creating link:'
                print 'File: ', filename
                print 'Path: ', lib
                print output
                print '********************************'

      if DEBUG:
        print 'Searching for rootmap file in:'
        print path
        print '-----------------------------------------------'
        res = shellCall( 0, 'ls ' + path + '/*rootmap*' )
        if res['OK']:
          print res['Value']
        else:
          print res
        print '-----------------------------------------------'

      output = shellCall( 0, 'ls ' + path + '/*rootmap*' )

      if DEBUG:
        if not output['OK']:
          print '**************************'
          print 'Warning, problem with rootmap:'
          print output
          print '**************************'

      if not output['Value'][0]:
        ldlibs = output['Value'][1].split( '\n' )
        for lib in ldlibs:
          if os.path.exists( lib ):
            if re.search( 'RELAX', lib ) is not None:
              filename = os.path.basename( lib )
              output = shellCall( 0, 'ln -s ' + str( lib ) + ' ' + str( filename ) )
              if DEBUG:
                if not output['OK']:
                  print '********************************'
                  print 'Warning, problem creating link:'
                  print 'File: ', filename
                  print 'Path: ', lib
                  print output
                  print '********************************'

  os.chdir( start )
  sys.stdout.flush()

Example 22

Project: cardoon
Source File: tran.py
View license
    def run(self, circuit):
        """
        Calculates transient analysis by solving nodal equations
        """
        # for now just print some fixed stuff
        print('******************************************************')
        print('                 Transient analysis')
        print('******************************************************')
        if hasattr(circuit, 'title'):
            print('\n', circuit.title, '\n')

        if glVar.sparse:
            nd = nodalSP
        else:
            nd = nodal
            print('Using dense matrices\n')

        # Only works with flattened circuits
        if not circuit._flattened:
            circuit.flatten()
            circuit.init()

        # Select integration method
        if self.im == 'BE':
            imo = BEuler()
        elif self.im == 'trap':
            imo = Trapezoidal()
        else:
            raise analysis.AnalysisError(
                'Unknown integration method: {0}'.format(self.im))

        # Create nodal objects and solve for initial state
        nd.make_nodal_circuit(circuit)
        dc = nd.DCNodal(circuit)
        tran = nd.TransientNodal(circuit, imo)
        x = dc.get_guess()
        # Use sources including transient values for t == 0
        sV = tran.get_source(0.)
        # solve DC equations
        try: 
            print('Calculating DC operating point ... ', end='')
            sys.stdout.flush()
            (x, res, iterations) = solve(x, sV, dc.convergence_helpers)
            print('Succeded.\n')
        except NoConvergenceError as ce:
            print('Failed.\n')
            print(ce)
            return
        dc.save_OP(x)
        tran.set_IC(self.tstep)
        # Release memory in dc object?
        del(dc)

        # Create time vector
        timeVec = np.arange(start=0., stop = self.tstop, step = self.tstep, 
                            dtype=float)
        nsamples = len(timeVec)
        circuit.tran_timevec = timeVec

        # Get terminals to plot/save from circuit. 
        termSet = circuit.get_requested_terms('tran')

        # Special treatment for ground terminal
        termSet1 = set(termSet)
        if circuit.nD_ref in termSet1:
            termSet1.remove(circuit.nD_ref)
            circuit.nD_ref.tran_v = np.zeros(nsamples)

        # Allocate vectors for results
        if self.saveall:
            for term in circuit.nD_termList:
                term.tran_v = np.empty(nsamples)
                term.tran_v[0] = x[term.nD_namRC]                
            circuit.nD_ref.tran_v = np.zeros(nsamples)
        else:
            # Only save requested nodes
            for term in termSet1:
                term.tran_v = np.empty(nsamples)
                term.tran_v[0] = x[term.nD_namRC]

        # Save initial values
        xOld = x
        tIter = 0
        tRes = 0.
        dots = 50
        print('System dimension: {0}'.format(circuit.nD_dimension))
        print('Number of samples: {0}'.format(nsamples))
        print('Integration method: {0}'.format(self.im))
        if self.verbose:
            print('-------------------------------------------------')
            print(' Step    | Time (s)     | Iter.    | Residual    ')
            print('-------------------------------------------------')
        else:
            print('Printing one dot every {0} samples:'.format(dots))
            sys.stdout.flush()

        for i in xrange(1, nsamples):
            tran.accept(xOld)
            sV = tran.get_rhs(timeVec[i])
            # solve equations: use previous time-step solution as an
            # initial guess
            if i > 1:
                # Re-use factorized Jacobian: This saves the time to
                # evaluate the function and Jacobian plus the time for
                # factorization. Only sparse implementation stores
                # factorized Jacobian
                xOld += tran.get_chord_deltax(sV)
            try: 
                (x, res, iterations) = solve(xOld, sV, 
                                             tran.convergence_helpers)
            except NoConvergenceError as ce:
                print(ce)
                return

            # Save results
            xOld[:] = x
            if self.saveall:
                for term in circuit.nD_termList:
                    term.tran_v[i] = x[term.nD_namRC]                
            else:
                # Only save requested nodes
                for term in termSet1:
                    term.tran_v[i] = x[term.nD_namRC]
            # Keep some info about iterations
            tIter += iterations
            tRes += res
            if self.verbose:
                print('{0:8} | {1:12} | {2:8} | {3:12}'.format(
                        i, timeVec[i], iterations, res))
            elif not i%dots:
                print('.', end='')
                sys.stdout.flush()

        # Calculate average residual and iterations
        avei = float(tIter) / nsamples
        aver = tRes / nsamples
        print('\nAverage iterations: {0}'.format(avei))
        print('Average residual: {0}\n'.format(aver))

        # Process output requests.  
        analysis.process_requests(circuit, 'tran', 
                                  timeVec, 'Time [s]', 'tran_v')

        def getvec(termname):
            return circuit.find_term(termname).tran_v

        if self.shell:
            analysis.ipython_drop("""
Available commands:
    timeVec: time vector
    getvec(<terminal>) to retrieve results (if result saved)
""", globals(), locals())

Example 23

Project: couchdb-python
Source File: replicate.py
View license
def main():

    usage = '%prog [options] <source> <target>'
    parser = optparse.OptionParser(usage=usage)
    parser.add_option('--continuous',
        action='store_true',
        dest='continuous',
        help='trigger continuous replication in cochdb')
    parser.add_option('--compact',
        action='store_true',
        dest='compact',
        help='compact target database after replication')

    options, args = parser.parse_args()
    if len(args) != 2:
        raise parser.error('need source and target arguments')

    # set up server objects

    src, tgt = args
    sbase, spath = findpath(parser, src)
    source = client.Server(sbase)
    tbase, tpath = findpath(parser, tgt)
    target = client.Server(tbase)

    # check database name specs

    if '*' in tpath:
        raise parser.error('invalid target path: must be single db or empty')

    all = sorted(i for i in source if i[0] != '_') # Skip reserved names.
    if not spath:
        raise parser.error('source database must be specified')

    sources = [i for i in all if fnmatch.fnmatchcase(i, spath)]
    if not sources:
        raise parser.error("no source databases match glob '%s'" % spath)

    if len(sources) > 1 and tpath:
        raise parser.error('target path must be empty with multiple sources')
    elif len(sources) == 1:
        databases = [(sources[0], tpath)]
    else:
        databases = [(i, i) for i in sources]

    # do the actual replication

    for sdb, tdb in databases:

        start = time.time()
        print(sdb, '->', tdb)
        sys.stdout.flush()

        if tdb not in target:
            target.create(tdb)
            sys.stdout.write("created")
            sys.stdout.flush()

        sdb = '%s%s' % (sbase, util.urlquote(sdb, ''))
        if options.continuous:
            target.replicate(sdb, tdb, continuous=options.continuous)
        else:
            target.replicate(sdb, tdb)
        print('%.1fs' % (time.time() - start))
        sys.stdout.flush()

    if options.compact:
        for (sdb, tdb) in databases:
            print('compact', tdb)
            target[tdb].compact()

Example 24

Project: vispy
Source File: _runners.py
View license
def _examples(fnames_str):
    """Run examples and make sure they work.

    Parameters
    ----------
    fnames_str : str
        Can be a space-separated list of paths to test, or an empty string to
        auto-detect and run all examples.
    """
    import_dir, dev = _get_import_dir()
    reason = None
    if not dev:
        reason = 'Cannot test examples unless in vispy git directory'
    else:
        with use_log_level('warning', print_msg=False):
            good, backend = has_application(capable=('multi_window',))
        if not good:
            reason = 'Must have suitable app backend'
    if reason is not None:
        msg = 'Skipping example test: %s' % reason
        print(msg)
        raise SkipTest(msg)

    # if we're given individual file paths as a string in fnames_str,
    # then just use them as the fnames
    # otherwise, use the full example paths that have been
    # passed to us
    if fnames_str:
        fnames = fnames_str.split(' ')

    else:
        fnames = [op.join(d[0], fname)
                  for d in os.walk(op.join(import_dir, '..', 'examples'))
                  for fname in d[2] if fname.endswith('.py')]

    fnames = sorted(fnames, key=lambda x: x.lower())
    print(_line_sep + '\nRunning %s examples using %s backend'
          % (len(fnames), backend))
    op.join('tutorial', 'app', 'shared_context.py'),  # non-standard

    fails = []
    n_ran = n_skipped = 0
    t0 = time()
    for fname in fnames:
        n_ran += 1
        root_name = op.split(fname)
        root_name = op.join(op.split(op.split(root_name[0])[0])[1],
                            op.split(root_name[0])[1], root_name[1])
        good = True
        with open(fname, 'r') as fid:
            for _ in range(10):  # just check the first 10 lines
                line = fid.readline()
                if line == '':
                    break
                elif line.startswith('# vispy: ') and 'testskip' in line:
                    good = False
                    break
        if not good:
            n_ran -= 1
            n_skipped += 1
            continue
        sys.stdout.flush()
        cwd = op.dirname(fname)
        cmd = [sys.executable, '-c', _script.format(op.split(fname)[1][:-3])]
        sys.stdout.flush()
        stdout, stderr, retcode = run_subprocess(cmd, return_code=True,
                                                 cwd=cwd, env=os.environ)
        if retcode or len(stderr.strip()) > 0:
            # Skipping due to missing dependency is okay
            if "ImportError: " in stderr:
                print('S', end='')
            else:
                ext = '\n' + _line_sep + '\n'
                fails.append('%sExample %s failed (%s):%s%s%s'
                             % (ext, root_name, retcode, ext, stderr, ext))
                print(fails[-1])
        else:
            print('.', end='')
        sys.stdout.flush()
    print('')
    t = (': %s failed, %s succeeded, %s skipped in %s seconds'
         % (len(fails), n_ran - len(fails), n_skipped, round(time()-t0)))
    if len(fails) > 0:
        raise RuntimeError('Failed%s' % t)
    print('Success%s' % t)

Example 25

Project: beacon-ml
Source File: deep-learning.py
View license
def main():
  if not os.path.exists(RESULTS_DIR):
    os.makedirs(RESULTS_DIR)

  # then run to to generate the vectorized data from the raw dump (already done)
  if REGEN_DATA or \
      not os.path.exists(VECTOR_DATA_PATH) or \
      not os.path.exists(VECTOR_LABELS_PATH) or \
      not os.path.exists(VALUE_RANGES_PATH):
    print 'Re-generating data'
    csv_fname = os.path.join(DATA_DIR, CSV_FNAME)
    sys.stdout.flush()
    data, labels, ranges = subsample_and_vectorize_data(csv_fname, LABEL, PRETTY_PRINT_LABEL)
    with open(VECTOR_DATA_PATH, 'wb') as file:
      np.save(file, data)
    data = None
    with open(VECTOR_LABELS_PATH, 'wb') as file:
      np.save(file, labels)
    labels = None
    with open(VALUE_RANGES_PATH, 'wb') as file:
      json.dump(ranges, file, indent=4)
    ranges = None

  features = load_feature_names()
  (x_train_full, y_train), (x_val_full, y_val) = prepare_data()
  train_rows, train_cols = x_train_full.shape
  val_rows, val_cols = x_val_full.shape

  # Figure out how many columns we need for the known starting features
  fname = os.path.join(RESULTS_DIR, PRETTY_PRINT_LABEL + '_accuracy_test')
  base_columns = 0;
  for name in starting_features:
    if name in features:
      fname += "." + name
      base_columns += features[name]['end'] - features[name]['start'] + 1
  fname += ".csv"

  # Try training each feature against the data set individually
  feature_count = len(features)
  feature_num = 0
  with open(fname, 'wb', 1) as out:
    for name, feature in features.iteritems():
      if not len(test_features) or name in test_features:
        feature_num += 1

        #Build an input data set with just the columns we care about
        count = feature['end'] - feature['start'] + 1
        x_train = np.zeros((train_rows, base_columns + count))
        x_val = np.zeros((val_rows, base_columns + count))
        col = 0
        # Populate the starting features
        for n in starting_features:
          if n == name:
            continue
          if n in features:
            for column in xrange(features[n]['start'], features[n]['end'] + 1):
              x_train[:, col] = x_train_full[:, column]
              x_val[:, col] = x_val_full[:, column]
              col += 1
        # Populate the features we are testing
        for column in xrange(feature['start'], feature['end'] + 1):
          x_train[:,col] = x_train_full[:,column]
          x_val[:, col] = x_val_full[:, column]
          col += 1

        # normalize the data
        scaler = StandardScaler()
        x_train = scaler.fit_transform(x_train)
        x_val = scaler.transform(x_val)

        # Run the actual training
        print '[{0:d}/{1:d}] Training deep model on {2} ({3:d} columns)'.format(feature_num, feature_count, name, col)
        sys.stdout.flush()
        acc, model = train_deep_model(x_train, y_train, x_val, y_val)
        print '{0} Accuracy: {1:0.4f}'.format(name, acc)
        sys.stdout.flush()
        out.write('{0},{1:0.4f}\n'.format(name,acc))

        # Test the varous values for the feature
        if len(test_features):
          max_val = 100000
          min_val = 100
          step_size = 100
          count = (max_val - min_val) / step_size
          original_values = np.array([[0.0]] * count)
          row = 0
          for value in xrange(100, 100000, 100):
            original_values[row] = value
            row += 1
          data = scaler.transform(original_values)
          prob = model.predict_proba(data, verbose=0)
          with open(os.path.join(RESULTS_DIR, PRETTY_PRINT_LABEL + '_values_' + name), 'wb') as v:
            for row in xrange(0, count):
              value = original_values[row][0]
              probability = prob[row][0]
              v.write('{0:d},{1:f}\n'.format(int(value), probability))

Example 26

Project: nrvr-commander
Source File: cygwin.py
View license
    @classmethod
    def forArch(cls, arch, packageDirs,
                force=False, mirror="http://mirrors.kernel.org/sourceware/cygwin/",
                noWait=False,
                dontDownload=False,
                ticker=True):
        """Download files or use previously downloaded files.
        
        As implemented uses wget.
        That has been a choice of convenience, could be written in Python instead.
        
        arch
            32 or 64.
        
        packageDirs
            a list of directories needed.
            
            You don't want to download all of Cygwin, only what is needed.
        
        force
            whether to force downloading even if apparently downloaded already.
            
            May be useful for programmatically updating at times.
        
        mirror
            URL of mirror to download from.
        
        noWait
            whether to forgo short waits between files.
            
            Be warned that frequent high use of bandwidth may be penalized by a server
            by refusal to serve anything at all to a specific client address or range of
            addresses.
        
        dontDownload
            whether you don't want to start a download, for some reason.
        
        Return directory path."""
        arch = Arch(arch)
        installerName = cls.installerName(arch)
        if arch == Arch(32):
            archPath = "x86"
        elif arch == Arch(64):
            archPath = "x86_64"
        else:
            raise Exception("unknown architecture arch=%s" % (arch))
        downloadDir = ScriptUser.loggedIn.userHomeRelative("Downloads")
        archDir = cls.basename(arch)
        downloadDir = os.path.join(downloadDir, archDir)
        semaphorePath = downloadDir + cls.semaphoreExtenstion
        #
        if os.path.exists(downloadDir) and not force:
            if not os.path.exists(semaphorePath):
                # directory exists and not download in progress,
                # assume it is good
                return downloadDir
            else:
                # directory exists and download in progress,
                # presumably from another script running in another process or thread,
                # wait for it to complete
                printed = False
                ticked = False
                # check the essential condition, initially and then repeatedly
                while os.path.exists(semaphorePath):
                    if not printed:
                        # first time only printing
                        print "waiting for " + semaphorePath + " to go away on completion"
                        sys.stdout.flush()
                        printed = True
                    if ticker:
                        if not ticked:
                            # first time only printing
                            sys.stdout.write("[")
                        sys.stdout.write(".")
                        sys.stdout.flush()
                        ticked = True
                    time.sleep(5)
                if ticked:
                    # final printing
                    sys.stdout.write("]\n")
                    sys.stdout.flush()
        elif not dontDownload: # it is normal to download
            if not os.path.exists(downloadDir):
                try:
                    os.makedirs(downloadDir)
                except OSError:
                    if os.path.exists(downloadDir): # concurrently made
                        pass
                    else: # failure
                        raise
            #
            # try downloading
            pid = os.getpid()
            try:
                with open(semaphorePath, "w") as semaphoreFile:
                    # create semaphore file
                    semaphoreFile.write("pid=" + str(pid))
                #
                print "starting to download " + archDir
                if ticker:
                    sys.stdout.write("[.")
                    sys.stdout.flush()
                try:
                    installerUrl = "http://cygwin.com/" + installerName
                    wget = CommandCapture(
                        ["wget",
                         "--quiet",
                         "--timestamping",
                         "-P", downloadDir,
                         installerUrl],
                        forgoPty=True)
                    #
                    if ticker:
                        sys.stdout.write(".")
                        sys.stdout.flush()
                    downloadPathRoot = posixpath.join(mirror, archPath) + "/"
                    downloadPathRootDepth = cls._directoryDepth(downloadPathRoot)
                    rejectList = "index.html*,*-src.tar.bz2,*-devel,*-devel-*,*-debuginfo,*-debuginfo-*"
                    wgetArgs = [
                        "wget",
                        "--quiet",
                        "--timestamping",
                        "--recursive",
                        "--no-host-directories",
                        "--cut-dirs", str(downloadPathRootDepth),
                        "--ignore-case",
                        "--reject", rejectList,
                        "-P", downloadDir,
                        "--no-parent",
                        "--level=1",
                        "-e", "robots=off",
                    ]
                    if not noWait:
                        wgetArgs.extend(["--wait=1", "--random-wait"])
                    wgetArgs.extend([downloadPathRoot])
                    wget = CommandCapture(wgetArgs, forgoPty=True)
                    #
                    downloadPackagesPath = posixpath.join(downloadPathRoot, "release") + "/"
                    #wildcardRegex = re.compile(r"^(.*)/([^/]*\*)$")
                    for packageDir in packageDirs:
                        if ticker:
                            sys.stdout.write(".")
                            sys.stdout.flush()
                        if not isinstance(packageDir, (tuple, list)): # e.g. "bash"
                            level = 1
                        else: # e.g. ("openssl", 2)
                            level = packageDir[1]
                            packageDir = packageDir[0]
                        downloadPath = posixpath.join(downloadPackagesPath, packageDir) + "/"
                        wgetArgs = [
                            "wget",
                            "--quiet",
                            "--timestamping",
                            "--recursive",
                            "--no-host-directories",
                            "--cut-dirs", str(downloadPathRootDepth),
                            "--ignore-case",
                            "--reject", rejectList,
                            "-P", downloadDir,
                            "--no-parent",
                            "--level", str(level),
                            "-e", "robots=off",
                        ]
                        if not noWait:
                            wgetArgs.extend(["--wait=1", "--random-wait"])
                        wgetArgs.extend([downloadPath])
                        wget = CommandCapture(wgetArgs, forgoPty=True)
                    if ticker:
                        sys.stdout.write("]")
                        sys.stdout.flush()
                finally:
                    if ticker:
                        sys.stdout.write("\n")
                        sys.stdout.flush()
            except: # apparently a problem
                print "problem downloading " + archDir
                raise
            else:
                print "done downloading " + archDir
            finally:
                try:
                    # delete semaphore file
                    os.remove(semaphorePath)
                except:
                    pass
        if os.path.exists(downloadDir):
            # directory exists now, assume it is good
            return downloadDir
        else:
            # apparently download has failed
            raise IOError("directory not found " + downloadDir)

Example 27

Project: NIPAP
Source File: bulk-string-replace.py
View license
def replace(pattern, replacement):

    # Fetch prefixes matching the string to replace
    print "Fetching prefixes from NIPAP... ",
    sys.stdout.flush()
    n = 1
    prefix_list = []
    t0 = time.time()
    query = {
        'operator': 'or',
        'val1': {
            'operator': 'regex_match',
            'val1': 'description',
            'val2': pattern
        },
        'val2': {
            'operator': 'regex_match',
            'val1': 'node',
            'val2': pattern
        }
    }
    full_result = Prefix.search(query, { 'parents_depth': -1, 'max_result': BATCH_SIZE })
    prefix_result = full_result['result']
    prefix_list += prefix_result
    print len(prefix_list), 
    sys.stdout.flush()
    while len(prefix_result) == 100:
        full_result = Prefix.smart_search(pattern, { 'parents_depth': -1, 'max_result': BATCH_SIZE, 'offset': n * BATCH_SIZE })
        prefix_result = full_result['result']
        prefix_list += prefix_result
        print len(prefix_list), 
        sys.stdout.flush()
        n += 1

    t1 = time.time()
    print " done in %.1f seconds" % (t1 - t0)

    # Display list
    print_pattern = "%-2s%-14s%-2s%-30s%-20s%s"
    print "\n\nPrefixes to change:"
    print print_pattern % ("", "VRF", "", "Prefix", "Node", "Description")
    i_match = 0
    for i, prefix in enumerate(prefix_list):
        if prefix.match:
            print COLOR_RESET,
            print " -- %d --" % i
            color = COLOR_RED
        else:
            color = COLOR_RESET
            
        print (color + print_pattern) % (
            "-" if prefix.match else "",
            prefix.vrf.rt,
            prefix.type[0].upper(),
            (("  " * prefix.indent) + prefix.display_prefix)[:min([ len(prefix.display_prefix) + 2*prefix.indent, 30 ])],
            (prefix.node or '')[:min([ len(prefix.node or ''), 20 ])],
            (prefix.description or '')[:min([ len(prefix.description or ''), 900 ])]
        )
        if prefix.match:
            new_prefix_node = re.sub(pattern, replacement, (prefix.node or ''), flags=re.IGNORECASE)
            new_prefix_desc = re.sub(pattern, replacement, (prefix.description or ''), flags=re.IGNORECASE)
            print (COLOR_GREEN + print_pattern) % (
                "+",
                prefix.vrf.rt,
                prefix.type[0].upper(),
                ("  " * prefix.indent + prefix.display_prefix)[:min([ len(prefix.display_prefix) + 2*prefix.indent, 30 ])],
                new_prefix_node[:min([ len(new_prefix_node), 20 ])],
                new_prefix_desc[:min([ len(new_prefix_desc), 90 ])]
            )


    # reset colors
    print COLOR_RESET,

    # Perform action?
    print "Select replacements to perform"
    print "Enter comma-separated selection (eg. 5,7,10) or \"all\" for all prefixes."
    print "Prefix list with ! to invert selection (eg !5,7,10 to perform operation on all except the entered prefixes)"
    inp = raw_input("Selection: ").strip()

    if len(inp) == 0:
        print "Empty selection, quitting."
        sys.exit(0)

    invert = False
    if inp[0] == "!":
        inp = inp[1:]
        invert = True

    rename_all = False
    if inp == 'all':
        rename_all = True
        selection = []
    else:
        selection = inp.split(",")
        try:
            selection = map(lambda x: int(x.strip()), selection)
        except ValueError as e:
            print >> sys.stderr, "Could not parse selection: %s" % str(e)
            sys.exit(1)

    for i, prefix in enumerate(prefix_list):

        if prefix.match and ((invert and i not in selection) or (not invert and i in selection) or rename_all):
            if prefix.node is not None:
                prefix.node = re.sub(pattern, replacement, prefix.node, flags=re.IGNORECASE)
            if prefix.description is not None:
                prefix.description = re.sub(pattern, replacement, prefix.description, flags=re.IGNORECASE)

            print "Saving prefix %s..." % prefix.display_prefix
            prefix.save()

Example 28

Project: acousticbrainz-server
Source File: job_calc.py
View license
def main(num_threads, profile, dataset_job_id):
    print("High-level extractor daemon starting with %d threads" % num_threads)
    sys.stdout.flush()
    build_sha1 = get_build_sha1(HIGH_LEVEL_EXTRACTOR_BINARY)
    create_profile(profile, PROFILE_CONF, build_sha1)
    db.init_db_engine(config.SQLALCHEMY_DATABASE_URI)

    model_id = get_model_from_eval(dataset_job_id)
    includes = load_includes_from_eval(dataset_job_id)

    num_processed = 0

    pool = {}
    docs = []
    while True:
        # Check to see if we need more database rows
        if len(docs) == 0:
            # Fetch more rows from the DB
            docs = db.data.get_unprocessed_highlevel_documents_for_model(model_id, includes)

            # We will fetch some rows that are already in progress. Remove those.
            in_progress = pool.keys()
            filtered = []
            for mbid, doc, id in docs:
                if id not in in_progress:
                    filtered.append((mbid, doc, id))
            docs = filtered

        if len(docs):
            # Start one document
            mbid, doc, id = docs.pop()
            th = HighLevel(mbid, doc, id)
            th.start()
            print("start %s" % id)
            sys.stdout.flush()
            pool[id] = th

        # If we're at max threads, wait for one to complete
        while True:
            if len(pool) == 0 and len(docs) == 0:
                if num_processed > 0:
                    print("processed %s documents, none remain. Sleeping." % num_processed)
                    sys.stdout.flush()
                num_processed = 0
                # Let's be nice and not keep any connections to the DB open while we nap
                # TODO: Close connections when we're sleeping
                sleep(SLEEP_DURATION)

            for id in pool.keys():
                if not pool[id].is_alive():

                    # Fetch the data and clean up the thread object
                    hl_data = pool[id].get_data()
                    ll_id = pool[id].get_ll_id()
                    pool[id].join()
                    del pool[id]

                    try:
                        jdata = json.loads(hl_data)
                    except ValueError:
                        print("error %s: Cannot parse result document" % mbid)
                        print(hl_data)
                        sys.stdout.flush()
                        jdata = {}

                    db.data.write_high_level(mbid, ll_id, jdata, build_sha1)

                    print("done  %s" % id)
                    sys.stdout.flush()
                    num_processed += 1

            if len(pool) == num_threads:
                # tranquilo!
                sleep(.1)
            else:
                break

Example 29

Project: pyNastran
Source File: map_loads.py
View license
    def build_mapping_matrix(self, debug=False):
        """
        Skips building the matrix if it already exists
        A mapping matrix translates element ID to loads on the nearby
        strucutral nodes.

        eid,distribution
        """
        if self.mapping_matrix != {}:
            return self.mapping_matrix

        log.info("---starting build_mapping_matrix---")
        #print("self.mapping_matrix = ",self.mapping_matrix)
        if os.path.exists('mappingMatrix.new.out'):
            self.mapping_matrix = self.parseMapFile('mappingMatrix.new.out')
            log.info("---finished build_mapping_matrix based on mappingMatrix.new.out---")
            sys.stdout.flush()
            return self.mapping_matrix
        log.info("...couldn't find 'mappingMatrix.new.out' in %r, so going to make it..." % os.getcwd())

        # this is the else...
        log.info("creating...")
        aero_model = self.aero_model
        structural_model = self.structural_model

        #aNodes = aero_model.getNodes()
        #sNodes = structural_model.getNodes()
        #treeObj = Tree(nClose=5)
        #tree    = treeObj.buildTree(aNodes,sNodes) # fromNodes,toNodes

        aElementIDs = aero_model.ElementIDs() # list
        sElementIDs = structural_model.getElementIDsWithPIDs() # list
        sElementIDs2 = structural_model.ElementIDs() # list

        msg = "there are no internal elements in the structural model?\n   ...len(sElementIDs)=%s len(sElementIDs2)=%s" % (
            len(sElementIDs), len(sElementIDs2))
        assert sElementIDs != sElementIDs2, msg
        log.info("maxAeroID=%s maxStructuralID=%s sElements=%s" % (max(aElementIDs), max(sElementIDs), len(sElementIDs2)))

        log.info("build_centroids - structural")
        sCentroids = self.build_centroids(structural_model, sElementIDs)
        self.build_centroid_tree(sCentroids)
        #self.buildNodalTree(sNodes)

        log.info("build_centroids - aero")
        aero_centroids = self.build_centroids(aero_model)

        with open('mappingMatrix.out', 'wb') as map_file:
            map_file.write('# aEID distribution (sEID:  weight)\n')

            t0 = time()
            nAeroElements = float(len(aElementIDs))
            log.info("---start piercing---")
            if debug:
                log.info("nAeroElements = %s" % nAeroElements)
            tEst = 1.
            tLeft = 1.
            percent_done = 0.

            if 1:
                num_cpus = 4
                pool = mp.Pool(num_cpus)
                result = pool.imap(self.map_loads_mp_func,
                                   [(aEID, aero_model) for aEID in aElementIDs])

                for j, return_values in enumerate(result):
                    aEID, distribution = return_values
                    #self.mappingMatrix[aEID] = distribution
                    map_file.write('%s %s\n' % (aEID, distribution))
                pool.close()
                pool.join()
            else:
                for (i, aero_eid) in enumerate(aElementIDs):
                    if i % 1000 == 0 and debug:
                        log.debug('  piercing %sth element' % i)
                        log.debug("tEst=%g minutes; tLeft=%g minutes; %.3f%% done" % (
                            tEst, tLeft, percent_done))
                        sys.stdout.flush()

                    aElement = aero_model.Element(aero_eid)
                    (aArea, aCentroid, aNormal) = aero_model.get_element_properties(aero_eid)
                    percentDone = i / nAeroElements * 100
                    if debug:
                        log.info('aEID=%s percentDone=%.2f aElement=%s aArea=%s aCentroid=%s aNormal=%s' %(
                            aero_eid, percentDone, aElement, aArea, aCentroid, aNormal))
                    pSource = aCentroid
                    (distribution) = self.pierce_elements(aCentroid, aero_eid, pSource, aNormal)
                    #(distribution)  = self.poorMansMapping(aCentroid, aero_eid, pSource, aNormal)
                    self.mapping_matrix[aero_eid] = distribution
                    map_file.write('%s %s\n' % (aero_eid, distribution))

                    dt = (time() - t0) / 60.
                    tEst = dt * nAeroElements / (i + 1)  # dtPerElement*nElements
                    tLeft = tEst - dt
                    percent_done = dt / tEst * 100.

        log.info("---finish piercing---")
        self.run_map_test(self.mapping_matrix)
        #print("mapping_matrix = ", self.mapping_matrix)
        log.info("---finished build_mapping_matrix---")
        sys.stdout.flush()
        return self.mapping_matrix

Example 30

View license
def main():
    # #############################
    print("Creating data...", end=' ')
    sys.stdout.flush()

    data_list = []
    random.seed(0)
    for d_id in range(500000):
        x = random.randint(0, 1000)
        y = random.randint(0, 1000)
        temp = random.randint(-10, 50)
        quality = random.random()
        data_list.append(DataPoint(d_id, x, y, temp, quality))

    print("done.")
    sys.stdout.flush()

    # #############################
    print("Simulating randomized data ...", end=' ')
    sys.stdout.flush()

    data_list.sort(key=lambda d: d.quality)

    print("done.")

    # Create a list of random IDs to locate without duplication
    interesting_ids = list({random.randint(0, len(data_list)) for _ in range(0, 100)})
    print("Creating {} interesting IDs to seek.".format(len(interesting_ids)))

    # #############################
    print("Locating data in list...", end=' ')
    sys.stdout.flush()

    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        pt = find_point_by_id_in_list(data_list, i)
        interesting_points.append(pt)

    t1 = datetime.datetime.now()
    dt_list = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("DT: {} sec".format(dt_list))
    print(interesting_points)

    # #############################
    t0 = datetime.datetime.now()

    print("Creating dictionary...", end='')
    data_lookup = {d.id: d for d in data_list}

    print("done.")
    sys.stdout.flush()

    print("Locating data in dictionary...", end=' ')
    sys.stdout.flush()

    #    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        item = data_lookup[i]
        interesting_points.append(item)

    t1 = datetime.datetime.now()
    dt_dict = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("DT: {} sec".format(dt_dict))
    print(interesting_points)
    print()
    print("Speedup from dict: {:,.0f}x".format(round(dt_list / dt_dict)))

Example 31

View license
def main():
  parser = argparse.ArgumentParser(description='Evaluate some explanations')
  parser.add_argument('--dataset', '-d', type=str, required=True,help='dataset name')
  parser.add_argument('--output_folder', '-o', type=str, required=True, help='output folder')
  parser.add_argument('--num_features', '-k', type=int, required=True, help='num features')
  parser.add_argument('--num_rounds', '-r', type=int, required=True, help='num rounds')
  parser.add_argument('--start_id',  '-i', type=int, default=0,required=False, help='output start id')
  args = parser.parse_args()
  dataset = args.dataset
  train_data, train_labels, test_data, test_labels, class_names = LoadDataset(dataset)
  rho = 25
  kernel = lambda d: np.sqrt(np.exp(-(d**2) / rho ** 2))
  local = explainers.GeneralizedLocalExplainer(kernel, explainers.data_labels_distances_mapping_text, num_samples=15000, return_mean=True, verbose=False, return_mapped=True)
  # Found through cross validation
  sigmas = {'multi_polarity_electronics': {'neighbors': 0.75, 'svm': 10.0, 'tree': 0.5,
  'logreg': 0.5, 'random_forest': 0.5, 'embforest': 0.75},
  'multi_polarity_kitchen': {'neighbors': 1.0, 'svm': 6.0, 'tree': 0.75,
  'logreg': 0.25, 'random_forest': 6.0, 'embforest': 1.0},
  'multi_polarity_dvd': {'neighbors': 0.5, 'svm': 0.75, 'tree': 8.0, 'logreg':
  0.75, 'random_forest': 0.5, 'embforest': 5.0}, 'multi_polarity_books':
  {'neighbors': 0.5, 'svm': 7.0, 'tree': 2.0, 'logreg': 1.0, 'random_forest':
  1.0, 'embforest': 3.0}}
  parzen1 = parzen_windows.ParzenWindowClassifier()
  parzen1.sigma = sigmas[dataset]['random_forest']
  parzen2 = parzen_windows.ParzenWindowClassifier()
  parzen2.sigma = sigmas[dataset]['random_forest']
  random = explainers.RandomExplainer()

  for Z in range(args.num_rounds):
    exps1 = {}
    exps2 = {}
    explainer_names = ['lime', 'parzen', 'random', 'greedy', 'mutual']
    for expl in explainer_names:
      exps1[expl] = []
      exps2[expl] = []
    print 'Round', Z
    sys.stdout.flush()
    fake_features_z = [([.1, .2], [.1,.1], 10)]#, ([.2, .1], [.1,.1], 10)]
    clean_train, dirty_train, clean_test = corrupt_dataset(fake_features_z, train_data, train_labels, test_data, test_labels)
    vectorizer = CountVectorizer(lowercase=False, binary=True) 
    dirty_train_vectors = vectorizer.fit_transform(dirty_train)
    clean_train_vectors = vectorizer.transform(clean_train)
    test_vectors = vectorizer.transform(clean_test)
    terms = np.array(list(vectorizer.vocabulary_.keys()))
    indices = np.array(list(vectorizer.vocabulary_.values()))
    inverse_vocabulary = terms[np.argsort(indices)]
    tokenizer = vectorizer.build_tokenizer()  
    c1 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
    c2 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
    untrustworthy = [i for i, x in enumerate(inverse_vocabulary) if x.startswith('FAKE')]
    train_idx, test_idx = tuple(cross_validation.ShuffleSplit(dirty_train_vectors.shape[0], 1, 0.2))[0]
    train_acc1 = train_acc2 = test_acc1 = test_acc2 = 0
    print 'Trying to find trees:'
    sys.stdout.flush()
    iteration = 0
    found_tree = True
    while np.abs(train_acc1 - train_acc2) > 0.001 or np.abs(test_acc1 - test_acc2) < 0.05: 
      iteration += 1
      c1.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
      c2.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
      train_acc1 = accuracy_score(train_labels[test_idx], c1.predict(dirty_train_vectors[test_idx]))
      train_acc2 = accuracy_score(train_labels[test_idx], c2.predict(dirty_train_vectors[test_idx]))
      test_acc1 = accuracy_score(test_labels, c1.predict(test_vectors))
      test_acc2 = accuracy_score(test_labels, c2.predict(test_vectors))
      if iteration == 3000:
        found_tree = False
        break
    if not found_tree:
      print 'skipping iteration', Z
      continue
    print 'done'
    print 'Train acc1:', train_acc1, 'Train acc2:', train_acc2
    print 'Test acc1:', test_acc1, 'Test acc2:', test_acc2
    sys.stdout.flush()
    predictions = c1.predict(dirty_train_vectors)
    predictions2 = c2.predict(dirty_train_vectors)
    predict_probas = c1.predict_proba(dirty_train_vectors)[:,1]
    predict_probas2 = c2.predict_proba(dirty_train_vectors)[:,1]
    cv_preds1 = cross_validation.cross_val_predict(c1, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
    cv_preds2 = cross_validation.cross_val_predict(c2, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
    parzen1.fit(dirty_train_vectors[train_idx], cv_preds1)
    parzen2.fit(dirty_train_vectors[train_idx], cv_preds2)
    pp = []
    pp2 = []
    true_labels = []
    iteration = 0
    for i in test_idx:
      if iteration % 50 == 0:
        print iteration
        sys.stdout.flush()
      iteration += 1
      pp.append(predict_probas[i])
      pp2.append(predict_probas2[i])
      true_labels.append(train_labels[i])
      exp, mean = local.explain_instance(dirty_train_vectors[i], 1, c1.predict_proba, args.num_features)
      exps1['lime'].append((exp, mean))

      exp = parzen1.explain_instance(dirty_train_vectors[i], 1, c1.predict_proba, args.num_features, None) 
      mean = parzen1.predict_proba(dirty_train_vectors[i])[1]
      exps1['parzen'].append((exp, mean))

      exp = random.explain_instance(dirty_train_vectors[i], 1, None, args.num_features, None)
      exps1['random'].append(exp)

      exp = explainers.explain_greedy_martens(dirty_train_vectors[i], predictions[i], c1.predict_proba, args.num_features)
      exps1['greedy'].append(exp)


      # Classifier 2
      exp, mean = local.explain_instance(dirty_train_vectors[i], 1, c2.predict_proba, args.num_features)
      exps2['lime'].append((exp, mean))

      exp = parzen2.explain_instance(dirty_train_vectors[i], 1, c2.predict_proba, args.num_features, None) 
      mean = parzen2.predict_proba(dirty_train_vectors[i])[1]
      exps2['parzen'].append((exp, mean))

      exp = random.explain_instance(dirty_train_vectors[i], 1, None, args.num_features, None)
      exps2['random'].append(exp)

      exp = explainers.explain_greedy_martens(dirty_train_vectors[i], predictions2[i], c2.predict_proba, args.num_features)
      exps2['greedy'].append(exp)

    out = {'true_labels' : true_labels, 'untrustworthy' : untrustworthy, 'train_acc1' :  train_acc1, 'train_acc2' : train_acc2, 'test_acc1' : test_acc1, 'test_acc2' : test_acc2, 'exps1' : exps1, 'exps2': exps2, 'predict_probas1': pp, 'predict_probas2': pp2}
    pickle.dump(out, open(os.path.join(args.output_folder, 'comparing_%s_%s_%d.pickle' % (dataset, args.num_features, Z + args.start_id)), 'w'))

Example 32

View license
def main():
  parser = argparse.ArgumentParser(description='Evaluate some explanations')
  parser.add_argument('--dataset', '-d', type=str, required=True,help='dataset name')
  parser.add_argument('--output_folder', '-o', type=str, required=True, help='output folder')
  parser.add_argument('--num_features', '-k', type=int, required=True, help='num features')
  parser.add_argument('--num_rounds', '-r', type=int, required=True, help='num rounds')
  parser.add_argument('--start_id',  '-i', type=int, default=0,required=False, help='output start id')
  args = parser.parse_args()
  dataset = args.dataset
  train_data, train_labels, test_data, test_labels, class_names = LoadDataset(dataset)
  rho = 25
  kernel = lambda d: np.sqrt(np.exp(-(d**2) / rho ** 2))
  local = explainers.GeneralizedLocalExplainer(kernel, explainers.data_labels_distances_mapping_text, num_samples=15000, return_mean=True, verbose=False, return_mapped=True)
  # Found through cross validation
  sigmas = {'multi_polarity_electronics': {'neighbors': 0.75, 'svm': 10.0, 'tree': 0.5,
  'logreg': 0.5, 'random_forest': 0.5, 'embforest': 0.75},
  'multi_polarity_kitchen': {'neighbors': 1.0, 'svm': 6.0, 'tree': 0.75,
  'logreg': 0.25, 'random_forest': 6.0, 'embforest': 1.0},
  'multi_polarity_dvd': {'neighbors': 0.5, 'svm': 0.75, 'tree': 8.0, 'logreg':
  0.75, 'random_forest': 0.5, 'embforest': 5.0}, 'multi_polarity_books':
  {'neighbors': 0.5, 'svm': 7.0, 'tree': 2.0, 'logreg': 1.0, 'random_forest':
  1.0, 'embforest': 3.0}}
  parzen1 = parzen_windows.ParzenWindowClassifier()
  parzen1.sigma = sigmas[dataset]['random_forest']
  parzen2 = parzen_windows.ParzenWindowClassifier()
  parzen2.sigma = sigmas[dataset]['random_forest']
  random = explainers.RandomExplainer()

  for Z in range(args.num_rounds):
    exps1 = {}
    exps2 = {}
    explainer_names = ['lime', 'parzen', 'random', 'greedy', 'mutual']
    for expl in explainer_names:
      exps1[expl] = []
      exps2[expl] = []
    print 'Round', Z
    sys.stdout.flush()
    fake_features_z = [([.1, .2], [.1,.1], 10)]#, ([.2, .1], [.1,.1], 10)]
    clean_train, dirty_train, clean_test = corrupt_dataset(fake_features_z, train_data, train_labels, test_data, test_labels)
    vectorizer = CountVectorizer(lowercase=False, binary=True) 
    dirty_train_vectors = vectorizer.fit_transform(dirty_train)
    clean_train_vectors = vectorizer.transform(clean_train)
    test_vectors = vectorizer.transform(clean_test)
    terms = np.array(list(vectorizer.vocabulary_.keys()))
    indices = np.array(list(vectorizer.vocabulary_.values()))
    inverse_vocabulary = terms[np.argsort(indices)]
    tokenizer = vectorizer.build_tokenizer()  
    c1 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
    c2 = ensemble.RandomForestClassifier(n_estimators=30, max_depth=5)
    untrustworthy = [i for i, x in enumerate(inverse_vocabulary) if x.startswith('FAKE')]
    train_idx, test_idx = tuple(cross_validation.ShuffleSplit(dirty_train_vectors.shape[0], 1, 0.2))[0]
    train_acc1 = train_acc2 = test_acc1 = test_acc2 = 0
    print 'Trying to find trees:'
    sys.stdout.flush()
    iteration = 0
    found_tree = True
    while np.abs(train_acc1 - train_acc2) > 0.001 or np.abs(test_acc1 - test_acc2) < 0.05: 
      iteration += 1
      c1.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
      c2.fit(dirty_train_vectors[train_idx], train_labels[train_idx])
      train_acc1 = accuracy_score(train_labels[test_idx], c1.predict(dirty_train_vectors[test_idx]))
      train_acc2 = accuracy_score(train_labels[test_idx], c2.predict(dirty_train_vectors[test_idx]))
      test_acc1 = accuracy_score(test_labels, c1.predict(test_vectors))
      test_acc2 = accuracy_score(test_labels, c2.predict(test_vectors))
      if iteration == 3000:
        found_tree = False
        break
    if not found_tree:
      print 'skipping iteration', Z
      continue
    print 'done'
    print 'Train acc1:', train_acc1, 'Train acc2:', train_acc2
    print 'Test acc1:', test_acc1, 'Test acc2:', test_acc2
    sys.stdout.flush()
    predictions = c1.predict(dirty_train_vectors)
    predictions2 = c2.predict(dirty_train_vectors)
    predict_probas = c1.predict_proba(dirty_train_vectors)[:,1]
    predict_probas2 = c2.predict_proba(dirty_train_vectors)[:,1]
    cv_preds1 = cross_validation.cross_val_predict(c1, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
    cv_preds2 = cross_validation.cross_val_predict(c2, dirty_train_vectors[train_idx], train_labels[train_idx], cv=5)
    parzen1.fit(dirty_train_vectors[train_idx], cv_preds1)
    parzen2.fit(dirty_train_vectors[train_idx], cv_preds2)
    pp = []
    pp2 = []
    true_labels = []
    iteration = 0
    for i in test_idx:
      if iteration % 50 == 0:
        print iteration
        sys.stdout.flush()
      iteration += 1
      pp.append(predict_probas[i])
      pp2.append(predict_probas2[i])
      true_labels.append(train_labels[i])
      exp, mean = local.explain_instance(dirty_train_vectors[i], 1, c1.predict_proba, args.num_features)
      exps1['lime'].append((exp, mean))

      exp = parzen1.explain_instance(dirty_train_vectors[i], 1, c1.predict_proba, args.num_features, None) 
      mean = parzen1.predict_proba(dirty_train_vectors[i])[1]
      exps1['parzen'].append((exp, mean))

      exp = random.explain_instance(dirty_train_vectors[i], 1, None, args.num_features, None)
      exps1['random'].append(exp)

      exp = explainers.explain_greedy_martens(dirty_train_vectors[i], predictions[i], c1.predict_proba, args.num_features)
      exps1['greedy'].append(exp)


      # Classifier 2
      exp, mean = local.explain_instance(dirty_train_vectors[i], 1, c2.predict_proba, args.num_features)
      exps2['lime'].append((exp, mean))

      exp = parzen2.explain_instance(dirty_train_vectors[i], 1, c2.predict_proba, args.num_features, None) 
      mean = parzen2.predict_proba(dirty_train_vectors[i])[1]
      exps2['parzen'].append((exp, mean))

      exp = random.explain_instance(dirty_train_vectors[i], 1, None, args.num_features, None)
      exps2['random'].append(exp)

      exp = explainers.explain_greedy_martens(dirty_train_vectors[i], predictions2[i], c2.predict_proba, args.num_features)
      exps2['greedy'].append(exp)

    out = {'true_labels' : true_labels, 'untrustworthy' : untrustworthy, 'train_acc1' :  train_acc1, 'train_acc2' : train_acc2, 'test_acc1' : test_acc1, 'test_acc2' : test_acc2, 'exps1' : exps1, 'exps2': exps2, 'predict_probas1': pp, 'predict_probas2': pp2}
    pickle.dump(out, open(os.path.join(args.output_folder, 'comparing_%s_%s_%d.pickle' % (dataset, args.num_features, Z + args.start_id)), 'w'))

Example 33

Project: attention-lvcsr
Source File: test_rng_mrg.py
View license
@attr('slow')
def test_normal0():

    steps = 50
    std = 2.
    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE'] or
            mode == 'Mode' and config.linker in ['py']):
        sample_size = (25, 30)
        default_rtol = .02
    else:
        sample_size = (999, 50)
        default_rtol = .01
    sample_size_odd = (sample_size[0], sample_size[1] - 1)
    x = tensor.matrix()

    for size, const_size, var_input, input, avg, rtol, std_tol in [
        (sample_size, sample_size, [], [], -5., default_rtol, default_rtol),
        (x.shape, sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        ((x.shape[0], sample_size[1]), sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        # test odd value
        (sample_size_odd, sample_size_odd, [], [], -5.,
         default_rtol, default_rtol),
        # test odd value
        (x.shape, sample_size_odd, [x],
         [numpy.zeros(sample_size_odd, dtype=config.floatX)],
         -5., default_rtol, default_rtol),
        (sample_size, sample_size, [], [],
         numpy.arange(numpy.prod(sample_size),
                      dtype='float32').reshape(sample_size),
         10. * std / numpy.sqrt(steps), default_rtol),
        # test empty size (scalar)
        ((), (), [], [], -5., default_rtol, 0.02),
        # test with few samples at the same time
        ((1,), (1,), [], [], -5., default_rtol, 0.02),
        ((2,), (2,), [], [], -5., default_rtol, 0.02),
        ((3,), (3,), [], [], -5., default_rtol, 0.02),
            ]:
        # print ''
        # print 'ON CPU:'

        R = MRG_RandomStreams(234, use_cuda=False)
        # Note: we specify `nstreams` to avoid a warning.
        n = R.normal(size=size, avg=avg, std=std,
                     nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, n, mode=mode)
        # theano.printing.debugprint(f)
        out = f(*input)
        # print 'random?[:10]\n', out[0, 0:10]

        # Increase the number of steps if size implies only a few samples
        if numpy.prod(const_size) < 10:
            steps_ = steps * 50
        else:
            steps_ = steps
        basictest(f, steps_, const_size, target_avg=avg, target_std=std,
                  prefix='mrg ', allow_01=True, inputs=input,
                  mean_rtol=rtol, std_tol=std_tol)

        sys.stdout.flush()

        if mode != 'FAST_COMPILE' and cuda_available:
            # print ''
            # print 'ON GPU:'
            R = MRG_RandomStreams(234, use_cuda=True)
            n = R.normal(size=size, avg=avg, std=std, dtype='float32',
                         nstreams=rng_mrg.guess_n_streams(size, warn=False))
            # well, it's really that this test w GPU doesn't make sense otw
            assert n.dtype == 'float32'
            f = theano.function(var_input, theano.Out(
                theano.sandbox.cuda.basic_ops.gpu_from_host(n),
                borrow=True), mode=mode_with_gpu)

            # theano.printing.debugprint(f)
            sys.stdout.flush()
            gpu_out = numpy.asarray(f(*input))
            # print 'random?[:10]\n', gpu_out[0, 0:10]
            # print '----'
            sys.stdout.flush()
            basictest(f, steps_, const_size, target_avg=avg, target_std=std,
                      prefix='gpu mrg ', allow_01=True, inputs=input,
                      mean_rtol=rtol, std_tol=std_tol)
            # Need to allow some rounding error as their is float
            # computation that are done on the gpu vs cpu
            assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6)

        # print ''
        # print 'ON CPU w NUMPY:'
        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

        nn = RR.normal(size=size, avg=avg, std=std)
        ff = theano.function(var_input, nn)

        basictest(ff, steps_, const_size, target_avg=avg, target_std=std,
                  prefix='numpy ', allow_01=True, inputs=input, mean_rtol=rtol)

Example 34

View license
def main():
    # #############################
    print("Creating data...", end=' ')
    sys.stdout.flush()

    data_list = []
    random.seed(0)
    for d_id in range(500000):
        x = random.randint(0, 1000)
        y = random.randint(0, 1000)
        temp = random.randint(-10, 50)
        quality = random.random()
        data_list.append(DataPoint(d_id, x, y, temp, quality))

    print("done.")
    sys.stdout.flush()

    # #############################
    print("Simulating randomized data ...", end=' ')
    sys.stdout.flush()

    data_list.sort(key=lambda d: d.quality)

    print("done.")

    # Create a list of random IDs to locate without duplication
    interesting_ids = list({random.randint(0, len(data_list)) for _ in range(0, 100)})
    print("Creating {} interesting IDs to seek.".format(len(interesting_ids)))

    # #############################
    print("Locating data in list...", end=' ')
    sys.stdout.flush()

    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        pt = find_point_by_id_in_list(data_list, i)
        interesting_points.append(pt)

    t1 = datetime.datetime.now()
    dt_list = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("DT: {} sec".format(dt_list))
    print(interesting_points)

    # #############################
    t0 = datetime.datetime.now()

    print("Creating dictionary...", end='')
    data_lookup = {d.id: d for d in data_list}

    print("done.")
    sys.stdout.flush()

    print("Locating data in dictionary...", end=' ')
    sys.stdout.flush()

    #    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        item = data_lookup[i]
        interesting_points.append(item)

    t1 = datetime.datetime.now()
    dt_dict = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("DT: {} sec".format(dt_dict))
    print(interesting_points)
    print()
    print("Speedup from dict: {:,.0f}x".format(round(dt_list / dt_dict)))

Example 35

Project: pyscf
Source File: icmpspt.py
View license
def icmpspt(mc, pttype="NEVPT2", energyE0=0.0, rdmM=0, frozen=0, PTM=1000, PTincore=False, fciExtraLine=[], have3RDM=False, root=0, nroots=1, verbose=None, AAAVsplit=1):

    #remove the -1 state
    import os
    os.system("rm %s/node0/Rotation*.state-1.tmp"%(mc.fcisolver.scratchDirectory))
    os.system("rm %s/node0/wave*.-1.tmp"%(mc.fcisolver.scratchDirectory))

#    if type(mc.fcisolver) is not dmrgci.DMRGCI:
#        if (mc.fcisolver.fcibase_class is not dmrgci.DMRGCI):
#            print "this works with dmrgscf and not regular mcscf"
#            exit(0)

    if (pttype != "NEVPT2" and AAAVsplit != 1):
        print "AAAVsplit only works with CASSCF natural orbitals and NEVPT2"
        exit(0)

    mc.fcisolver.startM = 100
    mc.fcisolver.maxM = max(rdmM,501)
    mc.fcisolver.clearSchedule()
    mc.fcisolver.restart = False

    if (not have3RDM):
        mc.fcisolver.has_threepdm = False

        #we will redo the calculations so lets get ride of -1 states
        import os
        os.system("rm %s/node0/Rotation-*.state-1.tmp"%(mc.fcisolver.scratchDirectory))
        os.system("rm %s/node0/wave-*.-1.tmp"%(mc.fcisolver.scratchDirectory))
        os.system("rm %s/node0/RestartReorder.dat_1"%(mc.fcisolver.scratchDirectory))
    else:
        mc.fcisolver.has_threepdm = True

    mc.fcisolver.generate_schedule()
    mc.fcisolver.extraline = []
    if (PTincore):
        mc.fcisolver.extraline.append('do_npdm_in_core')
    mc.fcisolver.extraline += fciExtraLine


    if (len(mc.fcisolver.orbsym) == 0 and mc.fcisolver.mol.symmetry):
        mcscf.casci_symm.label_symmetry_(mc, mc.mo_coeff)
    ericas = mc.get_h2cas()
    h1e = reduce(numpy.dot, (mc.mo_coeff.T, mc.get_hcore(), mc.mo_coeff))
    dmcore = numpy.dot(mc.mo_coeff[:,:mc.ncore], mc.mo_coeff[:,:mc.ncore].T)*2
    vj, vk = mc._scf.get_jk(mc.mol, dmcore)
    vhfcore = reduce(numpy.dot, (mc.mo_coeff.T, vj-vk*0.5, mc.mo_coeff))
    h1effcas = h1e+vhfcore

    dmrgci.writeIntegralFile(mc.fcisolver, h1effcas[mc.ncore:mc.ncore+mc.ncas, mc.ncore:mc.ncore+mc.ncas], ericas, mc.ncas, mc.nelecas)

    dm1eff = numpy.zeros(shape=(mc.ncas, mc.ncas)) #this is the state average density which is needed in NEVPT2
 
    #loop over all states besides the current root
    if (pttype == "NEVPT2" and nroots>1):
        stateIter = range(nroots)
        stateIter.remove(root)
        for istate in stateIter:
            dm3 = mc.fcisolver.make_rdm3(state=istate, norb=mc.ncas, nelec=mc.nelecas, dt=float_precision)    
            nelec = mc.nelecas[0]+mc.nelecas[1]
            dm2 = numpy.einsum('ijklmk', dm3)/(nelec-2)
            dm1 = numpy.einsum('ijkj', dm2)/(nelec-1)
            dm1eff += dm1

    #now add the contributaion due to the current root
    dm3 = mc.fcisolver.make_rdm3(state=root, norb=mc.ncas, nelec=mc.nelecas, dt=float_precision)    
    nelec = mc.nelecas[0]+mc.nelecas[1]
    dm2 = numpy.einsum('ijklmk', dm3)/(nelec-2)
    dm1 = numpy.einsum('ijkj', dm2)/(nelec-1)
    dm1eff += dm1
    dm1eff = dm1eff/(1.0*nroots)
    import os
    os.system("mkdir int")    
    numpy.save("int/E3",dm3)
    numpy.save("int/E3B.npy", dm3.transpose(0,3,1,4,2,5))
    numpy.save("int/E3C.npy", dm3.transpose(5,0,2,4,1,3))
    del dm3

    #backup the restartreorder file to -1. this is because responseaaav and responseaaac both overwrite this file
    #this means that when we want to restart a calculation after lets say responseaaav didnt finish, the new calculaitons
    #will use the restartreorder file that was written by the incomplete responseaaav run instead of the original dmrg run.
    reorderf1 = "%s/node0/RestartReorder.dat_1"%(mc.fcisolver.scratchDirectory)
    reorderf = "%s/node0/RestartReorder.dat"%(mc.fcisolver.scratchDirectory)
    import os.path
    reorder1present = os.path.isfile(reorderf1) 
    if (reorder1present):
        from subprocess import check_call
        output = check_call("cp %s %s"%(reorderf1, reorderf), shell=True)
    else :
        from subprocess import check_call
        check_call("cp %s %s"%(reorderf, reorderf1), shell=True)
    reorder = numpy.loadtxt("%s/node0/RestartReorder.dat"%(mc.fcisolver.scratchDirectory))


    if (pttype == "NEVPT2") :
        norbs, energyE0 = writeNevpt2Integrals(mc, dm1, dm2, dm1eff, AAAVsplit, frozen)
        sys.stdout.flush()
        print "wrote the integrals to disk"

        for k in range(AAAVsplit):
            writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore, mc.ncas,  norbs,
                              mc.fcisolver, PTM, "AAAV", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine, aaavsplit=AAAVsplit, aaavIter=k, root=root, name = "NEVPT2")
        writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore-frozen, mc.ncas,  norbs-frozen,
                          mc.fcisolver, PTM, "AAAC", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine,root=root, name = "NEVPT2")
        sys.stdout.flush()

        totalE = 0.0;
        totalE += executeNEVPT(nelec, mc.ncore, mc.ncas, frozen, mc.fcisolver.memory)# executeMRLCC(nelec, mc.ncore, mc.ncas)

        try:
            for k in range(AAAVsplit):
                outfile, infile = "responseNEVPT2_aaav%d.out"%(k), "responseNEVPT2_aaav%d.conf"%(k)
                output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
                file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
                import struct
                energy = struct.unpack('d', file1.read(8))[0]
                file1.close()
                totalE += energy
                print "perturber AAAV%i --  %18.9e"%(k, energy)
                sys.stdout.flush()

            if (mc.ncore-frozen != 0):
                outfile, infile = "responseNEVPT2_aaac.out", "responseNEVPT2_aaac.conf"
                output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
                file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
                energy = struct.unpack('d', file1.read(8))[0]
                file1.close()
                totalE += energy
                print "perturber AAAC --  %18.9e"%(energy)

        except ValueError:
            print(output)

        from subprocess import check_call
        return totalE
    else :
        #this is a bad way to do it, the problem is
        #that pyscf works with double precision and
        #
        #energyE0 = writeMRLCCIntegrals(mc, dm1, dm2)
        #sys.stdout.flush()
        energyE0, norbs = writeNumpyforMRLCC(mc, dm1, dm2, frozen) 
        sys.stdout.flush()
        writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore, mc.ncas,  norbs,
                          mc.fcisolver, PTM, "AAAV", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine, root=root, name="MRLCC")
        writeDMRGConfFile(mc.nelecas[0], mc.nelecas[1], mc.ncore-frozen, mc.ncas,  norbs-frozen,
                          mc.fcisolver, PTM, "AAAC", mc.fcisolver.memory, mc.fcisolver.num_thrds, reorder, fciExtraLine, root=root, name="MRLCC")
        totalE = 0.0
        totalE +=  executeMRLCC(nelec, mc.ncore, mc.ncas, frozen, mc.fcisolver.memory)
        from subprocess import check_call
        try:
            outfile, infile = "responseMRLCC_aaav0.out", "responseMRLCC_aaav0.conf"
            output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
            file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
            import struct
            energy = struct.unpack('d', file1.read(8))[0]
            file1.close()
            totalE += energy
            print "perturber AAAV --  %18.9e"%(energy)
        except ValueError:
            print "perturber AAAV -- NA"
            #exit()

        try:
            if (mc.ncore-frozen != 0):
                outfile, infile = "responseMRLCC_aaac.out", "responseMRLCC_aaac.conf"
                output = check_call("%s  %s  %s > %s"%(mc.fcisolver.mpiprefix, mc.fcisolver.executable, infile, outfile), shell=True)
                file1 = open("%s/node0/dmrg.e"%(mc.fcisolver.scratchDirectory),"rb")
                energy = struct.unpack('d', file1.read(8))[0]
                file1.close()
                totalE += energy
                print "perturber AAAC --  %18.9e"%(energy)
        except ValueError:
            print "perturber AAAC -- NA"

        print "total PT  -- %18.9e"%(totalE)
        return totalE

Example 36

Project: internetarchive
Source File: files.py
View license
    def download(self, file_path=None, verbose=None, silent=None, ignore_existing=None,
                 checksum=None, destdir=None, retries=None, ignore_errors=None):
        """Download the file into the current working directory.

        :type file_path: str
        :param file_path: Download file to the given file_path.

        :type verbose: bool
        :param verbose: (optional) Turn on verbose output.

        :type silent: bool
        :param silent: (optional) Suppress all output.

        :type ignore_existing: bool
        :param ignore_existing: Overwrite local files if they already
                                exist.

        :type checksum: bool
        :param checksum: (optional) Skip downloading file based on checksum.

        :type destdir: str
        :param destdir: (optional) The directory to download files to.

        :type retries: int
        :param retries: (optional) The number of times to retry on failed
                        requests.

        :type ignore_errors: bool
        :param ignore_errors: (optional) Don't fail if a single file fails to
                              download, continue to download other files.

        :rtype: bool
        :returns: True if file was successfully downloaded.
        """
        verbose = False if verbose is None else verbose
        silent = False if silent is None else silent
        ignore_existing = False if ignore_existing is None else ignore_existing
        checksum = False if checksum is None else checksum
        retries = 2 if not retries else retries
        ignore_errors = False if not ignore_errors else ignore_errors

        self.item.session._mount_http_adapter(max_retries=retries)
        file_path = self.name if not file_path else file_path

        if destdir:
            if not os.path.exists(destdir):
                os.mkdir(destdir)
            if os.path.isfile(destdir):
                raise IOError('{} is not a directory!'.format(destdir))
            file_path = os.path.join(destdir, file_path)

        if os.path.exists(file_path):
            if ignore_existing:
                msg = 'skipping {0}, file already exists.'.format(file_path)
                log.info(msg)
                if verbose:
                    print(' ' + msg)
                elif silent is False:
                    print('.', end='')
                    sys.stdout.flush()
                return
            elif checksum:
                md5_sum = utils.get_md5(open(file_path, 'rb'))
                if md5_sum == self.md5:
                    msg = ('skipping {0}, '
                           'file already exists based on checksum.'.format(file_path))
                    log.info(msg)
                    if verbose:
                        print(' ' + msg)
                    elif silent is False:
                        print('.', end='')
                        sys.stdout.flush()
                    return
            else:
                st = os.stat(file_path)
                if (st.st_mtime == self.mtime) and (st.st_size == self.size) \
                        or self.name.endswith('_files.xml') and st.st_size != 0:
                    msg = ('skipping {0}, file already exists '
                           'based on length and date.'.format(file_path))
                    log.info(msg)
                    if verbose:
                        print(' ' + msg)
                    elif silent is False:
                        print('.', end='')
                        sys.stdout.flush()
                    return

        parent_dir = os.path.dirname(file_path)
        if parent_dir != '' and not os.path.exists(parent_dir):
            os.makedirs(parent_dir)

        try:
            response = self.item.session.get(self.url, stream=True, timeout=12)
            response.raise_for_status()

            chunk_size = 2048
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=chunk_size):
                    if chunk:
                        f.write(chunk)
                        f.flush()
        except (RetryError, HTTPError, ConnectTimeout,
                ConnectionError, socket.error, ReadTimeout) as exc:
            msg = ('error downloading file {0}, '
                   'exception raised: {1}'.format(file_path, exc))
            log.error(msg)
            if os.path.exists(file_path):
                os.remove(file_path)
            if verbose:
                print(' ' + msg)
            elif silent is False:
                print('e', end='')
                sys.stdout.flush()
            if ignore_errors is True:
                return False
            else:
                raise exc

        # Set mtime with mtime from files.xml.
        os.utime(file_path, (0, self.mtime))

        msg = 'downloaded {0}/{1} to {2}'.format(self.identifier,
                                                 self.name,
                                                 file_path)
        log.info(msg)
        if verbose:
            print(' ' + msg)
        elif silent is False:
            print('d', end='')
            sys.stdout.flush()
        return True

Example 37

Project: models
Source File: build_imagenet_data.py
View license
def _process_image_files_batch(coder, thread_index, ranges, name, filenames,
                               synsets, labels, humans, bboxes, num_shards):
  """Processes and saves list of images as TFRecord in 1 thread.

  Args:
    coder: instance of ImageCoder to provide TensorFlow image coding utils.
    thread_index: integer, unique batch to run index is within [0, len(ranges)).
    ranges: list of pairs of integers specifying ranges of each batches to
      analyze in parallel.
    name: string, unique identifier specifying the data set
    filenames: list of strings; each string is a path to an image file
    synsets: list of strings; each string is a unique WordNet ID
    labels: list of integer; each integer identifies the ground truth
    humans: list of strings; each string is a human-readable label
    bboxes: list of bounding boxes for each image. Note that each entry in this
      list might contain from 0+ entries corresponding to the number of bounding
      box annotations for the image.
    num_shards: integer number of shards for this data set.
  """
  # Each thread produces N shards where N = int(num_shards / num_threads).
  # For instance, if num_shards = 128, and the num_threads = 2, then the first
  # thread would produce shards [0, 64).
  num_threads = len(ranges)
  assert not num_shards % num_threads
  num_shards_per_batch = int(num_shards / num_threads)

  shard_ranges = np.linspace(ranges[thread_index][0],
                             ranges[thread_index][1],
                             num_shards_per_batch + 1).astype(int)
  num_files_in_thread = ranges[thread_index][1] - ranges[thread_index][0]

  counter = 0
  for s in xrange(num_shards_per_batch):
    # Generate a sharded version of the file name, e.g. 'train-00002-of-00010'
    shard = thread_index * num_shards_per_batch + s
    output_filename = '%s-%.5d-of-%.5d' % (name, shard, num_shards)
    output_file = os.path.join(FLAGS.output_directory, output_filename)
    writer = tf.python_io.TFRecordWriter(output_file)

    shard_counter = 0
    files_in_shard = np.arange(shard_ranges[s], shard_ranges[s + 1], dtype=int)
    for i in files_in_shard:
      filename = filenames[i]
      label = labels[i]
      synset = synsets[i]
      human = humans[i]
      bbox = bboxes[i]

      image_buffer, height, width = _process_image(filename, coder)

      example = _convert_to_example(filename, image_buffer, label,
                                    synset, human, bbox,
                                    height, width)
      writer.write(example.SerializeToString())
      shard_counter += 1
      counter += 1

      if not counter % 1000:
        print('%s [thread %d]: Processed %d of %d images in thread batch.' %
              (datetime.now(), thread_index, counter, num_files_in_thread))
        sys.stdout.flush()

    writer.close()
    print('%s [thread %d]: Wrote %d images to %s' %
          (datetime.now(), thread_index, shard_counter, output_file))
    sys.stdout.flush()
    shard_counter = 0
  print('%s [thread %d]: Wrote %d images to %d shards.' %
        (datetime.now(), thread_index, counter, num_files_in_thread))
  sys.stdout.flush()

Example 38

Project: internetarchive
Source File: files.py
View license
    def download(self, file_path=None, verbose=None, silent=None, ignore_existing=None,
                 checksum=None, destdir=None, retries=None, ignore_errors=None):
        """Download the file into the current working directory.

        :type file_path: str
        :param file_path: Download file to the given file_path.

        :type verbose: bool
        :param verbose: (optional) Turn on verbose output.

        :type silent: bool
        :param silent: (optional) Suppress all output.

        :type ignore_existing: bool
        :param ignore_existing: Overwrite local files if they already
                                exist.

        :type checksum: bool
        :param checksum: (optional) Skip downloading file based on checksum.

        :type destdir: str
        :param destdir: (optional) The directory to download files to.

        :type retries: int
        :param retries: (optional) The number of times to retry on failed
                        requests.

        :type ignore_errors: bool
        :param ignore_errors: (optional) Don't fail if a single file fails to
                              download, continue to download other files.

        :rtype: bool
        :returns: True if file was successfully downloaded.
        """
        verbose = False if verbose is None else verbose
        silent = False if silent is None else silent
        ignore_existing = False if ignore_existing is None else ignore_existing
        checksum = False if checksum is None else checksum
        retries = 2 if not retries else retries
        ignore_errors = False if not ignore_errors else ignore_errors

        self.item.session._mount_http_adapter(max_retries=retries)
        file_path = self.name if not file_path else file_path

        if destdir:
            if not os.path.exists(destdir):
                os.mkdir(destdir)
            if os.path.isfile(destdir):
                raise IOError('{} is not a directory!'.format(destdir))
            file_path = os.path.join(destdir, file_path)

        if os.path.exists(file_path):
            if ignore_existing:
                msg = 'skipping {0}, file already exists.'.format(file_path)
                log.info(msg)
                if verbose:
                    print(' ' + msg)
                elif silent is False:
                    print('.', end='')
                    sys.stdout.flush()
                return
            elif checksum:
                md5_sum = utils.get_md5(open(file_path, 'rb'))
                if md5_sum == self.md5:
                    msg = ('skipping {0}, '
                           'file already exists based on checksum.'.format(file_path))
                    log.info(msg)
                    if verbose:
                        print(' ' + msg)
                    elif silent is False:
                        print('.', end='')
                        sys.stdout.flush()
                    return
            else:
                st = os.stat(file_path)
                if (st.st_mtime == self.mtime) and (st.st_size == self.size) \
                        or self.name.endswith('_files.xml') and st.st_size != 0:
                    msg = ('skipping {0}, file already exists '
                           'based on length and date.'.format(file_path))
                    log.info(msg)
                    if verbose:
                        print(' ' + msg)
                    elif silent is False:
                        print('.', end='')
                        sys.stdout.flush()
                    return

        parent_dir = os.path.dirname(file_path)
        if parent_dir != '' and not os.path.exists(parent_dir):
            os.makedirs(parent_dir)

        try:
            response = self.item.session.get(self.url, stream=True, timeout=12)
            response.raise_for_status()

            chunk_size = 2048
            with open(file_path, 'wb') as f:
                for chunk in response.iter_content(chunk_size=chunk_size):
                    if chunk:
                        f.write(chunk)
                        f.flush()
        except (RetryError, HTTPError, ConnectTimeout,
                ConnectionError, socket.error, ReadTimeout) as exc:
            msg = ('error downloading file {0}, '
                   'exception raised: {1}'.format(file_path, exc))
            log.error(msg)
            if os.path.exists(file_path):
                os.remove(file_path)
            if verbose:
                print(' ' + msg)
            elif silent is False:
                print('e', end='')
                sys.stdout.flush()
            if ignore_errors is True:
                return False
            else:
                raise exc

        # Set mtime with mtime from files.xml.
        os.utime(file_path, (0, self.mtime))

        msg = 'downloaded {0}/{1} to {2}'.format(self.identifier,
                                                 self.name,
                                                 file_path)
        log.info(msg)
        if verbose:
            print(' ' + msg)
        elif silent is False:
            print('d', end='')
            sys.stdout.flush()
        return True

Example 39

Project: SchoolIdolAPI
Source File: migrate_activities.py
View license
    def handle(self, *args, **options):

        print 'Delete activities max bonded/max leveled...'
        while models.Activity.objects.filter(Q(message='Max Leveled a card') | Q(message='Max Bonded a card')).count():
            ids = list(models.Activity.objects.filter(Q(message='Max Leveled a card') | Q(message='Max Bonded a card')).values_list('pk', flat=True)[:100])
            total_this = models.Activity.objects.filter(pk__in=ids).delete()
        print 'Done.'

        print 'Delete activities rank in event without ranking...'
        while get_rankevent_withoutranking_queryset().count():
            ids = list(get_rankevent_withoutranking_queryset().values_list('pk', flat=True)[:100])
            total_this = get_rankevent_withoutranking_queryset().filter(pk__in=ids).delete()
        print 'Done.'

        print 'Cache for owned cards activities'
        while get_ownedcardqueryset().count():
            activities_ownedcards = get_ownedcardqueryset()[:500]
            for activity in activities_ownedcards:
                account = activity.ownedcard.owner_account
                # Fix account
                activity.account = account
                # Cache
                activity = activity_cache_account(activity, account)
                activity.message_data = concat_args(unicode(activity.ownedcard.card), activity.ownedcard.stored)
                activity.right_picture_link = singlecardurl(activity.ownedcard.card)
                activity.right_picture = ownedcardimageurl({}, activity.ownedcard)
                activity.save()
            print '.',
            sys.stdout.flush()
        print 'Done.'

        print 'Cache for rank up activities'
        while get_rankupqueryset().count():
            activities_rankup = get_rankupqueryset()[:500]
            for activity in activities_rankup:
                # Cache
                activity = activity_cache_account(activity, activity.account)
                activity.message_data = concat_args(activity.number)
                activity.save()
            print '.',
            sys.stdout.flush()
        print 'Done.'

        print 'Cache for Rank in event activities'
        while get_rankeventqueryset().count():
            activities_rankevent = get_rankeventqueryset()[:500]
            for activity in activities_rankevent:
                # Cache
                activity = activity_cache_account(activity, activity.account)
                activity.message_data = concat_args(activity.eventparticipation.ranking,
                                                    unicode(activity.eventparticipation.event))
                activity.right_picture = eventimageurl({}, activity.eventparticipation.event, english=(activity.account.language != 'JP'))
                activity.right_picture_link = '/events/' + activity.eventparticipation.event.japanese_name + '/'
                activity.save()
            print '.',
            sys.stdout.flush()
        print 'Done.'

        print 'Remove duplicate activities with ownedcards'
        lastSeenId = float('-Inf')
        i = 0
        total_deleted = 0
        while get_duplicateownedcard_queryset()[i:i+500].count():
            activities_ownedcards = get_duplicateownedcard_queryset()[i:i+500]
            for activity in activities_ownedcards:
                if activity.ownedcard_id == lastSeenId:
                    print 'delete', activity
                    activity.delete()
                    total_deleted += 1
                else:
                    lastSeenId = activity.ownedcard_id
                pass
            i += 500
            print '.',
            sys.stdout.flush()
        print 'Done.'

Example 40

Project: prettytensor
Source File: local_trainer.py
View license
  def run_model(self,
                op_list,
                num_steps,
                feed_vars=(),
                feed_data=None,
                print_every=100,
                allow_initialize=True):
    """Runs `op_list` for `num_steps`.

    Args:
      op_list: A list of ops to run.
      num_steps: Number of steps to run this for.  If feeds are used, this is a
        maximum.
      feed_vars: The variables to feed.
      feed_data: An iterator that feeds data tuples.
      print_every: Print a log line and checkpoing every so many steps.
      allow_initialize: If True, the model will be initialized if any variable
        is uninitialized, if False the model will not be initialized.
    Returns:
      The final run result as a list.
    Raises:
      ValueError: If feed_data doesn't match feed_vars.
    """
    feed_data = feed_data or itertools.repeat(())

    ops = [bookkeeper.global_step()]
    ops.extend(op_list)

    sess = tf.get_default_session()
    self.prepare_model(sess, allow_initialize=allow_initialize)
    results = []
    try:
      for i, data in zip(xrange(num_steps), feed_data):
        log_this_time = print_every and i % print_every == 0
        if len(data) != len(feed_vars):
          raise ValueError(
              'feed_data and feed_vars must be the same length: %d vs %d' % (
                  len(data), len(feed_vars)))
        if self._coord.should_stop():
          print('Coordinator stopped')
          sys.stdout.flush()
          self.stop_queues()
          break
        if len(feed_vars) != len(data):
          raise ValueError('Feed vars must be the same length as data.')

        if log_this_time and self._summary_writer:
          results = sess.run(ops + [self._summaries],
                             dict(zip(feed_vars, data)))
          self._summary_writer.add_summary(results[-1], results[0])
          results = results[:-1]
        else:
          results = sess.run(ops, dict(zip(feed_vars, data)))
        if log_this_time:
          self._log_and_save(sess, results)

      # Print the last line if it wasn't just printed
      if print_every and not log_this_time:
        self._log_and_save(sess, results)
    except tf.errors.OutOfRangeError as ex:
      print('Done training -- epoch limit reached %s' % ex)
      sys.stdout.flush()
      self.stop_queues()
    except BaseException as ex:
      print('Exception -- stopping threads: %s' % ex, file=sys.stderr)
      sys.stdout.flush()
      self.stop_queues()
      raise
    return results

Example 41

Project: reseg
Source File: helper_dataset.py
View license
def preprocess_dataset(train, valid, test,
                       input_to_float,
                       preprocess_type,
                       patch_size, max_patches):

    if input_to_float and preprocess_type is None:
        train_norm = train[0].astype(floatX) / 255.
        train = (train_norm, train[1])
        valid_norm = valid[0].astype(floatX) / 255.
        valid = (valid_norm, valid[1])
        test_norm = test[0].astype(floatX) / 255.
        test = (test_norm, test[1])

    if preprocess_type is None:
        return train, valid, test

    # whiten, LCN, GCN, Local Mean Subtract, or normalize
    if len(train[0]) > 0:
        train_pre = []
        print ""
        print "Preprocessing {} images of the train set with {} {} ".format(
            len(train[0]), preprocess_type, patch_size),
        print ""
        i = 0
        print "Progress: {0:.3g} %".format(i * 100 / len(train[0])),
        for i, x in enumerate(train[0]):
            img = np.expand_dims(x, axis=0)
            x_pre = preprocess(img, preprocess_type,
                               patch_size,
                               max_patches)
            train_pre.append(x_pre[0])
            print "\rProgress: {0:.3g} %".format(i * 100 / len(train[0])),
            sys.stdout.flush()

        if input_to_float:
            train_pre = np.array(train_pre).astype(floatX) / 255.
        train = (np.array(train_pre), np.array(train[1]))

    if len(valid[0]) > 0:
        valid_pre = []
        print ""
        print "Preprocessing {} images of the valid set with {} {} ".format(
            len(valid[0]), preprocess_type, patch_size),
        print ""
        i = 0
        print "Progress: {0:.3g} %".format(i * 100 / len(valid[0])),
        for i, x in enumerate(valid[0]):
            img = np.expand_dims(x, axis=0)
            x_pre = preprocess(img, preprocess_type,
                               patch_size,
                               max_patches)
            valid_pre.append(x_pre[0])
            print "\rProgress: {0:.3g} %".format(i * 100 / len(valid[0])),
            sys.stdout.flush()

        if input_to_float:
            valid_pre = np.array(valid_pre).astype(floatX) / 255.
        valid = (np.array(valid_pre), np.array(valid[1]))

    if len(test[0]) > 0:
        test_pre = []
        print ""
        print "Preprocessing {} images of the test set with {} {} ".format(
            len(test[0]), preprocess_type, patch_size),
        print ""
        i = 0
        print "Progress: {0:.3g} %".format(i * 100 / len(test[0])),
        for i, x in enumerate(test[0]):
            img = np.expand_dims(x, axis=0)
            x_pre = preprocess(img, preprocess_type,
                               patch_size,
                               max_patches)
            test_pre.append(x_pre[0])
            print "\rProgress: {0:.3g} %".format(i * 100 / len(test[0])),
            sys.stdout.flush()

        if input_to_float:
            test_pre = np.array(test_pre).astype(floatX) / 255.
        test = (np.array(test_pre), np.array(test[1]))

    return train, valid, test

Example 42

View license
def main():
    # #############################
    print("Creating data...", end=' ')
    sys.stdout.flush()

    data_list = []  # 500,000 DataPoint items
    random.seed(0)
    for d_id in range(500000):
        x = random.randint(0, 1000)
        y = random.randint(0, 1000)
        temp = random.randint(-10, 50)
        quality = random.random()
        data_list.append(DataPoint(d_id, x, y, temp, quality))

    print("done.")
    sys.stdout.flush()

    # Reordering data for random access
    print("Reordering data for random access ...", end=' ')
    sys.stdout.flush()

    data_list.sort(key=lambda d: d.quality)

    print("done.")

    # Create a set of random IDs to locate without duplication
    interesting_ids = {random.randint(0, len(data_list)) for _ in range(0, 100)}
    print("Creating {} interesting IDs to seek.".format(len(interesting_ids)))

    # Locating data in list
    print("Locating data in list...", end=' ')
    sys.stdout.flush()

    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        pt = find_point_by_id_in_list(data_list, i)
        interesting_points.append(pt)

    t1 = datetime.datetime.now()
    dt_list = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("dt: {} sec".format(dt_list))
    print(interesting_points)

    # #############################

    # let's try this with a dictionary...
    # 1. Create dictionary via comprehension, key = id

    t0 = datetime.datetime.now()
    data_dict = {d.id: d for d in data_list}

    # 2. locate the data in dictionary
    interesting_points.clear()
    for d_id in interesting_ids:
        d = data_dict[d_id]
        interesting_points.append(d)

    t1 = datetime.datetime.now()
    dt_dict = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("dt: {} sec".format(dt_dict))
    print(interesting_points)
    print()
    print("Speedup from dict: {:,.0f}x".format(round(dt_list / dt_dict)))

Example 43

Project: mrepo
Source File: wrapperUtils.py
View license
    def callback(self, what, amount, total, hdr, path):
#        print "what: %s amount: %s total: %s hdr: %s path: %s" % (
#          what, amount, total, hdr, path)

        if what == rpm.RPMCALLBACK_INST_OPEN_FILE:
            fileName = "%s/%s-%s-%s.%s.rpm" % (path,
                                               hdr['name'],
                                               hdr['version'],
                                               hdr['release'],
                                               hdr['arch'])
            try:
                self.fd = os.open(fileName, os.O_RDONLY)
            except OSError:
                raise up2dateErrors.RpmError("Error opening %s" % fileName)

            return self.fd
        elif what == rpm.RPMCALLBACK_INST_CLOSE_FILE:
            os.close(self.fd)
            self.fd = 0

        elif what == rpm.RPMCALLBACK_INST_START:
            self.hashesPrinted = 0
            self.lastPercent = 0
            if type(hdr) == type(""):
                print "     %-23.23s" % ( hdr),
                sys.stdout.flush()

            else:
                fileName = "%s/%s-%s-%s.%s.rpm" % (path,
                                                   hdr['name'],
                                                   hdr['version'],
                                                   hdr['release'],
                                                   hdr['arch'])
                if self.cfg["isatty"]:
                    if self.progressCurrent == 0:
                        printit("Installing") 
                    print "%4d:%-23.23s" % (self.progressCurrent + 1,
                                            hdr['name']),
                    sys.stdout.flush()
                else:
                    printit("Installing %s" % fileName)


        # gets called at the start of each repackage, with a count of
        # which package and a total of the number of packages aka:
        # amount: 2 total: 7 for the second package being repackages
        # out of 7. That sounds obvious doesnt it?
        elif what == rpm.RPMCALLBACK_REPACKAGE_PROGRESS:
            pass
#            print "what: %s amount: %s total: %s hdr: %s path: %s" % (
#            what, amount, total, hdr, path)
#            self.printRpmHash(amount, total, noInc=1)
            
        elif what == rpm.RPMCALLBACK_REPACKAGE_START:
            printit( "Repackaging")
            #sys.stdout.flush()
            #print "what: %s amount: %s total: %s hdr: %s path: %s" % (
            # what, amount, total, hdr, path)
            
        elif what == rpm.RPMCALLBACK_INST_PROGRESS:
            if type(hdr) == type(""):
                # repackage...
                self.printRpmHash(amount,total, noInc=1)
            else:
                self.printRpmHash(amount,total)


        elif what == rpm.RPMCALLBACK_TRANS_PROGRESS:
            self.printRpmHash(amount, total, noInc=1)

            
        elif what == rpm.RPMCALLBACK_TRANS_START:
            self.hashesPrinted = 0
            self.lastPercent = 0
            self.progressTotal = 1
            self.progressCurrent = 0
            print "%-23.23s" % "Preparing",
            sys.stdout.flush()

        elif what == rpm.RPMCALLBACK_TRANS_STOP:
            self.printRpmHash(1, 1)
            self.progressTotal = self.packagesTotal
            self.progressCurrent = 0
            
        elif (what == rpm.RPMCALLBACK_UNINST_PROGRESS or
              what == rpm.RPMCALLBACK_UNINST_START or
              what == rpm.RPMCALLBACK_UNINST_STOP):
            pass
        
        if hasattr(rpm, "RPMCALLBACK_UNPACK_ERROR"):
            if ((what == rpm.RPMCALLBACK_UNPACK_ERROR) or
                (what == rpm.RPMCALLBACK_CPIO_ERROR)):
                pkg = "%s-%s-%s" % (hdr[rpm.RPMTAG_NAME],
                                    hdr[rpm.RPMTAG_VERSION],
                                    hdr[rpm.RPMTAG_RELEASE])

                if what == rpm.RPMCALLBACK_UNPACK_ERROR:
                    raise up2dateErrors.RpmInstallError, (
                        "There was a rpm unpack error "\
                        "installing the package: %s" % pkg, pkg)
                elif what == rpm.RPMCALLBACK_CPIO_ERROR:
                    raise up2dateErrors.RpmInstallError, (
                        "There was a cpio error "\
                        "installing the package: %s" % pkg, pkg)

Example 44

Project: pyzui
Source File: benchmark.py
View license
def benchmark(filename, ppmfile):
    print "Benchmarking %s ..." % os.path.basename(filename)

    base_mem = mem()

    ## conversion
    c = MagickConverter(filename, ppmfile)
    start_time = time.time()
    print "Converting to PPM...",
    sys.stdout.flush()
    c.run()
    end_time = time.time()
    print "Done: took %.2fs" % (end_time - start_time)
    del c

    ## metadata
    f = open(ppmfile, 'rb')
    w,h = read_ppm_header(f)
    f.close()
    print "Dimensions: %dx%d, %.2f megapixels" % (w, h, w * h * 1e-6)
    del f, w, h

    ## tiling
    t = PPMTiler(ppmfile)
    start_time = time.time()
    print "Tiling...",
    sys.stdout.flush()
    t.run()
    end_time = time.time()

    ## in general, python doesn't necessarily return allocated memory to the OS
    ## (see <http://effbot.org/pyfaq/
    ## why-doesnt-python-release-the-memory-when-i-delete-a-large-object.htm>)
    ## so the current memory usage is likely to be approximately equal to the
    ## peak memory usage during tiling
    ## however, it would probably be better to periodically check memory usage
    ## while the tiler is running and maintain a max value
    end_mem = mem()

    print "Done: took %.2fs consuming %.2fMB RAM" % \
        ((end_time - start_time), (end_mem - base_mem) * 1e-3)
    del t

    ## zooming
    viewport_w = 800
    viewport_h = 600
    print "Viewport: %dx%d" % (viewport_w, viewport_h)
    zoom_amount = 5.0
    print "Zoom amount: %.1f" % zoom_amount

    qzui = QZUI()
    qzui.framerate = None
    qzui.resize(viewport_w, viewport_h)
    qzui.show()

    scene = Scene.new()
    qzui.scene = scene
    obj = TiledMediaObject(ppmfile, scene, True)
    scene.add(obj)
    obj.fit((0, 0, viewport_w, viewport_h))

    start_time = time.time()
    print "Zooming (cold)...",
    sys.stdout.flush()
    num_frames = 100
    for i in xrange(num_frames):
        qzui.repaint()
        scene.centre = (viewport_w/2, viewport_h/2)
        scene.zoom(zoom_amount/num_frames)
    end_time = time.time()
    print "Done: %d frames took %.2fs, mean framerate %.2f FPS" % \
        (num_frames, (end_time - start_time),
        num_frames / (end_time - start_time))

    scene.zoom(-zoom_amount)
    start_time = time.time()
    print "Zooming (warm)...",
    sys.stdout.flush()
    num_frames = 100
    for i in xrange(num_frames):
        qzui.repaint()
        scene.centre = (viewport_w/2, viewport_h/2)
        scene.zoom(zoom_amount/num_frames)
    end_time = time.time()
    print "Done: %d frames took %.2fs, mean framerate %.2f FPS" % \
        (num_frames, (end_time - start_time),
        num_frames / (end_time - start_time))

Example 45

Project: edx2bigquery
Source File: analyze_content.py
View license
def analyze_course_content(course_id, 
                           listings_file=None,
                           basedir="X-Year-2-data-sql", 
                           datedir="2013-09-21", 
                           use_dataset_latest=False,
                           do_upload=False,
                           courses=None,
                           verbose=True,
                           pin_date=None,
                           ):
    '''
    Compute course_content table, which quantifies:

    - number of chapter, sequential, vertical modules
    - number of video modules
    - number of problem, *openended, mentoring modules
    - number of dicussion, annotatable, word_cloud modules

    Do this using the course "xbundle" file, produced when the course axis is computed.

    Include only modules which had nontrivial use, to rule out the staff and un-shown content. 
    Do the exclusion based on count of module appearing in the studentmodule table, based on 
    stats_module_usage for each course.

    Also, from the course listings file, compute the number of weeks the course was open.

    If do_upload (triggered by --force-recompute) then upload all accumulated data to the course report dataset 
    as the "stats_course_content" table.  Also generate a "course_summary_stats" table, stored in the
    course_report_ORG or course_report_latest dataset.  The course_summary_stats table combines
    data from many reports,, including stats_course_content, the medians report, the listings file,
    broad_stats_by_course, and time_on_task_stats_by_course.
    
    '''

    if do_upload:
        if use_dataset_latest:
            org = "latest"
        else:
            org = courses[0].split('/',1)[0]	# extract org from first course_id in courses

        crname = 'course_report_%s' % org

        gspath = gsutil.gs_path_from_course_id(crname)
        gsfnp = gspath / CCDATA
        gsutil.upload_file_to_gs(CCDATA, gsfnp)
        tableid = "stats_course_content"
        dataset = crname

        mypath = os.path.dirname(os.path.realpath(__file__))
        SCHEMA_FILE = '%s/schemas/schema_content_stats.json' % mypath

        try:
            the_schema = json.loads(open(SCHEMA_FILE).read())[tableid]
        except Exception as err:
            print "Oops!  Failed to load schema file for %s.  Error: %s" % (tableid, str(err))
            raise

        if 0:
            bqutil.load_data_to_table(dataset, tableid, gsfnp, the_schema, wait=True, verbose=False,
                                      format='csv', skiprows=1)

        table = 'course_metainfo'
        course_tables = ',\n'.join([('[%s.course_metainfo]' % bqutil.course_id2dataset(x)) for x in courses])
        sql = "select * from {course_tables}".format(course_tables=course_tables)
        print "--> Creating %s.%s using %s" % (dataset, table, sql)

        if 1:
            metainfo_dataset = bqutil.get_bq_table(dataset, table, sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          )
            # bqutil.create_bq_table(dataset, table, sql, overwrite=True)


        #-----------------------------------------------------------------------------
        # make course_summary_stats table
        #
        # This is a combination of the broad_stats_by_course table (if that exists), and course_metainfo.
        # Also use (and create if necessary) the nregistered_by_wrap table.

        # get the broad_stats_by_course data
        bsbc = bqutil.get_table_data(dataset, 'broad_stats_by_course')

        table_list = bqutil.get_list_of_table_ids(dataset)

        latest_person_course = max([ x for x in table_list if x.startswith('person_course_')])
        print "Latest person_course table in %s is %s" % (dataset, latest_person_course)
        
        sql = """
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        nr_by_wrap = bqutil.get_bq_table(dataset, 'nregistered_by_wrap', sql=sql, key={'name': 'course_id'})

        # rates for registrants before and during course
        
        sql = """
                SELECT 
                    *,
                    ncertified / nregistered * 100 as pct_certified_of_reg,
                    ncertified_and_registered_before_launch / nregistered_before_launch * 100 as pct_certified_reg_before_launch,
                    ncertified_and_registered_during_course / nregistered_during_course * 100 as pct_certified_reg_during_course,
                    ncertified / nregistered_by_wrap * 100 as pct_certified_of_reg_by_wrap,
                    ncertified / nviewed * 100 as pct_certified_of_viewed,
                    ncertified / nviewed_by_wrap * 100 as pct_certified_of_viewed_by_wrap,
                    ncertified_by_ewrap / nviewed_by_ewrap * 100 as pct_certified_of_viewed_by_ewrap,
                FROM
                (
                # ------------------------
                # get aggregate data
                SELECT pc.course_id as course_id, 
                    cminfo.wrap_date as wrap_date,
                    count(*) as nregistered,
                    sum(case when pc.certified then 1 else 0 end) ncertified,
                    sum(case when (TIMESTAMP(pc.cert_created_date) < cminfo.ewrap_date) and (pc.certified and pc.viewed) then 1 else 0 end) ncertified_by_ewrap,
                    sum(case when pc.viewed then 1 else 0 end) nviewed,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) nregistered_by_wrap,
                    sum(case when pc.start_time < cminfo.wrap_date then 1 else 0 end) / nregistered * 100 nregistered_by_wrap_pct,
                    sum(case when (pc.start_time < cminfo.wrap_date) and pc.viewed then 1 else 0 end) nviewed_by_wrap,
                    sum(case when (pc.start_time < cminfo.ewrap_date) and pc.viewed then 1 else 0 end) nviewed_by_ewrap,
                    sum(case when pc.start_time < cminfo.launch_date then 1 else 0 end) nregistered_before_launch,
                    sum(case when pc.start_time < cminfo.launch_date 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_before_launch,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) then 1 else 0 end) nregistered_during_course,
                    sum(case when (pc.start_time >= cminfo.launch_date) 
                              and (pc.start_time < cminfo.wrap_date) 
                              and pc.certified
                              then 1 else 0 end) ncertified_and_registered_during_course,
                FROM
                    [{dataset}.{person_course}] as pc
                left join (
                
                # --------------------
                #  get course launch and wrap dates from course_metainfo

       SELECT AA.course_id as course_id, 
              AA.wrap_date as wrap_date,
              AA.launch_date as launch_date,
              BB.ewrap_date as ewrap_date,
       FROM (
               #  inner get course launch and wrap dates from course_metainfo
                SELECT A.course_id as course_id,
                  A.wrap_date as wrap_date,
                  B.launch_date as launch_date,
                from
                (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as wrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Wrap'
                 )
                ) as A
                left outer join 
                (
                 SELECT course_id,
                      TIMESTAMP(concat(launch_year, "-", launch_month, '-', launch_day)) as launch_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as launch_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as launch_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as launch_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Course Launch'
                 )
                ) as B
                on A.course_id = B.course_id 
                # end inner course_metainfo subquery
            ) as AA
            left outer join
            (
                 SELECT course_id,
                      TIMESTAMP(concat(wrap_year, "-", wrap_month, '-', wrap_day, ' 23:59:59')) as ewrap_date,
                 FROM (
                  SELECT course_id, 
                    regexp_extract(value, r'(\d+)/\d+/\d+') as wrap_month,
                    regexp_extract(value, r'\d+/(\d+)/\d+') as wrap_day,
                    regexp_extract(value, r'\d+/\d+/(\d+)') as wrap_year,
                  FROM [{dataset}.course_metainfo]
                  where key='listings_Empirical Course Wrap'
                 )
            ) as BB
            on AA.course_id = BB.course_id

                # end course_metainfo subquery
                # --------------------
                
                ) as cminfo
                on pc.course_id = cminfo.course_id
                
                group by course_id, wrap_date
                order by course_id
                # ---- end get aggregate data
                )
                order by course_id
        """.format(dataset=dataset, person_course=latest_person_course)

        print "--> Assembling course_summary_stats from %s" % 'stats_cert_rates_by_registration'
        sys.stdout.flush()
        cert_by_reg = bqutil.get_bq_table(dataset, 'stats_cert_rates_by_registration', sql=sql, 
                                          newer_than=datetime.datetime(2015, 1, 16, 3, 0),
                                          key={'name': 'course_id'})

        # start assembling course_summary_stats

        c_sum_stats = defaultdict(OrderedDict)
        for entry in bsbc['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            cmci.update(entry)
            cnbw = nr_by_wrap['data_by_key'][course_id]
            nbw = int(cnbw['nregistered_by_wrap'])
            cmci['nbw_wrap_date'] = cnbw['wrap_date']
            cmci['nregistered_by_wrap'] = nbw
            cmci['nregistered_by_wrap_pct'] = cnbw['nregistered_by_wrap_pct']
            cmci['frac_female'] = float(entry['n_female_viewed']) / (float(entry['n_male_viewed']) + float(entry['n_female_viewed']))
            ncert = float(cmci['certified_sum'])
            if ncert:
                cmci['certified_of_nregistered_by_wrap_pct'] = nbw / ncert * 100.0
            else:
                cmci['certified_of_nregistered_by_wrap_pct'] = None
            cbr = cert_by_reg['data_by_key'][course_id]
            for field, value in cbr.items():
                cmci['cbr_%s' % field] = value

        # add medians for viewed, explored, and certified

        msbc_tables = {'msbc_viewed': "viewed_median_stats_by_course",
                       'msbc_explored': 'explored_median_stats_by_course',
                       'msbc_certified': 'certified_median_stats_by_course',
                       'msbc_verified': 'verified_median_stats_by_course',
                       }
        for prefix, mtab in msbc_tables.items():
            print "--> Merging median stats data from %s" % mtab
            sys.stdout.flush()
            bqdat = bqutil.get_table_data(dataset, mtab)
            for entry in bqdat['data']:
                course_id = entry['course_id']
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    cmci['%s_%s' % (prefix, field)] = value

        # add time on task data

        tot_table = "time_on_task_stats_by_course"
        prefix = "ToT"
        print "--> Merging time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add serial time on task data

        tot_table = "time_on_task_serial_stats_by_course"
        prefix = "SToT"
        print "--> Merging serial time on task data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # add show_answer stats

        tot_table = "show_answer_stats_by_course"
        prefix = "SAS"
        print "--> Merging show_answer stats data from %s" % tot_table
        sys.stdout.flush()
        try:
            bqdat = bqutil.get_table_data(dataset, tot_table)
        except Exception as err:
            bqdat = {'data': {}}
        for entry in bqdat['data']:
            course_id = entry['course_id']
            cmci = c_sum_stats[course_id]
            for field, value in entry.items():
                if field=='course_id':
                    continue
                cmci['%s_%s' % (prefix, field)] = value

        # setup list of keys, for CSV output

        css_keys = c_sum_stats.values()[0].keys()

        # retrieve course_metainfo table, pivot, add that to summary_stats

        print "--> Merging course_metainfo from %s" % table
        sys.stdout.flush()
        bqdat = bqutil.get_table_data(dataset, table)

        listings_keys = map(make_key, ["Institution", "Semester", "New or Rerun", "Andrew Recodes New/Rerun", 
                                       "Course Number", "Short Title", "Andrew's Short Titles", "Title", 
                                       "Instructors", "Registration Open", "Course Launch", "Course Wrap", "course_id",
                                       "Empirical Course Wrap", "Andrew's Order", "certifies", "MinPassGrade",
                                       '4-way Category by name', "4-way (CS, STEM, HSocSciGov, HumHistRel)"
                                       ])
        listings_keys.reverse()
        
        for lk in listings_keys:
            css_keys.insert(1, "listings_%s" % lk)

        COUNTS_TO_KEEP = ['discussion', 'problem', 'optionresponse', 'checkboxgroup', 'optioninput', 
                          'choiceresponse', 'video', 'choicegroup', 'vertical', 'choice', 'sequential', 
                          'multiplechoiceresponse', 'numericalresponse', 'chapter', 'solution', 'img', 
                          'formulaequationinput', 'responseparam', 'selfassessment', 'track', 'task', 'rubric', 
                          'stringresponse', 'combinedopenended', 'description', 'textline', 'prompt', 'category', 
                          'option', 'lti', 'annotationresponse', 
                          'annotatable', 'colgroup', 'tag_prompt', 'comment', 'annotationinput', 'image', 
                          'options', 'comment_prompt', 'conditional', 
                          'answer', 'poll_question', 'section', 'wrapper', 'map', 'area', 
                          'customtag', 'transcript', 
                          'split_test', 'word_cloud', 
                          'openended', 'openendedparam', 'answer_display', 'code', 
                          'drag_and_drop_input', 'customresponse', 'draggable', 'mentoring', 
                          'textannotation', 'imageannotation', 'videosequence', 
                          'feedbackprompt', 'assessments', 'openassessment', 'assessment', 'explanation', 'criterion']

        for entry in bqdat['data']:
            thekey = make_key(entry['key'])
            # if thekey.startswith('count_') and thekey[6:] not in COUNTS_TO_KEEP:
            #     continue
            if thekey.startswith('listings_') and thekey[9:] not in listings_keys:
                # print "dropping key=%s for course_id=%s" % (thekey, entry['course_id'])
                continue
            c_sum_stats[entry['course_id']][thekey] = entry['value']
            #if 'certifies' in thekey:
            #    print "course_id=%s, key=%s, value=%s" % (entry['course_id'], thekey, entry['value'])
            if thekey not in css_keys:
                css_keys.append(thekey)

        # compute forum_posts_per_week
        for course_id, entry in c_sum_stats.items():
            nfps = entry.get('nforum_posts_sum', 0)
            if nfps:
                fppw = int(nfps) / float(entry['nweeks'])
                entry['nforum_posts_per_week'] = fppw
                print "    course: %s, assessments_per_week=%s, forum_posts_per_week=%s" % (course_id, entry['total_assessments_per_week'], fppw)
            else:
                entry['nforum_posts_per_week'] = None
        css_keys.append('nforum_posts_per_week')

        # read in listings file and merge that in also
        if listings_file:
            if listings_file.endswith('.csv'):
                listings = csv.DictReader(open(listings_file))
            else:
                listings = [ json.loads(x) for x in open(listings_file) ]
            for entry in listings:
                course_id = entry['course_id']
                if course_id not in c_sum_stats:
                    continue
                cmci = c_sum_stats[course_id]
                for field, value in entry.items():
                    lkey = "listings_%s" % make_key(field)
                    if not (lkey in cmci) or (not cmci[lkey]):
                        cmci[lkey] = value

        print "Storing these fields: %s" % css_keys

        # get schema
        mypath = os.path.dirname(os.path.realpath(__file__))
        the_schema = json.loads(open('%s/schemas/schema_combined_course_summary_stats.json' % mypath).read())
        schema_dict = { x['name'] : x for x in the_schema }

        # write out CSV
        css_table = "course_summary_stats"
        ofn = "%s__%s.csv" % (dataset, css_table)
        ofn2 = "%s__%s.json" % (dataset, css_table)
        print "Writing data to %s and %s" % (ofn, ofn2)

        ofp = open(ofn, 'w')
        ofp2 = open(ofn2, 'w')
        dw = csv.DictWriter(ofp, fieldnames=css_keys)
        dw.writeheader()
        for cid, entry in c_sum_stats.items():
            for ek in entry:
                if ek not in schema_dict:
                    entry.pop(ek)
                # entry[ek] = str(entry[ek])	# coerce to be string
            ofp2.write(json.dumps(entry) + "\n")
            for key in css_keys:
                if key not in entry:
                    entry[key] = None
            dw.writerow(entry)
        ofp.close()
        ofp2.close()

        # upload to bigquery
        # the_schema = [ { 'type': 'STRING', 'name': x } for x in css_keys ]
        if 1:
            gsfnp = gspath / dataset / (css_table + ".json")
            gsutil.upload_file_to_gs(ofn2, gsfnp)
            # bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False,
            #                           format='csv', skiprows=1)
            bqutil.load_data_to_table(dataset, css_table, gsfnp, the_schema, wait=True, verbose=False)

        return

    
    print "-"*60 + " %s" % course_id

    # get nweeks from listings
    lfn = path(listings_file)
    if not lfn.exists():
        print "[analyze_content] course listings file %s doesn't exist!" % lfn
        return

    data = None
    if listings_file.endswith('.json'):
        data_feed = map(json.loads, open(lfn))
    else:
        data_feed = csv.DictReader(open(lfn))
    for k in data_feed:
        if not 'course_id' in k:
            print "Strange course listings row, no course_id in %s" % k
            raise Exception("Missing course_id")
        if k['course_id']==course_id:
            data = k
            break

    if not data:
        print "[analyze_content] no entry for %s found in course listings file %s!" % (course_id, lfn)
        return

    def date_parse(field):
        (m, d, y) = map(int, data[field].split('/'))
        return datetime.datetime(y, m, d)

    launch = date_parse('Course Launch')
    wrap = date_parse('Course Wrap')
    ndays = (wrap - launch).days
    nweeks = ndays / 7.0

    print "Course length = %6.2f weeks (%d days)" % (nweeks, ndays)

    if pin_date:
        datedir = pin_date
    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest and not pin_date)
    cfn = gsutil.path_from_course_id(course_id)

    xbfn = course_dir / ("xbundle_%s.xml" % cfn)
    
    if not xbfn.exists():
        print "[analyze_content] cannot find xbundle file %s for %s!" % (xbfn, course_id)

        if use_dataset_latest:
            # try looking in earlier directories for xbundle file
            import glob
            spath = course_dir / ("../*/xbundle_%s.xml" % cfn)
            files = list(glob.glob(spath))
            if files:
                xbfn = path(files[-1])
            if not xbfn.exists():
                print "   --> also cannot find any %s ; aborting!" % spath
            else:
                print "   --> Found and using instead: %s " % xbfn
        if not xbfn.exists():
            raise Exception("[analyze_content] missing xbundle file %s" % xbfn)

    # if there is an xbundle*.fixed file, use that instead of the normal one
    if os.path.exists(str(xbfn) + ".fixed"):
        xbfn = path(str(xbfn) + ".fixed")

    print "[analyze_content] For %s using %s" % (course_id, xbfn)
    
    # get module usage data
    mudata = get_stats_module_usage(course_id, basedir, datedir, use_dataset_latest)

    xml = etree.parse(open(xbfn)).getroot()
    
    counts = defaultdict(int)
    nexcluded = defaultdict(int)

    IGNORE = ['html', 'p', 'div', 'iframe', 'ol', 'li', 'ul', 'blockquote', 'h1', 'em', 'b', 'h2', 'h3', 'body', 'span', 'strong',
              'a', 'sub', 'strike', 'table', 'td', 'tr', 's', 'tbody', 'sup', 'sub', 'strike', 'i', 's', 'pre', 'policy', 'metadata',
              'grading_policy', 'br', 'center',  'wiki', 'course', 'font', 'tt', 'it', 'dl', 'startouttext', 'endouttext', 'h4', 
              'head', 'source', 'dt', 'hr', 'u', 'style', 'dd', 'script', 'th', 'p', 'P', 'TABLE', 'TD', 'small', 'text', 'title']

    problem_stats = defaultdict(int)

    def does_problem_have_random_script(problem):
        '''
        return 1 if problem has a script with "random." in it
        else return 0
        '''
        for elem in problem.findall('.//script'):
            if elem.text and ('random.' in elem.text):
                return 1
        return 0

    # walk through xbundle 
    def walk_tree(elem, policy=None):
        '''
        Walk XML tree recursively.
        elem = current element
        policy = dict of attributes for children to inherit, with fields like due, graded, showanswer
        '''
        policy = policy or {}
        if  type(elem.tag)==str and (elem.tag.lower() not in IGNORE):
            counts[elem.tag.lower()] += 1
        if elem.tag in ["sequential", "problem", "problemset", "course", "chapter"]:	# very old courses may use inheritance from course & chapter
            keys = ["due", "graded", "format", "showanswer", "start"]
            for k in keys:		# copy inheritable attributes, if they are specified
                val = elem.get(k)
                if val:
                    policy[k] = val
        if elem.tag=="problem":	# accumulate statistics about problems: how many have show_answer = [past_due, closed] ?  have random. in script?
            problem_stats['n_capa_problems'] += 1
            if policy.get('showanswer'):
                problem_stats["n_showanswer_%s" % policy.get('showanswer')] += 1
            else:
                problem_stats['n_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
                # see https://github.com/edx/edx-platform/blob/master/common/lib/xmodule/xmodule/capa_base.py#L118
                # finished = Show the answer after the student has answered the problem correctly, the student has no attempts left, or the problem due date has passed.
            problem_stats['n_random_script'] += does_problem_have_random_script(elem)

            if policy.get('graded')=='true' or policy.get('graded')=='True':
                problem_stats['n_capa_problems_graded'] += 1
                problem_stats['n_graded_random_script'] += does_problem_have_random_script(elem)
                if policy.get('showanswer'):
                    problem_stats["n_graded_showanswer_%s" % policy.get('showanswer')] += 1
                else:
                    problem_stats['n_graded_shownanswer_finished'] += 1	# DEFAULT showanswer = finished  (make sure this remains true)
            
        for k in elem:
            midfrag = (k.tag, k.get('url_name_orig', None))
            if (midfrag in mudata) and int(mudata[midfrag]['ncount']) < 20:
                nexcluded[k.tag] += 1
                if verbose:
                    try:
                        print "    -> excluding %s (%s), ncount=%s" % (k.get('display_name', '<no_display_name>').encode('utf8'), 
                                                                       midfrag, 
                                                                       mudata.get(midfrag, {}).get('ncount'))
                    except Exception as err:
                        print "    -> excluding ", k
                continue
            walk_tree(k, policy.copy())

    walk_tree(xml)
    print "--> Count of individual element tags throughout XML: ", counts
    
    print "--> problem_stats:", json.dumps(problem_stats, indent=4)

    # combine some into "qual_axis" and others into "quant_axis"
    qual_axis = ['openassessment', 'optionresponse', 'multiplechoiceresponse', 
                 # 'discussion', 
                 'choiceresponse', 'word_cloud', 
                 'combinedopenended', 'choiceresponse', 'stringresponse', 'textannotation', 'openended', 'lti']
    quant_axis = ['formularesponse', 'numericalresponse', 'customresponse', 'symbolicresponse', 'coderesponse',
                  'imageresponse']

    nqual = 0
    nquant = 0
    for tag, count in counts.items():
        if tag in qual_axis:
            nqual += count
        if tag in quant_axis:
            nquant += count
    
    print "nqual=%d, nquant=%d" % (nqual, nquant)

    nqual_per_week = nqual / nweeks
    nquant_per_week = nquant / nweeks
    total_per_week = nqual_per_week + nquant_per_week

    print "per week: nqual=%6.2f, nquant=%6.2f total=%6.2f" % (nqual_per_week, nquant_per_week, total_per_week)

    # save this overall data in CCDATA
    lock_file(CCDATA)
    ccdfn = path(CCDATA)
    ccd = {}
    if ccdfn.exists():
        for k in csv.DictReader(open(ccdfn)):
            ccd[k['course_id']] = k
    
    ccd[course_id] = {'course_id': course_id,
                      'nweeks': nweeks,
                      'nqual_per_week': nqual_per_week,
                      'nquant_per_week': nquant_per_week,
                      'total_assessments_per_week' : total_per_week,
                      }

    # fields = ccd[ccd.keys()[0]].keys()
    fields = ['course_id', 'nquant_per_week', 'total_assessments_per_week', 'nqual_per_week', 'nweeks']
    cfp = open(ccdfn, 'w')
    dw = csv.DictWriter(cfp, fieldnames=fields)
    dw.writeheader()
    for cid, entry in ccd.items():
        dw.writerow(entry)
    cfp.close()
    lock_file(CCDATA, release=True)

    # store data in course_metainfo table, which has one (course_id, key, value) on each line
    # keys include nweeks, nqual, nquant, count_* for module types *

    cmfields = OrderedDict()
    cmfields['course_id'] = course_id
    cmfields['course_length_days'] = str(ndays)
    cmfields.update({ make_key('listings_%s' % key) : value for key, value in data.items() })	# from course listings
    cmfields.update(ccd[course_id].copy())

    # cmfields.update({ ('count_%s' % key) : str(value) for key, value in counts.items() })	# from content counts

    cmfields['filename_xbundle'] = xbfn
    cmfields['filename_listings'] = lfn

    for key in sorted(counts):	# store counts in sorted order, so that the later generated CSV file can have a predictable structure
        value = counts[key]
        cmfields['count_%s' % key] =  str(value) 	# from content counts

    for key in sorted(problem_stats):	# store problem stats
        value = problem_stats[key]
        cmfields['problem_stat_%s' % key] =  str(value)

    cmfields.update({ ('nexcluded_sub_20_%s' % key) : str(value) for key, value in nexcluded.items() })	# from content counts

    course_dir = find_course_sql_dir(course_id, basedir, datedir, use_dataset_latest)
    csvfn = course_dir / CMINFO

    # manual overriding of the automatically computed fields can be done by storing course_id,key,value data
    # in the CMINFO_OVERRIDES file

    csvfn_overrides = course_dir / CMINFO_OVERRIDES
    if csvfn_overrides.exists():
        print "--> Loading manual override information from %s" % csvfn_overrides
        for ovent in csv.DictReader(open(csvfn_overrides)):
            if not ovent['course_id']==course_id:
                print "===> ERROR! override file has entry with wrong course_id: %s" % ovent
                continue
            print "    overriding key=%s with value=%s" % (ovent['key'], ovent['value'])
            cmfields[ovent['key']] = ovent['value']

    print "--> Course metainfo writing to %s" % csvfn

    fp = open(csvfn, 'w')

    cdw = csv.DictWriter(fp, fieldnames=['course_id', 'key', 'value'])
    cdw.writeheader()

    for k, v in cmfields.items():
        cdw.writerow({'course_id': course_id, 'key': k, 'value': v})
        
    fp.close()

    # build and output course_listings_and_metainfo 

    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    mypath = os.path.dirname(os.path.realpath(__file__))
    clm_table = "course_listing_and_metainfo"
    clm_schema_file = '%s/schemas/schema_%s.json' % (mypath, clm_table)
    clm_schema = json.loads(open(clm_schema_file).read())

    clm = {}
    for finfo in clm_schema:
        field = finfo['name']
        clm[field] = cmfields.get(field)
    clm_fnb = clm_table + ".json"
    clm_fn = course_dir / clm_fnb
    open(clm_fn, 'w').write(json.dumps(clm))

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / clm_fnb
    print "--> Course listing + metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, clm_table)
    sys.stdout.flush()
    gsutil.upload_file_to_gs(clm_fn, gsfnp)
    bqutil.load_data_to_table(dataset, clm_table, gsfnp, clm_schema, wait=True, verbose=False)

    # output course_metainfo

    table = 'course_metainfo'
    dataset = bqutil.course_id2dataset(course_id, use_dataset_latest=use_dataset_latest)

    gsfnp = gsutil.gs_path_from_course_id(course_id, use_dataset_latest=use_dataset_latest) / CMINFO
    print "--> Course metainfo uploading to %s then to %s.%s" % (gsfnp, dataset, table)
    sys.stdout.flush()

    gsutil.upload_file_to_gs(csvfn, gsfnp)

    mypath = os.path.dirname(os.path.realpath(__file__))
    SCHEMA_FILE = '%s/schemas/schema_course_metainfo.json' % mypath
    the_schema = json.loads(open(SCHEMA_FILE).read())[table]

    bqutil.load_data_to_table(dataset, table, gsfnp, the_schema, wait=True, verbose=False, format='csv', skiprows=1)

Example 46

View license
def main():
    # #############################
    print("Creating data...", end=' ')
    sys.stdout.flush()

    data_list = []  # 500,000 DataPoint items
    random.seed(0)
    for d_id in range(500000):
        x = random.randint(0, 1000)
        y = random.randint(0, 1000)
        temp = random.randint(-10, 50)
        quality = random.random()
        data_list.append(DataPoint(d_id, x, y, temp, quality))

    print("done.")
    sys.stdout.flush()

    # Reordering data for random access
    print("Reordering data for random access ...", end=' ')
    sys.stdout.flush()

    data_list.sort(key=lambda d: d.quality)

    print("done.")

    # Create a set of random IDs to locate without duplication
    interesting_ids = {random.randint(0, len(data_list)) for _ in range(0, 100)}
    print("Creating {} interesting IDs to seek.".format(len(interesting_ids)))

    # Locating data in list
    print("Locating data in list...", end=' ')
    sys.stdout.flush()

    t0 = datetime.datetime.now()
    interesting_points = []
    for i in interesting_ids:
        pt = find_point_by_id_in_list(data_list, i)
        interesting_points.append(pt)

    t1 = datetime.datetime.now()
    dt_list = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("dt: {} sec".format(dt_list))
    print(interesting_points)

    # #############################

    # let's try this with a dictionary...
    # 1. Create dictionary via comprehension, key = id

    t0 = datetime.datetime.now()
    data_dict = {d.id: d for d in data_list}

    # 2. locate the data in dictionary
    interesting_points.clear()
    for d_id in interesting_ids:
        d = data_dict[d_id]
        interesting_points.append(d)

    t1 = datetime.datetime.now()
    dt_dict = (t1 - t0).total_seconds()

    print("done.")
    sys.stdout.flush()

    print("dt: {} sec".format(dt_dict))
    print(interesting_points)
    print()
    print("Speedup from dict: {:,.0f}x".format(round(dt_list / dt_dict)))

Example 47

Project: text2image
Source File: alignDraw.py
View license
    def train(self, lr, epochs, save=False, validateAfter=0):
        self._build_train_function()
        sys.stdout.flush()

        if save == True:
            curr_time = datetime.datetime.now()
            weights_f_name = ("./attention-vae-%s-%s-%s-%s-%s-%s.h5" % (curr_time.year, curr_time.month, curr_time.day, curr_time.hour, curr_time.minute, curr_time.second))
            print weights_f_name

        all_outputs = np.array([0.0,0.0,0.0])
        iter_outputs = np.array([0.0,0.0,0.0])
        curr_iter = 0
        print_after = 100
        seen_examples = 0
        total_seen_examples = 0
        prev_outputs = np.array([float("inf"),float("inf"),float("inf")])
        prev_val_results = np.array([float("inf"),float("inf"),float("inf")])

        for epoch in xrange(0, epochs):
            a = datetime.datetime.now()
            
            self.train_iter.reset()
            while True:
                index_cap, index_im, cap_len = self.train_iter.next()
                if type(index_cap) == int:
                    break
                [kl, logpxz, log_likelihood, c_ts, read_attent_params, write_attent_params] = self._train_function(index_im, index_cap, cap_len, lr, self.runSteps)
                kl_total = kl * index_im.shape[0]
                logpxz_total = logpxz * index_im.shape[0]
                log_likelihood_total = log_likelihood * index_im.shape[0]
                all_outputs[0] = all_outputs[0] + kl_total
                all_outputs[1] = all_outputs[1] + logpxz_total
                all_outputs[2] = all_outputs[2] + log_likelihood_total
                iter_outputs[0] = iter_outputs[0] + kl_total
                iter_outputs[1] = iter_outputs[1] + logpxz_total
                iter_outputs[2] = iter_outputs[2] + log_likelihood_total
                seen_examples = seen_examples + index_im.shape[0]
                total_seen_examples = total_seen_examples + index_im.shape[0]

                if curr_iter % print_after == 0 and curr_iter != 0:
                    print 'Iteration %d ; Processed %d entries' % (curr_iter, total_seen_examples)
                    iter_outputs = iter_outputs / seen_examples
                    print float(iter_outputs[0]), float(iter_outputs[1]), float(iter_outputs[2])
                    print '\n'
                    iter_outputs = np.array([0.0,0.0,0.0])
                    seen_examples = 0
                    sys.stdout.flush()

                if curr_iter % (print_after*10) == 0 and curr_iter != 0:
                    self.save_weights(weights_f_name, c_ts, read_attent_params, write_attent_params)
                    print 'Done Saving Weights'
                    print '\n'
                    sys.stdout.flush()
                
                curr_iter = curr_iter + 1
            b = datetime.datetime.now()
            print("Epoch %d took %s" % (epoch, (b-a)))

            if save == True:
                self.save_weights(weights_f_name, c_ts, read_attent_params, write_attent_params)
                print 'Done Saving Weights'

            all_outputs = all_outputs / (self.input_shape[0] * 5) # 5 captions per image
            print 'Train Results'
            print float(all_outputs[0]), float(all_outputs[1]), float(all_outputs[2])

            if validateAfter != 0:
                if epoch % validateAfter == 0:
                    print 'Validation Results'
                    val_results = self.validate()
                    print float(val_results[0]), float(val_results[1]), float(val_results[2])
                    print '\n'

            if float(val_results[-1]) > float(prev_val_results[-1]):
                print("Learning Rate Decreased")
                lr = lr * 0.1
            elif self.reduceLRAfter != 0:
                if epoch == self.reduceLRAfter:
                    print ("Learning Rate Manually Decreased")
                    lr = lr * 0.1
            else:
                prev_val_results = np.copy(val_results)

            print '\n'
            all_outputs = np.array([0.0,0.0,0.0])
            sys.stdout.flush()

Example 48

Project: theano-hf
Source File: hf.py
View license
  def train(self, gradient_dataset, cg_dataset, initial_lambda=0.1, mu=0.03, global_backtracking=False, preconditioner=False, max_cg_iterations=250, num_updates=100, validation=None, validation_frequency=1, patience=numpy.inf, save_progress=None):
    '''Performs HF training.

  gradient_dataset : SequenceDataset-like object
      Defines batches used to compute the gradient.
      The `iterate(update=True)` method should yield shuffled training examples
      (tuples of variables matching your graph inputs).
      The same examples MUST be returned between multiple calls to iterator(),
      unless update is True, in which case the next batch should be different.
  cg_dataset : SequenceDataset-like object
      Defines batches used to compute CG iterations.
  initial_lambda : float
      Initial value of the Tikhonov damping coefficient.
  mu : float
      Coefficient for structural damping.
  global_backtracking : Boolean
      If True, backtracks as much as necessary to find the global minimum among
      all CG iterates. Else, Martens' heuristic is used.
  preconditioner : Boolean
      Whether to use Martens' preconditioner.
  max_cg_iterations : int
      CG stops after this many iterations regardless of the stopping criterion.
  num_updates : int
      Training stops after this many parameter updates regardless of `patience`.
  validation: SequenceDataset object, (lambda : tuple) callback, or None
      If a SequenceDataset object is provided, the training monitoring costs
      will be evaluated on that validation dataset.
      If a callback is provided, it should return a list of validation costs
      for monitoring, the first of which is also used for early stopping.
      If None, no early stopping nor validation monitoring is performed.
  validation_frequency: int
      Validation is performed every `validation_frequency` updates.
  patience: int
      Training stops after `patience` updates without improvement in validation
      cost.
  save_progress: string or None
      A checkpoint is automatically saved at this location after each update.
      Call the `train` function again with the same parameters to resume
      training.'''

    self.lambda_ = initial_lambda
    self.mu = mu
    self.global_backtracking = global_backtracking
    self.cg_dataset = cg_dataset
    self.preconditioner = preconditioner
    self.max_cg_iterations = max_cg_iterations
    best = [0, numpy.inf, None]  # iteration, cost, params
    first_iteration = 1

    if isinstance(save_progress, str) and os.path.isfile(save_progress):
      save = cPickle.load(file(save_progress))
      self.cg_last_x, best, self.lambda_, first_iteration, init_p = save
      first_iteration += 1
      for i, j in zip(self.p, init_p): i.set_value(j)
      print '* recovered saved model'
    
    try:
      for u in xrange(first_iteration, 1 + num_updates):
        print 'update %i/%i,' % (u, num_updates),
        sys.stdout.flush()

        gradient = numpy.zeros(sum(self.sizes), dtype=theano.config.floatX)
        costs = []
        for inputs in gradient_dataset.iterate(update=True):
          result = self.f_gc(*inputs)
          gradient += self.list_to_flat(result[:len(self.p)]) / gradient_dataset.number_batches
          costs.append(result[len(self.p):])

        print 'cost=', numpy.mean(costs, axis=0),
        print 'lambda=%.5f,' % self.lambda_,
        sys.stdout.flush()

        after_cost, flat_delta, backtracking, num_cg_iterations = self.cg(-gradient)
        delta_cost = numpy.dot(flat_delta, gradient + 0.5*self.batch_Gv(flat_delta, lambda_=0))  # disable damping
        before_cost = self.quick_cost()
        for i, delta in zip(self.p, self.flat_to_list(flat_delta)):
          i.set_value(i.get_value() + delta)
        cg_dataset.update()

        rho = (after_cost - before_cost) / delta_cost  # Levenberg-Marquardt
        #print 'rho=%f' %rho,
        if rho < 0.25:
          self.lambda_ *= 1.5
        elif rho > 0.75:
          self.lambda_ /= 1.5
        
        if validation is not None and u % validation_frequency == 0:
          if hasattr(validation, 'iterate'):
            costs = numpy.mean([self.f_cost(*i) for i in validation.iterate()], axis=0)
          elif callable(validation):
            costs = validation()
          print 'validation=', costs,
          if costs[0] < best[1]:
            best = u, costs[0], [i.get_value().copy() for i in self.p]
            print '*NEW BEST',

        if isinstance(save_progress, str):
          # do not save dataset states
          save = self.cg_last_x, best, self.lambda_, u, [i.get_value().copy() for i in self.p]
          cPickle.dump(save, file(save_progress, 'wb'), cPickle.HIGHEST_PROTOCOL)
        
        if u - best[0] > patience:
          print 'PATIENCE ELAPSED, BAILING OUT'
          break
        
        print
        sys.stdout.flush()
    except KeyboardInterrupt:
      print 'Interrupted by user.'
    
    if best[2] is None:
      best[2] = [i.get_value().copy() for i in self.p]
    return best[2]

Example 49

Project: pokedex
Source File: load.py
View license
def _get_verbose_prints(verbose):
    """If `verbose` is true, returns three functions: one for printing a
    starting message, one for printing an interim status update, and one for
    printing a success or failure message when finished.

    If `verbose` is false, returns no-op functions.
    """

    if not verbose:
        # Return dummies
        def dummy(*args, **kwargs):
            pass

        return dummy, dummy, dummy

    ### Okay, verbose == True; print stuff

    def print_start(thing):
        # Truncate to 66 characters, leaving 10 characters for a success
        # or failure message
        truncated_thing = thing[:66]

        # Also, space-pad to keep the cursor in a known column
        num_spaces = 66 - len(truncated_thing)

        print("%s...%s" % (truncated_thing, ' ' * num_spaces), end='')
        sys.stdout.flush()

    if sys.stdout.isatty():
        # stdout is a terminal; stupid backspace tricks are OK.
        # Don't use print, because it always adds magical spaces, which
        # makes backspace accounting harder

        backspaces = [0]
        def print_status(msg):
            # Overwrite any status text with spaces before printing
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(' ' * backspaces[0])
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(msg)
            sys.stdout.flush()
            backspaces[0] = len(msg)

        def print_done(msg='ok'):
            # Overwrite any status text with spaces before printing
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(' ' * backspaces[0])
            sys.stdout.write('\b' * backspaces[0])
            sys.stdout.write(msg + "\n")
            sys.stdout.flush()
            backspaces[0] = 0

    else:
        # stdout is a file (or something); don't bother with status at all
        def print_status(msg):
            pass

        def print_done(msg='ok'):
            print(msg)

    return print_start, print_status, print_done

Example 50

Project: nrvr-commander
Source File: download.py
View license
    @classmethod
    def fromUrl(cls, url,
                force=False,
                dontDownload=False,
                ticker=True):
        """Download file or use previously downloaded file.
        
        As implemented uses urllib2.
        
        dontDownload
            whether you don't want to start a download, for some reason.
        
        Return file path."""
        urlFilename = cls.basename(url)
        downloadDir = ScriptUser.loggedIn.userHomeRelative("Downloads")
        downloadPath = os.path.join(downloadDir, urlFilename)
        semaphorePath = downloadPath + cls.semaphoreExtenstion
        #
        if os.path.exists(downloadPath) and not force:
            if not os.path.exists(semaphorePath):
                # file exists and not download in progress,
                # assume it is good
                return downloadPath
            else:
                # file exists and download in progress,
                # presumably from another script running in another process or thread,
                # wait for it to complete
                printed = False
                ticked = False
                # check the essential condition, initially and then repeatedly
                while os.path.exists(semaphorePath):
                    if not printed:
                        # first time only printing
                        print "waiting for " + semaphorePath + " to go away on completion"
                        sys.stdout.flush()
                        printed = True
                    if ticker:
                        if not ticked:
                            # first time only printing
                            sys.stdout.write("[")
                        sys.stdout.write(".")
                        sys.stdout.flush()
                        ticked = True
                    time.sleep(5)
                if ticked:
                    # final printing
                    sys.stdout.write("]\n")
                    sys.stdout.flush()
        elif not dontDownload: # it is normal to download
            if not os.path.exists(downloadDir): # possibly on an international version OS
                try:
                    os.makedirs(downloadDir)
                except OSError:
                    if os.path.exists(downloadDir): # concurrently made
                        pass
                    else: # failure
                        raise
            #
            # try downloading
            pid = os.getpid()
            try:
                with open(semaphorePath, "w") as semaphoreFile:
                    # create semaphore file
                    semaphoreFile.write("pid=" + str(pid))
                #
                print "looking for " + url
                # open connection to server
                urlFileLikeObject = urllib2.urlopen(url)
                with open(downloadPath, "wb") as downloadFile:
                    print "starting to download " + url
                    if ticker:
                        sys.stdout.write("[")
                    # was shutil.copyfileobj(urlFileLikeObject, downloadFile)
                    try:
                        while True:
                            data = urlFileLikeObject.read(1000000)
                            if not data:
                                break
                            downloadFile.write(data)
                            if ticker:
                                sys.stdout.write(".")
                                sys.stdout.flush()
                    finally:
                        if ticker:
                            sys.stdout.write("]\n")
                            sys.stdout.flush()
            except: # apparently a problem
                if os.path.exists(downloadPath):
                    # don't let a bad file sit around
                    try:
                        os.remove(downloadPath)
                    except:
                        pass
                print "problem downloading " + url
                raise
            else:
                print "done downloading " + url
            finally:
                try:
                    # close connection to server
                    os.close(urlFileLikeObject)
                except:
                    pass
                try:
                    # delete semaphore file
                    os.remove(semaphorePath)
                except:
                    pass
        if os.path.exists(downloadPath):
            # file exists now, assume it is good
            return downloadPath
        else:
            # apparently download has failed
            raise IOError("file not found " + downloadPath)