Here are the examples of the python api sys.stdout.write taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
156 Examples
2
Example 1
Project: gratipay.com Source File: fake_data.py
def populate_db(db, num_participants=100, ntips=200, num_teams=5, num_transfers=5000):
"""Populate DB with fake data.
"""
print("Making Participants")
make_flag_tester = num_participants > 1
participants = []
for i in xrange(num_participants - 1 if make_flag_tester else num_participants):
participants.append(fake_participant(db))
if make_flag_tester:
# make a participant for testing weird flags
flag_tester = fake_participant(db, random_identities=False)
participants.append(flag_tester)
nepal = db.one("SELECT id FROM countries WHERE code='NP'")
flag_tester.store_identity_info(nepal, 'nothing-enforced', {})
flag_tester.set_identity_verification(nepal, True)
vatican = db.one("SELECT id FROM countries WHERE code='VA'")
flag_tester.store_identity_info(vatican, 'nothing-enforced', {})
flag_tester.set_identity_verification(vatican, True)
print("Making Teams")
teams = []
teamowners = random.sample(participants, num_teams)
for teamowner in teamowners:
teams.append(fake_team(db, teamowner))
# Creating a fake Gratipay Team
teamowner = random.choice(participants)
teams.append(fake_team(db, teamowner, "Gratipay"))
print("Making Payment Instructions")
npayment_instructions = 0
payment_instructions = []
for participant in participants:
for team in teams:
#eliminate self-payment
if participant.username != team.owner:
npayment_instructions += 1
if npayment_instructions > ntips:
break
payment_instructions.append(fake_payment_instruction(db, participant, team))
if npayment_instructions > ntips:
break
print("Making Elsewheres")
for p in participants:
#All participants get between 1 and 3 elsewheres
num_elsewheres = random.randint(1, 3)
for platform_name in random.sample(PLATFORMS, num_elsewheres):
fake_elsewhere(db, p, platform_name)
print("Making Tips")
tips = []
for i in xrange(ntips):
tipper, tippee = random.sample(participants, 2)
tips.append(fake_tip(db, tipper, tippee))
# Payments
payments = []
paymentcount = 0
team_amounts = defaultdict(int)
for payment_instruction in payment_instructions:
participant = Participant.from_id(payment_instruction['participant_id'])
team = Team.from_id(payment_instruction['team_id'])
amount = payment_instruction['amount']
assert participant.username != team.owner
paymentcount += 1
sys.stdout.write("\rMaking Payments (%i)" % (paymentcount))
sys.stdout.flush()
payments.append(fake_payment(db, participant.username, team.slug, amount, 'to-team'))
team_amounts[team.slug] += amount
for team in teams:
paymentcount += 1
sys.stdout.write("\rMaking Payments (%i)" % (paymentcount))
sys.stdout.flush()
payments.append(fake_payment(db, team.owner, team.slug, team_amounts[team.slug], 'to-participant'))
print("")
# Transfers
transfers = []
for i in xrange(num_transfers):
sys.stdout.write("\rMaking Transfers (%i/%i)" % (i+1, num_transfers))
sys.stdout.flush()
tipper, tippee = random.sample(participants, 2)
transfers.append(fake_transfer(db, tipper, tippee))
print("")
# Paydays
# First determine the boundaries - min and max date
min_date = min(min(x['ctime'] for x in payment_instructions + tips),
min(x['timestamp'] for x in payments + transfers))
max_date = max(max(x['ctime'] for x in payment_instructions + tips),
max(x['timestamp'] for x in payments + transfers))
# iterate through min_date, max_date one week at a time
payday_counter = 1
date = min_date
paydays_total = (max_date - min_date).days/7 + 1
while date < max_date:
sys.stdout.write("\rMaking Paydays (%i/%i)" % (payday_counter, paydays_total))
sys.stdout.flush()
payday_counter += 1
end_date = date + datetime.timedelta(days=7)
week_tips = filter(lambda x: date <= x['ctime'] < end_date, tips)
week_transfers = filter(lambda x: date <= x['timestamp'] < end_date, transfers)
week_payment_instructions = filter(lambda x: date <= x['ctime'] < end_date, payment_instructions)
week_payments = filter(lambda x: date <= x['timestamp'] < end_date, payments)
week_payments_to_teams = filter(lambda x: x['direction'] == 'to-team', week_payments)
week_payments_to_owners = filter(lambda x: x['direction'] == 'to-participant', week_payments)
for p in participants:
transfers_in = filter(lambda x: x['tippee'] == p.username, week_transfers)
payments_in = filter(lambda x: x['participant'] == p.username, week_payments_to_owners)
transfers_out = filter(lambda x: x['tipper'] == p.username, week_transfers)
payments_out = filter(lambda x: x['participant'] == p.username, week_payments_to_teams)
amount_in = sum([t['amount'] for t in transfers_in + payments_in])
amount_out = sum([t['amount'] for t in transfers_out + payments_out])
amount = amount_out - amount_in
fee = amount * D('0.02')
fee = abs(fee.quantize(D('.01')))
if amount != 0:
fee = amount * D('0.02')
fee = abs(fee.quantize(D('.01')))
fake_exchange(
db=db,
participant=p,
amount=amount,
fee=fee,
timestamp=(end_date - datetime.timedelta(seconds=1))
)
actives=set()
tippers=set()
#week_tips, week_transfers
for xfers in week_tips, week_transfers:
actives.update(x['tipper'] for x in xfers)
actives.update(x['tippee'] for x in xfers)
tippers.update(x['tipper'] for x in xfers)
# week_payment_instructions
actives.update(x['participant_id'] for x in week_payment_instructions)
tippers.update(x['participant_id'] for x in week_payment_instructions)
# week_payments
actives.update(x['participant'] for x in week_payments)
tippers.update(x['participant'] for x in week_payments_to_owners)
payday = {
'ts_start': date,
'ts_end': end_date,
'nusers': len(actives),
'volume': sum(x['amount'] for x in week_transfers)
}
insert_fake_data(db, "paydays", **payday)
date = end_date
print("")
2
Example 2
Project: pyOCD Source File: flash_test.py
def flash_test(board_id):
with MbedBoard.chooseBoard(board_id=board_id, frequency=1000000) as board:
target_type = board.getTargetType()
test_clock = 10000000
if target_type == "nrf51":
# Override clock since 10MHz is too fast
test_clock = 1000000
if target_type == "ncs36510":
# Override clock since 10MHz is too fast
test_clock = 1000000
memory_map = board.target.getMemoryMap()
ram_regions = [region for region in memory_map if region.type == 'ram']
ram_region = ram_regions[0]
ram_start = ram_region.start
ram_size = ram_region.length
# Grab boot flash and any regions coming immediately after
rom_region = memory_map.getBootMemory()
rom_start = rom_region.start
rom_size = rom_region.length
for region in memory_map:
if region.isFlash and (region.start == rom_start + rom_size):
rom_size += region.length
target = board.target
link = board.link
flash = board.flash
link.set_clock(test_clock)
link.set_deferred_transfer(True)
test_pass_count = 0
test_count = 0
result = FlashTestResult()
def print_progress(progress):
assert progress >= 0.0
assert progress <= 1.0
assert (progress == 0 and print_progress.prev_progress == 1.0) or (progress >= print_progress.prev_progress)
# Reset state on 0.0
if progress == 0.0:
print_progress.prev_progress = 0
print_progress.backwards_progress = False
print_progress.done = False
# Check for backwards progress
if progress < print_progress.prev_progress:
print_progress.backwards_progress = True
print_progress.prev_progress = progress
# print progress bar
if not print_progress.done:
sys.stdout.write('\r')
i = int(progress * 20.0)
sys.stdout.write("[%-20s] %3d%%" % ('=' * i, round(progress * 100)))
sys.stdout.flush()
# Finish on 1.0
if progress >= 1.0:
if not print_progress.done:
print_progress.done = True
sys.stdout.write("\n")
if print_progress.backwards_progress:
print("Progress went backwards during flash")
print_progress.prev_progress = 0
binary_file = os.path.join(parentdir, 'binaries', board.getTestBinary())
with open(binary_file, "rb") as f:
data = f.read()
data = struct.unpack("%iB" % len(data), data)
unused = rom_size - len(data)
addr = rom_start
size = len(data)
# Turn on extra checks for the next 4 tests
flash.setFlashAlgoDebug(True)
print("\r\n\r\n------ Test Basic Page Erase ------")
info = flash.flashBlock(addr, data, False, False, progress_cb=print_progress)
data_flashed = target.readBlockMemoryUnaligned8(addr, size)
if same(data_flashed, data) and info.program_type is FlashBuilder.FLASH_PAGE_ERASE:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Basic Chip Erase ------")
info = flash.flashBlock(addr, data, False, True, progress_cb=print_progress)
data_flashed = target.readBlockMemoryUnaligned8(addr, size)
if same(data_flashed, data) and info.program_type is FlashBuilder.FLASH_CHIP_ERASE:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Smart Page Erase ------")
info = flash.flashBlock(addr, data, True, False, progress_cb=print_progress)
data_flashed = target.readBlockMemoryUnaligned8(addr, size)
if same(data_flashed, data) and info.program_type is FlashBuilder.FLASH_PAGE_ERASE:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Smart Chip Erase ------")
info = flash.flashBlock(addr, data, True, True, progress_cb=print_progress)
data_flashed = target.readBlockMemoryUnaligned8(addr, size)
if same(data_flashed, data) and info.program_type is FlashBuilder.FLASH_CHIP_ERASE:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
flash.setFlashAlgoDebug(False)
print("\r\n\r\n------ Test Basic Page Erase (Entire chip) ------")
new_data = list(data)
new_data.extend(unused * [0x77])
info = flash.flashBlock(addr, new_data, False, False, progress_cb=print_progress)
if info.program_type == FlashBuilder.FLASH_PAGE_ERASE:
print("TEST PASSED")
test_pass_count += 1
result.page_erase_rate = float(len(new_data)) / float(info.program_time)
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Fast Verify ------")
info = flash.flashBlock(addr, new_data, progress_cb=print_progress, fast_verify=True)
if info.program_type == FlashBuilder.FLASH_PAGE_ERASE:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Offset Write ------")
addr = rom_start + rom_size / 2
page_size = flash.getPageInfo(addr).size
new_data = [0x55] * page_size * 2
info = flash.flashBlock(addr, new_data, progress_cb=print_progress)
data_flashed = target.readBlockMemoryUnaligned8(addr, len(new_data))
if same(data_flashed, new_data) and info.program_type is FlashBuilder.FLASH_PAGE_ERASE:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Multiple Block Writes ------")
addr = rom_start + rom_size / 2
page_size = flash.getPageInfo(addr).size
more_data = [0x33] * page_size * 2
addr = (rom_start + rom_size / 2) + 1 #cover multiple pages
fb = flash.getFlashBuilder()
fb.addData(rom_start, data)
fb.addData(addr, more_data)
fb.program(progress_cb=print_progress)
data_flashed = target.readBlockMemoryUnaligned8(rom_start, len(data))
data_flashed_more = target.readBlockMemoryUnaligned8(addr, len(more_data))
if same(data_flashed, data) and same(data_flashed_more, more_data):
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Overlapping Blocks ------")
test_pass = False
addr = (rom_start + rom_size / 2) #cover multiple pages
page_size = flash.getPageInfo(addr).size
new_data = [0x33] * page_size
fb = flash.getFlashBuilder()
fb.addData(addr, new_data)
try:
fb.addData(addr + 1, new_data)
except ValueError as e:
print("Exception: %s" % e)
test_pass = True
if test_pass:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Empty Block Write ------")
# Freebee if nothing asserts
fb = flash.getFlashBuilder()
fb.program()
print("TEST PASSED")
test_pass_count += 1
test_count += 1
print("\r\n\r\n------ Test Missing Progress Callback ------")
# Freebee if nothing asserts
addr = rom_start
flash.flashBlock(rom_start, data, True)
print("TEST PASSED")
test_pass_count += 1
test_count += 1
# Only run test if the reset handler can be programmed (rom start at address 0)
if rom_start == 0:
print("\r\n\r\n------ Test Non-Thumb reset handler ------")
non_thumb_data = list(data)
# Clear bit 0 of 2nd word - reset handler
non_thumb_data[4] = non_thumb_data[4] & ~1
flash.flashBlock(rom_start, non_thumb_data)
flash.flashBlock(rom_start, data)
print("TEST PASSED")
test_pass_count += 1
test_count += 1
# Note - The decision based tests below are order dependent since they
# depend on the previous state of the flash
print("\r\n\r\n------ Test Chip Erase Decision ------")
new_data = list(data)
new_data.extend([0xff] * unused) # Pad with 0xFF
info = flash.flashBlock(addr, new_data, progress_cb=print_progress)
if info.program_type == FlashBuilder.FLASH_CHIP_ERASE:
print("TEST PASSED")
test_pass_count += 1
result.chip_erase_rate_erased = float(len(new_data)) / float(info.program_time)
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Chip Erase Decision 2 ------")
new_data = list(data)
new_data.extend([0x00] * unused) # Pad with 0x00
info = flash.flashBlock(addr, new_data, progress_cb=print_progress)
if info.program_type == FlashBuilder.FLASH_CHIP_ERASE:
print("TEST PASSED")
test_pass_count += 1
result.chip_erase_rate = float(len(new_data)) / float(info.program_time)
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Page Erase Decision ------")
new_data = list(data)
new_data.extend([0x00] * unused) # Pad with 0x00
info = flash.flashBlock(addr, new_data, progress_cb=print_progress)
if info.program_type == FlashBuilder.FLASH_PAGE_ERASE:
print("TEST PASSED")
test_pass_count += 1
result.page_erase_rate_same = float(len(new_data)) / float(info.program_time)
result.analyze = info.analyze_type
result.analyze_time = info.analyze_time
result.analyze_rate = float(len(new_data)) / float(info.analyze_time)
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\n------ Test Page Erase Decision 2 ------")
new_data = list(data)
size_same = unused * 5 / 6
size_differ = unused - size_same
new_data.extend([0x00] * size_same) # Pad 5/6 with 0x00 and 1/6 with 0xFF
new_data.extend([0x55] * size_differ)
info = flash.flashBlock(addr, new_data, progress_cb=print_progress)
if info.program_type == FlashBuilder.FLASH_PAGE_ERASE:
print("TEST PASSED")
test_pass_count += 1
else:
print("TEST FAILED")
test_count += 1
print("\r\n\r\nTest Summary:")
print("Pass count %i of %i tests" % (test_pass_count, test_count))
if test_pass_count == test_count:
print("FLASH TEST SCRIPT PASSED")
else:
print("FLASH TEST SCRIPT FAILED")
target.reset()
result.passed = test_count == test_pass_count
return result
2
Example 3
Project: LasagneNLP Source File: bi_rnn.py
def main():
parser = argparse.ArgumentParser(description='Tuning with bi-directional RNN')
parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
required=True)
parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
help='path for embedding dict')
parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in RNN')
parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov'], help='update algorithm', default='sgd')
parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training',
required=True)
parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
parser.add_argument('--train') # "data/POS-penn/wsj/split1/wsj1.train.original"
parser.add_argument('--dev') # "data/POS-penn/wsj/split1/wsj1.dev.original"
parser.add_argument('--test') # "data/POS-penn/wsj/split1/wsj1.test.original"
args = parser.parse_args()
def construct_input_layer():
if fine_tune:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
output_size=embedd_dim, W=embedd_table, name='embedding')
return layer_embedding
else:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
name='input')
return layer_input
logger = utils.get_logger("BiRNN")
fine_tune = args.fine_tune
oov = args.oov
regular = args.regular
embedding = args.embedding
embedding_path = args.embedding_dict
train_path = args.train
dev_path = args.dev
test_path = args.test
update_algo = args.update
grad_clipping = args.grad_clipping
gamma = args.gamma
output_predict = args.output_prediction
dropout = args.dropout
X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
embedd_table, label_alphabet, _, _, _, _ = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
test_path, oov=oov,
fine_tune=fine_tune,
embedding=embedding,
embedding_path=embedding_path)
num_labels = label_alphabet.size() - 1
logger.info("constructing network...")
# create variables
target_var = T.imatrix(name='targets')
mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
if fine_tune:
input_var = T.imatrix(name='inputs')
num_data, max_length = X_train.shape
alphabet_size, embedd_dim = embedd_table.shape
else:
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
num_data, max_length, embedd_dim = X_train.shape
# construct input and mask layers
layer_incoming = construct_input_layer()
layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')
# construct bi-rnn
num_units = args.num_units
bi_rnn = build_BiRNN(layer_incoming, num_units, mask=layer_mask, grad_clipping=grad_clipping,
dropout=dropout)
# reshape bi-rnn to [batch * max_length, num_units]
bi_rnn = lasagne.layers.reshape(bi_rnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_rnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
# get output of bi-rnn shape=[batch * max_length, #label]
prediction_train = lasagne.layers.get_output(layer_output)
prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
final_prediction = T.argmax(prediction_eval, axis=1)
# flat target_var to vector
target_var_flatten = target_var.flatten()
# flat mask_var to vector
mask_var_flatten = mask_var.flatten()
# compute loss
num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
# for training, we use mean of loss over number of labels
loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
############################################
# l2 regularization?
if regular == 'l2':
l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
loss_train = loss_train + gamma * l2_penalty
# dima regularization?
# if regular == 'dima':
# params_regular = utils.get_all_params_by_name(layer_output, name=['forward.hidden_to_hidden.W',
# 'backward.hidden_to_hidden.W'])
# dima_penalty = lasagne.regularization.apply_penalty(params_regular, dima)
# loss_train = loss_train + gamma * dima_penalty
loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
# compute number of correct labels
corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)
corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)
# Create update expressions for training.
# hyper parameters to tune: learning rate, momentum, regularization.
batch_size = args.batch_size
learning_rate = args.learning_rate
decay_rate = args.decay_rate
momentum = 0.9
params = lasagne.layers.get_all_params(layer_output, trainable=True)
updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)
# Compile a function performing a training step on a mini-batch
train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss], updates=updates)
# Compile a second function evaluating the loss and accuracy of network
eval_fn = theano.function([input_var, target_var, mask_var], [loss_eval, corr_eval, num_loss, final_prediction])
# Finally, launch the training loop.
logger.info(
"Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f)..." \
% (
update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size, grad_clipping))
num_batches = num_data / batch_size
num_epochs = 1000
best_loss = 1e+12
best_acc = 0.0
best_epoch_loss = 0
best_epoch_acc = 0
best_loss_test_err = 0.
best_loss_test_corr = 0.
best_acc_test_err = 0.
best_acc_test_corr = 0.
stop_count = 0
lr = learning_rate
patience = 5
for epoch in range(1, num_epochs + 1):
print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
train_err = 0.0
train_corr = 0.0
train_total = 0
start_time = time.time()
num_back = 0
train_batches = 0
for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, batch_size=batch_size, shuffle=True):
inputs, targets, masks, _ = batch
err, corr, num = train_fn(inputs, targets, masks)
train_err += err * num
train_corr += corr
train_total += num
train_batches += 1
time_ave = (time.time() - start_time) / train_batches
time_left = (num_batches - train_batches) * time_ave
# update log
sys.stdout.write("\b" * num_back)
log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time_left)
sys.stdout.write(log_info)
num_back = len(log_info)
# update training log after each epoch
sys.stdout.write("\b" * num_back)
print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)
# evaluate performance on dev data
dev_err = 0.0
dev_corr = 0.0
dev_total = 0
for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, batch_size=batch_size):
inputs, targets, masks, _ = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks)
dev_err += err * num
dev_corr += corr
dev_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)
print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)
if best_loss < dev_err and best_acc > dev_corr / dev_total:
stop_count += 1
else:
update_loss = False
update_acc = False
stop_count = 0
if best_loss > dev_err:
update_loss = True
best_loss = dev_err
best_epoch_loss = epoch
if best_acc < dev_corr / dev_total:
update_acc = True
best_acc = dev_corr / dev_total
best_epoch_acc = epoch
# evaluate on test data when better performance detected
test_err = 0.0
test_corr = 0.0
test_total = 0
for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, batch_size=batch_size):
inputs, targets, masks, _ = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks)
test_err += err * num
test_corr += corr
test_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)
if update_loss:
best_loss_test_err = test_err
best_loss_test_corr = test_corr
if update_acc:
best_acc_test_err = test_err
best_acc_test_corr = test_corr
# stop if dev acc decrease 3 time straightly.
if stop_count == patience:
break
# re-compile a function with new learning rate for training
lr = learning_rate / (1.0 + epoch * decay_rate)
updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss],
updates=updates)
# print best performance on test data.
logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)
2
Example 4
Project: CommunityCellularManager Source File: setup_test_db.py
def create_data(self, username, password, usernum, kind, prefix,
endaga_version):
# Create a user.
sys.stdout.write('creating user: %s %s %s..\n' % (
username, password, usernum))
user = User(username=username, email="%[email protected]" % username)
user.set_password(password)
user.save()
# Get user profile and add some credit.
sys.stdout.write('setting user profile..\n')
user_profile = UserProfile.objects.get(user=user)
user_profile.save()
# Add some towers.
towers_to_add = random.randint(4, 7)
added_towers = []
print 'adding %s towers..' % towers_to_add
for index in range(towers_to_add):
nickname = None
if random.random() < 0.5:
nickname = 'Test Tower %s' % index
bts = BTS(uuid=str(uuid.uuid4()), nickname=nickname, secret='mhm',
inbound_url='http://localhost:8090',
network=user_profile.network)
added_towers.append(bts)
# Set the last_active time and uptime randomly.
random_seconds = random.randint(0, 24*60*60)
random_date = (timezone.now() -
datetime.timedelta(seconds=random_seconds))
bts.last_active = random_date
bts.uptime = random.randint(24*60*60, 100*24*60*60)
bts.status = random.choice(['no-data','active','inactive'])
bts.save()
# Set the metapackage version. This has to be done after initially
# creating the BTS or the post-create hook will override.
if endaga_version is not None:
endaga_version = bts.sortable_version(endaga_version)
versions = {
'endaga_version': endaga_version,
'freeswitch_version': None,
'gsm_version': None,
'python_endaga_core_version': None,
'python_gsm_version': None,
}
bts.package_versions = json.dumps(versions)
bts.save()
# Add some TimeseriesStats for each tower.
stats_to_add = random.randint(100, 1000)
print 'adding %s TimeseriesStats..' % stats_to_add
for _ in range(stats_to_add):
date = (
timezone.now() -
datetime.timedelta(seconds=random.randint(0, 7*24*60*60)))
key = random.choice(stats_app.views.TIMESERIES_STAT_KEYS)
if key in ('noise_rssi_db', 'noise_ms_rssi_target_db'):
value = random.randint(-75, -20)
elif 'percent' in key:
value = random.randint(0, 100)
elif 'bytes' in key:
value = random.randint(0, 10000)
else:
value = random.randint(0, 10)
stat = TimeseriesStat(key=key, value=value, date=date, bts=bts,
network=user_profile.network)
stat.save()
# Add some SystemEvents for each tower (either small or large number)
number_of_events = [0,1,2,5,18,135,264]
events_to_add = random.choice(number_of_events)
print 'adding %s SystemEvents..' % events_to_add
for _ in range(events_to_add):
# Actual events should be in order. But we should support
# out-of-order events just in case
date = (
timezone.now() -
datetime.timedelta(seconds=random.randint(0, 7*24*60*60)))
event = SystemEvent(date=date, bts=bts,
type=random.choice(['bts up','bts down']))
event.save()
# Make at least one BTS active recently.
bts.last_active = timezone.now()
bts.status = 'active'
bts.save()
# Make one BTS in the no-data state.
bts = BTS(uuid=str(uuid.uuid4()), nickname='No-data tower', secret='z',
inbound_url='http://localhost:5555',
network=user_profile.network,
package_versions=json.dumps(versions))
bts.save()
# Add some subscribers.
sys.stdout.write("adding subscribers and numbers..\n")
added_subscribers = []
for index in range(random.randint(3, 20)):
imsi = "IMSI%d999900000000%s" % (usernum, index)
if random.random() < 0.5:
name = "test name %s" % index
else:
name = ''
balance = random.randint(40000000, 60000000)
state = "active"
bts = BTS.objects.filter(
network=user_profile.network).order_by('?').first()
subscriber = Subscriber(network=user_profile.network, imsi=imsi,
name=name, balance=balance, state=state,
bts=bts, last_camped=bts.last_active)
subscriber.save()
added_subscribers.append(subscriber)
# And attach some numbers.
for _ in range(random.randint(1, 5)):
msisdn = int(prefix + str(random.randint(1000, 9999)))
number = Number(
number=msisdn, state="inuse", network=user_profile.network,
kind=kind, subscriber=subscriber)
number.save()
# Add one last subscriber so we have at least one sub with no activity.
imsi = "IMSI%d8888000000000" % usernum
name = 'test name (no activity)'
subscriber = Subscriber(network=user_profile.network, imsi=imsi,
bts=bts, name=name, balance=1000,
state='active')
subscriber.save()
# Add some UsageEvents attached to random subscribers.
events_to_add = random.randint(100, 4000)
sys.stdout.write("adding %s usage events..\n" % events_to_add)
all_destinations = list(Destination.objects.all())
with transaction.atomic():
for _ in range(events_to_add):
random_sub = random.choice(added_subscribers)
time_delta = datetime.timedelta(
minutes=random.randint(0, 60000))
date = (timezone.now() - time_delta)
kinds = [
('outside_sms', 10000), ('incoming_sms', 2000),
('local_sms', 4000),
('local_recv_sms', 1000), ('free_sms', 0),
('error_sms', 0),
('outside_call', 8000), ('incoming_call', 3000),
('local_call', 2000),
('local_recv_call', 1000),
('free_call', 0), ('error_call', 0), ('gprs', 5000)]
(kind, tariff) = random.choice(kinds)
to_number, billsec, up_bytes, call_duration = 4 * [None]
from_number, down_bytes, timespan, change = 4 * [None]
if 'call' in kind:
billsec = random.randint(0, 120)
change = tariff * billsec
call_duration = billsec + random.randint(0, 10)
to_number = str(random.randint(1234567890, 9876543210))
from_number = str(random.randint(1234567890, 9876543210))
reason = '%s sec call to %s (%s)' % (billsec, to_number,
kind)
elif 'sms' in kind:
change = tariff
to_number = str(random.randint(1234567890, 9876543210))
from_number = str(random.randint(1234567890, 9876543210))
reason = '%s to %s' % (kind, to_number)
elif kind == 'gprs':
up_bytes = random.randint(20000, 400000)
down_bytes = random.randint(20000, 400000)
change = (down_bytes/1024) * tariff
timespan = 60
reason = 'gprs_usage, %sB uploaded, %sB downloaded' % (
up_bytes, down_bytes)
old_amount = random_sub.balance
random_sub.change_balance(change)
usage_event = UsageEvent(
subscriber=random_sub, bts=random.choice(added_towers),
date=date, kind=kind,
reason=reason, oldamt=old_amount,
newamt=random_sub.balance, change=-change, billsec=billsec,
call_duration=call_duration, uploaded_bytes=up_bytes,
downloaded_bytes=down_bytes,
timespan=timespan, to_number=to_number,
from_number=from_number,
destination=random.choice(all_destinations), tariff=tariff)
try:
usage_event.save()
except DataError:
from django.db import connection
print connection.queries[-1]
random_sub.save()
# Create one more UE with a negative "oldamt" to test display
# handling of such events.
usage_event = UsageEvent(
subscriber=random_sub, bts=random.choice(added_towers),
date=date, kind='local_sms',
reason='negative oldamt', oldamt=-200000,
newamt=0, change=200000,
billsec=0, to_number='19195551234',
destination=random.choice(all_destinations))
usage_event.save()
# Add some transaction history.
sys.stdout.write("adding transactions..\n")
for _ in range(random.randint(10, 50)):
time_delta = datetime.timedelta(
minutes=random.randint(0, 60000))
date = (timezone.now() - time_delta)
new_transaction = Transaction(
ledger=user_profile.network.ledger, kind='credit',
reason='Automatic Recharge',
amount=1e3*random.randint(1000, 100000),
created=date,
)
new_transaction.save()
# And some floating numbers for release testing.
sys.stdout.write("adding floating phone numbers..\n")
for num in random.sample(range(10000, 99999), 300):
#need to be e164, that's what we use
msisdn = int('155555%s' % str(num))
state = random.choice(('available', 'pending'))
kind = random.choice(('number.nexmo.monthly',
'number.telecom.permanent'))
number = Number(
number=msisdn, state=state, kind=kind, country_id='US')
number.save()
2
Example 5
Project: cgstudiomap Source File: miniterm.py
def writer(self):
"""\
Loop and copy console->serial until EXITCHARCTER character is
found. When MENUCHARACTER is found, interpret the next key
locally.
"""
menu_active = False
try:
while self.alive:
try:
b = console.getkey()
except KeyboardInterrupt:
b = serial.to_bytes([3])
c = character(b)
if menu_active:
if c == MENUCHARACTER or c == EXITCHARCTER: # Menu character again/exit char -> send itself
self.serial.write(b) # send character
if self.echo:
sys.stdout.write(c)
elif c == '\x15': # CTRL+U -> upload file
sys.stderr.write('\n--- File to upload: ')
sys.stderr.flush()
console.cleanup()
filename = sys.stdin.readline().rstrip('\r\n')
if filename:
try:
file = open(filename, 'r')
sys.stderr.write('--- Sending file %s ---\n' % filename)
while True:
line = file.readline().rstrip('\r\n')
if not line:
break
self.serial.write(line)
self.serial.write('\r\n')
# Wait for output buffer to drain.
self.serial.flush()
sys.stderr.write('.') # Progress indicator.
sys.stderr.write('\n--- File %s sent ---\n' % filename)
except IOError, e:
sys.stderr.write('--- ERROR opening file %s: %s ---\n' % (filename, e))
console.setup()
elif c in '\x08hH?': # CTRL+H, h, H, ? -> Show help
sys.stderr.write(get_help_text())
elif c == '\x12': # CTRL+R -> Toggle RTS
self.rts_state = not self.rts_state
self.serial.setRTS(self.rts_state)
sys.stderr.write('--- RTS %s ---\n' % (self.rts_state and 'active' or 'inactive'))
elif c == '\x04': # CTRL+D -> Toggle DTR
self.dtr_state = not self.dtr_state
self.serial.setDTR(self.dtr_state)
sys.stderr.write('--- DTR %s ---\n' % (self.dtr_state and 'active' or 'inactive'))
elif c == '\x02': # CTRL+B -> toggle BREAK condition
self.break_state = not self.break_state
self.serial.setBreak(self.break_state)
sys.stderr.write('--- BREAK %s ---\n' % (self.break_state and 'active' or 'inactive'))
elif c == '\x05': # CTRL+E -> toggle local echo
self.echo = not self.echo
sys.stderr.write('--- local echo %s ---\n' % (self.echo and 'active' or 'inactive'))
elif c == '\x09': # CTRL+I -> info
self.dump_port_settings()
elif c == '\x01': # CTRL+A -> cycle escape mode
self.repr_mode += 1
if self.repr_mode > 3:
self.repr_mode = 0
sys.stderr.write('--- escape data: %s ---\n' % (
REPR_MODES[self.repr_mode],
))
elif c == '\x0c': # CTRL+L -> cycle linefeed mode
self.convert_outgoing += 1
if self.convert_outgoing > 2:
self.convert_outgoing = 0
self.newline = NEWLINE_CONVERISON_MAP[self.convert_outgoing]
sys.stderr.write('--- line feed %s ---\n' % (
LF_MODES[self.convert_outgoing],
))
elif c in 'pP': # P -> change port
dump_port_list()
sys.stderr.write('--- Enter port name: ')
sys.stderr.flush()
console.cleanup()
try:
port = sys.stdin.readline().strip()
except KeyboardInterrupt:
port = None
console.setup()
if port and port != self.serial.port:
# reader thread needs to be shut down
self._stop_reader()
# save settings
settings = self.serial.getSettingsDict()
try:
try:
new_serial = serial.serial_for_url(port, do_not_open=True)
except AttributeError:
# happens when the installed pyserial is older than 2.5. use the
# Serial class directly then.
new_serial = serial.Serial()
new_serial.port = port
# restore settings and open
new_serial.applySettingsDict(settings)
new_serial.open()
new_serial.setRTS(self.rts_state)
new_serial.setDTR(self.dtr_state)
new_serial.setBreak(self.break_state)
except Exception, e:
sys.stderr.write('--- ERROR opening new port: %s ---\n' % (e,))
new_serial.close()
else:
self.serial.close()
self.serial = new_serial
sys.stderr.write('--- Port changed to: %s ---\n' % (self.serial.port,))
# and restart the reader thread
self._start_reader()
elif c in 'bB': # B -> change baudrate
sys.stderr.write('\n--- Baudrate: ')
sys.stderr.flush()
console.cleanup()
backup = self.serial.baudrate
try:
self.serial.baudrate = int(sys.stdin.readline().strip())
except ValueError, e:
sys.stderr.write('--- ERROR setting baudrate: %s ---\n' % (e,))
self.serial.baudrate = backup
else:
self.dump_port_settings()
console.setup()
elif c == '8': # 8 -> change to 8 bits
self.serial.bytesize = serial.EIGHTBITS
self.dump_port_settings()
elif c == '7': # 7 -> change to 8 bits
self.serial.bytesize = serial.SEVENBITS
self.dump_port_settings()
elif c in 'eE': # E -> change to even parity
self.serial.parity = serial.PARITY_EVEN
self.dump_port_settings()
elif c in 'oO': # O -> change to odd parity
self.serial.parity = serial.PARITY_ODD
self.dump_port_settings()
elif c in 'mM': # M -> change to mark parity
self.serial.parity = serial.PARITY_MARK
self.dump_port_settings()
elif c in 'sS': # S -> change to space parity
self.serial.parity = serial.PARITY_SPACE
self.dump_port_settings()
elif c in 'nN': # N -> change to no parity
self.serial.parity = serial.PARITY_NONE
self.dump_port_settings()
elif c == '1': # 1 -> change to 1 stop bits
self.serial.stopbits = serial.STOPBITS_ONE
self.dump_port_settings()
elif c == '2': # 2 -> change to 2 stop bits
self.serial.stopbits = serial.STOPBITS_TWO
self.dump_port_settings()
elif c == '3': # 3 -> change to 1.5 stop bits
self.serial.stopbits = serial.STOPBITS_ONE_POINT_FIVE
self.dump_port_settings()
elif c in 'xX': # X -> change software flow control
self.serial.xonxoff = (c == 'X')
self.dump_port_settings()
elif c in 'rR': # R -> change hardware flow control
self.serial.rtscts = (c == 'R')
self.dump_port_settings()
else:
sys.stderr.write('--- unknown menu character %s --\n' % key_description(c))
menu_active = False
elif c == MENUCHARACTER: # next char will be for menu
menu_active = True
elif c == EXITCHARCTER:
self.stop()
break # exit app
elif c == '\n':
self.serial.write(self.newline) # send newline character(s)
if self.echo:
sys.stdout.write(c) # local echo is a real newline in any case
sys.stdout.flush()
else:
self.serial.write(b) # send byte
if self.echo:
sys.stdout.write(c)
sys.stdout.flush()
except:
self.alive = False
raise
2
Example 6
Project: LasagneNLP Source File: bi_lstm.py
def main():
parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM')
parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
required=True)
parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
help='path for embedding dict')
parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov'], help='update algorithm', default='sgd')
parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training',
required=True)
parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
parser.add_argument('--train') # "data/POS-penn/wsj/split1/wsj1.train.original"
parser.add_argument('--dev') # "data/POS-penn/wsj/split1/wsj1.dev.original"
parser.add_argument('--test') # "data/POS-penn/wsj/split1/wsj1.test.original"
args = parser.parse_args()
def construct_input_layer():
if fine_tune:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
output_size=embedd_dim,
W=embedd_table, name='embedding')
return layer_embedding
else:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
name='input')
return layer_input
logger = utils.get_logger("BiLSTM")
fine_tune = args.fine_tune
oov = args.oov
regular = args.regular
embedding = args.embedding
embedding_path = args.embedding_dict
train_path = args.train
dev_path = args.dev
test_path = args.test
update_algo = args.update
grad_clipping = args.grad_clipping
peepholes = args.peepholes
gamma = args.gamma
output_predict = args.output_prediction
dropout = args.dropout
X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
embedd_table, label_alphabet, _, _, _, _ = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
test_path, oov=oov,
fine_tune=fine_tune,
embedding=embedding,
embedding_path=embedding_path)
num_labels = label_alphabet.size() - 1
logger.info("constructing network...")
# create variables
target_var = T.imatrix(name='targets')
mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
if fine_tune:
input_var = T.imatrix(name='inputs')
num_data, max_length = X_train.shape
alphabet_size, embedd_dim = embedd_table.shape
else:
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
num_data, max_length, embedd_dim = X_train.shape
# construct input and mask layers
layer_incoming = construct_input_layer()
layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')
# construct bi-lstm
num_units = args.num_units
bi_lstm = build_BiLSTM(layer_incoming, num_units, mask=layer_mask, grad_clipping=grad_clipping,
peepholes=peepholes, dropout=dropout)
# reshape bi-rnn to [batch * max_length, num_units]
bi_lstm = lasagne.layers.reshape(bi_lstm, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_lstm, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
# get output of bi-rnn shape=[batch * max_length, #label]
prediction_train = lasagne.layers.get_output(layer_output)
prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
final_prediction = T.argmax(prediction_eval, axis=1)
# flat target_var to vector
target_var_flatten = target_var.flatten()
# flat mask_var to vector
mask_var_flatten = mask_var.flatten()
# compute loss
num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
# for training, we use mean of loss over number of labels
loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
# l2 regularization?
if regular == 'l2':
l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
loss_train = loss_train + gamma * l2_penalty
loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
# compute number of correct labels
corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)
corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)
# Create update expressions for training.
# hyper parameters to tune: learning rate, momentum, regularization.
batch_size = args.batch_size
learning_rate = args.learning_rate
decay_rate = args.decay_rate
momentum = 0.9
params = lasagne.layers.get_all_params(layer_output, trainable=True)
updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)
# Compile a function performing a training step on a mini-batch
train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss], updates=updates)
# Compile a second function evaluating the loss and accuracy of network
eval_fn = theano.function([input_var, target_var, mask_var], [loss_eval, corr_eval, num_loss, final_prediction])
# Finally, launch the training loop.
logger.info(
"Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
% (
update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size,
grad_clipping,
peepholes))
num_batches = num_data / batch_size
num_epochs = 1000
best_loss = 1e+12
best_acc = 0.0
best_epoch_loss = 0
best_epoch_acc = 0
best_loss_test_err = 0.
best_loss_test_corr = 0.
best_acc_test_err = 0.
best_acc_test_corr = 0.
stop_count = 0
lr = learning_rate
patience = 5
for epoch in range(1, num_epochs + 1):
print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
train_err = 0.0
train_corr = 0.0
train_total = 0
start_time = time.time()
num_back = 0
train_batches = 0
for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, batch_size=batch_size, shuffle=True):
inputs, targets, masks, _ = batch
err, corr, num = train_fn(inputs, targets, masks)
train_err += err * num
train_corr += corr
train_total += num
train_batches += 1
time_ave = (time.time() - start_time) / train_batches
time_left = (num_batches - train_batches) * time_ave
# update log
sys.stdout.write("\b" * num_back)
log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time_left)
sys.stdout.write(log_info)
num_back = len(log_info)
# update training log after each epoch
sys.stdout.write("\b" * num_back)
print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)
# evaluate performance on dev data
dev_err = 0.0
dev_corr = 0.0
dev_total = 0
for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, batch_size=batch_size):
inputs, targets, masks, _ = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks)
dev_err += err * num
dev_corr += corr
dev_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)
print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)
if best_loss < dev_err and best_acc > dev_corr / dev_total:
stop_count += 1
else:
update_loss = False
update_acc = False
stop_count = 0
if best_loss > dev_err:
update_loss = True
best_loss = dev_err
best_epoch_loss = epoch
if best_acc < dev_corr / dev_total:
update_acc = True
best_acc = dev_corr / dev_total
best_epoch_acc = epoch
# evaluate on test data when better performance detected
test_err = 0.0
test_corr = 0.0
test_total = 0
for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, batch_size=batch_size):
inputs, targets, masks, _ = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks)
test_err += err * num
test_corr += corr
test_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)
if update_loss:
best_loss_test_err = test_err
best_loss_test_corr = test_corr
if update_acc:
best_acc_test_err = test_err
best_acc_test_corr = test_corr
# stop if dev acc decrease 3 time straightly.
if stop_count == patience:
break
# re-compile a function with new learning rate for training
lr = learning_rate / (1.0 + epoch * decay_rate)
updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
train_fn = theano.function([input_var, target_var, mask_var], [loss_train, corr_train, num_loss],
updates=updates)
# print best performance on test data.
logger.info("final best loss test performance (at epoch %d)" % (best_epoch_loss))
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
logger.info("final best acc test performance (at epoch %d)" % (best_epoch_acc))
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)
2
Example 7
Project: pius Source File: signer.py
def sign_all_uids(self, key, level):
'''The main function that signs all the UIDs on a given key.'''
signed_any_uids = False
uids = self.get_uids(key)
print ' There %s %s UID%s on this key to sign' % (
['is', 'are'][len(uids) != 1], len(uids), "s"[len(uids) == 1:]
)
# From the user key ring make a clean copy
self.export_clean_key(key)
for uid in uids:
if uid['status'] == 'r':
print ' Skipping revoked uid %s' % uid['index']
continue
elif uid['status'] == 'e':
print ' Skipping expired uid %s' % uid['index']
continue
sys.stdout.write(' UID %s (%s): ' % (uid['index'], uid['id']))
# Make sure we have a clean keyring, and then import the key we care
# about
self.clean_working_keyring()
self.import_clean_key(key)
# Sign the key...
if self.mode in (MODE_CACHE_PASSPHRASE, MODE_AGENT):
try:
res = self.sign_uid(key, uid['index'], level)
except AgentError:
print '\ngpg-agent problems, bailing out!'
sys.exit(1)
except PassphraseError:
print ('\nThe passphrase that worked a moment ago now doesn\'t work.'
' I\'m bailing out!')
sys.exit(1)
except NoSelfKeyError:
print '\nWe don\'t have our own key, according to GnuPG.'
# No need to say anything else
sys.exit(1)
else:
res = self.sign_uid_expect(key, uid['index'], level)
if not res:
uid['result'] = False
continue
sys.stdout.write('signed')
uid['result'] = True
signed_any_uids = True
# Export the signed key...
self.export_signed_uid(key, uid['file'])
# If requested, encrypt the signed key...
if self.encrypt_outfiles:
try:
uid['enc_file'] = self._outfile_path(
self.encrypt_signed_uid(key, uid['file'])
)
sys.stdout.write(', encrypted')
except EncryptionKeyError:
print ('\nEncryption failed due to invalid key error. User may not'
' have an encryption subkey or it may be expired.')
uid['enc_file'] = None
# If we can't encrypt, we don't want to mail - even if we're using
# PGP/Mime the encryption for that will also fail. So we move on to
# the next key
continue
# If requested, send keys out. Note this doesn't depend on
# encrypt_outfiles, because if we use PGP/Mime, the default, the email
# itself is encrypted
if self.mail:
try:
if uid['email'] == None:
print ' WARNING: No email for %s, cannot send key.' % uid['id']
continue
# this is a ugly. The mailer needs to be able to be able to call
# encrypt_and_sign_file() to be able to generate the PGP/MIME file,
# so we pass outselves, it can call it...
self.mailer.send_sig_mail(self.signer, key, uid, self)
sys.stdout.write(', mailed')
except MailSendError, msg:
print ('\nThere was a problem talking to the mail server (%s): %s'
% (self.mail_host, msg))
# add a newline to all the sys.stdout.write()s
print ''
# remove the signed file, if it exists (it might not, if it's
# expired, the user chose not to sign it, etc.)
# But don't do this if the ONLY action we're performing is creating those
# files - then the desired result is these files.
if self.encrypt_outfiles or self.mail:
if os.path.exists(uid['file']):
os.unlink(uid['file'])
if self.verbose:
self.print_filenames(uids)
# Remove the clean keyfile we temporarily created
self.clean_clean_key(key)
return signed_any_uids
2
Example 8
Project: statsmodels Source File: ipython_directive.py
def process_input(self, data, input_prompt, lineno):
"""
Process data block for INPUT token.
"""
decorator, input, rest = data
image_file = None
image_directive = None
is_verbatim = decorator=='@verbatim' or self.is_verbatim
is_doctest = (decorator is not None and \
decorator.startswith('@doctest')) or self.is_doctest
is_suppress = decorator=='@suppress' or self.is_suppress
is_okexcept = decorator=='@okexcept' or self.is_okexcept
is_okwarning = decorator=='@okwarning' or self.is_okwarning
is_savefig = decorator is not None and \
decorator.startswith('@savefig')
input_lines = input.split('\n')
if len(input_lines) > 1:
if input_lines[-1] != "":
input_lines.append('') # make sure there's a blank line
# so splitter buffer gets reset
continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2))
if is_savefig:
image_file, image_directive = self.process_image(decorator)
ret = []
is_semicolon = False
# Hold the execution count, if requested to do so.
if is_suppress and self.hold_count:
store_history = False
else:
store_history = True
# Note: catch_warnings is not thread safe
with warnings.catch_warnings(record=True) as ws:
for i, line in enumerate(input_lines):
if line.endswith(';'):
is_semicolon = True
if i == 0:
# process the first input line
if is_verbatim:
self.process_input_line('')
self.IP.execution_count += 1 # increment it anyway
else:
# only submit the line in non-verbatim mode
self.process_input_line(line, store_history=store_history)
formatted_line = '%s %s'%(input_prompt, line)
else:
# process a continuation line
if not is_verbatim:
self.process_input_line(line, store_history=store_history)
formatted_line = '%s %s'%(continuation, line)
if not is_suppress:
ret.append(formatted_line)
if not is_suppress and len(rest.strip()) and is_verbatim:
# The "rest" is the standard output of the input. This needs to be
# added when in verbatim mode. If there is no "rest", then we don't
# add it, as the new line will be added by the processed output.
ret.append(rest)
# Fetch the processed output. (This is not the submitted output.)
self.cout.seek(0)
processed_output = self.cout.read()
if not is_suppress and not is_semicolon:
#
# In IPythonDirective.run, the elements of `ret` are eventually
# combined such that '' entries correspond to newlines. So if
# `processed_output` is equal to '', then the adding it to `ret`
# ensures that there is a blank line between consecutive inputs
# that have no outputs, as in:
#
# In [1]: x = 4
#
# In [2]: x = 5
#
# When there is processed output, it has a '\n' at the tail end. So
# adding the output to `ret` will provide the necessary spacing
# between consecutive input/output blocks, as in:
#
# In [1]: x
# Out[1]: 5
#
# In [2]: x
# Out[2]: 5
#
# When there is stdout from the input, it also has a '\n' at the
# tail end, and so this ensures proper spacing as well. E.g.:
#
# In [1]: print x
# 5
#
# In [2]: x = 5
#
# When in verbatim mode, `processed_output` is empty (because
# nothing was passed to IP. Sometimes the submitted code block has
# an Out[] portion and sometimes it does not. When it does not, we
# need to ensure proper spacing, so we have to add '' to `ret`.
# However, if there is an Out[] in the submitted code, then we do
# not want to add a newline as `process_output` has stuff to add.
# The difficulty is that `process_input` doesn't know if
# `process_output` will be called---so it doesn't know if there is
# Out[] in the code block. The requires that we include a hack in
# `process_block`. See the comments there.
#
ret.append(processed_output)
elif is_semicolon:
# Make sure there is a newline after the semicolon.
ret.append('')
# context information
filename = "Unknown"
lineno = 0
if self.directive.state:
filename = self.directive.state.docuement.current_source
lineno = self.directive.state.docuement.current_line
# output any exceptions raised during execution to stdout
# unless :okexcept: has been specified.
if not is_okexcept and "Traceback" in processed_output:
s = "\nException in %s at block ending on line %s\n" % (filename, lineno)
s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
sys.stdout.write('\n\n>>>' + ('-' * 73))
sys.stdout.write(s)
sys.stdout.write(processed_output)
sys.stdout.write('<<<' + ('-' * 73) + '\n\n')
# output any warning raised during execution to stdout
# unless :okwarning: has been specified.
if not is_okwarning:
for w in ws:
s = "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
sys.stdout.write('\n\n>>>' + ('-' * 73))
sys.stdout.write(s)
sys.stdout.write(('-' * 76) + '\n')
s=warnings.formatwarning(w.message, w.category,
w.filename, w.lineno, w.line)
sys.stdout.write(s)
sys.stdout.write('<<<' + ('-' * 73) + '\n')
self.cout.truncate(0)
return (ret, input_lines, processed_output,
is_doctest, decorator, image_file, image_directive)
2
Example 9
Project: pycamiface Source File: simple.py
def doit(device_num=0,
mode_num=None,
num_buffers=30,
save=False,
max_frames=None,
trigger_mode=None,
roi=None,
):
num_modes = cam_iface.get_num_modes(device_num)
for this_mode_num in range(num_modes):
mode_str = cam_iface.get_mode_string(device_num,this_mode_num)
print 'mode %d: %s'%(this_mode_num,mode_str)
if mode_num is None:
if 'DC1394_VIDEO_MODE_FORMAT7_0' in mode_str and 'MONO8' in mode_str:
mode_num=this_mode_num
if mode_num is None:
mode_num=0
print 'choosing mode %d'%(mode_num,)
cam = cam_iface.Camera(device_num,num_buffers,mode_num)
if save:
format = cam.get_pixel_coding()
depth = cam.get_pixel_depth()
filename = time.strftime( 'simple%Y%m%d_%H%M%S.fmf' )
fly_movie = FlyMovieFormat.FlyMovieSaver(filename,
version=3,
format=format,
bits_per_pixel=depth,
)
save_queue = Queue.Queue()
save_thread = threading.Thread( target=save_func, args=(fly_movie,save_queue))
save_thread.setDaemon(True)
save_thread.start()
num_props = cam.get_num_camera_properties()
#for i in range(num_props):
# print "property %d: %s"%(i,str(cam.get_camera_property_info(i)))
n_trigger_modes = cam.get_num_trigger_modes()
print "Trigger modes:"
for i in range(n_trigger_modes):
print ' %d: %s'%(i,cam.get_trigger_mode_string(i))
if trigger_mode is not None:
cam.set_trigger_mode_number( trigger_mode )
print 'Using trigger mode %d'%(cam.get_trigger_mode_number())
cam.start_camera()
if roi is not None:
cam.set_frame_roi( *roi )
actual_roi = cam.get_frame_roi()
if roi != actual_roi:
raise ValueError("could not set ROI. Actual ROI is %s."%(actual_roi,))
frametick = 0
framecount = 0
last_fps_print = time.time()
last_fno = None
while 1:
try:
buf = nx.asarray(cam.grab_next_frame_blocking())
except cam_iface.FrameDataMissing:
sys.stdout.write('M')
sys.stdout.flush()
continue
except cam_iface.FrameSystemCallInterruption:
sys.stdout.write('I')
sys.stdout.flush()
continue
timestamp = cam.get_last_timestamp()
fno = cam.get_last_framenumber()
if last_fno is not None:
skip = (fno-last_fno)-1
if skip != 0:
print 'WARNING: skipped %d frames'%skip
## if frametick==50:
## print 'sleeping'
## time.sleep(10.0)
## print 'wake'
last_fno=fno
now = time.time()
sys.stdout.write('.')
sys.stdout.flush()
frametick += 1
framecount += 1
t_diff = now-last_fps_print
if t_diff > 5.0:
fps = frametick/t_diff
print "%.1f fps"%fps
last_fps_print = now
frametick = 0
if save:
save_queue.put( (buf,timestamp) )
if max_frames:
if framecount >= max_frames:
break
2
Example 10
Project: info-flow-experiments Source File: reader.py
def read_old_log(log_file):
treatnames = []
fo = open(log_file, "r")
line = fo.readline()
chunks = re.split("\|\|", line)
if(chunks[0] == 'g'):
old = True
gmarker = 'g'
treatments = 2
treatnames = ['0', '1']
samples = len(chunks)-1
else:
old = False
gmarker = 'assign'
treatments = int(chunks[2])
samples = int(chunks[1])
line = fo.readline()
chunks = re.split("\|\|", line)
for i in range(1, len(chunks)):
treatnames.append(chunks[i].strip())
fo.close()
assert treatments == len(treatnames)
for i in range(0, treatments):
print "Treatment ", i, " = ", treatnames[i]
adv = []
ints = []
newsv = []
for i in range(0, samples):
adv.append(adVector.AdVector())
ints.append(interest.Interests())
newsv.append(news.NewsVector())
loadtimes = [timedelta(minutes=0)]*samples
reloads = [0]*samples
errors = [0]*samples
xvfbfails = []
breakout = False
par_adv = []
ass = []
fo = open(log_file, "r")
r = 0
sys.stdout.write("Scanning ads")
for line in fo:
chunks = re.split("\|\|", line)
chunks[len(chunks)-1] = chunks[len(chunks)-1].rstrip()
if(chunks[0] == gmarker and r==0):
r += 1
ass = chunks[2:]
if(old):
ass = chunks[1:]
assert len(ass) == samples
apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
#print ass
elif(chunks[0] == gmarker and r >0 ):
r += 1
par_adv.append({'advector':adv, 'newsvector':newsv, 'assignment':ass, 'xf':xvfbfails, 'intvector':ints,
'break':breakout, 'loadtimes':loadtimes, 'reloads':reloads, 'errors':errors})
sys.stdout.write(".")
sys.stdout.flush()
adv = []
ints = []
newsv = []
for i in range(0, samples):
adv.append(adVector.AdVector())
ints.append(interest.Interests())
newsv.append(news.NewsVector())
loadtimes = [timedelta(minutes=0)]*samples
reloads = [0]*samples
errors = [0]*samples
xvfbfails = []
breakout = False
ass = chunks[2:]
if(old):
ass = chunks[1:]
assert len(ass) == samples
apply_labels_to_vecs(adv, ints, newsv, ass, samples, treatments)
elif(chunks[0] == 'Xvfbfailure'):
xtreat, xid = chunks[1], chunks[2]
xvfbfails.append(xtreat)
elif(chunks[1] == 'breakingout'):
breakout = True
elif(chunks[1] == 'loadtime'):
t = (datetime.strptime(chunks[2], "%H:%M:%S.%f"))
delta = timedelta(hours=t.hour, minutes=t.minute, seconds=t.second)
id = int(chunks[3])
loadtimes[id] += delta
elif(chunks[1] == 'reload'):
id = int(chunks[2])
reloads[id] += 1
elif(chunks[1] == 'errorcollecting'):
id = int(chunks[2])
errors[id] += 1
elif(chunks[1] == 'prepref'):
id = int(chunks[4])
ints[id].remove_interest()
elif(chunks[1] == 'pref'):
id = int(chunks[4])
int_str = chunks[3]
ints[id].set_from_string(int_str)
elif(chunks[0] == 'news'):
ind_news = news.News({'Time':datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[4],
'Agency': chunks[5], 'Ago': chunks[6], 'Body': chunks[7].rstrip(), 'Label':chunks[2]})
newsv[int(chunks[1])].add(ind_news)
elif(chunks[0] == 'ad'):
ind_ad = ad.Ad({'Time':datetime.strptime(chunks[3], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[4],
'URL': chunks[5], 'Body': chunks[6].rstrip(), 'cat': "", 'Label':chunks[2]})
adv[int(chunks[1])].add(ind_ad)
else: # to analyze old log files
try:
ind_ad = ad.Ad({'Time':datetime.strptime(chunks[2], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[3],
'URL': chunks[4], 'Body': chunks[5].rstrip(), 'cat': "", 'label':chunks[1]})
# ind_ad = ad.Ad({'Time':datetime.strptime(chunks[1], "%Y-%m-%d %H:%M:%S.%f"), 'Title':chunks[2],
# 'URL': chunks[3], 'Body': chunks[4].rstrip(), 'cat': "", 'label':""})
adv[int(chunks[0])].add(ind_ad)
except:
pass
r += 1
par_adv.append({'advector':adv, 'newsvector':newsv, 'assignment':ass, 'xf':xvfbfails, 'intvector':ints,
'break':breakout, 'loadtimes':loadtimes, 'reloads':reloads, 'errors':errors})
sys.stdout.write(".Scanning complete\n")
sys.stdout.flush()
return par_adv, treatnames
2
Example 11
Project: Udacity-SDC-Radar-Driver-Micro-Challenge Source File: esr_ros_can_source.py
Function: parse_message
Function: parse_message
def parseMessage(self, msgId, rawmsg, dlc, flg, time):
msgToFunc = {
1248: self.status_one,
1249: self.status_two,
1250: self.status_three,
1251: self.status_four,
1280: self.track_msg,
1281: self.track_msg,
1282: self.track_msg,
1283: self.track_msg,
1284: self.track_msg,
1285: self.track_msg,
1286: self.track_msg,
1287: self.track_msg,
1288: self.track_msg,
1289: self.track_msg,
1290: self.track_msg,
1291: self.track_msg,
1292: self.track_msg,
1293: self.track_msg,
1294: self.track_msg,
1295: self.track_msg,
1296: self.track_msg,
1297: self.track_msg,
1298: self.track_msg,
1299: self.track_msg,
1300: self.track_msg,
1301: self.track_msg,
1302: self.track_msg,
1303: self.track_msg,
1304: self.track_msg,
1305: self.track_msg,
1306: self.track_msg,
1307: self.track_msg,
1308: self.track_msg,
1309: self.track_msg,
1310: self.track_msg,
1311: self.track_msg,
1312: self.track_msg,
1313: self.track_msg,
1314: self.track_msg,
1315: self.track_msg,
1316: self.track_msg,
1317: self.track_msg,
1318: self.track_msg,
1319: self.track_msg,
1320: self.track_msg,
1321: self.track_msg,
1322: self.track_msg,
1323: self.track_msg,
1324: self.track_msg,
1325: self.track_msg,
1326: self.track_msg,
1327: self.track_msg,
1328: self.track_msg,
1329: self.track_msg,
1330: self.track_msg,
1331: self.track_msg,
1332: self.track_msg,
1333: self.track_msg,
1334: self.track_msg,
1335: self.track_msg,
1336: self.track_msg,
1337: self.track_msg,
1338: self.track_msg,
1339: self.track_msg,
1340: self.track_msg,
1341: self.track_msg,
1342: self.track_msg,
1343: self.track_msg,
1344: self.track_status_msg,
1488: self.validation_msg_one,
1489: self.validation_msg_two,
1508: self.additional_status_one,
1509: self.additional_status_two,
1510: self.additional_status_three,
1511: self.additional_status_four,
1512: self.additional_status_five,
}
retData = {}
msg = []
if self.debug == True:
sys.stdout.write("In radar_data_parser and this is a message\n")
sys.stdout.write("msgId: %9d time: %9d flg: 0x%02x dlc: %d " % (msgId, time, flg, dlc))
msg = rawmsg
for i in xrange(dlc):
if self.debug == True:
sys.stdout.write(" 0x%0.2x " % (msg[i]))
if self.debug == True:
sys.stdout.write("\n")
if msgId in msgToFunc:
# This message is valid, so we need to parse it
if msgId >= 1280 and msgId <= 1343:
msgToFunc[msgId](msgId, msg)
else:
if self.debug == True:
sys.stdout.write("In radar_data_parser and this is msgId %d\n" % (msgId))
if (msgId == 1344):
msgToFunc[msgId](self.msg_counter, msg)
self.msg_counter += 1
elif (msgId > 1344 and self.msg_counter > 0):
msgToFunc[msgId](msg)
self.msg_counter = 0
else:
msgToFunc[msgId](msg)
if (msgId == 1512):
retData = self.data
self.data = {} # Start with a fresh object
return retData
2
Example 12
Project: ptsa Source File: timeseries.py
def resampled(self, resampled_rate, window=None,
loop_axis=None, num_mp_procs=0, pad_to_pow2=False):
"""
Resample the data and reset all the time ranges.
Uses the resample function from scipy. This method seems to
be more accurate than the decimate method.
Parameters
----------
resampled_rate : {float}
New sample rate to resample to.
window : {None,str,float,tuple}, optional
See scipy.signal.resample for details
loop_axis: {None,str,int}, optional
Sometimes it might be faster to loop over an axis.
num_mp_procs: int, optional
Whether to try and use multiprocessing to loop over axis.
0 means no multiprocessing
>0 specifies num procs to use
None means yes, and use all possible procs
pad_to_pow2: bool, optional
Pad along the time dimension to the next power of 2 so
that the resampling is much faster (experimental).
Returns
-------
ts : {TimeSeries}
A TimeSeries instance with the resampled data.
See Also
--------
scipy.signal.resample
"""
# resample the data, getting new time range
time_range = self[self.tdim]
new_length = int(np.round(len(time_range)*resampled_rate/self.samplerate))
if pad_to_pow2:
padded_length = 2**next_pow2(len(time_range))
padded_new_length = int(np.round(padded_length*resampled_rate/self.samplerate))
time_range = np.hstack([time_range,
(np.arange(1,padded_length-len(time_range)+1)*np.diff(time_range[-2:]))+time_range[-1]])
if loop_axis is None:
# just do standard method on all data at once
if pad_to_pow2:
newdat,new_time_range = resample(pad_to_next_pow2(np.asarray(self),axis=self.taxis),
padded_new_length, t=time_range,
axis=self.taxis, window=window)
else:
newdat,new_time_range = resample(np.asarray(self),
new_length, t=time_range,
axis=self.taxis, window=window)
else:
# loop over specified axis
# get the loop axis name and length
loop_dim = self.get_dim_name(loop_axis)
loop_dim_len = len(self[loop_dim])
# specify empty boolean index
ind = np.zeros(loop_dim_len,dtype=np.bool)
newdat = []
if has_mp and num_mp_procs != 0:
po = mp.Pool(num_mp_procs)
for i in range(loop_dim_len):
ind[i] = True
dat = self.select(**{loop_dim:ind})
taxis = dat.taxis
if has_mp and num_mp_procs != 0:
# start async proc
if pad_to_pow2:
dat = pad_to_next_pow2(np.asarray(dat), axis=dat.taxis)
newdat.append(po.apply_async(resample,
(np.asarray(dat), padded_new_length, time_range,
taxis, window)))
else:
newdat.append(po.apply_async(resample,
(np.asarray(dat), new_length, time_range,
taxis, window)))
else:
# just call on that dataset
sys.stdout.write('%d '%i)
sys.stdout.flush()
if pad_to_pow2:
dat = pad_to_next_pow2(np.asarray(dat), axis=dat.taxis)
ndat,new_time_range = resample(np.asarray(dat), padded_new_length, t=time_range,
axis=taxis, window=window)
else:
ndat,new_time_range = resample(np.asarray(dat), new_length, t=time_range,
axis=taxis, window=window)
newdat.append(ndat)
ind[i] = False
if has_mp and num_mp_procs != 0:
# aggregate mp results
po.close()
#po.join()
out = []
for i in range(len(newdat)):
sys.stdout.write('%d '%i)
sys.stdout.flush()
out.append(newdat[i].get())
#out = [newdat[i].get() for i in range(len(newdat))]
newdat = [out[i][0] for i in range(len(out))]
new_time_range = out[i][1]
# concatenate the new data
newdat = np.concatenate(newdat,axis=self.get_axis(loop_axis))
sys.stdout.write('\n')
sys.stdout.flush()
# remove pad if we padded it
if pad_to_pow2:
newdat = newdat.take(range(new_length),axis=self.taxis)
new_time_range = new_time_range[:new_length]
# set the time dimension
newdims = self.dims.copy()
attrs = self.dims[self.taxis]._attrs.copy()
for k in self.dims[self.taxis]._required_attrs.keys():
attrs.pop(k,None)
newdims[self.taxis] = Dim(new_time_range,
self.dims[self.taxis].name,
**attrs)
attrs = self._attrs.copy()
for k in self._required_attrs.keys():
attrs.pop(k,None)
return TimeSeries(newdat, self.tdim, resampled_rate,
dims=newdims, **attrs)
2
Example 13
def writer(self):
"""\
Loop and copy console->serial until EXITCHARCTER character is
found. When MENUCHARACTER is found, interpret the next key
locally.
"""
menu_active = False
try:
while self.alive:
try:
b = console.getkey()
except KeyboardInterrupt:
b = serial.to_bytes([3])
c = character(b)
if menu_active:
if c == MENUCHARACTER or c == EXITCHARCTER: # Menu character again/exit char -> send itself
self.serial.write(b) # send character
if self.echo:
sys.stdout.write(c)
elif c == '\x15': # CTRL+U -> upload file
sys.stderr.write('\n--- File to upload: ')
sys.stderr.flush()
console.cleanup()
filename = sys.stdin.readline().rstrip('\r\n')
if filename:
try:
file = open(filename, 'r')
sys.stderr.write('--- Sending file %s ---\n' % filename)
while True:
line = file.readline().rstrip('\r\n')
if not line:
break
self.serial.write(line)
self.serial.write('\r\n')
# Wait for output buffer to drain.
self.serial.flush()
sys.stderr.write('.') # Progress indicator.
sys.stderr.write('\n--- File %s sent ---\n' % filename)
except IOError, e:
sys.stderr.write('--- ERROR opening file %s: %s ---\n' % (filename, e))
console.setup()
elif c in '\x08hH?': # CTRL+H, h, H, ? -> Show help
sys.stderr.write(get_help_text())
elif c == '\x12': # CTRL+R -> Toggle RTS
self.rts_state = not self.rts_state
self.serial.setRTS(self.rts_state)
sys.stderr.write('--- RTS %s ---\n' % (self.rts_state and 'active' or 'inactive'))
elif c == '\x04': # CTRL+D -> Toggle DTR
self.dtr_state = not self.dtr_state
self.serial.setDTR(self.dtr_state)
sys.stderr.write('--- DTR %s ---\n' % (self.dtr_state and 'active' or 'inactive'))
elif c == '\x02': # CTRL+B -> toggle BREAK condition
self.break_state = not self.break_state
self.serial.setBreak(self.break_state)
sys.stderr.write('--- BREAK %s ---\n' % (self.break_state and 'active' or 'inactive'))
elif c == '\x05': # CTRL+E -> toggle local echo
self.echo = not self.echo
sys.stderr.write('--- local echo %s ---\n' % (self.echo and 'active' or 'inactive'))
elif c == '\x09': # CTRL+I -> info
self.dump_port_settings()
elif c == '\x01': # CTRL+A -> cycle escape mode
self.repr_mode += 1
if self.repr_mode > 3:
self.repr_mode = 0
sys.stderr.write('--- escape data: %s ---\n' % (
REPR_MODES[self.repr_mode],
))
elif c == '\x0c': # CTRL+L -> cycle linefeed mode
self.convert_outgoing += 1
if self.convert_outgoing > 2:
self.convert_outgoing = 0
self.newline = NEWLINE_CONVERISON_MAP[self.convert_outgoing]
sys.stderr.write('--- line feed %s ---\n' % (
LF_MODES[self.convert_outgoing],
))
elif c in 'pP': # P -> change port
sys.stderr.write('\n--- Enter port name: ')
sys.stderr.flush()
console.cleanup()
try:
port = sys.stdin.readline().strip()
except KeyboardInterrupt:
port = None
console.setup()
if port and port != self.serial.port:
# reader thread needs to be shut down
self._stop_reader()
# save settings
settings = self.serial.getSettingsDict()
try:
try:
new_serial = serial.serial_for_url(port, do_not_open=True)
except AttributeError:
# happens when the installed pyserial is older than 2.5. use the
# Serial class directly then.
new_serial = serial.Serial()
new_serial.port = port
# restore settings and open
new_serial.applySettingsDict(settings)
new_serial.open()
new_serial.setRTS(self.rts_state)
new_serial.setDTR(self.dtr_state)
new_serial.setBreak(self.break_state)
except Exception, e:
sys.stderr.write('--- ERROR opening new port: %s ---\n' % (e,))
new_serial.close()
else:
self.serial.close()
self.serial = new_serial
sys.stderr.write('--- Port changed to: %s ---\n' % (self.serial.port,))
# and restart the reader thread
self._start_reader()
elif c in 'bB': # B -> change baudrate
sys.stderr.write('\n--- Baudrate: ')
sys.stderr.flush()
console.cleanup()
backup = self.serial.baudrate
try:
self.serial.baudrate = int(sys.stdin.readline().strip())
except ValueError, e:
sys.stderr.write('--- ERROR setting baudrate: %s ---\n' % (e,))
self.serial.baudrate = backup
else:
self.dump_port_settings()
console.setup()
elif c == '8': # 8 -> change to 8 bits
self.serial.bytesize = serial.EIGHTBITS
self.dump_port_settings()
elif c == '7': # 7 -> change to 8 bits
self.serial.bytesize = serial.SEVENBITS
self.dump_port_settings()
elif c in 'eE': # E -> change to even parity
self.serial.parity = serial.PARITY_EVEN
self.dump_port_settings()
elif c in 'oO': # O -> change to odd parity
self.serial.parity = serial.PARITY_ODD
self.dump_port_settings()
elif c in 'mM': # M -> change to mark parity
self.serial.parity = serial.PARITY_MARK
self.dump_port_settings()
elif c in 'sS': # S -> change to space parity
self.serial.parity = serial.PARITY_SPACE
self.dump_port_settings()
elif c in 'nN': # N -> change to no parity
self.serial.parity = serial.PARITY_NONE
self.dump_port_settings()
elif c == '1': # 1 -> change to 1 stop bits
self.serial.stopbits = serial.STOPBITS_ONE
self.dump_port_settings()
elif c == '2': # 2 -> change to 2 stop bits
self.serial.stopbits = serial.STOPBITS_TWO
self.dump_port_settings()
elif c == '3': # 3 -> change to 1.5 stop bits
self.serial.stopbits = serial.STOPBITS_ONE_POINT_FIVE
self.dump_port_settings()
elif c in 'xX': # X -> change software flow control
self.serial.xonxoff = (c == 'X')
self.dump_port_settings()
elif c in 'rR': # R -> change hardware flow control
self.serial.rtscts = (c == 'R')
self.dump_port_settings()
else:
sys.stderr.write('--- unknown menu character %s --\n' % key_description(c))
menu_active = False
elif c == MENUCHARACTER: # next char will be for menu
menu_active = True
elif c == EXITCHARCTER:
self.stop()
break # exit app
elif c == '\n':
self.serial.write(self.newline) # send newline character(s)
if self.echo:
sys.stdout.write(c) # local echo is a real newline in any case
sys.stdout.flush()
else:
self.serial.write(b) # send byte
if self.echo:
sys.stdout.write(c)
sys.stdout.flush()
except:
self.alive = False
raise
2
Example 14
Project: tools-iuc Source File: htseqsams2mx.py
def htseqMX(gff_filename, sam_filenames, colnames, sam_exts, sam_bais, opts):
"""
Code taken from count.py in Simon Anders HTSeq distribution
Wrapped in a loop to accept multiple bam/sam files and their names from galaxy to
produce a matrix of contig counts by sample for downstream use in edgeR and DESeq tools
"""
class UnknownChrom( Exception ):
pass
def my_showwarning( message, category, filename, lineno=None, line=None ):
sys.stdout.write( "Warning: %s\n" % message )
def invert_strand( iv ):
iv2 = iv.copy()
if iv2.strand == "+":
iv2.strand = "-"
elif iv2.strand == "-":
iv2.strand = "+"
else:
raise ValueError("Illegal strand")
return iv2
def count_reads_in_features( sam_filenames, colnames, gff_filename, opts ):
""" Hacked version of htseq count.py
"""
if opts.quiet:
warnings.filterwarnings( action="ignore", module="HTSeq" )
features = HTSeq.GenomicArrayOfSets( "auto", opts.stranded != "no" )
mapqMin = int(opts.mapqMin)
counts = {}
nreads = 0
empty = 0
ambiguous = 0
notaligned = 0
lowqual = 0
nonunique = 0
filtered = 0 # new filter_extras - need a better way to do this - independent filter tool?
gff = HTSeq.GFF_Reader( gff_filename )
try:
for i, f in enumerate(gff):
if f.type == opts.feature_type:
try:
feature_id = f.attr[ opts.id_attribute ]
except KeyError:
try:
feature_id = f.attr[ 'gene_id' ]
except KeyError:
sys.exit( "Feature at row %d %s does not contain a '%s' attribute OR a gene_id attribute - faulty GFF?" %
( (i + 1), f.name, opts.id_attribute ) )
if opts.stranded != "no" and f.iv.strand == ".":
sys.exit( "Feature %s at %s does not have strand information but you are "
"running htseq-count in stranded mode. Use '--stranded=no'." %
( f.name, f.iv ) )
features[ f.iv ] += feature_id
counts[ feature_id ] = [0 for x in colnames] # we use sami as an index here to bump counts later
except:
sys.stderr.write( "Error occured in %s.\n" % gff.get_line_number_string() )
raise
if not opts.quiet:
sys.stdout.write( "%d GFF lines processed.\n" % i )
if len( counts ) == 0 and not opts.quiet:
sys.stdout.write( "Warning: No features of type '%s' found.\n" % opts.feature_type )
for sami, sam_filename in enumerate(sam_filenames):
colname = colnames[sami]
isbam = sam_exts[sami] == 'bam'
hasbai = sam_bais[sami] > ''
if hasbai:
tempname = os.path.splitext(os.path.basename(sam_filename))[0]
tempbam = '%s_TEMP.bam' % tempname
tempbai = '%s_TEMP.bai' % tempname
os.link(sam_filename, tempbam)
os.link(sam_bais[sami], tempbai)
try:
if isbam:
if hasbai:
read_seq = HTSeq.BAM_Reader( tempbam )
else:
read_seq = HTSeq.BAM_Reader( sam_filename )
else:
read_seq = HTSeq.SAM_Reader( sam_filename )
first_read = iter(read_seq).next()
pe_mode = first_read.paired_end
except:
if isbam:
print >> sys.stderr, "Error occured when reading first line of bam file %s colname=%s \n" % (sam_filename, colname )
else:
print >> sys.stderr, "Error occured when reading first line of sam file %s colname=%s \n" % (sam_filename, colname )
raise
try:
if pe_mode:
read_seq_pe_file = read_seq
read_seq = HTSeq.pair_SAM_alignments( read_seq )
for seqi, r in enumerate(read_seq):
nreads += 1
if not pe_mode:
if not r.aligned:
notaligned += 1
continue
try:
if len(opts.filter_extras) > 0:
for extra in opts.filter_extras:
if r.optional_field(extra):
filtered += 1
continue
if r.optional_field( "NH" ) > 1:
nonunique += 1
continue
except KeyError:
pass
if r.aQual < mapqMin:
lowqual += 1
continue
if opts.stranded != "reverse":
iv_seq = ( co.ref_iv for co in r.cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = ( invert_strand( co.ref_iv ) for co in r.cigar if co.type == "M" and co.size > 0 )
else:
if r[0] is not None and r[0].aligned:
if opts.stranded != "reverse":
iv_seq = ( co.ref_iv for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = ( invert_strand( co.ref_iv ) for co in r[0].cigar if co.type == "M" and co.size > 0 )
else:
iv_seq = tuple()
if r[1] is not None and r[1].aligned:
if opts.stranded != "reverse":
iv_seq = itertools.chain( iv_seq,
( invert_strand( co.ref_iv ) for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
iv_seq = itertools.chain( iv_seq,
( co.ref_iv for co in r[1].cigar if co.type == "M" and co.size > 0 ) )
else:
if r[0] is None or not r[0].aligned:
notaligned += 1
continue
try:
if ( r[0] is not None and r[0].optional_field( "NH" ) > 1 ) or \
( r[1] is not None and r[1].optional_field( "NH" ) > 1 ):
nonunique += 1
continue
except KeyError:
pass
if ( r[0] and r[0].aQual < mapqMin ) or ( r[1] and r[1].aQual < mapqMin ):
lowqual += 1
continue
try:
if opts.mode == "union":
fs = set()
for iv in iv_seq:
if iv.chrom not in features.chrom_vectors:
raise UnknownChrom
for iv2, fs2 in features[ iv ].steps():
fs = fs.union( fs2 )
elif opts.mode == "intersection-strict" or opts.mode == "intersection-nonempty":
fs = None
for iv in iv_seq:
if iv.chrom not in features.chrom_vectors:
raise UnknownChrom
for iv2, fs2 in features[ iv ].steps():
if len(fs2) > 0 or opts.mode == "intersection-strict":
if fs is None:
fs = fs2.copy()
else:
fs = fs.intersection( fs2 )
else:
sys.exit( "Illegal overlap mode %s" % opts.mode )
if fs is None or len( fs ) == 0:
empty += 1
elif len( fs ) > 1:
ambiguous += 1
else:
ck = list(fs)[0]
counts[ck][sami] += 1 # end up with counts for each sample as a list
except UnknownChrom:
if not pe_mode:
rr = r
else:
rr = r[0] if r[0] is not None else r[1]
empty += 1
if not opts.quiet:
sys.stdout.write( ( "Warning: Skipping read '%s', because chromosome " +
"'%s', to which it has been aligned, did not appear in the GFF file.\n" ) %
( rr.read.name, iv.chrom ) )
except:
if not pe_mode:
sys.stderr.write( "Error occured in %s.\n" % read_seq.get_line_number_string() )
else:
sys.stderr.write( "Error occured in %s.\n" % read_seq_pe_file.get_line_number_string() )
raise
if not opts.quiet:
sys.stdout.write( "%d sam %s processed for %s.\n" % ( seqi, "lines " if not pe_mode else "line pairs", colname ) )
return counts, empty, ambiguous, lowqual, notaligned, nonunique, filtered, nreads
warnings.showwarning = my_showwarning
assert os.path.isfile(gff_filename), '## unable to open supplied gff file %s' % gff_filename
try:
counts, empty, ambiguous, lowqual, notaligned, nonunique, filtered, nreads = count_reads_in_features( sam_filenames, colnames, gff_filename, opts)
except:
sys.stderr.write( "Error: %s\n" % str( sys.exc_info()[1] ) )
sys.stderr.write( "[Exception type: %s, raised in %s:%d]\n" %
( sys.exc_info()[1].__class__.__name__,
os.path.basename(traceback.extract_tb( sys.exc_info()[2] )[-1][0]),
traceback.extract_tb( sys.exc_info()[2] )[-1][1] ) )
sys.exit( 1 )
return counts, empty, ambiguous, lowqual, notaligned, nonunique, filtered, nreads
2
Example 15
Project: kamaelia_ Source File: spam_grab.py
def main(self):
yield self.getMailStats()
print "Number of emails waiting for us:", self.stat_mails
print "Size of inbox", self.stat_size
self.spamcount = self.getSpamStoreMeta()
lower = self.stat_mails
print self.whitelist
while lower > 1:
deletions = []
greyzone = []
higher = lower
# lower = max(1, lower-50)
# lower = max(1, lower-200)
lower = max(1, lower-600)
if 1:
higher = lower
lower = max(1, lower-600)
higher = lower
lower = max(1, lower-600)
higher = lower
lower = max(1, lower-600)
higher = lower
lower = max(1, lower-600)
higher = lower
lower = max(1, lower-600)
higher = lower
lower = max(1, lower-600)
higher = lower
lower = max(1, lower-600)
higher = lower+200
lower = max(1, lower-500)
# if 0:
higher = lower
lower = max(1, lower-600)
print "lower, higher",lower, higher
l = 0
for mailid in range(lower, higher+1):
l +=1
if (l % 100) == 0: print
# print "Retrieving HEADERS of mail", mailid
yield self.getMessageHeaders(mailid)
# print "-------- HEADERS RECEIVED --------"
delete = False
whitelisted = False
for sender in self.headers.get("from",[]):
if whitelisted:
continue
if self.blacklisted_sender(sender):
delete = True
if self.whitelisted_sender(sender):
delete = False
whitelisted = True
continue
if "mail delivery subsystem" in sender:
delete = True
if "system administrator" in sender:
if "undeliverable" in self.headers["subject"][0]:
delete = True
for phrase in self.phrases: # hideously inefficient, but works
if phrase in self.headers["subject"][0]:
delete = True
if not delete: # handled differently now
pass
# print self.headers["subject"][0]
if delete:
deletions.append( (mailid, self.headers["from"], self.headers) )
sys.stdout.write("D")
else:
sys.stdout.write(".")
sys.stdout.flush()
if not delete and not whitelisted:
# print "THIS /MAY/ BE SPAM", self.headers["subject"][0]
greyzone.append( (mailid, self.headers.get("from",[]), self.headers) )
print
if len(deletions) != 0:
print
print "============ CANDIDATES FOR DELETION ============"
pprint.pprint( [ (ID, FROM, HEADERS.get("subject",[]) ) for (ID, FROM, HEADERS) in deletions ])
print "TOTAL Suggested", len(deletions)
print "To delete these, don't type 'quit'"
X = raw_input()
if X.lower() == "quit":
break
if X.lower() != "skip":
for deletion in deletions:
ID, FROM, HEADERS = deletion
sys.stdout.write(".")
sys.stdout.flush()
yield self.grabStoreSpam(ID)
# print "SPAM GRABBED"
# print "INCREASING SPAMCOUNT"
f = str(self.spamcount)
self.spamcount +=1
self.storeSpamStoreMeta(self.spamcount)
print "DELETING SPAM FROM SERVER, you still have mail",ID,"here", "SPAMSTORE/"+f
yield self.deleteMessage(ID)
#print self.result
print "RECOMMENDED DELETIONS COMPLETE"
print "To delete more, don't type 'quit'"
X = raw_input()
if X.lower() == "quit":
break
else:
print "skipping, moving on"
deletions = []
if len(greyzone) != 0:
print "============ EMAILS WHICH ARE GREY ============"
# pprint.pprint( [ (ID, FROM, HEADERS["subject"]) for (ID, FROM, HEADERS) in greyzone ])
for (ID, FROM, HEADERS) in greyzone:
senders = []
for sender in FROM:
if ("<" in sender) and (">" in sender):
sender = sender[sender.find("<")+1:sender.rfind(">")]
senders.append(sender)
print " ".join(senders), ":", "".join( HEADERS["subject"]), ":", "".join(FROM)
print "JUST SENDERS ---------------------------------------"
allsenders = []
for senders in [ FROM for (ID, FROM, HEADERS) in greyzone ]:
for sender in senders:
if ("<" in sender) and (">" in sender):
sender = sender[sender.find("<")+1:sender.rfind(">")]
if sender not in allsenders:
allsenders.append(sender)
for sender in allsenders:
print sender
print "JUST SUBJECTS --------------------------------------"
for subjects in [ HEADERS["subject"] for (ID, FROM, HEADERS) in greyzone ]:
for subject in subjects:
print subject
print "=====End of report on currently grey mails====="
print "To keep doing, don't type quit"
X = raw_input()
if X.lower() == "quit":
break
print "Done, call again"
self.send(["QUIT"], "outbox")
2
Example 16
Project: liffy Source File: liffy.py
def main():
# Terminal Colors
t = Terminal()
def banner():
print(t.cyan("""
.____ .__ _____ _____
| | |__|/ ____\/ ____\__.__.
| | | \ __\ __< | |
| |___| || | | | \___ |
|_______ \__||__| |__| / ____| v1.2
\/ \/
"""))
def progressbar():
bar_width = 70
sys.stdout.write(t.cyan("[{0}] ".format(datetime.datetime.now())) + " " * bar_width)
sys.stdout.flush()
sys.stdout.write("\b" * (bar_width + 1))
for w in xrange(bar_width):
time.sleep(0.01)
sys.stdout.write(".")
sys.stdout.flush()
sys.stdout.write("\n")
#---------------------------------------------------------------------------------------------------
banner()
if not len(sys.argv):
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Not Enough Arguments!")
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Example: ./liffy.py --url \
http://target/files.php?file= --data\n")
sys.exit(0)
#---------------------------------------------------------------------------------------------------
""" Command Line Arguments """
parser = argparse.ArgumentParser()
parser.add_argument("--url", help="target url")
parser.add_argument("--data", help="data technique", action="store_true")
parser.add_argument("--input", help="input technique", action="store_true")
parser.add_argument("--expect", help="expect technique", action="store_true")
parser.add_argument("--environ", help="/proc/self/environ technique", action="store_true")
parser.add_argument("--access", help="access logs technique", action="store_true")
parser.add_argument("--ssh", help="auth logs technique", action="store_true")
parser.add_argument("--filter", help="filter technique", action="store_true")
parser.add_argument("--location", help="path to target file (access log, auth log, etc.)")
parser.add_argument("--nostager", help="execute payload directly, do not use stager", action="store_true")
parser.add_argument("--relative", help="use path traversal sequences for attack", action="store_true")
parser.add_argument("--cookies", help="session cookies")
args = parser.parse_args()
#---------------------------------------------------------------------------------------------------
""" Assign argument values """
url = args.url
nostager = args.nostager
relative = args.relative
c = args.cookies
#---------------------------------------------------------------------------------------------------
""" Check to make sure target is actually up """
print(t.cyan("[{0}] ".format(datetime.datetime.now())) + "Checking Target: {0}".format(url))
parsed = urlparse.urlsplit(url)
domain = parsed.scheme + "://" + parsed.netloc
progressbar()
try:
r = requests.get(domain)
if r.status_code != 200:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Did Not Receive Correct Response From Target URL!")
else:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Target URL Looks Good!")
if args.data:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Data Technique Selected!")
d = core.Data(url, nostager, c)
d.execute_data()
elif args.input:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Input Technique Selected!")
i = core.Input(url, nostager, c)
i.execute_input()
elif args.expect:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Expect Technique Selected!")
e = core.Expect(url, nostager, c)
e.execute_expect()
elif args.environ:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "/proc/self/environ Technique Selected!")
i = core.Environ(url, nostager, relative, c)
i.execute_environ()
elif args.access:
if not args.location:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Log Location Not Provided! Using Default")
l = '/var/log/apache2/access.log'
else:
l = args.location
a = core.Logs(url, l, nostager, relative, c)
a.execute_logs()
elif args.ssh:
if not args.location:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Log Location Not Provided! Using Default")
l = '/var/log/auth.log'
else:
l = args.location
a = core.SSHLogs(url, l, relative, c)
a.execute_ssh()
elif args.filter:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Filter Technique Selected!")
f = core.Filter(url, c)
f.execute_filter()
else:
print(t.red("[{0}] ".format(datetime.datetime.now())) + "Technique Not Selected!")
sys.exit(0)
except requests.HTTPError as e:
print(t.red("[{0}] HTTP Error!".format(datetime.datetime.now())) + str(e))
2
Example 17
Project: LasagneNLP Source File: bi_lstm_cnn_crf.py
def main():
parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM-CNN-CRF')
parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna', 'random'], help='Embedding for words',
required=True)
parser.add_argument('--embedding_dict', default=None, help='path for embedding dict')
parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
parser.add_argument('--num_filters', type=int, default=20, help='Number of filters in CNN')
parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov', 'adadelta'], help='update algorithm',
default='sgd')
parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training', required=True)
parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
parser.add_argument('--train') # "data/POS-penn/wsj/split1/wsj1.train.original"
parser.add_argument('--dev') # "data/POS-penn/wsj/split1/wsj1.dev.original"
parser.add_argument('--test') # "data/POS-penn/wsj/split1/wsj1.test.original"
args = parser.parse_args()
def construct_input_layer():
if fine_tune:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
output_size=embedd_dim,
W=embedd_table, name='embedding')
return layer_embedding
else:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
name='input')
return layer_input
def construct_char_input_layer():
layer_char_input = lasagne.layers.InputLayer(shape=(None, max_sent_length, max_char_length),
input_var=char_input_var, name='char-input')
layer_char_input = lasagne.layers.reshape(layer_char_input, (-1, [2]))
layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char_input, input_size=char_alphabet_size,
output_size=char_embedd_dim, W=char_embedd_table,
name='char_embedding')
layer_char_input = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1))
return layer_char_input
logger = utils.get_logger("BiLSTM-CNN-CRF")
fine_tune = args.fine_tune
oov = args.oov
regular = args.regular
embedding = args.embedding
embedding_path = args.embedding_dict
train_path = args.train
dev_path = args.dev
test_path = args.test
update_algo = args.update
grad_clipping = args.grad_clipping
peepholes = args.peepholes
num_filters = args.num_filters
gamma = args.gamma
output_predict = args.output_prediction
dropout = args.dropout
X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
embedd_table, label_alphabet, \
C_train, C_dev, C_test, char_embedd_table = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
test_path, oov=oov,
fine_tune=fine_tune,
embedding=embedding,
embedding_path=embedding_path,
use_character=True)
num_labels = label_alphabet.size() - 1
logger.info("constructing network...")
# create variables
target_var = T.imatrix(name='targets')
mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
if fine_tune:
input_var = T.imatrix(name='inputs')
num_data, max_length = X_train.shape
alphabet_size, embedd_dim = embedd_table.shape
else:
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
num_data, max_length, embedd_dim = X_train.shape
char_input_var = T.itensor3(name='char-inputs')
num_data_char, max_sent_length, max_char_length = C_train.shape
char_alphabet_size, char_embedd_dim = char_embedd_table.shape
assert (max_length == max_sent_length)
assert (num_data == num_data_char)
# construct input and mask layers
layer_incoming1 = construct_char_input_layer()
layer_incoming2 = construct_input_layer()
layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')
# construct bi-rnn-cnn
num_units = args.num_units
bi_lstm_cnn_crf = build_BiLSTM_CNN_CRF(layer_incoming1, layer_incoming2, num_units, num_labels, mask=layer_mask,
grad_clipping=grad_clipping, peepholes=peepholes, num_filters=num_filters,
dropout=dropout)
logger.info("Network structure: hidden=%d, filter=%d" % (num_units, num_filters))
# compute loss
num_tokens = mask_var.sum(dtype=theano.config.floatX)
# get outpout of bi-lstm-cnn-crf shape [batch, length, num_labels, num_labels]
energies_train = lasagne.layers.get_output(bi_lstm_cnn_crf)
energies_eval = lasagne.layers.get_output(bi_lstm_cnn_crf, deterministic=True)
loss_train = crf_loss(energies_train, target_var, mask_var).mean()
loss_eval = crf_loss(energies_eval, target_var, mask_var).mean()
# l2 regularization?
if regular == 'l2':
l2_penalty = lasagne.regularization.regularize_network_params(bi_lstm_cnn_crf, lasagne.regularization.l2)
loss_train = loss_train + gamma * l2_penalty
_, corr_train = crf_accuracy(energies_train, target_var)
corr_train = (corr_train * mask_var).sum(dtype=theano.config.floatX)
prediction_eval, corr_eval = crf_accuracy(energies_eval, target_var)
corr_eval = (corr_eval * mask_var).sum(dtype=theano.config.floatX)
# Create update expressions for training.
# hyper parameters to tune: learning rate, momentum, regularization.
batch_size = args.batch_size
learning_rate = 1.0 if update_algo == 'adadelta' else args.learning_rate
decay_rate = args.decay_rate
momentum = 0.9
params = lasagne.layers.get_all_params(bi_lstm_cnn_crf, trainable=True)
updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)
# Compile a function performing a training step on a mini-batch
train_fn = theano.function([input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_tokens],
updates=updates)
# Compile a second function evaluating the loss and accuracy of network
eval_fn = theano.function([input_var, target_var, mask_var, char_input_var],
[loss_eval, corr_eval, num_tokens, prediction_eval])
# Finally, launch the training loop.
logger.info(
"Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
% (
update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size,
grad_clipping,
peepholes))
num_batches = num_data / batch_size
num_epochs = 1000
best_loss = 1e+12
best_acc = 0.0
best_epoch_loss = 0
best_epoch_acc = 0
best_loss_test_err = 0.
best_loss_test_corr = 0.
best_acc_test_err = 0.
best_acc_test_corr = 0.
stop_count = 0
lr = learning_rate
patience = args.patience
for epoch in range(1, num_epochs + 1):
print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
train_err = 0.0
train_corr = 0.0
train_total = 0
train_inst = 0
start_time = time.time()
num_back = 0
train_batches = 0
for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, char_inputs=C_train,
batch_size=batch_size, shuffle=True):
inputs, targets, masks, char_inputs = batch
err, corr, num = train_fn(inputs, targets, masks, char_inputs)
train_err += err * inputs.shape[0]
train_corr += corr
train_total += num
train_inst += inputs.shape[0]
train_batches += 1
time_ave = (time.time() - start_time) / train_batches
time_left = (num_batches - train_batches) * time_ave
# update log
sys.stdout.write("\b" * num_back)
log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_inst, train_corr * 100 / train_total, time_left)
sys.stdout.write(log_info)
num_back = len(log_info)
# update training log after each epoch
assert train_inst == num_data
sys.stdout.write("\b" * num_back)
print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / num_data, train_corr * 100 / train_total, time.time() - start_time)
# evaluate performance on dev data
dev_err = 0.0
dev_corr = 0.0
dev_total = 0
dev_inst = 0
for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, char_inputs=C_dev, batch_size=batch_size):
inputs, targets, masks, char_inputs = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
dev_err += err * inputs.shape[0]
dev_corr += corr
dev_total += num
dev_inst += inputs.shape[0]
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet,
is_flattened=False)
print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
dev_err / dev_inst, dev_corr, dev_total, dev_corr * 100 / dev_total)
if best_loss < dev_err and best_acc > dev_corr / dev_total:
stop_count += 1
else:
update_loss = False
update_acc = False
stop_count = 0
if best_loss > dev_err:
update_loss = True
best_loss = dev_err
best_epoch_loss = epoch
if best_acc < dev_corr / dev_total:
update_acc = True
best_acc = dev_corr / dev_total
best_epoch_acc = epoch
# evaluate on test data when better performance detected
test_err = 0.0
test_corr = 0.0
test_total = 0
test_inst = 0
for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, char_inputs=C_test,
batch_size=batch_size):
inputs, targets, masks, char_inputs = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
test_err += err * inputs.shape[0]
test_corr += corr
test_total += num
test_inst += inputs.shape[0]
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet,
is_flattened=False)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
test_err / test_inst, test_corr, test_total, test_corr * 100 / test_total)
if update_loss:
best_loss_test_err = test_err
best_loss_test_corr = test_corr
if update_acc:
best_acc_test_err = test_err
best_acc_test_corr = test_corr
# stop if dev acc decrease 3 time straightly.
if stop_count == patience:
break
# re-compile a function with new learning rate for training
if update_algo != 'adadelta':
lr = learning_rate / (1.0 + epoch * decay_rate)
updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
train_fn = theano.function([input_var, target_var, mask_var, char_input_var],
[loss_train, corr_train, num_tokens],
updates=updates)
# print best performance on test data.
logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_loss_test_err / test_inst, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_acc_test_err / test_inst, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)
2
Example 18
Project: apogee Source File: turbospec.py
def turbosynth(*args,**kwargs):
"""
NAME:
turbosynth
PURPOSE:
Run a Turbospectrum synthesis (direct interface to the Turbospectrum code; use 'synth' for a general routine that generates the non-continuum-normalized spectrum, convolves withe LSF and macrotubulence, and optionally continuum normalizes the output)
INPUT ARGUMENTS:
lists with abundances:
[Atomic number1,diff1]
[Atomic number2,diff2]
...
[Atomic numberM,diffM]
SYNTHEIS KEYWORDS:
isotopes= ('solar') use 'solar' or 'arcturus' isotope ratios; can also be a dictionary with isotope ratios (e.g., isotopes= {'6.012':'0.9375','6.013':'0.0625'})
wmin, wmax, dw, width= (15000.000, 17000.000, 0.10000000) spectral synthesis limits and step of calculation (see MOOG)
babsma_wmin, babsma_wmax= (wmin,wmax)) allows opacity limits to be different (broader) than for the synthesis itself
costheta= (1.) cosine of the viewing angle
LINELIST KEYWORDS:
air= (True) if True, perform the synthesis in air wavelengths (affects the default Hlinelist, nothing else; output is in air if air, vacuum otherwise); set to False at your own risk, as Turbospectrum expects the linelist in air wavelengths!)
Hlinelist= (None) Hydrogen linelists to use; can be set to the path of a linelist file or to the name of an APOGEE linelist; if None, then we first search for the Hlinedata.vac in the APOGEE linelist directory (if air=False) or we use the internal Turbospectrum Hlinelist (if air=True)
linelist= (None) molecular and atomic linelists to use; can be set to the path of a linelist file or to the name of an APOGEE linelist, or lists of such files; if a single filename is given, the code will first search for files with extensions '.atoms', '.molec' or that start with 'turboatoms.' and 'turbomolec.'
ATMOSPHERE KEYWORDS:
modelatm= (None) model-atmosphere instance
vmicro= (2.) microturbulence (km/s)
modelopac= (None)
(a) if set to an existing filename: assume babsma_lu has already been run and use this continuous opacity in bsyn_lu
(b) if set to a non-existing filename: store the continuous opacity in this file
MISCELLANEOUS KEYWORDS:
dr= data release
saveTurboInput= if set to a string, the input to and output from Turbospectrum will be saved as a tar.gz file with this name; can be a filename in the current directory or a full path
OUTPUT:
(wavelengths,cont-norm. spectrum, spectrum (nwave))
HISTORY:
2015-04-13 - Written - Bovy (IAS)
"""
# Get the spectral synthesis limits
wmin= kwargs.pop('wmin',_WMIN_DEFAULT)
wmax= kwargs.pop('wmax',_WMAX_DEFAULT)
dw= kwargs.pop('dw',_DW_DEFAULT)
babsma_wmin= kwargs.pop('babsma_wmin',wmin)
babsma_wmax= kwargs.pop('babsma_wmax',wmax)
if babsma_wmin > wmin or babsma_wmax < wmax:
raise ValueError("Opacity wavelength range must encompass the synthesis range")
if int(numpy.ceil((wmax-wmin)/dw > 150000)):
raise ValueError('Too many wavelengths for Turbospectrum synthesis, reduce the wavelength step dw (to, e.g., 0.016)')
costheta= kwargs.pop('costheta',1.)
# Linelists
Hlinelist= kwargs.pop('Hlinelist',None)
linelist= kwargs.pop('linelist',None)
# Parse isotopes
isotopes= kwargs.pop('isotopes','solar')
if isinstance(isotopes,str) and isotopes.lower() == 'solar':
isotopes= {}
elif isinstance(isotopes,str) and isotopes.lower() == 'arcturus':
isotopes= {'6.012':'0.9375',
'6.013':'0.0625'}
elif not isinstance(isotopes,dict):
raise ValueError("'isotopes=' input not understood, should be 'solar', 'arcturus', or a dictionary")
# We will run in a subdirectory of the current directory
tmpDir= tempfile.mkdtemp(dir=os.getcwd())
# Get the model atmosphere
modelatm= kwargs.pop('modelatm',None)
if not modelatm is None:
if isinstance(modelatm,str) and os.path.exists(modelatm):
raise ValueError('modelatm= input is an existing filename, but you need to give an Atmosphere object instead')
elif isinstance(modelatm,str):
raise ValueError('modelatm= input needs to be an Atmosphere instance')
else:
# Check temperature
if modelatm._teff > 7000.:
warnings.warn('Turbospectrum does not include all necessary physics to model stars hotter than about 7000 K; proceed with caution',RuntimeWarning)
# Write atmosphere to file
modelfilename= os.path.join(tmpDir,'atm.mod')
modelatm.writeto(modelfilename,turbo=True)
modeldirname= os.path.dirname(modelfilename)
modelbasename= os.path.basename(modelfilename)
# Get the name of the linelists
if Hlinelist is None:
if kwargs.get('air',True):
Hlinelist= 'DATA/Hlinedata' # will be symlinked
else:
Hlinelist= appath.linelistPath('Hlinedata.vac',
dr=kwargs.get('dr',None))
if not os.path.exists(Hlinelist) and not Hlinelist == 'DATA/Hlinedata':
Hlinelist= appath.linelistPath(Hlinelist,
dr=kwargs.get('dr',None))
if not os.path.exists(Hlinelist) and not kwargs.get('air',True):
print("Hlinelist in vacuum linelist not found, using Turbospectrum's, which is in air...")
Hlinelist= 'DATA/Hlinedata' # will be symlinked
linelistfilenames= [Hlinelist]
if isinstance(linelist,str):
if os.path.exists(linelist):
linelistfilenames.append(linelist)
else:
# Try finding the linelist
atomlinelistfilename= appath.linelistPath(\
'%s.atoms' % linelist,
dr=kwargs.get('dr',None))
moleclinelistfilename= appath.linelistPath(\
'%s.molec' % linelist,
dr=kwargs.get('dr',None))
if os.path.exists(atomlinelistfilename) \
and os.path.exists(moleclinelistfilename):
linelistfilenames.append(atomlinelistfilename)
linelistfilenames.append(moleclinelistfilename)
else:
atomlinelistfilename= appath.linelistPath(\
'turboatoms.%s' % linelist,
dr=kwargs.get('dr',None))
moleclinelistfilename= appath.linelistPath(\
'turbomolec.%s' % linelist,
dr=kwargs.get('dr',None))
if not os.path.exists(atomlinelistfilename) \
and '201404080919' in atomlinelistfilename \
and kwargs.get('air',True):
download.linelist(os.path.basename(atomlinelistfilename),
dr=kwargs.get('dr',None))
if not os.path.exists(moleclinelistfilename) \
and '201404080919' in moleclinelistfilename \
and kwargs.get('air',True):
download.linelist(os.path.basename(moleclinelistfilename),
dr=kwargs.get('dr',None))
if os.path.exists(atomlinelistfilename) \
and os.path.exists(moleclinelistfilename):
linelistfilenames.append(atomlinelistfilename)
linelistfilenames.append(moleclinelistfilename)
if linelist is None or len(linelistfilenames) == 1:
os.remove(modelfilename)
os.rmdir(tmpDir)
raise ValueError('linelist= must be set (see docuementation) and given linelist must exist (either as absolute path or in the linelist directory)')
# Link the Turbospectrum DATA directory
os.symlink(os.getenv('TURBODATA'),os.path.join(tmpDir,'DATA'))
# Cut the linelist to the desired wavelength range, if necessary,
# Skipped because it is unnecessary, but left in case we still want to
# use it
rmLinelists= False
for ll, linelistfilename in enumerate(linelistfilenames[1:]):
if not _CUTLINELIST: continue #SKIP
if wmin == _WMIN_DEFAULT and wmax == _WMAX_DEFAULT: continue
rmLinelists= True
with open(os.path.join(tmpDir,'cutlines.awk'),'w') as awkfile:
awkfile.write('($1>%.3f && $1<%.3f) || ( substr($1,1,1) == "'
%(wmin-7.,wmax+7.) +"'"+'")\n')
keeplines= open(os.path.join(tmpDir,'lines.tmp'),'w')
stderr= open('/dev/null','w')
try:
subprocess.check_call(['awk','-f','cutlines.awk',
linelistfilename],
cwd=tmpDir,stdout=keeplines,stderr=stderr)
keeplines.close()
except subprocess.CalledProcessError:
os.remove(os.path.join(tmpDir,'lines.tmp'))
os.remove(os.path.join(tmpDir,'DATA'))
raise RuntimeError("Removing unnecessary linelist entries failed ...")
finally:
os.remove(os.path.join(tmpDir,'cutlines.awk'))
stderr.close()
# Remove elements that aren't used altogether, adjust nlines
with open(os.path.join(tmpDir,'lines.tmp'),'r') as infile:
lines= infile.readlines()
nl_list= [l[0] == "'" for l in lines]
nl= numpy.array(nl_list,dtype='int')
nl_list.append(True)
nl_list.append(True)
nlines= [numpy.sum(1-nl[ii:nl_list[ii+2:].index(True)+ii+2])
for ii in range(len(nl))]
with open(os.path.join(tmpDir,os.path.basename(linelistfilename)),
'w') \
as outfile:
for ii, line in enumerate(lines):
if ii < len(lines)-2:
if not lines[ii][0] == "'":
outfile.write(lines[ii])
elif not (lines[ii+2][0] == "'" and lines[ii+1][0] == "'"):
if lines[ii+1][0] == "'":
# Adjust nlines
outfile.write(lines[ii].replace(lines[ii].split()[-1]+'\n',
'%i\n' % nlines[ii]))
else:
outfile.write(lines[ii])
else:
if not lines[ii][0] == "'": outfile.write(lines[ii])
os.remove(os.path.join(tmpDir,'lines.tmp'))
# cp the linelists to the temporary directory
shutil.copy(linelistfilename,tmpDir)
linelistfilenames[ll]= os.path.basename(linelistfilename)
# Parse the abundances
if len(args) == 0: #special case that there are *no* differences
args= ([26,0.],)
indiv_abu= {}
for arg in args:
indiv_abu[arg[0]]= arg[1]+solarabundances._ASPLUND05[arg[0]]\
+modelatm._metals
if arg[0] == 6: indiv_abu[arg[0]]+= modelatm._cm
if arg[0] in [8,10,12,14,16,18,20,22]: indiv_abu[arg[0]]+= modelatm._am
modelopac= kwargs.get('modelopac',None)
if modelopac is None or \
(isinstance(modelopac,str) and not os.path.exists(modelopac)):
# Now write the script file for babsma_lu
scriptfilename= os.path.join(tmpDir,'babsma.par')
modelopacname= os.path.join(tmpDir,'mopac')
_write_script(scriptfilename,
babsma_wmin,babsma_wmax,dw,
None,
modelfilename,
None,
modelopacname,
modelatm._metals,
modelatm._am,
indiv_abu,
kwargs.get('vmicro',2.),
None,None,None,bsyn=False)
# Run babsma
sys.stdout.write('\r'+"Running Turbospectrum babsma_lu ...\r")
sys.stdout.flush()
if kwargs.get('verbose',False):
stdout= None
stderr= None
else:
stdout= open('/dev/null', 'w')
stderr= subprocess.STDOUT
try:
p= subprocess.Popen(['babsma_lu'],
cwd=tmpDir,
stdin=subprocess.PIPE,
stdout=stdout,
stderr=stderr)
with open(os.path.join(tmpDir,'babsma.par'),'r') as parfile:
for line in parfile:
p.stdin.write(line.encode('utf-8'))
stdout, stderr= p.communicate()
except subprocess.CalledProcessError:
for linelistfilename in linelistfilenames:
os.remove(linelistfilename,tmpDir)
if os.path.exists(os.path.join(tmpDir,'DATA')):
os.remove(os.path.join(tmpDir,'DATA'))
raise RuntimeError("Running babsma_lu failed ...")
finally:
if os.path.exists(os.path.join(tmpDir,'babsma.par')) \
and not 'saveTurboInput' in kwargs:
os.remove(os.path.join(tmpDir,'babsma.par'))
sys.stdout.write('\r'+download._ERASESTR+'\r')
sys.stdout.flush()
if isinstance(modelopac,str):
shutil.copy(modelopacname,modelopac)
else:
shutil.copy(modelopac,tmpDir)
modelopacname= os.path.join(tmpDir,os.path.basename(modelopac))
# Now write the script file for bsyn_lu
scriptfilename= os.path.join(tmpDir,'bsyn.par')
outfilename= os.path.join(tmpDir,'bsyn.out')
_write_script(scriptfilename,
wmin,wmax,dw,
costheta,
modelfilename,
None,
modelopacname,
modelatm._metals,
modelatm._am,
indiv_abu,
None,
outfilename,
isotopes,
linelistfilenames,
bsyn=True)
# Run bsyn
sys.stdout.write('\r'+"Running Turbospectrum bsyn_lu ...\r")
sys.stdout.flush()
if kwargs.get('verbose',False):
stdout= None
stderr= None
else:
stdout= open('/dev/null', 'w')
stderr= subprocess.STDOUT
try:
p= subprocess.Popen(['bsyn_lu'],
cwd=tmpDir,
stdin=subprocess.PIPE,
stdout=stdout,
stderr=stderr)
with open(os.path.join(tmpDir,'bsyn.par'),'r') as parfile:
for line in parfile:
p.stdin.write(line.encode('utf-8'))
stdout, stderr= p.communicate()
except subprocess.CalledProcessError:
raise RuntimeError("Running bsyn_lu failed ...")
finally:
if 'saveTurboInput' in kwargs:
turbosavefilename= kwargs['saveTurboInput']
if os.path.dirname(turbosavefilename) == '':
turbosavefilename= os.path.join(os.getcwd(),turbosavefilename)
try:
subprocess.check_call(['tar','cvzf',turbosavefilename,
os.path.basename(os.path.normpath(tmpDir))])
except subprocess.CalledProcessError:
raise RuntimeError("Tar-zipping the Turbospectrum input and output failed; you will have to manually delete the temporary directory ...")
# Need to remove babsma.par, bc not removed above
if os.path.exists(os.path.join(tmpDir,'babsma.par')):
os.remove(os.path.join(tmpDir,'babsma.par'))
if os.path.exists(os.path.join(tmpDir,'bsyn.par')):
os.remove(os.path.join(tmpDir,'bsyn.par'))
if os.path.exists(modelopacname):
os.remove(modelopacname)
if os.path.exists(modelopacname+'.mod'):
os.remove(modelopacname+'.mod')
if os.path.exists(os.path.join(tmpDir,'DATA')):
os.remove(os.path.join(tmpDir,'DATA'))
if os.path.exists(os.path.join(tmpDir,'dummy-output.dat')):
os.remove(os.path.join(tmpDir,'dummy-output.dat'))
if os.path.exists(modelfilename):
os.remove(modelfilename)
if rmLinelists:
for linelistfilename in linelistfilenames[1:]:
os.remove(linelistfilename)
sys.stdout.write('\r'+download._ERASESTR+'\r')
sys.stdout.flush()
# Now read the output
turboOut= numpy.loadtxt(outfilename)
# Clean up
os.remove(outfilename)
os.rmdir(tmpDir)
# Return wav, cont-norm, full spectrum
return (turboOut[:,0],turboOut[:,1],turboOut[:,2])
2
Example 19
Project: scikit-rf Source File: ipython_directive.py
def process_input(self, data, input_prompt, lineno):
"""
Process data block for INPUT token.
"""
decorator, input, rest = data
image_file = None
image_directive = None
is_verbatim = decorator=='@verbatim' or self.is_verbatim
is_doctest = (decorator is not None and \
decorator.startswith('@doctest')) or self.is_doctest
is_suppress = decorator=='@suppress' or self.is_suppress
is_okexcept = decorator=='@okexcept' or self.is_okexcept
is_okwarning = decorator=='@okwarning' or self.is_okwarning
is_savefig = decorator is not None and \
decorator.startswith('@savefig')
input_lines = input.split('\n')
if len(input_lines) > 1:
if input_lines[-1] != "":
input_lines.append('') # make sure there's a blank line
# so splitter buffer gets reset
continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2))
if is_savefig:
image_file, image_directive = self.process_image(decorator)
ret = []
is_semicolon = False
# Hold the execution count, if requested to do so.
if is_suppress and self.hold_count:
store_history = False
else:
store_history = True
# Note: catch_warnings is not thread safe
with warnings.catch_warnings(record=True) as ws:
for i, line in enumerate(input_lines):
if line.endswith(';'):
is_semicolon = True
if i == 0:
# process the first input line
if is_verbatim:
self.process_input_line('')
self.IP.execution_count += 1 # increment it anyway
else:
# only submit the line in non-verbatim mode
self.process_input_line(line, store_history=store_history)
formatted_line = '%s %s'%(input_prompt, line)
else:
# process a continuation line
if not is_verbatim:
self.process_input_line(line, store_history=store_history)
formatted_line = '%s %s'%(continuation, line)
if not is_suppress:
ret.append(formatted_line)
if not is_suppress and len(rest.strip()) and is_verbatim:
# the "rest" is the standard output of the
# input, which needs to be added in
# verbatim mode
ret.append(rest)
self.cout.seek(0)
output = self.cout.read()
if not is_suppress and not is_semicolon:
ret.append(output)
elif is_semicolon: # get spacing right
ret.append('')
# context information
filename = "Unknown"
lineno = 0
if self.directive.state:
filename = self.directive.state.docuement.current_source
lineno = self.directive.state.docuement.current_line
# output any exceptions raised during execution to stdout
# unless :okexcept: has been specified.
if not is_okexcept and "Traceback" in output:
s = "\nException in %s at block ending on line %s\n" % (filename, lineno)
s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
sys.stdout.write('\n\n>>>' + ('-' * 73))
sys.stdout.write(s)
sys.stdout.write(output)
sys.stdout.write('<<<' + ('-' * 73) + '\n\n')
# output any warning raised during execution to stdout
# unless :okwarning: has been specified.
if not is_okwarning:
for w in ws:
s = "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
sys.stdout.write('\n\n>>>' + ('-' * 73))
sys.stdout.write(s)
sys.stdout.write(('-' * 76) + '\n')
s=warnings.formatwarning(w.message, w.category,
w.filename, w.lineno, w.line)
sys.stdout.write(s)
sys.stdout.write('<<<' + ('-' * 73) + '\n')
self.cout.truncate(0)
return (ret, input_lines, output, is_doctest, decorator, image_file,
image_directive)
2
Example 20
Project: flopy Source File: mfswi2.py
@staticmethod
def load(f, model, ext_unit_dict=None):
"""
Load an existing package.
Parameters
----------
f : filename or file handle
File to load.
model : model object
The model object (of type :class:`flopy.modflow.mf.Modflow`) to
which this package will be added.
ext_unit_dict : dictionary, optional
If the arrays in the file are specified using EXTERNAL,
or older style array control records, then `f` should be a file
handle. In this case ext_unit_dict is required, which can be
constructed using the function
:class:`flopy.utils.mfreadnam.parsenamefile`.
Returns
-------
swi2 : ModflowSwi2 object
Examples
--------
>>> import flopy
>>> m = flopy.modflow.Modflow()
>>> swi2 = flopy.modflow.ModflowSwi2.load('test.swi2', m)
"""
if model.verbose:
sys.stdout.write('loading swi2 package file...\n')
if not hasattr(f, 'read'):
filename = f
f = open(filename, 'r')
# dataset 0 -- header
while True:
line = f.readline()
if line[0] != '#':
break
# determine problem dimensions
nrow, ncol, nlay, nper = model.get_nrow_ncol_nlay_nper()
# --read dataset 1
if model.verbose:
sys.stdout.write(' loading swi2 dataset 1\n')
t = line.strip().split()
nsrf = int(t[0])
istrat = int(t[1])
nobs = int(t[2])
if int(t[3]) > 0:
model.add_pop_key_list(int(t[3]))
iswizt = 55
if int(t[4]) > 0:
model.add_pop_key_list(int(t[4]))
ipakcb = 56
else:
ipakcb = 0
iswiobs = 0
if int(t[5]) > 0:
model.add_pop_key_list(int(t[5]))
iswiobs = 1051
options = []
adaptive = False
for idx in range(6, len(t)):
if '#' in t[idx]:
break
options.append(t[idx])
if 'adaptive' in t[idx].lower():
adaptive = True
# read dataset 2a
if model.verbose:
sys.stdout.write(' loading swi2 dataset 2a\n')
while True:
line = f.readline()
if line[0] != '#':
break
t = line.strip().split()
nsolver = int(t[0])
iprsol = int(t[1])
mutsol = int(t[2])
# read dataset 2b
solver2params = {}
if nsolver == 2:
if model.verbose:
sys.stdout.write(' loading swi2 dataset 2b\n')
while True:
line = f.readline()
if line[0] != '#':
break
t = line.strip().split()
solver2params['mxiter'] = int(t[0])
solver2params['iter1'] = int(t[1])
solver2params['npcond'] = int(t[2])
solver2params['zclose'] = float(t[3])
solver2params['rclose'] = float(t[4])
solver2params['relax'] = float(t[5])
solver2params['nbpol'] = int(t[6])
solver2params['damp'] = float(t[7])
solver2params['dampt'] = float(t[8])
# read dataset 3a
if model.verbose:
sys.stdout.write(' loading swi2 dataset 3a\n')
while True:
line = f.readline()
if line[0] != '#':
break
t = line.strip().split()
toeslope = float(t[0])
tipslope = float(t[1])
alpha = None
beta = 0.1
if len(t) > 2:
try:
alpha = float(t[2])
beta = float(t[3])
except:
pass
# read dataset 3b
nadptmx, nadptmn, adptfct = None, None, None
if adaptive:
if model.verbose:
sys.stdout.write(' loading swi2 dataset 3b\n')
while True:
line = f.readline()
if line[0] != '#':
break
t = line.strip().split()
nadptmx = int(t[0])
nadptmn = int(t[1])
adptfct = float(t[2])
# read dataset 4
if model.verbose:
print(' loading nu...')
if istrat == 1:
nnu = nsrf + 1
else:
nnu = nsrf + 2
while True:
ipos = f.tell()
line = f.readline()
if line[0] != '#':
f.seek(ipos)
break
nu = Util2d.load(f, model, (1, nnu), np.float32, 'nu',
ext_unit_dict)
nu = nu.array.reshape((nnu))
# read dataset 5
if model.verbose:
print(' loading initial zeta surfaces...')
while True:
ipos = f.tell()
line = f.readline()
if line[0] != '#':
f.seek(ipos)
break
zeta = []
for n in range(nsrf):
ctxt = 'zeta_surf{:02d}'.format(n + 1)
zeta.append(Util3d.load(f, model, (nlay, nrow, ncol),
np.float32, ctxt, ext_unit_dict))
# read dataset 6
if model.verbose:
print(' loading initial ssz...')
while True:
ipos = f.tell()
line = f.readline()
if line[0] != '#':
f.seek(ipos)
break
ssz = Util3d.load(f, model, (nlay, nrow, ncol), np.float32,
'ssz', ext_unit_dict)
# read dataset 7
if model.verbose:
print(' loading initial isource...')
while True:
ipos = f.tell()
line = f.readline()
if line[0] != '#':
f.seek(ipos)
break
isource = Util3d.load(f, model, (nlay, nrow, ncol), np.int,
'isource', ext_unit_dict)
# read dataset 8
obsname = []
obslrc = []
if nobs > 0:
if model.verbose:
print(' loading observation locations...')
while True:
line = f.readline()
if line[0] != '#':
break
for i in range(nobs):
if i > 0:
try:
line = f.readline()
except:
break
t = line.strip().split()
obsname.append(t[0])
kk = int(t[1]) - 1
ii = int(t[2]) - 1
jj = int(t[3]) - 1
obslrc.append([kk, ii, jj])
nobs = len(obsname)
# create swi2 instance
swi2 = ModflowSwi2(model, nsrf=nsrf, istrat=istrat, nobs=nobs,
iswizt=iswizt, ipakcb=ipakcb,
iswiobs=iswiobs, options=options,
nsolver=nsolver, iprsol=iprsol, mutsol=mutsol,
solver2params=solver2params,
toeslope=toeslope, tipslope=tipslope, alpha=alpha,
beta=beta,
nadptmx=nadptmx, nadptmn=nadptmn, adptfct=adptfct,
nu=nu, zeta=zeta, ssz=ssz, isource=isource,
obsnam=obsname, obslrc=obslrc)
# return swi2 instance
return swi2
2
Example 21
Project: deepy Source File: server.py
def handle_control(self, req, worker_id):
"""
Handles a control_request received from a worker.
Returns:
string or dict: response
'stop' - the worker should quit
'wait' - wait for 1 second
'eval' - evaluate on valid and test set to start a new epoch
'sync_hyperparams' - set learning rate
'valid' - evaluate on valid and test set, then save the params
'train' - train next batches
"""
if self.start_time is None: self.start_time = time.time()
response = ""
if req == 'next':
if self.num_train_batches == 0:
response = "get_num_batches"
elif self._done:
response = "stop"
self.worker_is_done(worker_id)
elif self._evaluating:
response = 'wait'
elif not self.batch_pool:
# End of one iter
if self._train_costs:
with self._lock:
sys.stdout.write("\r")
sys.stdout.flush()
mean_costs = []
for i in range(len(self._training_names)):
mean_costs.append(np.mean([c[i] for c in self._train_costs]))
self.log("train (epoch={:2d}) {}".format(
self.epoch,
self.get_monitor_string(zip(self._training_names, mean_costs)))
)
response = {'eval': None, 'best_valid_cost': self._best_valid_cost}
self._evaluating = True
else:
# Continue training
if worker_id not in self.prepared_worker_pool:
response = {"sync_hyperparams": self.feed_hyperparams()}
self.prepared_worker_pool.add(worker_id)
elif self._iters_from_last_valid >= self._valid_freq:
response = {'valid': None, 'best_valid_cost': self._best_valid_cost}
self._iters_from_last_valid = 0
else:
response = {"train": self.feed_batches()}
elif 'eval_done' in req:
with self._lock:
self._evaluating = False
sys.stdout.write("\r")
sys.stdout.flush()
if 'test_costs' in req and req['test_costs']:
self.log("test (epoch={:2d}) {}".format(
self.epoch,
self.get_monitor_string(req['test_costs']))
)
if 'valid_costs' in req and req['test_costs']:
valid_J = req['valid_costs'][0][1]
if valid_J < self._best_valid_cost:
self._best_valid_cost = valid_J
star_str = "*"
else:
star_str = ""
self.log("valid (epoch={:2d}) {} {} (worker {})".format(
self.epoch,
self.get_monitor_string(req['valid_costs']),
star_str,
worker_id))
# if star_str and 'auto_save' in req and req['auto_save']:
# self.log("(worker {}) save the model to {}".format(
# worker_id,
# req['auto_save']
# ))
continue_training = self.prepare_epoch()
self._epoch_start_time = time.time()
if not continue_training:
self._done = True
self.log("training time {:.4f}s".format(time.time() - self.start_time))
response = "stop"
elif 'valid_done' in req:
with self._lock:
sys.stdout.write("\r")
sys.stdout.flush()
if 'valid_costs' in req:
valid_J = req['valid_costs'][0][1]
if valid_J < self._best_valid_cost:
self._best_valid_cost = valid_J
star_str = "*"
else:
star_str = ""
self.log("valid ( dryrun ) {} {} (worker {})".format(
self.get_monitor_string(req['valid_costs']),
star_str,
worker_id
))
# if star_str and 'auto_save' in req and req['auto_save']:
# self.log("(worker {}) save the model to {}".format(
# worker_id,
# req['auto_save']
# ))
elif 'train_done' in req:
costs = req['costs']
self._train_costs.append(costs)
sys.stdout.write("\x1b[2K\r> %d%% | J=%.2f | %.1f batch/s" % (
self._current_iter * 100 / self.num_train_batches,
costs[0], float(len(self._train_costs)*self.step_len)/(time.time() - self._epoch_start_time)))
sys.stdout.flush()
elif 'get_num_batches_done' in req:
self.num_train_batches = req['get_num_batches_done']
elif 'get_easgd_alpha' in req:
response = self._easgd_alpha
elif 'sync_hyperparams' in req:
response = {"sync_hyperparams": self.feed_hyperparams()}
elif 'init_schedule' in req:
with self._lock:
sys.stdout.write("\r")
sys.stdout.flush()
self.log("worker {} connected".format(worker_id))
if self.epoch == 0:
schedule_params = req['init_schedule']
sch_str = " ".join("{}={}".format(a, b) for (a, b) in schedule_params.items())
self.log("initialize the schedule with {}".format(sch_str))
for key, val in schedule_params.items():
if not val: continue
if key == 'learning_rate':
self._lr = val
elif key == 'start_halving_at':
self.epoch_start_halving = val
elif key == 'end_at':
self.end_at = val
elif key == 'step_len':
self.step_len = val
elif key == 'valid_freq':
self._valid_freq = val
elif 'set_names' in req:
self._training_names = req['training_names']
self._evaluation_names = req['evaluation_names']
return response
2
Example 22
def main(argv):
optparser = OptionParser(__doc__)
optparser.add_option("--debug", default=False, action='store_true',
help="show me the SchoonerSpores[tm]")
optparser.add_option("--exclude-dictionary", default='',
help="comma-separated list of words that will not be "
"considered to be dictionary words")
optparser.add_option("--disable-picking", default='',
help="comma-separated list of words that will be "
"not be picked from sentences")
optparser.add_option("--disable-swapping", default='',
help="comma-separated list of colon-separated "
"pairs of words that will be "
"not be considered for swapping")
optparser.add_option("--dictionary-words-only", default=False,
action='store_true',
help="only swap words when both words are "
"dictionary words")
optparser.add_option("--remove-quotes", default=False, action='store_true',
help="strip double quotes from input words")
(options, args) = optparser.parse_args(argv[1:])
filenames = args
load_dictionary(options.exclude_dictionary.split(','))
disable_picking = set([
w.upper() for w in options.disable_picking.split(',')
])
disable_swapping = set([
frozenset([z.upper for z in x.split(':')])
for x in options.disable_swapping.split(',')
])
words = []
for filename in filenames:
with open(filename, 'r') as f:
for line in f:
line = line.strip().replace('--', '-- ')
words.extend(line.split())
if line == '' and words[-1] is not PARAGRAPH_BREAK:
words.append(PARAGRAPH_BREAK)
BASE_CLAUSE_ENDERS = ['.', '!', '?', ';', ':', ',', '--']
CLAUSE_ENDERS = tuple(
BASE_CLAUSE_ENDERS +
[c + '"' for c in BASE_CLAUSE_ENDERS] +
[c + "'" for c in BASE_CLAUSE_ENDERS]
)
sentences = [] # actually clauses. :/
sentence = []
for word in words:
if word is PARAGRAPH_BREAK:
if sentence:
sentences.append(sentence)
sentence = []
sentences.append(PARAGRAPH_BREAK)
continue
if options.remove_quotes:
if word.startswith(('"', "'")):
word = word[1:]
if word.endswith(('"', "'")):
word = word[:-1]
sentence.append(word)
if (word not in ('Mr.', 'Mrs.', 'Dr.') and
word.endswith(CLAUSE_ENDERS)):
sentences.append(sentence)
sentence = []
sentences.append(sentence)
for sentence in sentences:
if sentence is PARAGRAPH_BREAK:
sys.stdout.write('\n\n')
continue
scores = {} # frozenset of two (word, pos) tuples -> score
for (pos1, word1) in enumerate(sentence):
for (pos2, word2) in enumerate(sentence):
clean_word1 = clean(word1)
clean_word2 = clean(word2)
if clean_word1 == clean_word2:
continue
if len(clean_word1) <= 2 or len(clean_word2) <= 2:
continue
if clean_word1 in disable_picking or clean_word2 in disable_picking:
continue
if frozenset([clean_word1, clean_word2]) in disable_swapping:
continue
(pre1, cons1, base1) = strip_initial_consonants(word1)
(pre2, cons2, base2) = strip_initial_consonants(word2)
if len(cons1) == 0 and len(cons2) == 0:
continue
if cons1.upper() == cons2.upper():
continue
new1 = pre1 + cons2 + base1
new2 = pre2 + cons1 + base2
pair = frozenset([(word1, new1, pos1), (word2, new2, pos2)])
scores[pair] = calculate_schooner_spore(
cons1, word1, new1, pos1,
cons2, word2, new2, pos2,
dictionary_words_only=options.dictionary_words_only
)
if options.debug:
s = []
for pair, score in scores.iteritems():
s.append((score, pair))
print ' '.join(sentence)
for (score, pair) in sorted(s, reverse=True):
print score, pair
print
best_score = AWFUL_SCORE
best_pair = None
for pair, score in scores.iteritems():
if score > best_score:
best_score = score
best_pair = pair
if best_pair is None or best_score == AWFUL_SCORE:
sys.stdout.write(sentencify(sentence))
else:
best_pair = list(best_pair)
(word1, new1, pos1) = best_pair[0]
(word2, new2, pos2) = best_pair[1]
new1 = adjust_case(new1, word1)
new2 = adjust_case(new2, word2)
sentence[pos2] = new2
sentence[pos1] = new1
sys.stdout.write(sentencify(sentence))
2
Example 23
Project: D-TECT Source File: d-tect.py
def dtect():
print(" ____ _____ _____ ____ _____ ")
print(" | _ \ |_ _| ____/ ___|_ _|")
print(" | | | |__| | | _|| | | | ")
print(" | |_| |__| | | |__| |___ | | ")
print(" |____/ |_| |_____\____| |_| v1.0")
print("")
print(" D-TECT - Pentest the Modern Web")
print(" Author: Shawar Khan - ( https://shawarkhan.com )")
print("")
def menu():
global filedetector,wpenumerator,subdomainscan,portscan,wpscan,xssscanner,wpbackupscan,sqliscanner
print(" -- "+boldwhite+"Menu"+reset+" -- \n \n 1. "+boldwhite+"WordPress Username Enumerator"+reset+" \n 2. "+boldwhite+"Sensitive File Detector"+reset+" \n 3. "+boldwhite+"Sub-Domain Scanner"+reset+"\n 4. "+boldwhite+"Port Scanner"+reset+" \n 5. "+boldwhite+"Wordpress Scanner\n"+reset+" 6. "+boldwhite+"Cross-Site Scripting [ XSS ] Scanner\n"+reset+" 7. "+boldwhite+"Wordpress Backup Grabber\n"+reset+" 8. "+boldwhite+"SQL Injection [ SQLI ] Scanner\n"+reset)
option = raw_input("[+] Select Option\n > ")
if option == "1":
wpenumerator = "on"
elif option == "2":
filedetector = "on"
elif option == "3":
subdomainscan = "on"
elif option == "4":
portscan = "on"
elif option == "5":
wpscan = "on"
elif option == "6":
xssscanner = "on"
elif option == "7":
wpbackupscan = "on"
elif option == "8":
sqliscanner = "on"
else:
print("[+] Incorrect Option selected")
menu()
def sock(i,secretswitch=0):
secret = secretswitch
global data,page,sourcecode
if redirect == 1:
data = host+i
else:
data = host.strip("/")+'/'+i
page = urllib.urlopen(data)
sourcecode = page.read()
if secret == "1":
return sourcecode
def cloudflare():
data = host #+'/'
page = urllib.urlopen(data)
pagesource = page.read()
if "used CloudFlare to restrict access</title>" in pagesource:
print("[!] Cloudflare blocked the IP")
again()
def alive():
try:
global page,splithost,ip
data = host#+'/'
page = urllib.urlopen(data)
source = page.read()
splithost = str(data.split("://")[1].split("/")[0])
ip = socket.gethostbyname(splithost)
print("[i] "+green+"Site is up!"+reset)
print(" \n[+] Target Info:\n | URL: "+boldwhite+"%s"+reset+"\n | IP: "+boldwhite+"%s"+reset+"\n ")%(data,ip)
print("[+] Checking if any Cloudflare is blocking access...")
cloudflare()
redirectcheck()
except(IOError):
print("[!] "+red+"Error connecting to site! Site maybe down."+reset)
again()
def responseheadercheck():
print('')
headers = ['set-cookie','x-cache','Location','Date','Content-Type','Content-Length','Connection','Etag','Expires','Last-Modified','Pragma','Vary','Cache-Control','X-Pingback','Accept-Ranges']
headersfound = []
interesting = []
caution = []
cj = 0
for i in page.headers:
if i.lower() in str(headers).lower():
pass
elif i == "server":
structure = str(i)+" : "+str(page.headers[i])
headersfound.append(structure)
structure = "Server : "+boldwhite+str(page.headers[i])+reset
interesting.append(structure)
elif i == "x-powered-by":
structure = str(i)+" : "+str(page.headers[i])
headersfound.append(structure)
structure = "Powered by: "+boldwhite+str(page.headers[i])+reset
interesting.append(structure)
elif i == "x-frame-options":
cj = 1
pass
else:
structure = str(i)+" : "+str(page.headers[i])
headersfound.append(structure)
if cj == 0:
caution.append("[!]"+red+" X-Frame-Options header Missing\n"+reset+"[!] "+red+"Page might be vulnerable to "+boldred+"Click Jacking\n"+reset+"[!] "+boldred+page.geturl()+reset+"\n[i] About ClickJacking: [ "+green+"https://www.owasp.org/index.php/Clickjacking"+reset+" ]")
print("[+] Interesting Headers Found:")
for i in headersfound:
print(" | %s")%(i)
if len(interesting) != 0:
print("\n[i] Information from Headers:")
for i in interesting:
print(" | %s")%i
print('')
if cj == 0:
print(caution[0])
print('')
def parameterarrange(payload):
parsedurl = urlparse.urlparse(host)
parameters = urlparse.parse_qsl(parsedurl.query, keep_blank_values=True)
parameternames = []
parametervalues = []
for m in parameters:
parameternames.append(m[0])
parametervalues.append(m[1])
for n in parameters:
try:
print("Checking '%s' parameter")%n[0]
index = parameternames.index(n[0])
original = parametervalues[index]
parametervalues[index] = payload
return urllib.urlencode(dict(zip(parameternames,parametervalues)))
parametervalues[index] = original
except(KeyError):
pass
def SQLIscan(site):
print("[+] [ SQLI ] Scanner Started...\n")
vuln = []
payloads = {
'2':'"',
'1':'\''
}
errors = {
'MySQL':'You have an error in your SQL syntax;',
'Oracle':'SQL command not properly ended',
'MSSQL':'Unclosed quotation mark after the character string',
'PostgreSQL':'syntax error at or near'
}
path = urlparse.urlparse(site).scheme+"://"+urlparse.urlparse(site).netloc+urlparse.urlparse(site).path
parsedurl = urlparse.urlparse(host)
parameters = urlparse.parse_qsl(parsedurl.query, keep_blank_values=True)
parameternames = []
parametervalues = []
for m in parameters:
parameternames.append(m[0])
parametervalues.append(m[1])
for n in parameters:
found = 0
print("[+] Checking '%s' parameter")%n[0]
try:
for i in payloads:
pay = payloads[i]
index = parameternames.index(n[0])
original = parametervalues[index]
parametervalues[index] = pay
modifiedurl = urllib.urlencode(dict(zip(parameternames,parametervalues)))
parametervalues[index] = original
modifiedparams = modifiedurl
payload = urllib.quote_plus(payloads[i])
u = urllib.urlopen(path+"?"+modifiedparams)
source = u.read()
#print ("[+] Checking HTML Context...")
for i in errors:
if errors[i] in source:#htmlcode[0].contents[0]:
dbfound = " | Back-End Database: "+green+str(i)+reset
found = 1
break
if found != 1:
break
except(KeyError):
pass
if found == 1:
print("[!] "+red+"SQL Injection Vulnerability Found!"+reset)
print dbfound
vuln.append("'"+n[0]+"'")
found = 0
if len(vuln) != 0:
print(" | Vulnerable Parameter/s:"),
for i in vuln:
print(i),
else:
print("[!] Not Vulnerable")
def XSSscan(site):
print("[+] [ XSS ] Scanner Started...")
vuln = []
payloads = {
'3':'d4rk();"\'\\/}{d4rk',
'2':'d4rk</script><script>alert(1)</script>d4rk',
'1':'<d4rk>'
}
path = urlparse.urlparse(site).scheme+"://"+urlparse.urlparse(site).netloc+urlparse.urlparse(site).path
parsedurl = urlparse.urlparse(host)
parameters = urlparse.parse_qsl(parsedurl.query, keep_blank_values=True)
parameternames = []
parametervalues = []
for m in parameters:
parameternames.append(m[0])
parametervalues.append(m[1])
for n in parameters:
found = 0
print(" | Checking '%s' parameter")%n[0]
try:
for i in payloads:
pay = payloads[i]
index = parameternames.index(n[0])
original = parametervalues[index]
parametervalues[index] = pay
modifiedurl = urllib.urlencode(dict(zip(parameternames,parametervalues)))
parametervalues[index] = original
modifiedparams = modifiedurl
payload = urllib.quote_plus(payloads[i])
u = urllib.urlopen(path+"?"+modifiedparams)
source = u.read()
code = BeautifulSoup(source)
if str(i) == str(1):
#print ("[+] Checking HTML Context...")
if payloads[i] in source:#htmlcode[0].contents[0]:
#print("[+] XSS Vulnerability Found.")
found = 1
script = code.findAll('script')
if str(i) == str(3) or str(i) == str(2):
#print("[+] Checking JS Context...")
if str(i) == str(3):
#JS Context
for p in range(len(script)):
try:
if pay in script[p].contents[0]:
#print("[+] XSS Vulnerability Found")
found = 1
except(IndexError):
pass
if str(i) == str(2):
if payloads['2'] in source:
# print("[+] XSS Vulnerability Found")
found = 1
except(KeyError):
pass
if found == 1:
vuln.append("'"+n[0]+"'")
found = 0
if len(vuln) != 0:
print("[!] "+red+"Vulnerable Parameter/s:"+reset),
for i in vuln:
print(boldred+i+reset),
else:
print("[!] Not Vulnerable")
def portscanner():
print("[i] Syntax : Function")
print(" 23,80,120 : Scans Specific Ports, e.g, Scans Port 23,80 and 120")
print(" 23-80 : Scans a Range of Ports, e.g, Scans Port from 23 to 80")
print(" 23 : Scans a single port, e.g, Scans Port 23")
print(" all : Scans all ports from 20 to 5000")
print(" ")
portoption = raw_input("[+] Enter Range or Port:\n > ")
wasmultiple = 0
wasrange = 0
wasone = 0
if ',' in portoption:
wasmultiple = 1
multipleport = portoption.split(',')
notexpected = 0
for i in multipleport:
if not str(i).isdigit():
print("[!] Incorrect Syntax!")
notexpected = 1
if notexpected == 1:
again()
totallength = multipleport
elif '-' in portoption:
wasrange = 1
rangeport = portoption.split('-')
totalrange = range(int(rangeport[0]),int(rangeport[1])+1)
if len(rangeport) != 2:
print("[!] Incorrect Syntax!")
again()
totallength = totalrange
elif portoption == 'all':
totallength = range(20,5000)
elif portoption.isdigit():
wasone = 1
oneport = int(portoption)
totallength = range(1)
else:
print("[+] Incorrect Syntax!")
again()
print("[+] Scanning %s Port/s on Target: %s")%(len(totallength),ip)
ports = 5000
found = 1
protocolname = 'tcp'
progress = 20
loopcondition = range(20,5000)
if portoption == 'all':
loopcondition = range(20,5000)
ports = 5000
progress = 20
elif wasmultiple == 1:
loopcondition = multipleport
ports = int(len(multipleport))
progress = 0 #int(min(multipleport))
elif wasrange == 1:
loopcondition = totalrange
ports = int(rangeport[1])
progress = int(rangeport[0])-1
elif wasone == 1:
onlyport = []
onlyport.append(portoption)
loopcondition = onlyport
progress = 0
ports = 1
else:
loopcondition = range(20,5000)
for i in loopcondition:
i = int(i)
progress += 1
sys.stdout.write("\r[+] Progress %i / %s ..."% (progress,ports))
sys.stdout.flush()
portconnect = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
response = portconnect.connect_ex((ip, i))
if(response == 0) :
print ('\n | Port: '+boldwhite+'%d'+reset+' \n | Status: '+green+'OPEN'+reset+'\n | Service: '+boldwhite+'%s'+reset+'\n')% (i,socket.getservbyport(i, protocolname))
found += 1
portconnect.close()
if found == 1:
print("\n | "+red+"No Open Ports Found!"+reset)
def subdomainscanner():
import sys
print("\n[+] Subdomain Scanner Start!")
wordlist = ["mail","localhost","blog","forum","0","01","02","03","1","10","11","12","13","14","15","16","17","18","19","2","20","3","3com","4","5","6","7","8","9","ILMI","a","a.auth-ns","a01","a02","a1","a2","abc","about","ac","academico","acceso","access","accounting","accounts","acid","activestat","ad","adam","adkit","admin","administracion","administrador","administrator","administrators","admins","ads","adserver","adsl","ae","af","affiliate","affiliates","afiliados","ag","agenda","agent","ai","aix","ajax","ak","akamai","al","alabama","alaska","albuquerque","alerts","alpha","alterwind","am","amarillo","americas","an","anaheim","analyzer","announce","announcements","antivirus","ao","ap","apache","apollo","app","app01","app1","apple","application","applications","apps","appserver","aq","ar","archie","arcsight","argentina","arizona","arkansas","arlington","as","as400","asia","asterix","at","athena","atlanta","atlas","att","au","auction","austin","auth","auto","av","aw","ayuda","az","b","b.auth-ns","b01","b02","b1","b2","b2b","b2c","ba","back","backend","backup","baker","bakersfield","balance","balancer","baltimore","banking","bayarea","bb","bbdd","bbs","bd","bdc","be","bea","beta","bf","bg","bh","bi","billing","biz","biztalk","bj","black","blackberry","blogs","blue","bm","bn","bnc","bo","bob","bof","boise","bolsa","border","boston","boulder","boy","br","bravo","brazil","britian","broadcast","broker","bronze","brown","bs","bsd","bsd0","bsd01","bsd02","bsd1","bsd2","bt","bug","buggalo","bugs","bugzilla","build","bulletins","burn","burner","buscador","buy","bv","bw","by","bz","c","c.auth-ns","ca","cache","cafe","calendar","california","call","calvin","canada","canal","canon","careers","catalog","cc","cd","cdburner","cdn","cert","certificates","certify","certserv","certsrv","cf","cg","cgi","ch","channel","channels","charlie","charlotte","chat","chats","chatserver","check","checkpoint","chi","chicago","ci","cims","cincinnati","cisco","citrix","ck","cl","class","classes","classifieds","classroom","cleveland","clicktrack","client","clientes","clients","club","clubs","cluster","clusters","cm","cmail","cms","cn","co","cocoa","code","coldfusion","colombus","colorado","columbus","com","commerce","commerceserver","communigate","community","compaq","compras","con","concentrator","conf","conference","conferencing","confidential","connect","connecticut","consola","console","consult","consultant","consultants","consulting","consumer","contact","content","contracts","core","core0","core01","corp","corpmail","corporate","correo","correoweb","cortafuegos","counterstrike","courses","cr","cricket","crm","crs","cs","cso","css","ct","cu","cust1","cust10","cust100","cust101","cust102","cust103","cust104","cust105","cust106","cust107","cust108","cust109","cust11","cust110","cust111","cust112","cust113","cust114","cust115","cust116","cust117","cust118","cust119","cust12","cust120","cust121","cust122","cust123","cust124","cust125","cust126","cust13","cust14","cust15","cust16","cust17","cust18","cust19","cust2","cust20","cust21","cust22","cust23","cust24","cust25","cust26","cust27","cust28","cust29","cust3","cust30","cust31","cust32","cust33","cust34","cust35","cust36","cust37","cust38","cust39","cust4","cust40","cust41","cust42","cust43","cust44","cust45","cust46","cust47","cust48","cust49","cust5","cust50","cust51","cust52","cust53","cust54","cust55","cust56","cust57","cust58","cust59","cust6","cust60","cust61","cust62","cust63","cust64","cust65","cust66","cust67","cust68","cust69","cust7","cust70","cust71","cust72","cust73","cust74","cust75","cust76","cust77","cust78","cust79","cust8","cust80","cust81","cust82","cust83","cust84","cust85","cust86","cust87","cust88","cust89","cust9","cust90","cust91","cust92","cust93","cust94","cust95","cust96","cust97","cust98","cust99","customer","customers","cv","cvs","cx","cy","cz","d","dallas","data","database","database01","database02","database1","database2","databases","datastore","datos","david","db","db0","db01","db02","db1","db2","dc","de","dealers","dec","def","default","defiant","delaware","dell","delta","delta1","demo","demonstration","demos","denver","depot","des","desarrollo","descargas","design","designer","detroit","dev","dev0","dev01","dev1","devel","develop","developer","developers","development","device","devserver","devsql","dhcp","dial","dialup","digital","dilbert","dir","direct","directory","disc","discovery","discuss","discussion","discussions","disk","disney","distributer","distributers","dj","dk","dm","dmail","dmz","dnews","dns","dns-2","dns0","dns1","dns2","dns3","do","docs","docuementacion","docuementos","domain","domains","dominio","domino","dominoweb","doom","download","downloads","downtown","dragon","drupal","dsl","dyn","dynamic","dynip","dz","e","e-com","e-commerce","e0","eagle","earth","east","ec","echo","ecom","ecommerce","edi","edu","education","edward","ee","eg","eh","ejemplo","elpaso","email","employees","empresa","empresas","en","enable","eng","eng01","eng1","engine","engineer","engineering","enterprise","epsilon","er","erp","es","esd","esm","espanol","estadisticas","esx","et","eta","europe","events","domain","exchange","exec","extern","external","extranet","f","f5","falcon","farm","faststats","fax","feedback","feeds","fi","field","file","files","fileserv","fileserver","filestore","filter","find","finger","firewall","fix","fixes","fj","fk","fl","flash","florida","flow","fm","fo","foobar","formacion","foro","foros","fortworth","forums","foto","fotos","foundry","fox","foxtrot","fr","france","frank","fred","freebsd","freebsd0","freebsd01","freebsd02","freebsd1","freebsd2","freeware","fresno","front","frontdesk","fs","fsp","ftp","ftp-","ftp0","ftp2","ftp_","ftpserver","fw","fw-1","fw1","fwsm","fwsm0","fwsm01","fwsm1","g","ga","galeria","galerias","galleries","gallery","games","gamma","gandalf","gate","gatekeeper","gateway","gauss","gd","ge","gemini","general","george","georgia","germany","gf","gg","gh","gi","gl","glendale","gm","gmail","gn","go","gold","goldmine","golf","gopher","gp","gq","gr","green","group","groups","groupwise","gs","gsx","gt","gu","guest","gw","gw1","gy","h","hal","halflife","hawaii","hello","help","helpdesk","helponline","henry","hermes","hi","hidden","hk","hm","hn","hobbes","hollywood","home","homebase","homer","honeypot","honolulu","host","host1","host3","host4","host5","hotel","hotjobs","houstin","houston","howto","hp","hpov","hr","ht","http","https","hu","hub","humanresources","i","ia","ias","ibm","ibmdb","id","ida","idaho","ids","ie","iis","il","illinois","im","images","imail","imap","imap4","img","img0","img01","img02","in","inbound","inc","include","incoming","india","indiana","indianapolis","info","informix","inside","install","int","intern","internal","international","internet","intl","intranet","invalid","investor","investors","invia","invio","io","iota","iowa","iplanet","ipmonitor","ipsec","ipsec-gw","iq","ir","irc","ircd","ircserver","ireland","iris","irvine","irving","is","isa","isaserv","isaserver","ism","israel","isync","it","italy","ix","j","japan","java","je","jedi","jm","jo","jobs","john","jp","jrun","juegos","juliet","juliette","juniper","k","kansas","kansascity","kappa","kb","ke","kentucky","kerberos","keynote","kg","kh","ki","kilo","king","km","kn","knowledgebase","knoxville","koe","korea","kp","kr","ks","kw","ky","kz","l","la","lab","laboratory","labs","lambda","lan","laptop","laserjet","lasvegas","launch","lb","lc","ldap","legal","leo","li","lib","library","lima","lincoln","link","linux","linux0","linux01","linux02","linux1","linux2","lista","lists","listserv","listserver","live","lk","load","loadbalancer","local","log","log0","log01","log02","log1","log2","logfile","logfiles","logger","logging","loghost","login","logs","london","longbeach","losangeles","lotus","louisiana","lr","ls","lt","lu","luke","lv","ly","lyris","m","ma","mac","mac1","mac10","mac11","mac2","mac3","mac4","mac5","mach","macintosh","madrid","mail2","mailer","mailgate","mailhost","mailing","maillist","maillists","mailroom","mailserv","mailsite","mailsrv","main","maine","maint","mall","manage","management","manager","manufacturing","map","mapas","maps","marketing","marketplace","mars","marvin","mary","maryland","massachusetts","master","max","mc","mci","md","mdaemon","me","media","member","members","memphis","mercury","merlin","messages","messenger","mg","mgmt","mh","mi","miami","michigan","mickey","midwest","mike","milwaukee","minneapolis","minnesota","mirror","mis","mississippi","missouri","mk","ml","mm","mn","mngt","mo","mobile","mom","monitor","monitoring","montana","moon","moscow","movies","mozart","mp","mp3","mpeg","mpg","mq","mr","mrtg","ms","ms-exchange","ms-sql","msexchange","mssql","mssql0","mssql01","mssql1","mt","mta","mtu","mu","multimedia","music","mv","mw","mx","my","mysql","mysql0","mysql01","mysql1","mz","n","na","name","names","nameserv","nameserver","nas","nashville","nat","nc","nd","nds","ne","nebraska","neptune","net","netapp","netdata","netgear","netmeeting","netscaler","netscreen","netstats","network","nevada","new","newhampshire","newjersey","newmexico","neworleans","news","newsfeed","newsfeeds","newsgroups","newton","newyork","newzealand","nf","ng","nh","ni","nigeria","nj","nl","nm","nms","nntp","no","node","nokia","nombres","nora","north","northcarolina","northdakota","northeast","northwest","noticias","novell","november","np","nr","ns","ns-","ns0","ns01","ns02","ns1","ns2","ns3","ns4","ns5","ns_","nt","nt4","nt40","ntmail","ntp","ntserver","nu","null","nv","ny","nz","o","oakland","ocean","odin","office","offices","oh","ohio","ok","oklahoma","oklahomacity","old","om","omaha","omega","omicron","online","ontario","open","openbsd","openview","operations","ops","ops0","ops01","ops02","ops1","ops2","opsware","or","oracle","orange","order","orders","oregon","orion","orlando","oscar","out","outbound","outgoing","outlook","outside","ov","owa","owa01","owa02","owa1","owa2","ows","oxnard","p","pa","page","pager","pages","paginas","papa","paris","parners","partner","partners","patch","patches","paul","payroll","pbx","pc","pc01","pc1","pc10","pc101","pc11","pc12","pc13","pc14","pc15","pc16","pc17","pc18","pc19","pc2","pc20","pc21","pc22","pc23","pc24","pc25","pc26","pc27","pc28","pc29","pc3","pc30","pc31","pc32","pc33","pc34","pc35","pc36","pc37","pc38","pc39","pc4","pc40","pc41","pc42","pc43","pc44","pc45","pc46","pc47","pc48","pc49","pc5","pc50","pc51","pc52","pc53","pc54","pc55","pc56","pc57","pc58","pc59","pc6","pc60","pc7","pc8","pc9","pcmail","pda","pdc","pe","pegasus","pennsylvania","peoplesoft","personal","pf","pg","pgp","ph","phi","philadelphia","phoenix","phoeniz","phone","phones","photos","pi","pics","pictures","pink","pipex-gw","pittsburgh","pix","pk","pki","pl","plano","platinum","pluto","pm","pm1","pn","po","policy","polls","pop","pop3","portal","portals","portfolio","portland","post","posta","posta01","posta02","posta03","postales","postoffice","ppp1","ppp10","ppp11","ppp12","ppp13","ppp14","ppp15","ppp16","ppp17","ppp18","ppp19","ppp2","ppp20","ppp21","ppp3","ppp4","ppp5","ppp6","ppp7","ppp8","ppp9","pptp","pr","prensa","press","print >> sys.stdout,er","print >> sys.stdout,serv","print >> sys.stdout,server","priv","privacy","private","problemtracker","products","profiles","project","projects","promo","proxy","prueba","pruebas","ps","psi","pss","pt","pub","public","pubs","purple","pw","py","q","qa","qmail","qotd","quake","quebec","queen","quotes","r","r01","r02","r1","r2","ra","radio","radius","rapidsite","raptor","ras","rc","rcs","rd","re","read","realserver","recruiting","red","redhat","ref","reference","reg","register","registro","registry","regs","relay","rem","remote","remstats","reports","research","reseller","reserved","resumenes","rho","rhodeisland","ri","ris","rmi","ro","robert","romeo","root","rose","route","router","router1","rs","rss","rtelnet","rtr","rtr01","rtr1","ru","rune","rw","rwhois","s","s1","s2","sa","sac","sacramento","sadmin","safe","sales","saltlake","sam","san","sanantonio","sandiego","sanfrancisco","sanjose","saskatchewan","saturn","sb","sbs","sc","scanner","schedules","scotland","scotty","sd","se","search","seattle","sec","secret","secure","secured","securid","security","sendmail","seri","serv","serv2","server","server1","servers","service","services","servicio","servidor","setup","sg","sh","shared","sharepoint","shareware","shipping","shop","shoppers","shopping","si","siebel","sierra","sigma","signin","signup","silver","sim","sirius","site","sj","sk","skywalker","sl","slackware","slmail","sm","smc","sms","smtp","smtphost","sn","sniffer","snmp","snmpd","snoopy","snort","so","socal","software","sol","solaris","solutions","soporte","source","sourcecode","sourcesafe","south","southcarolina","southdakota","southeast","southwest","spain","spam","spider","spiderman","splunk","spock","spokane","springfield","sprint >> sys.stdout,","sqa","sql","sql0","sql01","sql1","sql7","sqlserver","squid","sr","ss","ssh","ssl","ssl0","ssl01","ssl1","st","staff","stage","staging","start","stat","static","statistics","stats","stlouis","stock","storage","store","storefront","streaming","stronghold","strongmail","studio","submit","subversion","sun","sun0","sun01","sun02","sun1","sun2","superman","supplier","suppliers","support","sv","sw","sw0","sw01","sw1","sweden","switch","switzerland","sy","sybase","sydney","sysadmin","sysback","syslog","syslogs","system","sz","t","tacoma","taiwan","talk","tampa","tango","tau","tc","tcl","td","team","tech","technology","techsupport","telephone","telephony","telnet","temp","tennessee","terminal","terminalserver","termserv","test","test2k","testbed","testing","testlab","testlinux","testo","testserver","testsite","testsql","testxp","texas","tf","tftp","tg","th","thailand","theta","thor","tienda","tiger","time","titan","tivoli","tj","tk","tm","tn","to","tokyo","toledo","tom","tool","tools","toplayer","toronto","tour","tp","tr","tracker","train","training","transfers","trinidad","trinity","ts","ts1","tt","tucson","tulsa","tumb","tumblr","tunnel","tv","tw","tx","tz","u","ua","uddi","ug","uk","um","uniform","union","unitedkingdom","unitedstates","unix","unixware","update","updates","upload","ups","upsilon","uranus","urchin","us","usa","usenet","user","users","ut","utah","utilities","uy","uz","v","va","vader","vantive","vault","vc","ve","vega","vegas","vend","vendors","venus","vermont","vg","vi","victor","video","videos","viking","violet","vip","virginia","vista","vm","vmserver","vmware","vn","vnc","voice","voicemail","voip","voyager","vpn","vpn0","vpn01","vpn02","vpn1","vpn2","vt","vu","w","w1","w2","w3","wa","wais","wallet","wam","wan","wap","warehouse","washington","wc3","web","webaccess","webadmin","webalizer","webboard","webcache","webcam","webcast","webdev","webdocs","webfarm","webhelp","weblib","weblogic","webmail","webmaster","webproxy","webring","webs","webserv","webserver","webservices","website","websites","websphere","websrv","websrvr","webstats","webstore","websvr","webtrends","welcome","west","westvirginia","wf","whiskey","white","whois","wi","wichita","wiki","wililiam","win","win01","win02","win1","win2","win2000","win2003","win2k","win2k3","windows","windows01","windows02","windows1","windows2","windows2000","windows2003","windowsxp","wingate","winnt","winproxy","wins","winserve","winxp","wire","wireless","wisconsin","wlan","wordpress","work","world","write","ws","ws1","ws10","ws11","ws12","ws13","ws2","ws3","ws4","ws5","ws6","ws7","ws8","ws9","wusage","wv","ww","www","www-","www-01","www-02","www-1","www-2","www-int","www0","www01","www02","www1","www2","www3","www_","wwwchat","wwwdev","wwwmail","wy","wyoming","x","x-ray","xi","xlogan","xmail","xml","xp","y","yankee","ye","yellow","young","yt","yu","z","z-log","za","zebra","zera","zeus","zlog","zm","zulu","zw"]
progress = 0
for i in wordlist:
progress += 1
sys.stdout.write("\r[+] Progress %i / %s ..."% (progress,len(wordlist)))
sys.stdout.flush()
try:
s = socket.gethostbyname(i+'.'+splithost)
if (s):
so = socket.gethostbyname_ex(i+'.'+splithost)
print("\n[+] Subdomain found!\n | Subdomain: %s.%s \n | Nameserver: %s\n | IP: %s")%(i,splithost,so[0],s)
if s == '127.0.0.1':
print("[!] "+red+"Sub-domain is vulnerable to "+boldred+"Same-Site Scripting! "+reset+"\n[!] About Same-Site Scripting:\n[!] [ "+green+"https://www.acunetix.com/vulnerabilities/web/same-site-scripting"+reset+" ] ")
print('')
except(socket.gaierror):
pass
def enumform(listofIDs,listofnames):
lengthofnames = len(max(listofnames, key=len))
lengthofIDs = len(max(listofIDs, key=len))
if lengthofnames < 12:
lengthofnames = 12
print "[i] "+green+"Found the following Username/s:"+reset
print "\t+-"+'-'.center(6, '-')+'-+-'+'-'.center(lengthofnames, '-')+"-+"
print "\t| "+'ID/s'.center(6, ' ')+' | '+'Username/s'.center(lengthofnames, ' ')+" |"
print "\t+-"+'-'.center(6, '-')+'-+-'+'-'.center(lengthofnames, '-')+"-+"
for i,d in zip(listofnames,listofIDs):
print '\t| '+d.center(6, ' ')+" | "+i.center(lengthofnames, ' ')+' |'
print "\t+-"+'-'.center(6, '-')+'-+-'+'-'.center(lengthofnames, '-')+"-+"
print("")
def wpbackupscanner():
backups = ['wp-config.php~','wp-config.php.txt','wp-config.php.save','.wp-config.php.swp','wp-config.php.swp','wp-config.php.swo','wp-config.php_bak','wp-config.bak','wp-config.php.bak','wp-config.save','wp-config.old','wp-config.php.old','wp-config.php.orig','wp-config.orig','wp-config.php.original','wp-config.original','wp-config.txt']
print("[+] Scan Started")
print("[+] Searching Wordpress Backups...")
print("[?] Note: Press CTRL+C to skip\n ")
progress = 0
backup = []
backupurl = []
try:
for i in backups:
progress += 1
sys.stdout.write("\r[+] Progress %i / %s ..."% (progress,len(backups)))
sys.stdout.flush()
sock(i)
if page.getcode() == 200:
detecting = sock(i,"1")
if "define('DB_PASSWORD'" in detecting:
s1 = i
s2 = data
backup.append(s1)
backupurl.append(s2)
except(KeyboardInterrupt):
print("\n[+] File detection skipped")
print('')
for ifile,iurl in zip(backup,backupurl):
print("[!] "+boldred+"Backup Found!\n"+reset+" | "+red+"Filename: "+boldred+"%s"+reset+"\n | "+red+"URL: "+boldred+"%s\n"+reset)%(ifile,iurl)
def wpenumeration():
import time
global d4rk,dr1,host
page = urllib.urlopen(host)
url = page.geturl()
if page.geturl() != host:
print("[i] The remote host redirects to '"+str(url)+"' \n Following the redirection...")
host = page.geturl()
print("\n[+] Scan Started : "+lightgreen+"%s"+reset) % time.strftime("%c")
print "[+] Enumeration Usernames..."
T = 33
found = 0
listofusernames = []
listofids = []
for i in range(30):
authorlink = host+"?author="+str(i+1)
url = urllib.urlopen(authorlink)
source = url.read()
if url.geturl() == authorlink:
break
else:
com = str(host)+"/author/"
res = url.geturl()
res = res.split("/")
while len(res) >=3:
res.pop(0)
listofusernames.append(res[0])
listofids.append(str(i+1))
found = 1
d4rk = dr1+str(1)+str(T)+str(7)
if found == 0:
print("[+] "+red+"No Usernames detected"+reset)
else:
enumform(listofids,listofusernames)
print("[+] Enumeration Completed.")
print("[+] Scan Ended : "+lightgreen+"%s"+reset) % time.strftime("%c")
def wpscanner():
print(" \n[+] Detecting Wordpress")
wp = 0
i = 'wp-admin/'
sock(i)
if "wp-login.php?redirect_to" in page.geturl():
wp = 1
print(green+"[i] "+green+"Wordpress Detected!"+reset)
if wpenumeration == "on":
wpenumeration()
else:
wpenumeration()
if wp == 0:
i = 'wp-content/index.php'
sock(i)
if page.getcode() == 200 and "" in page.read():
print("[!] "+green+"Wordpress Detected!"+reset)
wp = 1
if wpenumeration == "on":
wpenumeration()
else:
wpbackupscanner()
wpenumeration()
if wp == 0:
print("[!] "+red+"No Wordpress Detected"+reset)
def redirectcheck():
global redirect,host
redirect = 0
print("[+] Checking Redirection")
page = urllib.urlopen(host)
url = page.geturl()
if page.geturl() != host:
option = raw_input("[i] "+boldgrey+"Host redirects to "+str(url)+reset+" \n Set this as default Host? [Y/N]:\n > ")
if option.lower() == "y":
host = page.geturl()
redirect = 1
else:
print("[+] URL isn't redirecting")
def again():
global wpenumerator,filedetector,subdomainscan,portscan,wpscan,xssscanner,wpbackupscan,sqliscanner
# -- Switches Reset --
wpenumerator = "off"
filedetector = "off"
subdomainscan = "off"
portscan = "off"
wpscan = "off"
xssscanner = "off"
wpbackupscan = "off"
sqliscanner = "off"
# -- Swiches Reset --
inp = raw_input("\n[+] [E]xit or launch [A]gain? (e/a)").lower()
if inp == 'a':
dtect()
elif inp == 'e':
exit()
else:
print("[!] Incorrect option selected")
again()
# -- Program Structure Start --
menu()
try:
global host
host = raw_input("[+] Enter Domain \n e.g, site.com\n > ")
if 'https://' in host:
pass
elif 'http://' in host:
pass
else:
host = "http://"+host
print("[+] Checking Status...")
alive()
responseheadercheck()
if xssscanner == "on":
XSSscan(host)
if sqliscanner == "on":
SQLIscan(host)
if wpbackupscan == "on":
wpbackupscanner()
if filedetector == "on":
files = ['robots.txt','crossdomain.xml','.htaccess','clientaccesspolicy.xml','infophp.php','log.txt','logs.txt','CHANGELOG.txt','awstats/data/']
print("[+] Scan Started")
print("[+] Searching sensitive files...")
print("[?] Note: Press CTRL+C to skip\n ")
try:
for i in files:
if i == "awstats/data/":
sock(i)
if "<title>Index of /awstats/data</title>" in sourcecode:
print("[!] awstats detected!\n[!] URL: %s")%(data)
else:
sock(i)
if page.getcode() == 200:
print("[!] File Found!\n | Name: %s\n | URL: %s\n")%(i,data)
except(KeyboardInterrupt):
print("\n[+] File detection skipped")
if wpenumerator == "on":
print(" \n[+] Detecting Wordpress")
wp = 0
i = 'wp-admin/'
sock(i)
if "wp-login.php?redirect_to" in page.geturl():
wp = 1
print(green+"[i] "+green+"Wordpress Detected!"+reset)
wpenumeration()
if wp == 0:
i = 'wp-content/index.php'
sock(i)
if page.getcode() == 200 and "" in page.read():
print("[!] "+green+"Wordpress Detected!"+reset)
wp = 1
wpenumeration()
if wp == 0:
print("[!] "+red+"No Wordpress Detected"+reset)
if wpscan == "on":
wpscanner()
if subdomainscan == "on":
subdomainscanner()
if portscan == "on":
portscanner()
again()
except(KeyboardInterrupt) as Exit:
print("\n[+] Exiting...")
sys.exit()
2
Example 24
Project: elijah-openstack Source File: cloudlet_client.py
def request_import_basevm(server_address, token,
endpoint, glance_endpoint,
import_filepath, basevm_name):
def _create_param(filepath, image_name, image_type, disk_size, mem_size):
properties = {
"image_type": "snapshot",
"image_location": "snapshot",
CLOUDLET_TYPE.PROPERTY_KEY_CLOUDLET: "True",
CLOUDLET_TYPE.PROPERTY_KEY_CLOUDLET_TYPE: image_type,
CLOUDLET_TYPE.PROPERTY_KEY_BASE_UUID: base_hashvalue,
}
param = {
"name": "%s" % image_name,
"data": open(filepath, "rb"),
"size": os.path.getsize(filepath),
"is_public": True,
"disk_format": "raw",
"container_format": "bare",
"min_disk": disk_size,
"min_ram": mem_size,
"properties": properties,
}
return param
(base_hashvalue, disk_name, memory_name, diskhash_name, memoryhash_name) = \
PackagingUtil._get_basevm_attribute(import_filepath)
# check duplicated base VM
image_list = get_list(server_address, token, endpoint, "images")
for image in image_list:
properties = image.get("metadata", None)
if properties is None or len(properties) == 0:
continue
if properties.get(CLOUDLET_TYPE.PROPERTY_KEY_CLOUDLET_TYPE) != \
CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK:
continue
base_sha256_uuid = properties.get(CLOUDLET_TYPE.PROPERTY_KEY_BASE_UUID)
if base_sha256_uuid == base_hashvalue:
msg = "Duplicated base VM is already exists on the system\n"
msg += "Image UUID of duplicated Base VM: %s\n" % image['id']
raise CloudletClientError(msg)
# decompress files
temp_dir = mkdtemp(prefix="cloudlet-base-")
sys.stdout.write(
"Decompressing zipfile(%s) to temp dir(%s)\n" %
(import_filepath, temp_dir))
zipbase = zipfile.ZipFile(
_FileFile("file:///%s" % os.path.abspath(import_filepath)), 'r')
zipbase.extractall(temp_dir)
disk_path = os.path.join(temp_dir, disk_name)
memory_path = os.path.join(temp_dir, memory_name)
diskhash_path = os.path.join(temp_dir, diskhash_name)
memoryhash_path = os.path.join(temp_dir, memoryhash_name)
# create new flavor if nothing matches
memory_header = elijah_memory_util._QemuMemoryHeader(open(memory_path))
libvirt_xml_str = memory_header.xml
cpu_count, memory_size_mb = get_resource_size(libvirt_xml_str)
disk_gb = int(math.ceil(os.path.getsize(disk_path)/1024/1024/1024))
flavor_list = get_list(server_address, token, endpoint, "flavors")
flavor_ref, flavor_id = find_matching_flavor(flavor_list, cpu_count,
memory_size_mb, disk_gb)
if flavor_id == None:
flavor_name = "cloudlet-flavor-%s" % basevm_name
flavor_ref, flavor_id = create_flavor(server_address,
token,
endpoint,
cpu_count,
memory_size_mb,
disk_gb,
flavor_name)
sys.stdout.write("Create new flavor for the base VM\n")
# upload Base VM
disk_param = _create_param(disk_path, basevm_name + "-disk",
CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK,
disk_gb, memory_size_mb)
memory_param = _create_param(memory_path, basevm_name + "-memory",
CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM,
disk_gb, memory_size_mb)
diskhash_param = _create_param(diskhash_path, basevm_name + "-diskhash",
CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK_HASH,
disk_gb, memory_size_mb)
memoryhash_param = _create_param(memoryhash_path, basevm_name + "-memhash",
CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM_HASH,
disk_gb, memory_size_mb)
url = "://".join((glance_endpoint.scheme, glance_endpoint.netloc))
gclient = glance_client.Client('1', url, token=token, insecure=True)
sys.stdout.write("upload base memory to glance\n")
glance_memory = gclient.images.create(**memory_param)
sys.stdout.write("upload base disk hash to glance\n")
glance_diskhash = gclient.images.create(**diskhash_param)
sys.stdout.write("upload base memory hash to glance\n")
glance_memoryhash = gclient.images.create(**memoryhash_param)
# upload Base disk at the last to have references for other image files
glance_ref = {
CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM: glance_memory.id,
CLOUDLET_TYPE.IMAGE_TYPE_BASE_DISK_HASH: glance_diskhash.id,
CLOUDLET_TYPE.IMAGE_TYPE_BASE_MEM_HASH: glance_memoryhash.id,
CLOUDLET_TYPE.PROPERTY_KEY_BASE_RESOURCE:
libvirt_xml_str.replace("\n", "") # API cannot send '\n'
}
disk_param['properties'].update(glance_ref)
sys.stdout.write("upload base disk to glance\n")
glance_disk = gclient.images.create(**disk_param)
# delete temp dir
if os.path.exists(temp_dir):
shutil.rmtree(temp_dir)
return glance_disk
2
Example 25
Project: onigiri Source File: onigiri.py
def acquire_ram(self, victim, alternative):
targets = victim.Targets
pm = re.compile(r'.*:pmem$')
self.logger.debug('Issue Discovery Request...')
for target in targets:
if pm.search(target.TargetName):
self.logger.info('Physical Memory found: {0} (DiskType={1})'.format(target.TargetName, target.DiskType))
dest_path = self.out_path + "\\" + victim.MachineNameOrIP
img_path = dest_path + "\\pmem"
if self.skip and (os.path.exists(img_path + '.dd4.001') or os.path.exists(img_path + '.dmp')):
self.logger.info('the RAM image already exists, so skip the acquisition ({0})'.format(img_path))
continue
if not os.path.exists(dest_path):
os.mkdir(dest_path)
if alternative:
self.logger.info('acquiring mapped physical memory using PsExec&DumpIt...')
#cmd_listen = [self.dumpit_path, '/l', '/f', img_path + '.dmp.lznt1']
cmd_listen = [self.dumpit_path, '/l', '/f', img_path + '.dmp']
self.logger.debug('DumpIt Listener cmdline: {}'.format(' '.join(cmd_listen)))
proc_listen = subprocess.Popen(cmd_listen, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
for i in range(3):
self.logger.info('trying... {0}'.format(i+1))
dest_host = socket.gethostbyname(socket.gethostname())
cmd_psexec = [self.psexec_path, r'\\' + victim.MachineNameOrIP, '-accepteula', '-c', '-f', '-u', self.domain + '\\' + self.user,
#'-p', self.password, '-r', 'onigiri', self.dumpit_path, '/t', dest_host, '/a', '/d', '/lznt1'] # /lznt1 through network NOT work
'-p', self.password, '-r', 'onigiri', self.dumpit_path, '/t', dest_host, '/a', '/d']
self.logger.debug('PsExec cmdline: {}'.format(' '.join(cmd_psexec)))
proc_psexec = subprocess.Popen(cmd_psexec, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
#stdout_data, stderr_data = proc_psexec.communicate()
''' # for Python 3.3
while 1:
try:
outs, errs = proc_psexec.communicate(timeout=5)
break
except subprocess.TimeoutExpired:
size = os.path.getsize(img_path + '.dmp')
sys.stdout.write('\r...{:8d}MB'.format(long(size / (1024 * 1024))))
continue
'''
sleep(2)
while proc_psexec.poll() is None:
sleep(0.1)
#size = os.path.getsize(img_path + '.dmp.lznt1')
try:
size = os.path.getsize(img_path + '.dmp')
sys.stdout.write('\r...{:8d}MB'.format(long(size / (1024 * 1024))))
except WindowsError:
self.logger.debug('WindowsError: os.path.getsize for {}'.format(img_path + '.dmp'))
sleep(1)
print '\r\t\t ...Done.'
if proc_psexec.returncode == 0:
break
else:
self.logger.error(stderr_data)
self.logger.error('PsExec&DumpIt failed.')
self.logger.debug('PsExec returncode={0}'.format(proc_psexec.returncode))
if proc_psexec.returncode != 0:
proc_listen.terminate()
self.logger.critical('RAM acquisition failed (PsExec&DumpIt).')
self.logger.error("check with the cmdline: {0}".format(' '.join(cmd_psexec)))
sys.exit(1)
else:
stdout_data, stderr_data = proc_listen.communicate()
self.logger.debug('DumpIt Listener returncode={0}'.format(proc_listen.returncode))
if proc_listen.returncode != 0:
self.logger.error(stderr_data)
self.logger.critical('RAM acquisition failed (DumpIt Listener).')
self.logger.error("check with the cmdline: {0}".format(' '.join(cmd_listen)))
sys.exit(1)
#self.logger.info('RAM crashdump image saved (lznt1 compressed): {0}'.format(img_path + '.dmp.lznt1'))
self.logger.info('RAM crashdump image saved: {0}'.format(img_path + '.dmp'))
'''
self.logger.info('decompressing...')
cmd_decomp = [self.dumpit_path, '/unpack', img_path + '.dmp.lznt1', img_path + '.dmp']
self.logger.debug('DumpIt unpack cmdline: {}'.format(' '.join(cmd_decomp)))
proc_decomp = subprocess.Popen(cmd_decomp, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout_data, stderr_data = proc_decomp.communicate()
if proc_decomp.returncode != 0:
self.logger.critical('DumpIt decompression failed.')
self.logger.error("check with the cmdline: {0}".format(' '.join(cmd_decomp)))
sys.exit(1)
self.logger.info('Decompressed RAM crashdump image saved: {0}'.format(img_path + '.dmp'))
'''
else:
try:
self.logger.debug('Login to F-Response Disk...')
target.Login()
except win32com.client.pywintypes.com_error:
self.logger.critical('Login to F-Response Disk failed. Aborted in the previous acquisition? Please check the status on GUI console and logout the pmem manually.')
sys.exit(1)
#login_check = target.PhysicalDiskMapping
#device = target.PhysicalDiskName
if target.PhysicalDiskMapping == -1:
self.logger.critical('PhysicalDiskMapping failed due to timing issue. Simply try again.')
sys.exit(1)
device = r'\\.\PhysicalDrive' + str(target.PhysicalDiskMapping)
self.logger.info('acquiring mapped physical memory using F-Response&FTKImager ({0})...'.format(device))
cmd = [self.ftk_path, device, dest_path + "\\pmem"]
self.logger.debug('FTKImager cmdline: {}'.format(' '.join(cmd)))
proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=-1)
with io.open(proc.stderr.fileno(), closefd=False) as stream: # iter(proc.stdout.readline) doesn't work for '\r'?
for line in stream:
if line.find('MB') != -1 or line.find('complete') != -1:
sys.stdout.write('\r' + line.rstrip('\n'))
print ''
proc.wait()
self.logger.debug('Remove F-Response Disk...')
target.Logout()
self.logger.debug('returncode={0}'.format(proc.returncode))
if proc.returncode != 0:
self.logger.critical('RAM acquisition failed (F-Response&FTKImager).')
self.logger.error("check with the cmdline: {0}".format(' '.join(cmd)))
sys.exit(1)
self.logger.info('RAM raw image saved: {0}'.format(img_path + '.dd4.001'))
2
Example 26
Project: core Source File: howmanynodes.py
def main():
usagestr = "usage: %prog [-h] [options] [args]"
parser = optparse.OptionParser(usage = usagestr)
parser.set_defaults(waittime = 0.2, numnodes = 0, bridges = 0, retries = 0,
logfile = None, services = None)
parser.add_option("-w", "--waittime", dest = "waittime", type = float,
help = "number of seconds to wait between node creation" \
" (default = %s)" % parser.defaults["waittime"])
parser.add_option("-n", "--numnodes", dest = "numnodes", type = int,
help = "number of nodes (default = unlimited)")
parser.add_option("-b", "--bridges", dest = "bridges", type = int,
help = "number of nodes per bridge; 0 = one bridge " \
"(def. = %s)" % parser.defaults["bridges"])
parser.add_option("-r", "--retry", dest = "retries", type = int,
help = "number of retries on error (default = %s)" % \
parser.defaults["retries"])
parser.add_option("-l", "--log", dest = "logfile", type = str,
help = "log memory usage to this file (default = %s)" % \
parser.defaults["logfile"])
parser.add_option("-s", "--services", dest = "services", type = str,
help = "pipe-delimited list of services added to each " \
"node (default = %s)\n(Example: 'zebra|OSPFv2|OSPFv3|" \
"vtysh|IPForward')" % parser.defaults["services"])
def usage(msg = None, err = 0):
sys.stdout.write("\n")
if msg:
sys.stdout.write(msg + "\n\n")
parser.print_help()
sys.exit(err)
(options, args) = parser.parse_args()
for a in args:
sys.stderr.write("ignoring command line argument: '%s'\n" % a)
start = datetime.datetime.now()
prefix = ipaddr.IPv4Prefix("10.83.0.0/16")
print "Testing how many network namespace nodes this machine can create."
print " - %s" % linuxversion()
mem = memfree()
print " - %.02f GB total memory (%.02f GB swap)" % \
(mem['total']/GBD, mem['stotal']/GBD)
print " - using IPv4 network prefix %s" % prefix
print " - using wait time of %s" % options.waittime
print " - using %d nodes per bridge" % options.bridges
print " - will retry %d times on failure" % options.retries
print " - adding these services to each node: %s" % options.services
print " "
lfp = None
if options.logfile is not None:
# initialize a csv log file header
lfp = open(options.logfile, "a")
lfp.write("# log from howmanynodes.py %s\n" % time.ctime())
lfp.write("# options = %s\n#\n" % options)
lfp.write("# numnodes,%s\n" % ','.join(MEMKEYS))
lfp.flush()
session = pycore.Session(persistent=True)
switch = session.addobj(cls = pycore.nodes.SwitchNode)
switchlist.append(switch)
print "Added bridge %s (%d)." % (switch.brname, len(switchlist))
i = 0
retry_count = options.retries
while True:
i += 1
# optionally add a bridge (options.bridges nodes per bridge)
try:
if options.bridges > 0 and switch.numnetif() >= options.bridges:
switch = session.addobj(cls = pycore.nodes.SwitchNode)
switchlist.append(switch)
print "\nAdded bridge %s (%d) for node %d." % \
(switch.brname, len(switchlist), i)
except Exception, e:
print "At %d bridges (%d nodes) caught exception:\n%s\n" % \
(len(switchlist), i-1, e)
break
# create a node
try:
n = session.addobj(cls = pycore.nodes.LxcNode, name = "n%d" % i)
n.newnetif(switch, ["%s/%s" % (prefix.addr(i), prefix.prefixlen)])
n.cmd([SYSCTL_BIN, "net.ipv4.icmp_echo_ignore_broadcasts=0"])
if options.services is not None:
session.services.addservicestonode(n, "", options.services,
verbose=False)
n.boot()
nodelist.append(n)
if i % 25 == 0:
print "\n%s nodes created " % i,
mem = memfree()
free = mem['free'] + mem['buff'] + mem['cached']
swap = mem['stotal'] - mem['sfree']
print "(%.02f/%.02f GB free/swap)" % (free/GBD , swap/GBD),
if lfp:
lfp.write("%d," % i)
lfp.write("%s\n" % ','.join(str(mem[x]) for x in MEMKEYS))
lfp.flush()
else:
sys.stdout.write(".")
sys.stdout.flush()
time.sleep(options.waittime)
except Exception, e:
print "At %d nodes caught exception:\n" % i, e
if retry_count > 0:
print "\nWill retry creating node %d." % i
shutil.rmtree(n.nodedir, ignore_errors = True)
retry_count -= 1
i -= 1
time.sleep(options.waittime)
continue
else:
print "Stopping at %d nodes!" % i
break
if i == options.numnodes:
print "Stopping at %d nodes due to numnodes option." % i
break
# node creation was successful at this point
retry_count = options.retries
if lfp:
lfp.flush()
lfp.close()
print "elapsed time: %s" % (datetime.datetime.now() - start)
print "Use the core-cleanup script to remove nodes and bridges."
2
Example 27
Project: ptsa Source File: lmer.py
def __init__(self, fe_formula, re_formula,
re_group, dep_data, ind_data,
factors=None, row_mask=None,
use_ranks=False, use_norm=True,
memmap=False, memmap_dir=None,
resid_formula=None,
null_formula=None, num_null_boot=0,
svd_terms=None, use_ssvd=False,
#nperms=500, nboot=100,
n_jobs=1, verbose=10,
lmer_opts=None):
"""
"""
if verbose>0:
sys.stdout.write('Initializing...')
sys.stdout.flush()
start_time = time.time()
# save the formula
self._formula_str = fe_formula + ' + ' + re_formula
# see if there's a resid formula
if resid_formula:
# the random effects are the same
self._resid_formula_str = resid_formula + ' + ' + re_formula
else:
self._resid_formula_str = None
# see if there's a null formula
if null_formula:
# the random effects are the same
self._null_formula_str = null_formula + ' + ' + re_formula
else:
self._null_formula_str = None
self._num_null_boot = num_null_boot
# save whether using ranks
self._use_ranks = use_ranks
# see whether to use sparse svd
self._use_ssvd = use_ssvd
# see if memmapping
self._memmap = memmap
# save job info
self._n_jobs = n_jobs
self._verbose = verbose
# eventually fill the feature shape
self._feat_shape = None
# fill A,M,O,D
self._A = {}
self._M = {}
self._O = {}
self._D = {}
O = []
# loop over unique grouping var
self._re_group = re_group
if isinstance(ind_data, dict):
# groups are the keys
self._groups = np.array(ind_data.keys())
else:
# groups need to be extracted from the recarray
self._groups = np.unique(ind_data[re_group])
for g in self._groups:
# get that subj inds
if isinstance(ind_data,dict):
# the index is just the group into that dict
ind_ind = g
else:
# select the rows based on the group
ind_ind = ind_data[re_group]==g
# process the row mask
if row_mask is None:
# no mask, so all good
row_ind = np.ones(len(ind_data[ind_ind]),dtype=np.bool)
elif isinstance(row_mask, dict):
# pull the row_mask from the dict
row_ind = row_mask[g]
else:
# index into it with ind_ind
row_ind = row_mask[ind_ind]
# extract that group's A,M,O
# first save the observations (rows of A)
self._O[g] = ind_data[ind_ind][row_ind]
if use_ranks:
# loop over non-factors and rank them
for n in self._O[g].dtype.names:
if (n in factors) or isinstance(self._O[g][n][0],str):
continue
self._O[g][n] = rankdata(self._O[g][n])
O.append(self._O[g])
# eventually allow for dict of data files for dep_data
if isinstance(dep_data,dict):
# the index is just the group into that dict
dep_ind = g
else:
# select the rows based on the group
dep_ind = ind_ind
# save feature shape if necessary
if self._feat_shape is None:
self._feat_shape = dep_data[dep_ind].shape[1:]
# Save D index into data
self._D[g] = dep_data[dep_ind][row_ind]
# reshape it
self._D[g] = self._D[g].reshape((self._D[g].shape[0],-1))
if use_ranks:
if verbose>0:
sys.stdout.write('Ranking %s...'%(str(g)))
sys.stdout.flush()
for i in xrange(self._D[g].shape[1]):
self._D[g][:,i] = rankdata(self._D[g][:,i])
# reshape M, so we don't have to do it repeatedly
self._M[g] = self._D[g].copy() #dep_data[ind].reshape((dep_data[ind].shape[0],-1))
# normalize M
if use_norm:
self._M[g] -= self._M[g].mean(0)
self._M[g] /= np.sqrt((self._M[g]**2).sum(0))
# determine A from the model.matrix
rdf = DataFrame({k:(FactorVector(self._O[g][k])
if k in factors else self._O[g][k])
for k in self._O[g].dtype.names})
# model spec as data frame
ms = r['data.frame'](r_model_matrix(Formula(fe_formula), data=rdf))
cols = list(r['names'](ms))
if svd_terms is None:
self._svd_terms = [c for c in cols if not 'Intercept' in c]
else:
self._svd_terms = svd_terms
self._A[g] = np.concatenate([np.array(ms.rx(c))
for c in self._svd_terms]).T
#for c in cols if not 'Intercept' in c]).T
if use_ranks:
for i in xrange(self._A[g].shape[1]):
self._A[g][:,i] = rankdata(self._A[g][:,i])
# normalize A
if True: #use_norm:
self._A[g] -= self._A[g].mean(0)
self._A[g] /= np.sqrt((self._A[g]**2).sum(0))
# memmap if desired
if self._memmap:
self._M[g] = _memmap_array(self._M[g], memmap_dir)
self._D[g] = _memmap_array(self._D[g], memmap_dir)
# concat the Os together and make an LMER instance
#O = np.concatenate(O)
#self._O = np.vstack(O)
#self._O = np.array(O)
self._O = O
if lmer_opts is None:
lmer_opts = {}
self._lmer_opts = lmer_opts
self._factors = factors
#self._lmer = LMER(self._formula_str, O, factors=factors, **lmer_opts)
# prepare for the perms and boots
self._perms = []
self._boots = []
self._tp = []
self._tb = []
if verbose>0:
sys.stdout.write('Done (%.2g sec)\n'%(time.time()-start_time))
sys.stdout.write('Processing actual data...')
sys.stdout.flush()
start_time = time.time()
global _global_meld
_global_meld[id(self)] = self
# run for actual data (returns both perm and boot vals)
self._R = None
self._ss = None
self._mer = None
self._mer_null = None
tp,tb,R,feat_mask,ss,mer,mer_null = _eval_model(id(self),None, None)
self._R = R
self._tp.append(tp)
self._tb.append(tb)
self._feat_mask = feat_mask
self._ss = ss
self._mer = mer
self._mer_null = mer_null
if verbose>0:
sys.stdout.write('Done (%.2g sec)\n'%(time.time()-start_time))
sys.stdout.flush()
2
Example 28
Project: fixofx Source File: webunittest.py
def fetch(self, url, postdata=None, server=None, port=None, protocol=None,
ok_codes=None):
'''Run a single test request to the indicated url. Use the POST data
if supplied.
Raises failureException if the returned data contains any of the
strings indicated to be Error Content.
Returns a HTTPReponse object wrapping the response from the server.
'''
# see if the url is fully-qualified (not just a path)
t_protocol, t_server, t_url, x, t_args, x = urlparse.urlparse(url)
if t_server:
protocol = t_protocol
if ':' in t_server:
server, port = t_server.split(':')
else:
server = t_server
if protocol == 'http':
port = '80'
else:
port = '443'
url = t_url
if t_args:
url = url + '?' + t_args
# ignore the machine name if the URL is for localhost
if t_server == 'localhost':
server = None
elif not server:
# no server was specified with this fetch, or in the URL, so
# see if there's a base URL to use.
base = self.get_base_url()
if base:
t_protocol, t_server, t_url, x, x, x = urlparse.urlparse(base)
if t_protocol:
protocol = t_protocol
if t_server:
server = t_server
if t_url:
url = urlparse.urljoin(t_url, url)
# TODO: allow override of the server and port from the URL!
if server is None: server = self.server
if port is None: port = self.port
if protocol is None: protocol = self.protocol
if ok_codes is None: ok_codes = self.expect_codes
if protocol == 'http':
handler = self.scheme_handlers.get('http')
h = handler(server, int(port))
if int(port) == 80:
host_header = server
else:
host_header = '%s:%s'%(server, port)
elif protocol == 'https':
#if httpslib is None:
#raise ValueError, "Can't fetch HTTPS: M2Crypto not installed"
handler = self.scheme_handlers.get('https')
h = handler(server, int(port))
if int(port) == 443:
host_header = server
else:
host_header = '%s:%s'%(server, port)
else:
raise ValueError, protocol
params = None
if postdata:
for field,value in postdata.items():
if type(value) == type({}):
postdata[field] = []
for k,selected in value.items():
if selected: postdata[field].append(k)
# Do a post with the data file
params = mimeEncode(postdata)
h.putrequest('POST', url)
h.putheader('Content-type', 'multipart/form-data; boundary=%s'%
boundary)
h.putheader('Content-length', str(len(params)))
else:
# Normal GET
h.putrequest('GET', url)
# Other Full Request headers
if self.authinfo:
h.putheader('Authorization', "Basic %s"%self.authinfo)
h.putheader('Host', host_header)
# Send cookies
# - check the domain, max-age (seconds), path and secure
# (http://www.ietf.org/rfc/rfc2109.txt)
cookies_used = []
cookie_list = []
for domain, cookies in self.cookies.items():
# check cookie domain
if not server.endswith(domain):
continue
for path, cookies in cookies.items():
# check that the path matches
urlpath = urlparse.urlparse(url)[2]
if not urlpath.startswith(path) and not (path == '/' and
urlpath == ''):
continue
for sendcookie in cookies.values():
# and that the cookie is or isn't secure
if sendcookie['secure'] and protocol != 'https':
continue
# TODO: check max-age
cookie_list.append("%s=%s;"%(sendcookie.key,
sendcookie.coded_value))
cookies_used.append(sendcookie.key)
if cookie_list:
h.putheader('Cookie', ' '.join(cookie_list))
# check that we sent the cookies we expected to
if self.expect_cookies is not None:
assert cookies_used == self.expect_cookies, \
"Didn't use all cookies (%s expected, %s used)"%(
self.expect_cookies, cookies_used)
# finish the headers
h.endheaders()
if params is not None:
h.send(params)
# handle the reply
errcode, errmsg, headers = h.getreply()
# get the body and save it
f = h.getfile()
g = cStringIO.StringIO()
d = f.read()
while d:
g.write(d)
d = f.read()
response = HTTPResponse(self.cookies, protocol, server, port, url,
errcode, errmsg, headers, g.getvalue(), self.error_content)
f.close()
if errcode not in ok_codes:
if VERBOSE:
sys.stdout.write('e')
sys.stdout.flush()
raise HTTPError(response)
# decode the cookies
if self.accept_cookies:
try:
# decode the cookies and update the cookies store
cookie.decodeCookies(url, server, headers, self.cookies)
except:
if VERBOSE:
sys.stdout.write('c')
sys.stdout.flush()
raise
# Check errors
if self.error_content:
data = response.body
for content in self.error_content:
if data.find(content) != -1:
msg = "Matched error: %s"%content
if hasattr(self, 'results') and self.results:
self.writeError(url, msg)
self.log('Matched error'+`(url, content)`, data)
if VERBOSE:
sys.stdout.write('c')
sys.stdout.flush()
raise self.failureException, msg
if VERBOSE:
sys.stdout.write('_')
sys.stdout.flush()
return response
2
Example 29
Project: Udacity-SDC-Radar-Driver-Micro-Challenge Source File: view_rosbag_radar.py
Function: parse_message
Function: parse_message
def parseMessage(self, msgId, rawmsg, dlc, flg, time):
msgToFunc = {
1248: self.status_one,
1249: self.status_two,
1250: self.status_three,
1251: self.status_four,
1280: self.track_msg,
1281: self.track_msg,
1282: self.track_msg,
1283: self.track_msg,
1284: self.track_msg,
1285: self.track_msg,
1286: self.track_msg,
1287: self.track_msg,
1288: self.track_msg,
1289: self.track_msg,
1290: self.track_msg,
1291: self.track_msg,
1292: self.track_msg,
1293: self.track_msg,
1294: self.track_msg,
1295: self.track_msg,
1296: self.track_msg,
1297: self.track_msg,
1298: self.track_msg,
1299: self.track_msg,
1300: self.track_msg,
1301: self.track_msg,
1302: self.track_msg,
1303: self.track_msg,
1304: self.track_msg,
1305: self.track_msg,
1306: self.track_msg,
1307: self.track_msg,
1308: self.track_msg,
1309: self.track_msg,
1310: self.track_msg,
1311: self.track_msg,
1312: self.track_msg,
1313: self.track_msg,
1314: self.track_msg,
1315: self.track_msg,
1316: self.track_msg,
1317: self.track_msg,
1318: self.track_msg,
1319: self.track_msg,
1320: self.track_msg,
1321: self.track_msg,
1322: self.track_msg,
1323: self.track_msg,
1324: self.track_msg,
1325: self.track_msg,
1326: self.track_msg,
1327: self.track_msg,
1328: self.track_msg,
1329: self.track_msg,
1330: self.track_msg,
1331: self.track_msg,
1332: self.track_msg,
1333: self.track_msg,
1334: self.track_msg,
1335: self.track_msg,
1336: self.track_msg,
1337: self.track_msg,
1338: self.track_msg,
1339: self.track_msg,
1340: self.track_msg,
1341: self.track_msg,
1342: self.track_msg,
1343: self.track_msg,
1344: self.track_status_msg,
1488: self.validation_msg_one,
1489: self.validation_msg_two,
1508: self.additional_status_one,
1509: self.additional_status_two,
1510: self.additional_status_three,
1511: self.additional_status_four,
1512: self.additional_status_five,
}
msg = []
if self.debug == True:
sys.stdout.write("In radar_data_parser and this is a message\n")
sys.stdout.write("msgId: %9d time: %9d flg: 0x%02x dlc: %d " % (msgId, time, flg, dlc))
for i in xrange(dlc):
msg[:0] = [ int(struct.unpack('B', rawmsg[i])[0]) ]
if self.debug == True:
sys.stdout.write(" 0x%0.2x " % (msg[i]))
if self.debug == True:
sys.stdout.write("\n")
if msgId in msgToFunc:
# This message is valid, so we need to parse it
if msgId >= 1280 and msgId <= 1343:
msgToFunc[msgId](msgId, msg)
else:
if self.debug == True:
sys.stdout.write("In radar_data_parser and this is msgId %d\n" % (msgId))
if (msgId == 1344):
msgToFunc[msgId](self.msg_counter, msg)
self.msg_counter += 1
elif (msgId > 1344 and self.msg_counter > 0):
msgToFunc[msgId](msg)
self.msg_counter = 0
else:
msgToFunc[msgId](msg)
if (msgId == 1512):
if self.first == True:
print json.dumps(self.data)
self.first = False
else:
print ",", json.dumps(self.data)
self.data = {} # Start with a fresh object
2
Example 30
Project: iktomi Source File: base.py
def manage(commands, argv=None, delim=':'):
'''
Parses argv and runs neccessary command. Is to be used in manage.py file.
Accept a dict with digest name as keys and instances of
:class:`Cli<iktomi.management.commands.Cli>`
objects as values.
The format of command is the following::
./manage.py digest_name:command_name[ arg1[ arg2[...]]][ --key1=kwarg1[...]]
where command_name is a part of digest instance method name, args and kwargs
are passed to the method. For details, see
:class:`Cli<iktomi.management.commands.Cli>` docs.
'''
commands = {(k.decode('utf-8') if isinstance(k, six.binary_type) else k): v
for k, v in commands.items()}
# Default django autocompletion script is registered to manage.py
# We use the same name for this script and it seems to be ok
# to implement the same interface
def perform_auto_complete(commands):
from .lazy import LazyCli
cwords = os.environ['COMP_WORDS'].split()[1:]
cword = int(os.environ['COMP_CWORD'])
try:
curr = cwords[cword - 1]
except IndexError:
curr = ''
suggest = []
if len(cwords) > 1 and cwords[0] in commands.keys():
value = commands[cwords[0]]
if isinstance(value, LazyCli):
value = value.get_digest()
for cmd_name, _ in value.get_funcs():
cmd_name = cmd_name[8:]
suggest.append(cmd_name)
if curr == ":":
curr = ''
else:
suggest += list(commands.keys()) + [x+":" for x in commands.keys()]
suggest.sort()
output = u" ".join(filter(lambda x: x.startswith(curr), suggest))
sys.stdout.write(output)
auto_complete = 'IKTOMI_AUTO_COMPLETE' in os.environ or \
'DJANGO_AUTO_COMPLETE' in os.environ
if auto_complete:
perform_auto_complete(commands)
sys.exit(0)
argv = sys.argv if argv is None else argv
if len(argv) > 1:
cmd_name = argv[1]
raw_args = argv[2:]
args, kwargs = [], {}
# parsing params
for item in raw_args:
if item.startswith('--'):
splited = item[2:].split('=', 1)
if len(splited) == 2:
k,v = splited
elif len(splited) == 1:
k,v = splited[0], True
kwargs[k] = v
else:
args.append(item)
# trying to get command instance
if delim in cmd_name:
digest_name, command = cmd_name.split(delim)
else:
digest_name = cmd_name
command = None
try:
digest = commands[digest_name]
except KeyError:
_command_list(commands)
sys.exit('ERROR: Command "{}" not found'.format(digest_name))
try:
if command is None:
if isinstance(digest, Cli):
help_ = digest.description(argv[0], digest_name)
sys.stdout.write(help_)
sys.exit('ERROR: "{}" command digest requires command name'\
.format(digest_name))
digest(*args, **kwargs)
else:
digest(command, *args, **kwargs)
except CommandNotFound:
help_ = digest.description(argv[0], digest_name)
sys.stdout.write(help_)
sys.exit('ERROR: Command "{}:{}" not found'.format(digest_name, command))
else:
_command_list(commands)
sys.exit('Please provide any command')
2
Example 31
Project: MIDS Source File: Movies1MDataset.py
def convert_csv(consts):
print("converting csv...")
timezones_cvs = pandas.read_csv(
consts.timezone_path
,dtype = {
'zip':numpy.str
,'city':numpy.str
,'state':numpy.str
,'latitude':numpy.float32
,'longitude':numpy.float32
,'timezone':numpy.int32
,'dst':numpy.int32
}
,index_col = False
)
print("timezone data was loaded")
movies_cvs = pandas.read_csv(
consts.movies_path
,sep=";"
,header=None
,quotechar='"'
,encoding="cp1251"
,names=("MovieID","Name","Genders")
,dtype = {
'MovieID':numpy.int32
,'Name':numpy.str
,'Genders':numpy.str
}
,index_col = False
)
print("movies data was loaded")
users_cvs = pandas.read_csv(
consts.users_path
,sep=";"
,header=None
,quotechar='"'
,encoding="cp1251"
,names=("UserID","Gender","Age","Occupation","ZipCode")
,dtype = {
'UserID':numpy.int32
,'Gender':numpy.str
,'Age':numpy.int32
,'Occupation':numpy.int32
,"ZipCode":numpy.str
}
,index_col = False
)
print("users data was loaded")
ratings_cvs = pandas.read_csv(
consts.ratings_path
,sep=";"
,header=None
,quotechar='"'
,encoding="cp1251"
,names=("UserID","MovieID","Rating","Timestamp")
,dtype = {
'UserID':numpy.int32
,'MovieID':numpy.int32
,'Rating':numpy.float32
,'Timestamp':numpy.int32
}
,index_col = False
)
print("ratings data was loaded")
lt = time.time()
prog = re.compile(pattern = "\((\d+)\)$")
movies_cvs['year'] = int(consts.min_year)
for i in numpy.arange(movies_cvs.shape[0]-1):
name = str(movies_cvs.at[i,"Name"])
m = prog.search(name)
if m:
movies_cvs.at[i,'year'] = int(m.group(1))
pass
t1 = time.time()
if t1>lt+1:
p = float(i)/float(movies_cvs.shape[0])*100.0
sys.stdout.write("\t\t\t\t\t\t\t\t\t\r")
sys.stdout.write("movies csv data process %f %%\r" % (p,))
lt = lt+1
pass
print("movies cvs data was prepared")
users_cvs['latitude'] = float(0)
users_cvs['longitude'] = float(0)
users_cvs['timezone'] = int(0)
users_cvs['dts'] = int(0)
for i in numpy.arange(users_cvs.shape[0]-1):
zipcode = users_cvs.loc[i,'ZipCode']
zc = timezones_cvs[timezones_cvs.zip.isin([zipcode])]
if len(zc)==1:
users_cvs.at[i,'timezone'] = int(zc['timezone'])
users_cvs.at[i,'latitude'] = float(zc['latitude'])
users_cvs.at[i,'longitude'] = float(zc['longitude'])
users_cvs.at[i,'dts'] = int(zc['dst'])
pass
t1 = time.time()
if t1>lt+1:
p = float(i)/float(users_cvs.shape[0])*100.0
sys.stdout.write("\t\t\t\t\t\t\t\t\t\r")
sys.stdout.write("users csv data process %f %%\r" % (p,))
lt = lt+1
pass
print("users cvs data was prepared")
ratings_cvs["wday"] = int(0)
ratings_cvs["yday"] = int(0)
ratings_cvs["year"] = int(consts.min_year)
for i in numpy.arange(ratings_cvs.shape[0]-1):
user_id = int(ratings_cvs.at[i,"UserID"])
t0 = ratings_cvs.at[i,"Timestamp"]
ui = users_cvs[users_cvs.UserID.isin([user_id])]
if len(ui)==1:
timezone = int(ui["timezone"]) - 2
tt = datetime.datetime.fromtimestamp(t0,datetime.timezone(datetime.timedelta(hours=timezone))).timetuple()
ratings_cvs.at[i,"wday"] = tt.tm_wday
ratings_cvs.at[i,"yday"] = tt.tm_yday
ratings_cvs.at[i,"year"] = tt.tm_year
pass
t1 = time.time()
if t1>lt+1:
p = float(i)/float(ratings_cvs.shape[0])*100.0
sys.stdout.write("\t\t\t\t\t\t\t\t\t\r")
sys.stdout.write("ratings csv data process %f %%\r" % (p,))
lt = lt+1
pass
print("ratings cvs data was prepared")
users_cvs.to_csv(
path_or_buf = consts.users_csv_file_name,
sep = ";"
,header = False
,index = False
,encoding = "utf-8"
,quoting = csv.QUOTE_ALL
,quotechar = '"'
,line_terminator = "\n"
,doublequote = True
)
movies_cvs.to_csv(
path_or_buf = consts.movies_csv_file_name,
sep = ";"
,header = False
,index = False
,encoding = "utf-8"
,quoting = csv.QUOTE_ALL
,quotechar = '"'
,line_terminator = "\n"
,doublequote = True
)
ratings_cvs.to_csv(
path_or_buf = consts.ratings_csv_file_name
,sep = ";"
,header = False
,index = False
,encoding = "utf-8"
,quoting = csv.QUOTE_ALL
,quotechar = '"'
,line_terminator = "\n"
,doublequote = True
)
print("converting done")
return
2
Example 32
Project: rPGA Source File: discover.py
def discover_junctions_bychrom(self,chrom,hetsnps,snpids,editpositions):
bam1 = pysam.Samfile(self.hap1Bam)
bam2 = pysam.Samfile(self.hap2Bam)
snpreads1,snpreads2 = defaultdict(list),defaultdict(list)
snpreads = defaultdict(lambda:defaultdict(list))
reads1,reads2 = defaultdict(list),defaultdict(list)
spec1,spec2 = list(),list()
snps1,snps2 = defaultdict(list),defaultdict(list)
refalt1,refalt2 = defaultdict(list),defaultdict(list)
edit1,edit2= defaultdict(list),defaultdict(list)
hap1only,hap2only = list(),list()
sys.stdout.write( "Reading in hap1 bam file: "+ self.hap1Bam + "\n")
for r in bam1.fetch('chr'+str(chrom)):
tags = self.get_tags(r)
if int(tags['NH'])>1: ## read is multimapped, deal with separately
continue
spec,editpos,snppos,refalt = self.haplotype_specific_read(r,hetsnps,0, editpositions)
snpreads1[spec].append(r.qname)
reads1[r.qname].append(r)
edit1[r.qname] += editpos
snps1[r.qname] += snppos
refalt1[r.qname] += refalt
sys.stdout.write( "Reading in hap2 bam file: "+ self.hap2Bam+"\n")
for r in bam2.fetch('chr'+str(chrom)):
tags = self.get_tags(r)
if int(tags['NH'])>1: ## read is multimapped
continue
spec,editpos,snppos,refalt = self.haplotype_specific_read(r,hetsnps,1, editpositions)
snpreads2[spec].append(r.qname)
reads2[r.qname].append(r)
edit2[r.qname]+=editpos
snps2[r.qname]+=snppos
refalt2[r.qname] += refalt
sys.stdout.write("Assign haplotype specific reads\n")
conflicting = list(set([ r for r in snpreads1[0] if r in snpreads2[0]] + snpreads1[2] + snpreads2[2]))
multimapped = list(set(snpreads1[3] + snpreads2[3]))
rnaeditreads = list(set(snpreads1[4] + snpreads2[4]))
snpreads1[0] = list(set([ r for r in snpreads1[0] if ((r not in conflicting) and (r not in snpreads2[3]))]))
snpreads2[0] = list(set([ r for r in snpreads2[0] if ((r not in conflicting) and (r not in snpreads1[3]))]))
for qname in snpreads1[0]:
if qname in reads2:
if all([True if reads1[qname][i].pos==reads2[qname][i].pos else False for i in range(len(reads1[qname]))]): # reads have same starting position in hap1 and hap2
if self.num_mismatches(reads1[qname][0]) < self.num_mismatches(reads2[qname][0]):
spec1.append(qname)
else:
hap1only.append(qname)
for qname in snpreads2[0]:
if qname in reads1:
if all([True if reads1[qname][i].pos==reads2[qname][i].pos else False for i in range(len(reads2[qname]))]): # reads have same starting position in hap1 and hap2
if self.num_mismatches(reads2[qname][0]) < self.num_mismatches(reads1[qname][0]):
spec2.append(qname)
else:
hap2only.append(qname)
conflictCount = len(set(conflicting))
hap1Count = len(set(spec1))
hap2Count = len(set(spec2))
geneGroup,gtf,geneInfo = self.read_in_gtf()
bamr = pysam.Samfile(self.refBam,"rb")
junctions = defaultdict(lambda: defaultdict(set))
for qname in reads1:
if ((qname not in spec2) and (qname not in conflicting)):
for r in reads1[qname]:
juncs = self.get_junction_coordinates(r)
for j in juncs:
start,end = j
junctions['1'][start,end].add(r.pos)
for qname in reads2:
if ((qname not in spec1) and (qname not in conflicting)):
for r in reads2[qname]:
juncs = self.get_junction_coordinates(r)
for j in juncs:
start,end = j
junctions['2'][start,end].add(r.pos)
for r in bamr.fetch('chr'+str(chrom)):
if (r.qname not in conflicting):
juncs = self.get_junction_coordinates(r)
for j in juncs:
start,end = j
junctions['R'][start,end].add(r.pos)
nR = {}
for i in ['1','2','R']:
nR[i] = {j:len(junctions[i][j]) for j in junctions[i]}
spec = {}
spec['1'] = [j for j in nR['1'] if ((j not in nR['2']) and (j not in nR['R']) and nR['1'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
spec['2'] = [j for j in nR['2'] if ((j not in nR['1']) and (j not in nR['R']) and nR['2'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
spec['12'] = [j for j in nR['1'] if ((j in nR['2']) and (j not in nR['R']) and nR['1'][j]>1 and nR['2'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
spec['R'] = [j for j in nR['R'] if ((j not in nR['1']) and (j not in nR['2'])and nR['R'][j]>1 and len(self.get_splicesite_snp(j[0],j[1],snpids))>0)]
bed = defaultdict(list)
for h in ['1','2','R']:
counter = 0
for start,end in spec[h]:
counter += 1
num_overlapping_reads = sum([nR[h][j] for j in nR[h]
if ( j[0]<end and j[1]>start and
self.characterize_junction(str(chrom),j[0],j[1],geneGroup,gtf,geneInfo)[0]=="R")])
if num_overlapping_reads > 0:
freq = float(nR[h][start,end])/float(num_overlapping_reads)
else:
freq = 1
n_or_r,strand = self.characterize_junction(str(chrom),start,end,geneGroup,gtf,geneInfo)
snp = ','.join(self.get_splicesite_snp(start,end,snpids))
bed[h].append('chr'+str(chrom)+' '+str(start) + ' ' + str(end) + ' J_'+str(counter)+'_'+n_or_r+'_'+snp+ ' ' + str(strand) + ' ' +str(freq))
counter = 0
for start,end in spec['12']:
counter += 1
num_overlapping_reads1 = sum([nR['1'][j] for j in nR['1'] if
( j[0]<end and j[1]>start and
self.characterize_junction(str(chrom),j[0],j[1],geneGroup,gtf,geneInfo)[0]=="R")])
num_overlapping_reads2 = sum([nR['2'][j] for j in nR['2'] if
( j[0]<end and j[1]>start and
self.characterize_junction(str(chrom),j[0],j[1],geneGroup,gtf,geneInfo)[0]=="R")])
if num_overlapping_reads1 == 0:
freq1=1.0
else:
freq1 = float(nR['1'][start,end])/float(num_overlapping_reads1)
if num_overlapping_reads2==0:
freq2 = 1.0
else:
freq2 = float(nR['2'][start,end])/float(num_overlapping_reads2)
n_or_r,strand = self.characterize_junction(str(chrom),start,end,geneGroup,gtf,geneInfo)
snp =','.join(self.get_splicesite_snp(start,end,snpids))
bed['12'].append('chr'+str(chrom)+' '+str(start) + ' ' + str(end) + ' J_'+str(counter)+'_'+n_or_r+'_'+snp+ ' ' + str(strand)+' '+str((freq1+freq2)/2))
return '\n'.join(bed['1']),'\n'.join(bed['2']),'\n'.join(bed['12']), '\n'.join(bed['R'])
2
Example 33
def run(self, config, options, args, help=None):
def fmt_details(pkg_config, req_version, installed_version):
fmt_list = []
if pkg_config:
fmt_list.append(pkg_config)
if req_version:
fmt_list.append(_('required=%s') % req_version)
if installed_version and installed_version != 'unknown':
fmt_list.append(_('installed=%s') % installed_version)
# Translators: This is used to separate items of package metadata
fmt_str = _(', ').join(fmt_list)
if fmt_str:
return _('(%s)') % fmt_str
else:
return ''
config.set_from_cmdline_options(options)
module_set = jhbuild.moduleset.load(config)
modules = args or config.modules
module_list = module_set.get_full_module_list(modules, config.skip)
if options.dump_all:
for module in module_list:
if (isinstance(module, SystemModule) or isinstance(module.branch, TarballBranch) and
module.pkg_config is not None):
if module.pkg_config is not None:
print 'pkgconfig:{0}'.format(module.pkg_config[:-3]) # remove .pc
if module.systemdependencies is not None:
for dep_type, value, altdeps in module.systemdependencies:
sys.stdout.write('{0}:{1}'.format(dep_type, value))
for dep_type, value, empty in altdeps:
sys.stdout.write(',{0}:{1}'.format(dep_type, value))
sys.stdout.write('\n')
return
module_state = module_set.get_module_state(module_list)
have_new_enough = False
have_too_old = False
if options.dump:
for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
if new_enough:
continue
if installed_version is not None and systemmodule:
# it's already installed but it's too old and we
# don't know how to build a new one for ourselves
have_too_old = True
# request installation in two cases:
# 1) we don't know how to build it
# 2) we don't want to build it ourselves
#
# partial_build is on by default so this check will only
# fail if someone explicitly turned it off
if systemmodule or config.partial_build:
assert (module.pkg_config or module.systemdependencies)
if module.pkg_config is not None:
print 'pkgconfig:{0}'.format(module.pkg_config[:-3]) # remove .pc
if module.systemdependencies is not None:
for dep_type, value, altdeps in module.systemdependencies:
sys.stdout.write('{0}:{1}'.format(dep_type, value))
for dep_type, value, empty in altdeps:
sys.stdout.write(',{0}:{1}'.format(dep_type, value))
sys.stdout.write('\n')
if have_too_old:
return 1
return
print _('System installed packages which are new enough:')
for module,(req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
if (installed_version is not None) and new_enough and (config.partial_build or systemmodule):
have_new_enough = True
print (' %s %s' % (module.name,
fmt_details(module.pkg_config,
req_version,
installed_version)))
if not have_new_enough:
print _(' (none)')
print _('Required packages:')
print _(' System installed packages which are too old:')
for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
if (installed_version is not None) and (not new_enough) and systemmodule:
have_too_old = True
print (' %s %s' % (module.name,
fmt_details(module.pkg_config,
req_version,
installed_version)))
if not have_too_old:
print _(' (none)')
print _(' No matching system package installed:')
uninstalled = []
for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
if installed_version is None and (not new_enough) and systemmodule:
print (' %s %s' % (module.name,
fmt_details(module.pkg_config,
req_version,
installed_version)))
if module.pkg_config is not None:
uninstalled.append((module.name, 'pkgconfig', module.pkg_config[:-3])) # remove .pc
elif module.systemdependencies is not None:
for dep_type, value, altdeps in module.systemdependencies:
uninstalled.append((module.name, dep_type, value))
if len(uninstalled) == 0:
print _(' (none)')
have_too_old = False
if config.partial_build:
print _('Optional packages: (JHBuild will build the missing packages)')
print _(' System installed packages which are too old:')
for module, (req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
if (installed_version is not None) and (not new_enough) and (not systemmodule):
have_too_old = True
print (' %s %s' % (module.name,
fmt_details(module.pkg_config,
req_version,
installed_version)))
if not have_too_old:
print _(' (none)')
print _(' No matching system package installed:')
for module,(req_version, installed_version, new_enough, systemmodule) in module_state.iteritems():
if installed_version is None and (not new_enough) and (not systemmodule):
print (' %s %s' % (module.name,
fmt_details(module.pkg_config,
req_version,
installed_version)))
if module.pkg_config is not None:
uninstalled.append((module.name, 'pkgconfig', module.pkg_config[:-3])) # remove .pc
if len(uninstalled) == 0:
print _(' (none)')
if options.install:
installer = SystemInstall.find_best()
if installer is None:
# FIXME: This should be implemented per Colin's design:
# https://bugzilla.gnome.org/show_bug.cgi?id=682104#c3
if cmds.has_command('apt-get'):
raise FatalError(_("%(cmd)s is required to install "
"packages on this system. Please "
"install %(cmd)s.")
% {'cmd' : 'apt-file'})
raise FatalError(_("Don't know how to install packages on this system"))
if len(uninstalled) == 0:
logging.info(_("No uninstalled system dependencies to install for modules: %r") % (modules, ))
else:
logging.info(_("Installing dependencies on system: %s") % \
' '.join(pkg[0] for pkg in uninstalled))
installer.install(uninstalled)
2
Example 34
Project: pyfilesystem Source File: fscp.py
def do_run(self, options, args):
self.options = options
if len(args) < 2:
self.error("at least two filesystems required\n")
return 1
srcs = args[:-1]
dst = args[-1]
dst_fs, dst_path = self.open_fs(dst, writeable=True, create_dir=True)
if dst_path is not None and dst_fs.isfile(dst_path):
self.error('Destination must be a directory\n')
return 1
if dst_path:
dst_fs = dst_fs.makeopendir(dst_path)
dst_path = None
copy_fs_paths = []
progress = options.progress
if progress:
sys.stdout.write(self.progress_bar(len(srcs), 0, 'scanning...'))
sys.stdout.flush()
self.root_dirs = []
for i, fs_url in enumerate(srcs):
src_fs, src_path = self.open_fs(fs_url)
if src_path is None:
src_path = '/'
if iswildcard(src_path):
for file_path in src_fs.listdir(wildcard=src_path, full=True):
copy_fs_paths.append((self.FILE, src_fs, file_path, file_path))
else:
if src_fs.isdir(src_path):
self.root_dirs.append((src_fs, src_path))
src_sub_fs = src_fs.opendir(src_path)
for dir_path, file_paths in src_sub_fs.walk():
if dir_path not in ('', '/'):
copy_fs_paths.append((self.DIR, src_sub_fs, dir_path, dir_path))
sub_fs = src_sub_fs.opendir(dir_path)
for file_path in file_paths:
copy_fs_paths.append((self.FILE, sub_fs, file_path, pathjoin(dir_path, file_path)))
else:
if src_fs.exists(src_path):
copy_fs_paths.append((self.FILE, src_fs, src_path, src_path))
else:
self.error('%s is not a file or directory\n' % src_path)
return 1
if progress:
sys.stdout.write(self.progress_bar(len(srcs), i + 1, 'scanning...'))
sys.stdout.flush()
if progress:
sys.stdout.write(self.progress_bar(len(copy_fs_paths), 0, self.get_verb()))
sys.stdout.flush()
if self.options.threads > 1:
copy_fs_dirs = [r for r in copy_fs_paths if r[0] == self.DIR]
copy_fs_paths = [r for r in copy_fs_paths if r[0] == self.FILE]
for path_type, fs, path, dest_path in copy_fs_dirs:
dst_fs.makedir(path, allow_recreate=True, recursive=True)
self.lock = threading.RLock()
self.total_files = len(copy_fs_paths)
self.done_files = 0
file_queue = queue.Queue()
threads = [FileOpThread(self.get_action(),
'T%i' % i,
dst_fs,
file_queue,
self.on_done,
self.on_error)
for i in xrange(options.threads)]
for thread in threads:
thread.start()
self.action_errors = []
complete = False
try:
enqueue = file_queue.put
for resource in copy_fs_paths:
enqueue(resource)
while not file_queue.empty():
time.sleep(0)
if self.any_error():
raise SystemExit
# Can't use queue.join here, or KeyboardInterrupt will not be
# caught until the queue is finished
#file_queue.join()
except KeyboardInterrupt:
options.progress = False
self.output("\nCancelling...\n")
except SystemExit:
options.progress = False
finally:
sys.stdout.flush()
for thread in threads:
thread.finish_event.set()
for thread in threads:
thread.join()
complete = True
if not self.any_error():
self.post_actions()
dst_fs.close()
if self.action_errors:
for error in self.action_errors:
self.error(self.wrap_error(unicode(error)) + '\n')
sys.stdout.flush()
else:
if complete and options.progress:
sys.stdout.write(self.progress_bar(self.total_files, self.done_files, ''))
sys.stdout.write('\n')
sys.stdout.flush()
2
Example 35
Project: easybuild-easyblocks Source File: quantumespresso.py
def configure_step(self):
"""Custom configuration procedure for Quantum ESPRESSO."""
if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']:
self.cfg.update('configopts', '--enable-openmp')
if not self.toolchain.options.get('usempi', None):
self.cfg.update('configopts', '--disable-parallel')
if not self.cfg['with_scalapack']:
self.cfg.update('configopts', '--without-scalapack')
repls = []
if self.toolchain.comp_family() in [toolchain.INTELCOMP]:
# set preprocessor command (-E to stop after preprocessing, -C to preserve comments)
cpp = "%s -E -C" % os.getenv('CC')
repls.append(('CPP', cpp, False))
env.setvar('CPP', cpp)
# also define $FCCPP, but do *not* include -C (comments should not be preserved when preprocessing Fortran)
env.setvar('FCCPP', "%s -E" % os.getenv('CC'))
super(EB_QuantumESPRESSO, self).configure_step()
# compose list of DFLAGS (flag, value, keep_stuff)
# for guidelines, see include/defs.h.README in sources
dflags = []
comp_fam_dflags = {
toolchain.INTELCOMP: '-D__INTEL',
toolchain.GCC: '-D__GFORTRAN -D__STD_F95',
}
dflags.append(comp_fam_dflags[self.toolchain.comp_family()])
if self.toolchain.options.get('openmp', False):
libfft = os.getenv('LIBFFT_MT')
else:
libfft = os.getenv('LIBFFT')
if libfft:
if "fftw3" in libfft:
dflags.append('-D__FFTW3')
else:
dflags.append('-D__FFTW')
env.setvar('FFTW_LIBS', libfft)
if get_software_root('ACML'):
dflags.append('-D__ACML')
if self.toolchain.options.get('usempi', None):
dflags.append('-D__MPI -D__PARA')
if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']:
dflags.append(" -D__OPENMP")
if self.cfg['with_scalapack']:
dflags.append(" -D__SCALAPACK")
# always include -w to supress warnings
dflags.append('-w')
repls.append(('DFLAGS', ' '.join(dflags), False))
# complete C/Fortran compiler and LD flags
if self.toolchain.options.get('openmp', False) or self.cfg['hybrid']:
repls.append(('LDFLAGS', self.toolchain.get_flag('openmp'), True))
repls.append(('(?:C|F90|F)FLAGS', self.toolchain.get_flag('openmp'), True))
# obtain library settings
libs = []
for lib in ['BLAS', 'LAPACK', 'FFT', 'SCALAPACK']:
if self.toolchain.options.get('openmp', False):
val = os.getenv('LIB%s_MT' % lib)
else:
val = os.getenv('LIB%s' % lib)
repls.append(('%s_LIBS' % lib, val, False))
libs.append(val)
libs = ' '.join(libs)
repls.append(('BLAS_LIBS_SWITCH', 'external', False))
repls.append(('LAPACK_LIBS_SWITCH', 'external', False))
repls.append(('LD_LIBS', os.getenv('LIBS'), False))
self.log.debug("List of replacements to perform: %s" % repls)
# patch make.sys file
fn = os.path.join(self.cfg['start_dir'], 'make.sys')
try:
for line in fileinput.input(fn, inplace=1, backup='.orig.eb'):
for (k, v, keep) in repls:
# need to use [ \t]* instead of \s*, because vars may be undefined as empty,
# and we don't want to include newlines
if keep:
line = re.sub(r"^(%s\s*=[ \t]*)(.*)$" % k, r"\1\2 %s" % v, line)
else:
line = re.sub(r"^(%s\s*=[ \t]*).*$" % k, r"\1%s" % v, line)
# fix preprocessing directives for .f90 files in make.sys if required
if self.toolchain.comp_family() in [toolchain.GCC]:
line = re.sub(r"\$\(MPIF90\) \$\(F90FLAGS\) -c \$<",
"$(CPP) -C $(CPPFLAGS) $< -o $*.F90\n" +
"\t$(MPIF90) $(F90FLAGS) -c $*.F90 -o $*.o",
line)
sys.stdout.write(line)
except IOError, err:
raise EasyBuildError("Failed to patch %s: %s", fn, err)
self.log.debug("Contents of patched %s: %s" % (fn, open(fn, "r").read()))
# patch default make.sys for wannier
if LooseVersion(self.version) >= LooseVersion("5"):
fn = os.path.join(self.cfg['start_dir'], 'install', 'make_wannier90.sys')
else:
fn = os.path.join(self.cfg['start_dir'], 'plugins', 'install', 'make_wannier90.sys')
try:
for line in fileinput.input(fn, inplace=1, backup='.orig.eb'):
line = re.sub(r"^(LIBS\s*=\s*).*", r"\1%s" % libs, line)
sys.stdout.write(line)
except IOError, err:
raise EasyBuildError("Failed to patch %s: %s", fn, err)
self.log.debug("Contents of patched %s: %s" % (fn, open(fn, "r").read()))
# patch Makefile of want plugin
wantprefix = 'want-'
wantdirs = [d for d in os.listdir(self.builddir) if d.startswith(wantprefix)]
if len(wantdirs) > 1:
raise EasyBuildError("Found more than one directory with %s prefix, help!", wantprefix)
if len(wantdirs) != 0:
wantdir = os.path.join(self.builddir, wantdirs[0])
make_sys_in_path = None
cand_paths = [os.path.join('conf', 'make.sys.in'), os.path.join('config', 'make.sys.in')]
for path in cand_paths:
full_path = os.path.join(wantdir, path)
if os.path.exists(full_path):
make_sys_in_path = full_path
break
if make_sys_in_path is None:
raise EasyBuildError("Failed to find make.sys.in in want directory %s, paths considered: %s",
wantdir, ', '.join(cand_paths))
try:
for line in fileinput.input(make_sys_in_path, inplace=1, backup='.orig.eb'):
# fix preprocessing directives for .f90 files in make.sys if required
if self.toolchain.comp_family() in [toolchain.GCC]:
line = re.sub("@f90rule@",
"$(CPP) -C $(CPPFLAGS) $< -o $*.F90\n" +
"\t$(MPIF90) $(F90FLAGS) -c $*.F90 -o $*.o",
line)
sys.stdout.write(line)
except IOError, err:
raise EasyBuildError("Failed to patch %s: %s", fn, err)
# move non-espresso directories to where they're expected and create symlinks
try:
dirnames = [d for d in os.listdir(self.builddir) if not d.startswith('espresso')]
targetdir = os.path.join(self.builddir, "espresso-%s" % self.version)
for dirname in dirnames:
shutil.move(os.path.join(self.builddir, dirname), os.path.join(targetdir, dirname))
self.log.info("Moved %s into %s" % (dirname, targetdir))
dirname_head = dirname.split('-')[0]
linkname = None
if dirname_head == 'sax':
linkname = 'SaX'
if dirname_head == 'wannier90':
linkname = 'W90'
elif dirname_head in ['gipaw', 'plumed', 'want', 'yambo']:
linkname = dirname_head.upper()
if linkname:
os.symlink(os.path.join(targetdir, dirname), os.path.join(targetdir, linkname))
except OSError, err:
raise EasyBuildError("Failed to move non-espresso directories: %s", err)
2
Example 36
Project: LasagneNLP Source File: bi_lstm_cnn.py
def main():
parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM-CNN')
parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
required=True)
parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
help='path for embedding dict')
parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
parser.add_argument('--num_filters', type=int, default=20, help='Number of filters in CNN')
parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov', 'adadelta'], help='update algorithm', default='sgd')
parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training', required=True)
parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
parser.add_argument('--train') # "data/POS-penn/wsj/split1/wsj1.train.original"
parser.add_argument('--dev') # "data/POS-penn/wsj/split1/wsj1.dev.original"
parser.add_argument('--test') # "data/POS-penn/wsj/split1/wsj1.test.original"
args = parser.parse_args()
def construct_input_layer():
if fine_tune:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
output_size=embedd_dim,
W=embedd_table, name='embedding')
return layer_embedding
else:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
name='input')
return layer_input
def construct_char_input_layer():
layer_char_input = lasagne.layers.InputLayer(shape=(None, max_sent_length, max_char_length),
input_var=char_input_var, name='char-input')
layer_char_input = lasagne.layers.reshape(layer_char_input, (-1, [2]))
layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char_input, input_size=char_alphabet_size,
output_size=char_embedd_dim, W=char_embedd_table,
name='char_embedding')
layer_char_input = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1))
return layer_char_input
logger = utils.get_logger("BiLSTM-CNN")
fine_tune = args.fine_tune
oov = args.oov
regular = args.regular
embedding = args.embedding
embedding_path = args.embedding_dict
train_path = args.train
dev_path = args.dev
test_path = args.test
update_algo = args.update
grad_clipping = args.grad_clipping
peepholes = args.peepholes
num_filters = args.num_filters
gamma = args.gamma
output_predict = args.output_prediction
dropout = args.dropout
X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
embedd_table, label_alphabet, \
C_train, C_dev, C_test, char_embedd_table = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
test_path, oov=oov,
fine_tune=fine_tune,
embedding=embedding,
embedding_path=embedding_path,
use_character=True)
num_labels = label_alphabet.size() - 1
logger.info("constructing network...")
# create variables
target_var = T.imatrix(name='targets')
mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
if fine_tune:
input_var = T.imatrix(name='inputs')
num_data, max_length = X_train.shape
alphabet_size, embedd_dim = embedd_table.shape
else:
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
num_data, max_length, embedd_dim = X_train.shape
char_input_var = T.itensor3(name='char-inputs')
num_data_char, max_sent_length, max_char_length = C_train.shape
char_alphabet_size, char_embedd_dim = char_embedd_table.shape
assert (max_length == max_sent_length)
assert (num_data == num_data_char)
# construct input and mask layers
layer_incoming1 = construct_char_input_layer()
layer_incoming2 = construct_input_layer()
layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')
# construct bi-rnn-cnn
num_units = args.num_units
bi_lstm_cnn = build_BiLSTM_CNN(layer_incoming1, layer_incoming2, num_units, mask=layer_mask,
grad_clipping=grad_clipping, peepholes=peepholes, num_filters=num_filters,
dropout=dropout)
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_lstm_cnn = lasagne.layers.reshape(bi_lstm_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_lstm_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
# get output of bi-lstm-cnn shape=[batch * max_length, #label]
prediction_train = lasagne.layers.get_output(layer_output)
prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
final_prediction = T.argmax(prediction_eval, axis=1)
# flat target_var to vector
target_var_flatten = target_var.flatten()
# flat mask_var to vector
mask_var_flatten = mask_var.flatten()
# compute loss
num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
# for training, we use mean of loss over number of labels
loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
# l2 regularization?
if regular == 'l2':
l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
loss_train = loss_train + gamma * l2_penalty
loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
# compute number of correct labels
corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)
corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)
# Create update expressions for training.
# hyper parameters to tune: learning rate, momentum, regularization.
batch_size = args.batch_size
learning_rate = 1.0 if update_algo == 'adadelta' else args.learning_rate
decay_rate = args.decay_rate
momentum = 0.9
params = lasagne.layers.get_all_params(layer_output, trainable=True)
updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)
# Compile a function performing a training step on a mini-batch
train_fn = theano.function([input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_loss],
updates=updates)
# Compile a second function evaluating the loss and accuracy of network
eval_fn = theano.function([input_var, target_var, mask_var, char_input_var],
[loss_eval, corr_eval, num_loss, final_prediction])
# Finally, launch the training loop.
logger.info(
"Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
% (
update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size, grad_clipping,
peepholes))
num_batches = num_data / batch_size
num_epochs = 1000
best_loss = 1e+12
best_acc = 0.0
best_epoch_loss = 0
best_epoch_acc = 0
best_loss_test_err = 0.
best_loss_test_corr = 0.
best_acc_test_err = 0.
best_acc_test_corr = 0.
stop_count = 0
lr = learning_rate
patience = args.patience
for epoch in range(1, num_epochs + 1):
print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
train_err = 0.0
train_corr = 0.0
train_total = 0
start_time = time.time()
num_back = 0
train_batches = 0
for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, char_inputs=C_train,
batch_size=batch_size, shuffle=True):
inputs, targets, masks, char_inputs = batch
err, corr, num = train_fn(inputs, targets, masks, char_inputs)
train_err += err * num
train_corr += corr
train_total += num
train_batches += 1
time_ave = (time.time() - start_time) / train_batches
time_left = (num_batches - train_batches) * time_ave
# update log
sys.stdout.write("\b" * num_back)
log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time_left)
sys.stdout.write(log_info)
num_back = len(log_info)
# update training log after each epoch
sys.stdout.write("\b" * num_back)
print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)
# evaluate performance on dev data
dev_err = 0.0
dev_corr = 0.0
dev_total = 0
for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, char_inputs=C_dev, batch_size=batch_size):
inputs, targets, masks, char_inputs = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
dev_err += err * num
dev_corr += corr
dev_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)
print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)
if best_loss < dev_err and best_acc > dev_corr / dev_total:
stop_count += 1
else:
update_loss = False
update_acc = False
stop_count = 0
if best_loss > dev_err:
update_loss = True
best_loss = dev_err
best_epoch_loss = epoch
if best_acc < dev_corr / dev_total:
update_acc = True
best_acc = dev_corr / dev_total
best_epoch_acc = epoch
# evaluate on test data when better performance detected
test_err = 0.0
test_corr = 0.0
test_total = 0
for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, char_inputs=C_test,
batch_size=batch_size):
inputs, targets, masks, char_inputs = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
test_err += err * num
test_corr += corr
test_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)
if update_loss:
best_loss_test_err = test_err
best_loss_test_corr = test_corr
if update_acc:
best_acc_test_err = test_err
best_acc_test_corr = test_corr
# stop if dev acc decrease 3 time straightly.
if stop_count == patience:
break
# re-compile a function with new learning rate for training
if update_algo != 'adadelta':
lr = learning_rate / (1.0 + epoch * decay_rate)
updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
train_fn = theano.function([input_var, target_var, mask_var, char_input_var],
[loss_train, corr_train, num_loss],
updates=updates)
# print best performance on test data.
logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)
2
Example 37
def shell():
"""
The shell, parse command line args,
and set variables.
"""
global url
global port
global action_url
global user_agent
global html_file
global external_js
print_startup()
if os.path.exists("history.log"):
if os.stat("history.log").st_size == 0:
history = open("history.log", "w")
else:
history = open("history.log", "a")
else:
history = open("history.log", "w")
while True:
try:
# for Re-complete
complete(array)
an = raw_input("\033[01;37m>>> \033[00m") or "help"
prompt = an.split()
if not prompt:
print("Error: What? try help.")
elif prompt[0] == ";" or prompt[0] == "clear":
print("\033[H\033[J")
elif prompt[0] == "q" or prompt[0] == "quit":
printt(2,"bye bye!")
break;
elif prompt[0] == "help" or prompt[0] == "?":
if prompt[1]:
print_help_option(str(prompt[1]))
else:
print_help()
elif prompt[0] == "show":
sys.stdout.write("\033[01;37m\t")
print("-" * 20)
print("\turl : %s " %url)
print("\tport : %d " %(port))
print("\taction_url : %s " %(action_url))
print("\tuser_agent : %s " %(user_agent))
print("\thtml_file : %s " %(html_file))
print("\texternal_js : %s " %(external_js))
sys.stdout.write("\t")
print("-" * 20)
sys.stdout.write("\033[01;00m")
elif prompt[0] == "set":
if prompt[1] == "port":
port = int(prompt[2])
## Check if port == 80 and not running as root
if port == 80 and os.getuid() != 0:
printt(2, "Permission denied, to bind port 80, you need to run weeman as root.");
history.write("port = %s\n" %port)
if prompt[1] == "url":
url = str(prompt[2])
history.write("url = %s\n" %url)
if prompt[1] == "action_url":
action_url = str(prompt[2])
history.write("action_url = %s\n" %action_url)
if prompt[1] == "user_agent":
prompt.pop(0)
u = str()
for x in prompt:
u+=" "+x
user_agent = str(u.replace("user_agent", ""))
history.write("user_agent = %s\n" %user_agent)
if prompt[1] == "html_file":
html_file = str(prompt[2])
if prompt[1] == "external_js":
external_js = str(prompt[2])
history.write("external_js = %s\n" %external_js)
elif prompt[0] == "run" or prompt[0] == "r":
if not url:
printt(3, "Error: \'url\' can't be \'None\', please use \'set\'.")
elif not action_url:
printt(3, "Error: \'action_url\' can't be \'None\', please use \'set\'.")
else:
# Here we start the server (:
s = weeman(url,port)
s.clone()
s.serve()
elif prompt[0] == "banner" or prompt[0] == "b":
print_startup()
elif prompt[0] == "framework":
fw = framework()
fw.shell()
else:
print("Error: \'%s\' What? try help." %prompt[0])
except KeyboardInterrupt:
s = weeman(url,port)
s.cleanup()
print("\nInterrupt ...")
except IndexError:
if prompt[0] == "help" or prompt[0] == "?":
print_help()
else:
printt(3, "Error: please provide option for \'%s\'." %prompt[0])
except Exception as e:
printt(3, "Error: (%s)" %(str(e)))
2
Example 38
def __init__(self, sshParameters, argv,
exceptionIfNotZero=True,
connectTimeoutSeconds=None,
maxConnectionRetries=10,
tickerForRetry=True,
checkForPermissionDenied=False):
"""Create new SshCommand instance.
Will wait until completed.
Captures returncode, and output.
Output may contain extraneous leading or trailing newlines and whitespace.
Example use::
example = SshCommand(exampleSshParameters, ["ls", "-al"])
print "returncode=" + str(example.returncode)
print "output=" + example.output
sshParameters
an SshParameters instance.
argv
list of command and arguments passed to ssh.
If given a string instead of a list then fixed by argv=argv.split() making a list.
That may only work as expected for some commands on some platforms.
It should work for a command without arguments.
Hence if you don't want a string split, pass it in wrapped as sole item of a list."""
if not _gotPty:
# cannot use ssh if no pty
raise Exception("must have module pty available to use ssh command"
", which is known to be available in Python 2.6 on Linux, but not on Windows")
#
if isinstance(argv, basestring):
argv = argv.split()
maxConnectionRetries = int(maxConnectionRetries)
#
self._ipaddress = sshParameters.ipaddress
self._argv = argv
self._user = sshParameters.user
self._pwd = sshParameters.pwd
self._exceptionIfNotZero = exceptionIfNotZero
self._connectTimeoutSeconds = connectTimeoutSeconds
self._connectionRetriesRemaining = maxConnectionRetries if maxConnectionRetries else -1
self._output = ""
self._returncode = None
#
ticked = False
while self._connectionRetriesRemaining:
self._connectionRetriesRemaining -= 1
# fork and connect child to a pseudo-terminal
self._pid, self._fd = pty.fork()
if self._pid == 0:
# in child process
sshOptions = ["-l", self._user]
if connectTimeoutSeconds:
sshOptions.extend(["-o", "ConnectTimeout=" + str(connectTimeoutSeconds)])
sshOptions.append(self._ipaddress)
os.execvp("ssh", ["ssh"] + sshOptions + self._argv)
else:
# in parent process
if self._pwd:
# if given a password then apply
promptedForPassword = False
outputTillPrompt = ""
# look for password prompt
while not promptedForPassword:
try:
newOutput = os.read(self._fd, 1024)
if not len(newOutput):
# end has been reached
if not self._connectionRetriesRemaining:
# was raise Exception("unexpected end of output from ssh")
raise Exception("failing to connect via ssh\n" +
outputTillPrompt)
if tickerForRetry:
if not ticked:
# first time only printing
sys.stdout.write("retrying to connect via ssh [")
sys.stdout.write(".")
sys.stdout.flush()
ticked = True
break # break out of while not promptedForPassword:
# ssh has been observed returning "\r\n" for newline, but we want "\n"
newOutput = SshCommand._crLfRegex.sub("\n", newOutput)
outputTillPrompt += newOutput
if SshCommand._acceptPromptRegex.search(outputTillPrompt):
# e.g. "Are you sure you want to continue connecting (yes/no)? "
raise Exception("cannot proceed unless having accepted host key\n" +
outputTillPrompt +
'\nE.g. invoke SshCommand.acceptKnownHostKey(SshParameters("{0}",user,pwd)).'.format(self._ipaddress))
if SshCommand._pwdPromptRegex.search(outputTillPrompt):
# e.g. "10.123.45.67's password: "
promptedForPassword = True
except EnvironmentError:
# e.g. "@ WARNING: REMOTE HOST IDENTIFICATION HAS CHANGED! @" and closing
raise Exception("failing to connect via ssh\n" +
outputTillPrompt)
if not promptedForPassword: # i.e. if got here from breaking out of while not promptedForPassword:
continue # continue at while self._connectionRetriesRemaining:
else: # promptedForPassword is normal
# if connecting then no more retries,
# maxConnectionRetries is meant for retrying connecting only
self._connectionRetriesRemaining = 0
os.write(self._fd, self._pwd + "\n")
# look for output
endOfOutput = False
outputSincePrompt = ""
try:
while not endOfOutput:
try:
newOutput = os.read(self._fd, 1024)
if len(newOutput):
outputSincePrompt += newOutput
else:
# end has been reached
endOfOutput = True
if checkForPermissionDenied:
# seen stderr "Permission denied, please try again."
# and a repeat of stdout "10.123.45.67's password: "
if len(outputSincePrompt) <= 128: # limit to early in output
if SshCommand._permissionDeniedRegex.search(outputSincePrompt) and SshCommand._pwdPromptRegex.search(outputSincePrompt):
os.kill(self._pid, signal.SIGKILL)
except EnvironmentError as e:
# some ideas maybe at http://bugs.python.org/issue5380
if e.errno == 5: # errno.EIO:
# seen when pty closes OSError: [Errno 5] Input/output error
endOfOutput = True
else:
# we accept what we got so far, for now
endOfOutput = True
finally:
# remove any leading space (maybe there after "password:" prompt) and
# remove first newline (is there after entering password and "\n")
self._output = re.sub(SshCommand._removeLeadingSpaceAndFirstNewlineRegex, r"\1", outputSincePrompt)
#
# get returncode
signalled = False
try:
ignorePidAgain, waitEncodedStatusIndication = os.waitpid(self._pid, 0)
if os.WIFEXITED(waitEncodedStatusIndication):
# normal exit(status) call
self._returncode = os.WEXITSTATUS(waitEncodedStatusIndication)
else:
# e.g. os.WIFSIGNALED or os.WIFSTOPPED
# less common case
signalled = True
self._returncode = -1
# raise an exception if asked to and there is a reason
exceptionMessage = ""
if signalled:
# less common case
exceptionMessage += "ssh did not exit normally"
elif self._exceptionIfNotZero and self._returncode:
exceptionMessage += "returncode: " + str(self._returncode)
if exceptionMessage:
commandDescription = "ipaddress: " + self._ipaddress
commandDescription += "\ncommand:\n\t" + self._argv[0]
if len(self._argv) > 1:
commandDescription += "\narguments:\n\t" + "\n\t".join(self._argv[1:])
else:
commandDescription += "\nno arguments"
commandDescription += "\nuser: " + self._user
exceptionMessage = commandDescription + "\n" + exceptionMessage
exceptionMessage += "\noutput:\n" + self._output
raise SshCommandException(exceptionMessage)
except OSError:
# supposedly can occur
self._returncode = -1
raise SshCommandException("ssh did not exit normally")
if ticked:
# final printing
sys.stdout.write("]\n")
sys.stdout.flush()
2
Example 39
Project: DragonPy Source File: pager.py
def _manual_test_console():
print(("\nconsole size: width %s, height %s" % (getwidth(), getheight())))
echo("--<enter>--")
getch()
echo("\n")
print("\nsys.stdout.write() doesn't insert newlines automatically,")
print("that's why it is used for console output in non-trivial")
print("cases here.\n")
sys.stdout.write("--<enter>--")
sys.stdout.flush()
getch()
print("\rHowever, sys.stdout.write() requires explicit flushing")
print("to make the output immediately appear on the screen.")
print("echo() function from this module does this automatically.")
echo("\n--<enter>--")
getch()
print("\n\nThe following test outputs string equal to the width of the\n"
"screen and waits for you to press <enter>. It behaves\n"
"differently on Linux and Windows - W. scrolls the window and\n"
"places cursor on the next line immediately, while L. window\n"
"doesn't scroll until the next character is output.\n"
)
print("Tested on:")
print(" Windows Vista - cmd.exe console")
print(" Debian Lenny - native terminal")
print(" Debian Lenny - PuTTY SSH terminal from Windows Vista")
echo("\n--<enter>--")
getch()
echo("\n")
echo("<" + "-"*(getwidth()-2) + ">")
getch()
print("^ note there is no newline when the next character is printed")
print("")
print("At least this part works similar on all platforms. It is just\n"
"the state of the console after the last character on the line\n"
"is printed that is different.")
print("")
echo("--<enter>--")
getch()
print("")
print("\nBut there is one special case.")
print("")
print("It is when the next character is a newline.")
print("")
print("The following test prints line equal to the width of the\n"
"console, waits for <enter>, then outputs newline '\\n',\n"
"waits for another key press, then outputs 'x' char.")
print("")
echo("--<enter>--")
getch()
print("")
echo("<" + "-"*(getwidth()-2) + ">")
getch()
echo("\n")
getch()
echo("x")
getch()
print("\n^ here is the difference:")
print("")
print("On Windows you will get:\n"
" <----------->\n"
" \n"
" x")
print("")
print("Linux will show you:\n"
" <----------->\n"
" x")
print("")
echo("--<enter>--")
getch()
print("")
print("\nThe next test will fill the screen with '1' digits\n"
"numbering each line staring from 1.")
print("")
print("It works the same on Linux and Windows, because the next\n"
"character after the last on the line is not linefeed.\n")
echo("--<enter>--")
getch()
print("")
numwidth = len(str(getwidth()))
strlen = getwidth() - numwidth - 2 # 2 = '. ' after the line number
filler = '1' * strlen
for i in range(getheight()-1): # -1 to leave last line for --<enter>--
lineno = ("%" + str(numwidth) + "s. ") % (i+1)
sys.stdout.write(lineno + filler)
echo("--<enter>--")
getch()
print("")
print("\nNext test prints this source code using page() function")
print("")
echo("--<enter>--")
getch()
print("")
content = open(__file__)
page(content)
echo("--<enter>--")
getch()
print("")
2
Example 40
Project: LasagneNLP Source File: bi_lstm_highcnn.py
def main():
parser = argparse.ArgumentParser(description='Tuning with bi-directional LSTM-HighCNN')
parser.add_argument('--fine_tune', action='store_true', help='Fine tune the word embeddings')
parser.add_argument('--embedding', choices=['word2vec', 'glove', 'senna'], help='Embedding for words',
required=True)
parser.add_argument('--embedding_dict', default='data/word2vec/GoogleNews-vectors-negative300.bin',
help='path for embedding dict')
parser.add_argument('--batch_size', type=int, default=10, help='Number of sentences in each batch')
parser.add_argument('--num_units', type=int, default=100, help='Number of hidden units in LSTM')
parser.add_argument('--num_filters', type=int, default=20, help='Number of filters in CNN')
parser.add_argument('--learning_rate', type=float, default=0.1, help='Learning rate')
parser.add_argument('--decay_rate', type=float, default=0.1, help='Decay rate of learning rate')
parser.add_argument('--grad_clipping', type=float, default=0, help='Gradient clipping')
parser.add_argument('--gamma', type=float, default=1e-6, help='weight for regularization')
parser.add_argument('--peepholes', action='store_true', help='Peepholes for LSTM')
parser.add_argument('--oov', choices=['random', 'embedding'], help='Embedding for oov word', required=True)
parser.add_argument('--update', choices=['sgd', 'momentum', 'nesterov', 'adadelta'], help='update algorithm', default='sgd')
parser.add_argument('--regular', choices=['none', 'l2'], help='regularization for training', required=True)
parser.add_argument('--dropout', action='store_true', help='Apply dropout layers')
parser.add_argument('--patience', type=int, default=5, help='Patience for early stopping')
parser.add_argument('--output_prediction', action='store_true', help='Output predictions to temp files')
parser.add_argument('--train') # "data/POS-penn/wsj/split1/wsj1.train.original"
parser.add_argument('--dev') # "data/POS-penn/wsj/split1/wsj1.dev.original"
parser.add_argument('--test') # "data/POS-penn/wsj/split1/wsj1.test.original"
args = parser.parse_args()
def construct_input_layer():
if fine_tune:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length), input_var=input_var, name='input')
layer_embedding = lasagne.layers.EmbeddingLayer(layer_input, input_size=alphabet_size,
output_size=embedd_dim,
W=embedd_table, name='embedding')
return layer_embedding
else:
layer_input = lasagne.layers.InputLayer(shape=(None, max_length, embedd_dim), input_var=input_var,
name='input')
return layer_input
def construct_char_input_layer():
layer_char_input = lasagne.layers.InputLayer(shape=(None, max_sent_length, max_char_length),
input_var=char_input_var, name='char-input')
layer_char_input = lasagne.layers.reshape(layer_char_input, (-1, [2]))
layer_char_embedding = lasagne.layers.EmbeddingLayer(layer_char_input, input_size=char_alphabet_size,
output_size=char_embedd_dim, W=char_embedd_table,
name='char_embedding')
layer_char_input = lasagne.layers.DimshuffleLayer(layer_char_embedding, pattern=(0, 2, 1))
return layer_char_input
logger = utils.get_logger("BiLSTM-HighCNN")
fine_tune = args.fine_tune
oov = args.oov
regular = args.regular
embedding = args.embedding
embedding_path = args.embedding_dict
train_path = args.train
dev_path = args.dev
test_path = args.test
update_algo = args.update
grad_clipping = args.grad_clipping
peepholes = args.peepholes
num_filters = args.num_filters
gamma = args.gamma
output_predict = args.output_prediction
dropout = args.dropout
X_train, Y_train, mask_train, X_dev, Y_dev, mask_dev, X_test, Y_test, mask_test, \
embedd_table, label_alphabet, \
C_train, C_dev, C_test, char_embedd_table = data_processor.load_dataset_sequence_labeling(train_path, dev_path,
test_path, oov=oov,
fine_tune=fine_tune,
embedding=embedding,
embedding_path=embedding_path,
use_character=True)
num_labels = label_alphabet.size() - 1
logger.info("constructing network...")
# create variables
target_var = T.imatrix(name='targets')
mask_var = T.matrix(name='masks', dtype=theano.config.floatX)
if fine_tune:
input_var = T.imatrix(name='inputs')
num_data, max_length = X_train.shape
alphabet_size, embedd_dim = embedd_table.shape
else:
input_var = T.tensor3(name='inputs', dtype=theano.config.floatX)
num_data, max_length, embedd_dim = X_train.shape
char_input_var = T.itensor3(name='char-inputs')
num_data_char, max_sent_length, max_char_length = C_train.shape
char_alphabet_size, char_embedd_dim = char_embedd_table.shape
assert (max_length == max_sent_length)
assert (num_data == num_data_char)
# construct input and mask layers
layer_incoming1 = construct_char_input_layer()
layer_incoming2 = construct_input_layer()
layer_mask = lasagne.layers.InputLayer(shape=(None, max_length), input_var=mask_var, name='mask')
# construct bi-rnn-cnn
num_units = args.num_units
bi_lstm_cnn = build_BiLSTM_HighCNN(layer_incoming1, layer_incoming2, num_units, mask=layer_mask,
grad_clipping=grad_clipping, peepholes=peepholes, num_filters=num_filters,
dropout=dropout)
# reshape bi-rnn-cnn to [batch * max_length, num_units]
bi_lstm_cnn = lasagne.layers.reshape(bi_lstm_cnn, (-1, [2]))
# construct output layer (dense layer with softmax)
layer_output = lasagne.layers.DenseLayer(bi_lstm_cnn, num_units=num_labels, nonlinearity=nonlinearities.softmax,
name='softmax')
# get output of bi-lstm-cnn shape=[batch * max_length, #label]
prediction_train = lasagne.layers.get_output(layer_output)
prediction_eval = lasagne.layers.get_output(layer_output, deterministic=True)
final_prediction = T.argmax(prediction_eval, axis=1)
# flat target_var to vector
target_var_flatten = target_var.flatten()
# flat mask_var to vector
mask_var_flatten = mask_var.flatten()
# compute loss
num_loss = mask_var_flatten.sum(dtype=theano.config.floatX)
# for training, we use mean of loss over number of labels
loss_train = lasagne.objectives.categorical_crossentropy(prediction_train, target_var_flatten)
loss_train = (loss_train * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
# l2 regularization?
if regular == 'l2':
l2_penalty = lasagne.regularization.regularize_network_params(layer_output, lasagne.regularization.l2)
loss_train = loss_train + gamma * l2_penalty
loss_eval = lasagne.objectives.categorical_crossentropy(prediction_eval, target_var_flatten)
loss_eval = (loss_eval * mask_var_flatten).sum(dtype=theano.config.floatX) / num_loss
# compute number of correct labels
corr_train = lasagne.objectives.categorical_accuracy(prediction_train, target_var_flatten)
corr_train = (corr_train * mask_var_flatten).sum(dtype=theano.config.floatX)
corr_eval = lasagne.objectives.categorical_accuracy(prediction_eval, target_var_flatten)
corr_eval = (corr_eval * mask_var_flatten).sum(dtype=theano.config.floatX)
# Create update expressions for training.
# hyper parameters to tune: learning rate, momentum, regularization.
batch_size = args.batch_size
learning_rate = 1.0 if update_algo == 'adadelta' else args.learning_rate
decay_rate = args.decay_rate
momentum = 0.9
params = lasagne.layers.get_all_params(layer_output, trainable=True)
updates = utils.create_updates(loss_train, params, update_algo, learning_rate, momentum=momentum)
# Compile a function performing a training step on a mini-batch
train_fn = theano.function([input_var, target_var, mask_var, char_input_var], [loss_train, corr_train, num_loss],
updates=updates)
# Compile a second function evaluating the loss and accuracy of network
eval_fn = theano.function([input_var, target_var, mask_var, char_input_var],
[loss_eval, corr_eval, num_loss, final_prediction])
# Finally, launch the training loop.
logger.info(
"Start training: %s with regularization: %s(%f), dropout: %s, fine tune: %s (#training data: %d, batch size: %d, clip: %.1f, peepholes: %s)..." \
% (
update_algo, regular, (0.0 if regular == 'none' else gamma), dropout, fine_tune, num_data, batch_size, grad_clipping,
peepholes))
num_batches = num_data / batch_size
num_epochs = 1000
best_loss = 1e+12
best_acc = 0.0
best_epoch_loss = 0
best_epoch_acc = 0
best_loss_test_err = 0.
best_loss_test_corr = 0.
best_acc_test_err = 0.
best_acc_test_corr = 0.
stop_count = 0
lr = learning_rate
patience = args.patience
for epoch in range(1, num_epochs + 1):
print 'Epoch %d (learning rate=%.4f, decay rate=%.4f): ' % (epoch, lr, decay_rate)
train_err = 0.0
train_corr = 0.0
train_total = 0
start_time = time.time()
num_back = 0
train_batches = 0
for batch in utils.iterate_minibatches(X_train, Y_train, masks=mask_train, char_inputs=C_train,
batch_size=batch_size, shuffle=True):
inputs, targets, masks, char_inputs = batch
err, corr, num = train_fn(inputs, targets, masks, char_inputs)
train_err += err * num
train_corr += corr
train_total += num
train_batches += 1
time_ave = (time.time() - start_time) / train_batches
time_left = (num_batches - train_batches) * time_ave
# update log
sys.stdout.write("\b" * num_back)
log_info = 'train: %d/%d loss: %.4f, acc: %.2f%%, time left (estimated): %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time_left)
sys.stdout.write(log_info)
num_back = len(log_info)
# update training log after each epoch
sys.stdout.write("\b" * num_back)
print 'train: %d/%d loss: %.4f, acc: %.2f%%, time: %.2fs' % (
min(train_batches * batch_size, num_data), num_data,
train_err / train_total, train_corr * 100 / train_total, time.time() - start_time)
# evaluate performance on dev data
dev_err = 0.0
dev_corr = 0.0
dev_total = 0
for batch in utils.iterate_minibatches(X_dev, Y_dev, masks=mask_dev, char_inputs=C_dev, batch_size=batch_size):
inputs, targets, masks, char_inputs = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
dev_err += err * num
dev_corr += corr
dev_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/dev%d' % epoch, label_alphabet)
print 'dev loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
dev_err / dev_total, dev_corr, dev_total, dev_corr * 100 / dev_total)
if best_loss < dev_err and best_acc > dev_corr / dev_total:
stop_count += 1
else:
update_loss = False
update_acc = False
stop_count = 0
if best_loss > dev_err:
update_loss = True
best_loss = dev_err
best_epoch_loss = epoch
if best_acc < dev_corr / dev_total:
update_acc = True
best_acc = dev_corr / dev_total
best_epoch_acc = epoch
# evaluate on test data when better performance detected
test_err = 0.0
test_corr = 0.0
test_total = 0
for batch in utils.iterate_minibatches(X_test, Y_test, masks=mask_test, char_inputs=C_test,
batch_size=batch_size):
inputs, targets, masks, char_inputs = batch
err, corr, num, predictions = eval_fn(inputs, targets, masks, char_inputs)
test_err += err * num
test_corr += corr
test_total += num
if output_predict:
utils.output_predictions(predictions, targets, masks, 'tmp/test%d' % epoch, label_alphabet)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
test_err / test_total, test_corr, test_total, test_corr * 100 / test_total)
if update_loss:
best_loss_test_err = test_err
best_loss_test_corr = test_corr
if update_acc:
best_acc_test_err = test_err
best_acc_test_corr = test_corr
# stop if dev acc decrease 3 time straightly.
if stop_count == patience:
break
# re-compile a function with new learning rate for training
if update_algo != 'adadelta':
lr = learning_rate / (1.0 + epoch * decay_rate)
updates = utils.create_updates(loss_train, params, update_algo, lr, momentum=momentum)
train_fn = theano.function([input_var, target_var, mask_var, char_input_var],
[loss_train, corr_train, num_loss],
updates=updates)
# print best performance on test data.
logger.info("final best loss test performance (at epoch %d)" % best_epoch_loss)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_loss_test_err / test_total, best_loss_test_corr, test_total, best_loss_test_corr * 100 / test_total)
logger.info("final best acc test performance (at epoch %d)" % best_epoch_acc)
print 'test loss: %.4f, corr: %d, total: %d, acc: %.2f%%' % (
best_acc_test_err / test_total, best_acc_test_corr, test_total, best_acc_test_corr * 100 / test_total)
2
Example 41
Project: GeoVis Source File: messages.py
def __iter__(self):
iterable = self.iterable
if not "shellreport" in self.kwargs:
shellreport="progressbar"
else:
shellreport=self.kwargs["shellreport"]
if not "text" in self.kwargs:
text="unknown task"
else:
text=self.kwargs["text"]
if not "tkwidget" in self.kwargs:
tkwidget=None
else:
tkwidget=self.kwargs["tkwidget"]
if not "queue" in self.kwargs:
queue=None
else:
queue=self.kwargs["queue"]
if not "picklepath" in self.kwargs:
picklepath=None
else:
picklepath=self.kwargs["picklepath"]
if not "reportincr" in self.kwargs:
reportincr=1
else:
reportincr=self.kwargs["reportincr"]
if not "genlength" in self.kwargs:
genlength=None
else:
genlength=self.kwargs["genlength"]
if not "countmethod" in self.kwargs:
countmethod="auto"
else:
countmethod=self.kwargs["countmethod"]
#some error checking
if not hasattr(iterable, "__iter__"):
raise TypeError("The iterable argument was not iterable")
if not hasattr(iterable, "__len__") and not genlength:
raise TypeError("The iterable argument must have a length in order to asses its progress")
#determine report types
if not shellreport:
shellprogbar=False
shellprint=False
elif shellreport.lower() == "progressbar":
shellprogbar=True
shellprint=False
elif shellreport.lower() == "print":
shellprogbar=False
shellprint=True
#do some startup things
if shellprogbar:
reportincr = 2
print "\n%s" %text
print "0%"+","*50+"100%"
sys.stdout.write(" ")
#convert report incr percent to fraction of one
reportincr = reportincr/100.0
#measure total length
if not genlength:
total = float(len(iterable))
else:
total = float(genlength)
nextthresh = reportincr
self.prog = 0
timer.start("task completed in")
for index, each in enumerate(iterable):
if countmethod == "auto":
#only if countmethod is set to "auto" will progress increase automatically
#otherwise, the user has to keep a reference to the ProgressReport obj
#and manually increment self.prog at the correct pace
self.prog = index
percent = self.prog/total
#report progress if reached threshold
if percent >= nextthresh:
nextthresh += reportincr
#if progressbar is true this will ignore the shellprint option
if shellprogbar:
sys.stdout.write("|")
elif shellprint:
print "%i percent task completion: %s" %(int(percent*100),text)
if queue:
queue.put({"percent":int(percent*100),"text":text})
if tkwidget:
#tkwidget.set(int(percent*100))
tkwidget.update()
if picklepath:
msgbox = open(picklepath,"wb")
pickle.dump({"percent":int(percent*100),"text":text}, msgbox)
msgbox.close()
#check for finish
if nextthresh >= 1:
if shellprogbar:
sys.stdout.write("\n"+" "*8)
timer.stop("task completed in")
sys.stdout.write("\n")
elif shellprint:
print "%i percent task completion: %s" %(100,text)
if queue:
queue.put({"percent":100,"text":text})
if tkwidget:
#tkwidget.set(int(percent*100))
tkwidget.update()
if picklepath:
msgbox = open(picklepath,"wb")
pickle.dump({"percent":100,"text":text}, msgbox)
msgbox.close()
#yield next element from iterable
yield each
2
Example 42
Project: pysmc Source File: _smc.py
def initialize(self, gamma, particle_approximation=None,
num_mcmc_per_particle=10):
"""
Initialize SMC at a particular ``gamma``.
The method has basically three ways of initializing the particles:
+ If ``particles_approximation`` is not ``None``,
then it is assumed to contain the
particles at the corresponding value of ``gamma``.
+ If ``particles_approximation`` is ``None`` and the
MCMC sampler class has a method
called ``draw_from_prior()`` that works, then it is called to
initialize the particles.
+ In any other case, MCMC sampling is used to initialize the particles.
We are assuming that the MCMC sampler has already been tuned for
that particular gamma and that a sufficient burning period has past.
Then we record the current state as the first particle, we sample
``num_mcmc_per_particle`` times and record the second particle, and
so on.
:param gamma: The initial ``gamma`` parameter. It must, of
course, be within the right range of
``gamma``.
:type gamma: float
:param particles_approximation: A dictionary of MCMC states representing
the particles. When using MPI, we are
assuming that each one of the CPU's
has each own collection of particles.
:type particles_approximation: :class:`pysmc.ParticleApproximation`
:param num_mcmc_per_particle: This parameter is ignored if
``particles`` is not ``None``. If the
only way to initialize the particles is
to use MCMC, then this is the number of
of mcmc samples we drop before getting
a SMC particle.
"""
if self.verbose > 0:
print '------------------------'
print 'START SMC Initialization'
print '------------------------'
print '- initializing at', self.gamma_name, ':', gamma
# Zero out the MCMC step counter
self._total_num_mcmc = 0
# Set gamma
self._set_gamma(gamma)
# Set the weights and ESS
self.log_w.fill(-math.log(self.num_particles))
self._ess = float(self.num_particles)
if particle_approximation is not None:
if self.verbose > 0:
print '- initializing with a particle approximation.'
self._particles = particle_approximation.particles
self._log_w = particle_approximation.log_w
self._ess = self._get_ess_at(self.log_w)
return
else:
self.particles[0] = self.mcmc_sampler.get_state()
try:
if self.verbose > 0:
sys.stdout.write(
'- initializing by sampling from the prior: ')
if not gamma == 0.:
raise AttributeError()
for i in range(1, self.my_num_particles):
self.mcmc_sampler.draw_from_prior()
self.particles[i] = self.mcmc_sampler.get_state()
if self.verbose > 0:
sys.stdout.write('SUCCESS\n')
except AttributeError:
if self.verbose > 0:
sys.stdout.write('FAILURE\n')
print '- initializing via MCMC'
if self.use_mpi:
total_samples = (self.my_num_particles
* num_mcmc_per_particle)
print '- taking a total of', total_samples, 'samples per process'
else:
total_samples = (self.num_particles
* num_mcmc_per_particle)
print '- taking a total of', total_samples, 'samples'
print '- creating a particle every', num_mcmc_per_particle
if self.verbose > 0:
pb = pymc.progressbar.ProgressBar(self.num_particles *
num_mcmc_per_particle)
# Only rank 0 keeps the first particle
if self.rank == 0:
start_idx = 1
else:
start_idx = 0
for i in range(start_idx, self.my_num_particles):
self.mcmc_sampler.sample(num_mcmc_per_particle)
self.particles[i] = self.mcmc_sampler.get_state()
self._total_num_mcmc += num_mcmc_per_particle
# TODO: Find bug in PyMC bar
#if self.verbose > 0:
# pb.update((i + 2) * self.size * num_mcmc_per_particle)
if self.verbose > 0:
print ''
pa = self.get_particle_approximation().gather()
sm_params = self.mcmc_sampler.get_params(comm=self.comm)
if self.update_db and self.rank == 0:
self.db.add(self.gamma, pa, sm_params)
self.db.commit()
if self.verbose > 0:
print '----------------------'
print 'END SMC Initialization'
print '----------------------'
2
Example 43
Project: flopy Source File: mfstr.py
@staticmethod
def load(f, model, nper=None, ext_unit_dict=None):
"""
Load an existing package.
Parameters
----------
f : filename or file handle
File to load.
model : model object
The model object (of type :class:`flopy.modflow.mf.Modflow`) to
which this package will be added.
nper : int
The number of stress periods. If nper is None, then nper will be
obtained from the model object. (default is None).
ext_unit_dict : dictionary, optional
If the arrays in the file are specified using EXTERNAL,
or older style array control records, then `f` should be a file
handle. In this case ext_unit_dict is required, which can be
constructed using the function
:class:`flopy.utils.mfreadnam.parsenamefile`.
Returns
-------
str : ModflowStr object
ModflowStr object.
Examples
--------
>>> import flopy
>>> m = flopy.modflow.Modflow()
>>> strm = flopy.modflow.ModflowStr.load('test.str', m)
"""
if model.verbose:
sys.stdout.write('loading str package file...\n')
if not hasattr(f, 'read'):
filename = f
f = open(filename, 'r')
# dataset 0 -- header
while True:
line = f.readline()
if line[0] != '#':
break
# read dataset 1 - optional parameters
npstr, mxl = 0, 0
t = line.strip().split()
if t[0].lower() == 'parameter':
if model.verbose:
sys.stdout.write(' loading str dataset 1\n')
npstr = int(t[1])
mxl = int(t[2])
# read next line
line = f.readline()
# data set 2
if model.verbose:
sys.stdout.write(' loading str dataset 2\n')
t = line.strip().split()
mxacts = int(t[0])
nss = int(t[1])
ntrib = int(t[2])
ndiv = int(t[3])
icalc = int(t[4])
const = float(t[5])
istcb1 = int(t[6])
istcb2 = int(t[7])
ipakcb = 0
try:
if istcb1 != 0:
ipakcb = 53
model.add_pop_key_list(istcb1)
except:
pass
try:
if istcb2 != 0:
ipakcb = 53
model.add_pop_key_list(istcb2)
except:
pass
options = []
aux_names = []
if len(t) > 8:
it = 8
while it < len(t):
toption = t[it]
if 'aux' in toption.lower():
options.append(' '.join(t[it:it + 2]))
aux_names.append(t[it + 1].lower())
it += 1
it += 1
# read parameter data
if npstr > 0:
dt = ModflowStr.get_empty(1, aux_names=aux_names).dtype
pak_parms = mfparbc.load(f, npstr, dt, model.verbose)
if nper is None:
nrow, ncol, nlay, nper = model.get_nrow_ncol_nlay_nper()
stress_period_data = {}
segment_data = {}
for iper in range(nper):
if model.verbose:
print(" loading " + str(ModflowStr) + " for kper {0:5d}".format(iper + 1))
line = f.readline()
if line == '':
break
t = line.strip().split()
itmp = int(t[0])
irdflg, iptflg = 0, 0
if len(t) > 1:
irdflg = int(t[1])
if len(t) > 2:
iptflg = int(t[2])
if itmp == 0:
bnd_output = None
seg_output = None
current, current_seg = ModflowStr.get_empty(itmp, nss, aux_names=aux_names)
elif itmp > 0:
if npstr > 0:
partype = ['cond']
if model.verbose:
print(" reading str dataset 7")
for iparm in range(itmp):
line = f.readline()
t = line.strip().split()
pname = t[0].lower()
iname = 'static'
try:
tn = t[1]
c = tn.lower()
instance_dict = pak_parms.bc_parms[pname][1]
if c in instance_dict:
iname = c
else:
iname = 'static'
except:
pass
par_dict, current_dict = pak_parms.get(pname)
data_dict = current_dict[iname]
current = ModflowStr.get_empty(par_dict['nlst'], aux_names=aux_names)
# get appropriate parval
if model.mfpar.pval is None:
parval = np.float(par_dict['parval'])
else:
try:
parval = np.float(model.mfpar.pval.pval_dict[pname])
except:
parval = np.float(par_dict['parval'])
# fill current parameter data (par_current)
for ibnd, t in enumerate(data_dict):
current[ibnd] = tuple(t[:len(current.dtype.names)])
else:
if model.verbose:
print(" reading str dataset 6")
current, current_seg = ModflowStr.get_empty(itmp, nss,
aux_names=aux_names)
for ibnd in range(itmp):
line = f.readline()
t = []
if model.free_format_input:
tt = line.strip().split()
#current[ibnd] = tuple(t[:len(current.dtype.names)])
for idx, v in enumerate(tt[:10]):
t.append(v)
for ivar in range(3):
t.append(-1.0E+10)
if len(aux_names) > 0:
for idx, v in enumerate(t[10:]):
t.append(v)
if len(tt) != len(current.dtype.names)-3:
raise Exception
else:
ipos = [5, 5, 5, 5, 5, 15, 10, 10, 10, 10]
istart = 0
for ivar in range(len(ipos)):
istop = istart + ipos[ivar]
txt = line[istart:istop]
try:
t.append(float(txt))
except:
t.append(0.)
istart = istop
for ivar in range(3):
t.append(-1.0E+10)
if len(aux_names) > 0:
tt = line[istart:].strip().split()
for ivar in range(len(aux_names)):
t.append(tt[ivar])
current[ibnd] = tuple(t[:len(current.dtype.names)])
# convert indices to zero-based
current['k'] -= 1
current['i'] -= 1
current['j'] -= 1
# read dataset 8
if icalc > 0:
if model.verbose:
print(" reading str dataset 8")
for ibnd in range(itmp):
line = f.readline()
if model.free_format_input:
t = line.strip().split()
v = [float(vt) for vt in t[:3]]
else:
v = []
ipos = [10, 10, 10]
istart = 0
for ivar in range(len(ipos)):
istop = istart + ipos[ivar]
v.append(float(line[istart:istop]))
istart = istop + 1
ipos = 0
for idx in range(10, 13):
current[ibnd][idx] = v[ipos]
ipos += 1
bnd_output = np.recarray.copy(current)
# read data set 9
if ntrib > 0:
if model.verbose:
print(" reading str dataset 9")
for iseg in range(nss):
line = f.readline()
if model.free_format_input:
t = line.strip().split()
v = [float(vt) for vt in t[:ntrib]]
else:
v = []
ipos = 5
istart = 0
for ivar in range(ntrib):
istop = istart + ipos
try:
v.append(float(line[istart:istop]))
except:
v.append(0.)
istart = istop
for idx in range(ntrib):
current_seg[iseg][idx] = v[idx]
# read data set 10
if ndiv > 0:
if model.verbose:
print(" reading str dataset 10")
for iseg in range(nss):
line = f.readline()
if model.free_format_input:
t = line.strip().split()
v = float(t[0])
else:
ipos = 10
istart = 0
for ivar in range(ndiv):
istop = istart + ipos
v = float(line[istart:istop])
istart = istop
current_seg[iseg][10] = v
seg_output = np.recarray.copy(current_seg)
else:
bnd_output = -1
seg_output = -1
if bnd_output is None:
stress_period_data[iper] = itmp
segment_data[iper] = itmp
else:
stress_period_data[iper] = bnd_output
segment_data[iper] = seg_output
strpak = ModflowStr(model, mxacts=mxacts, nss=nss,
ntrib=ntrib, ndiv=ndiv, icalc=icalc,
const=const, ipakcb=ipakcb,
stress_period_data=stress_period_data,
segment_data=segment_data,
options=options)
return strpak
2
Example 44
Project: bits Source File: readline.py
def _readline(prompt=""):
global width, height, line_x, line_y, buffer_max, history, kill_ring, kill_accuemulate, ctrl_o_index, completer
with redirect.nolog():
with pager.nopager():
sys.stdout.write(prompt)
line_buffer = ''
pos = 0
prev_len = 0
term_count = bits.get_term_count()
width = [0] * term_count
height = [0] * term_count
line_x = [0] * term_count
line_y = [0] * term_count
for term in range(term_count):
width[term], height[term] = bits.get_width_height(term)
line_x[term], line_y[term] = bits.get_xy(term)
buffer_max = min((width[term] - 2 - line_x[term]) + ((height[term] - 1) * (width[term] - 1)) for term in range(term_count))
history_index = len(history)
history_state = dict()
completer_state = 0
last_yank_start = None
kill_accuemulate = False
if ctrl_o_index is not None:
if ctrl_o_index < len(history):
history_index = ctrl_o_index
line_buffer = history[history_index]
pos = len(line_buffer)
ctrl_o_index = None
while True:
# Update history
history_state[history_index] = (line_buffer, pos)
try:
# clear any characters after the current line buffer
trailing_len = prev_len - len(line_buffer)
if trailing_len > 0:
for term in range(term_count):
trailing_x, trailing_y = PositionCursor(len(line_buffer), line_x[term], line_y[term], term)
print_buffer(" " * trailing_len, trailing_x, trailing_y, term)
prev_len = len(line_buffer)
for term in range(term_count):
# print the current line buffer
print_buffer(line_buffer, line_x[term], line_y[term], term)
# move the cursor to location of pos within the line buffer
PositionCursor(pos, line_x[term], line_y[term], term)
c = bits.input.get_key()
key = bits.input.key
def ctrl(k):
return key(k, ctrl=True)
# Reset states that depend on last key
if c != key('y', alt=True):
last_yank_start = None
if c not in (ctrl('k'), ctrl('u'), ctrl('w')):
kill_accuemulate = False
if c == key('\r') or c == key('\n') or c == ctrl('o'):
if line_buffer or (history and history[-1]):
history.append(line_buffer)
if c == ctrl('o'): # Ctrl-O
ctrl_o_index = history_index + 1
sys.stdout.write('\n')
return line_buffer + '\n'
if not (c == key('\t') or c == ctrl('i')):
# reset completer state to force restart of the completer
completer_state = 0
if c == key(bits.input.KEY_HOME) or c == ctrl('a'):
# start of line
pos = 0
elif c == key(bits.input.KEY_LEFT) or c == ctrl('b'):
# left
if pos != 0:
pos -= 1
elif c == ctrl('d'):
# EOF
if len(line_buffer) == 0:
return ""
if pos < len(line_buffer):
line_buffer, pos = delete_char(line_buffer, pos)
elif c == key(bits.input.KEY_DELETE):
if pos < len(line_buffer):
line_buffer, pos = delete_char(line_buffer, pos)
elif c == key(bits.input.KEY_END) or c == ctrl('e'):
# end of line
pos = len(line_buffer)
elif c == key(bits.input.KEY_RIGHT) or c == ctrl('f'):
# right
if pos != len(line_buffer):
pos += 1
elif c == key('\b') or c == ctrl('h'):
# backspace
line_buffer, pos = delete_char_left(line_buffer, pos)
elif c == key('\t') or c == ctrl('i'):
# tab completion
if completer is not None:
if completer_state != 0:
for c in range(len(current_completion)):
line_buffer, pos = delete_char_left(line_buffer, pos)
else:
cur = pos
while pos != 0 and line_buffer[pos-1] != ' ':
pos -= 1
saved_str = line_buffer[pos:cur]
line_buffer = line_buffer[:pos] + line_buffer[cur:]
current_completion = completer(saved_str, completer_state)
completer_state += 1
if current_completion is not None:
for c in current_completion:
line_buffer, pos = insert_char(line_buffer, c, pos)
else:
for c in saved_str:
line_buffer, pos = insert_char(line_buffer, c, pos)
completer_state = 0
elif c == ctrl('k'):
# delete from current to end of line
killed_text = line_buffer[pos:]
line_buffer = line_buffer[:pos]
add_to_kill_ring(killed_text, to_right=True)
elif c == ctrl('l'):
# clear screen
bits.clear_screen()
sys.stdout.write(prompt)
for term in range(term_count):
line_x[term], line_y[term] = bits.get_xy(term);
elif c == key(bits.input.KEY_DOWN) or c == ctrl('n'):
# Next line in history
if history_index < len(history):
history_index += 1
if history_index == len(history):
line_buffer, pos = history_state.get(history_index, ('', 0))
else:
line_buffer, pos = history_state.get(history_index, (history[history_index], len(history[history_index])))
elif c == key(bits.input.KEY_UP) or c == ctrl('p'):
# Previous line in history
if history_index > 0:
history_index -= 1
line_buffer, pos = history_state.get(history_index, (history[history_index], len(history[history_index])))
elif c == ctrl('u'):
# delete from current to beginning of line
killed_text = line_buffer[:pos]
line_buffer = line_buffer[pos:]
pos = 0
add_to_kill_ring(killed_text, to_right=False)
elif c == ctrl(bits.input.KEY_LEFT):
# Move left by word
while pos != 0 and not line_buffer[pos-1].isalnum():
pos -= 1
while pos != 0 and line_buffer[pos-1].isalnum():
pos -= 1
elif c == ctrl(bits.input.KEY_RIGHT):
# Move right by word
end = len(line_buffer)
while pos != end and not line_buffer[pos].isalnum():
pos += 1
while pos != end and line_buffer[pos].isalnum():
pos += 1
elif c == ctrl('w'):
# delete previous word; note that this uses a different
# definition of "word" than Ctrl-Left and Ctrl-Right.
cur = pos
while pos != 0 and line_buffer[pos-1] == ' ':
pos -= 1
while pos != 0 and line_buffer[pos-1] != ' ':
pos -= 1
killed_text = line_buffer[pos:cur]
line_buffer = line_buffer[:pos] + line_buffer[cur:]
add_to_kill_ring(killed_text, to_right=False)
elif c == ctrl('y'):
# Yank
if kill_ring:
line_buffer, last_yank_start, pos = insert_string(line_buffer, kill_ring[-1], pos)
elif c == key('y', alt=True):
# If immediately after yank, rotate kill ring and yank
# the new top instead.
if last_yank_start is not None:
line_buffer = line_buffer[:last_yank_start] + line_buffer[pos:]
pos = last_yank_start
kill_ring.insert(0, kill_ring.pop()) # Rotate
line_buffer, last_yank_start, pos = insert_string(line_buffer, kill_ring[-1], pos)
elif c == ctrl('z') or c == key(bits.input.KEY_ESC):
if len(line_buffer) == 0:
return ""
elif c.key in key_hooks:
key_hooks[c.key]()
elif not(c.ctrl) and not(c.alt) and isinstance(c.key, basestring) and c.key in string.printable:
# printable
try:
line_buffer, pos = insert_char(line_buffer, c.key.encode('ascii'), pos)
except UnicodeError:
pass
else:
pass
except IOError:
pass
2
Example 45
Project: pySecurityCenter Source File: population.py
def gen(sc, asset, expire):
'''
Database population function.
What we are doing here is trying to interpret the output of plugin ID 20811
and use that information to help populate the database with individualized
entries of the software that is installed on the host. This information will
later be used to build the report.
'''
# The following regex patters are used to pull out the needed fields from
# Plugin ID 20811
redate = re.compile(r'\[installed on (\d{4})/(\d{1,2})/(\d{1,2})\]')
reinvdate = re.compile(r'\[installed on (\d{1,2})/(\d{1,2})/(\d{4})\]')
rever = re.compile(r'\[version (.*?)\]')
resw = re.compile(r'^([\w\s\.\(\-\)\+]*)')
s = Session()
ts = datetime.datetime.now()
for vuln in sc.analysis(('pluginID','=','20811,22869'),
('asset', '=', {'id': str(asset)}),
tool='vulndetails'):
# First we need to get the host information...
nh = False
host = s.query(Host).filter_by(ip=vuln['ip']).first()
if not host:
host = Host()
nh = True
hdata = sc.analysis(('ip', '=', vuln['ip']),tool='sumip')[0]
host.ip = vuln['ip']
host.name = vuln['netbiosName']
host.cpe = hdata['osCPE']
host.dns = hdata['dnsName']
host.asset_id = asset
if nh:
s.add(host)
else:
s.merge(host)
s.commit()
sys.stdout.write('%4d\t%-16s\t%-40s' % (host.id, host.ip, host.dns))
sys.stdout.flush()
if vuln['pluginID'] == '22869':
if 'CentOS Linux system' in vuln['pluginText'] or 'Red Hat Linux system' in vuln['pluginText']:
software = re.findall(' ([a-zA-Z0-9\.\-]*)\|',vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
elif 'SunOS 5.10' in vuln['pluginText']:
software = re.findall('Patch: ([^ ]*)', vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
elif 'Solaris 11 system' in vuln['pluginText']:
software = re.findall('([\w\/]+)\W+([0-9\.\-]+).*\n',vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item[0]
entry.version = item[1]
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
elif 'Mac OS X system' in vuln['pluginText']:
software = re.findall(' ([a-zA-Z0-9\.\-\_]*\.pkg)\n',vuln['pluginText'])
for item in software:
entry = Entry()
entry.name = item
entry.timestamp = ts
entry.host_id = host.id
s.add(entry)
s.commit()
else:
sys.stdout.write('\t[NO FORMATTER]')
sys.stdout.flush()
if vuln['pluginID'] == '20811':
software = False
patches = False
sw = None
nh = False
s.commit()
for line in vuln['pluginText'].split('\n'):
if '</plugin_output>' in line:
continue
if line == u'The following software are installed on the remote host :':
software = True
patches = False
continue
if line == u'The following updates are installed :':
patches = True
continue
if software and line != '':
names = resw.findall(line)
vers = rever.findall(line)
dates = redate.findall(line)
new = Entry()
if len(names) > 0: new.name = names[0].strip()
if len(vers) > 0: new.version = vers[0]
try:
if len(dates) > 0:
date = datetime.date(year=int(dates[0][0]),
month=int(dates[0][1]),
day=int(dates[0][2]))
new.date = date
else:
dates = reinvdate.findall(line)
if len(dates) > 0:
date = datetime.date(year=int(dates[0][2]),
month=int(dates[0][0]),
day=int(dates[0][1]))
new.date = date
except:
pass
if patches:
if line[:2] != ' ':
sw = line.strip(':').strip()
continue
else:
new.name = '%s (%s)' % (new.name, sw)
new.timestamp = ts
new.host_id = host.id
s.add(new)
s.commit()
sys.stdout.write('\tdone\n')
sys.stdout.flush()
s.commit()
# Now to expire the old data out...
exp = datetime.datetime.now() - datetime.timedelta(days=expire)
print exp
# First to delete the aged out entries
for entry in s.query(Entry).filter(Entry.timestamp < exp).all():
s.delete(entry)
s.commit()
# Next to delete any hosts that we arent pulling info for anymore...
for host in s.query(Host).all():
if len(host.entries) == 0:
s.delete(host)
s.commit()
s.close()
2
Example 46
def process_input(self, data, input_prompt, lineno):
"""
Process data block for INPUT token.
"""
decorator, input, rest = data
image_file = None
image_directive = None
is_verbatim = decorator=='@verbatim' or self.is_verbatim
is_doctest = (decorator is not None and \
decorator.startswith('@doctest')) or self.is_doctest
is_suppress = decorator=='@suppress' or self.is_suppress
is_okexcept = decorator=='@okexcept' or self.is_okexcept
is_okwarning = decorator=='@okwarning' or self.is_okwarning
is_savefig = decorator is not None and \
decorator.startswith('@savefig')
# set the encodings to be used by DecodingStringIO
# to convert the execution output into unicode if
# needed. this attrib is set by IpythonDirective.run()
# based on the specified block options, defaulting to ['ut
self.cout.set_encodings(self.output_encoding)
input_lines = input.split('\n')
if len(input_lines) > 1:
if input_lines[-1] != "":
input_lines.append('') # make sure there's a blank line
# so splitter buffer gets reset
continuation = ' %s:'%''.join(['.']*(len(str(lineno))+2))
if is_savefig:
image_file, image_directive = self.process_image(decorator)
ret = []
is_semicolon = False
# Hold the execution count, if requested to do so.
if is_suppress and self.hold_count:
store_history = False
else:
store_history = True
# Note: catch_warnings is not thread safe
with warnings.catch_warnings(record=True) as ws:
for i, line in enumerate(input_lines):
if line.endswith(';'):
is_semicolon = True
if i == 0:
# process the first input line
if is_verbatim:
self.process_input_line('')
self.IP.execution_count += 1 # increment it anyway
else:
# only submit the line in non-verbatim mode
self.process_input_line(line, store_history=store_history)
formatted_line = '%s %s'%(input_prompt, line)
else:
# process a continuation line
if not is_verbatim:
self.process_input_line(line, store_history=store_history)
formatted_line = '%s %s'%(continuation, line)
if not is_suppress:
ret.append(formatted_line)
if not is_suppress and len(rest.strip()) and is_verbatim:
# the "rest" is the standard output of the
# input, which needs to be added in
# verbatim mode
ret.append(rest)
self.cout.seek(0)
output = self.cout.read()
if not is_suppress and not is_semicolon:
ret.append(output)
elif is_semicolon: # get spacing right
ret.append('')
# context information
filename = self.state.docuement.current_source
lineno = self.state.docuement.current_line
# output any exceptions raised during execution to stdout
# unless :okexcept: has been specified.
if not is_okexcept and "Traceback" in output:
s = "\nException in %s at block ending on line %s\n" % (filename, lineno)
s += "Specify :okexcept: as an option in the ipython:: block to suppress this message\n"
sys.stdout.write('\n\n>>>' + ('-' * 73))
sys.stdout.write(s)
sys.stdout.write(output)
sys.stdout.write('<<<' + ('-' * 73) + '\n\n')
# output any warning raised during execution to stdout
# unless :okwarning: has been specified.
if not is_okwarning:
for w in ws:
s = "\nWarning in %s at block ending on line %s\n" % (filename, lineno)
s += "Specify :okwarning: as an option in the ipython:: block to suppress this message\n"
sys.stdout.write('\n\n>>>' + ('-' * 73))
sys.stdout.write(s)
sys.stdout.write('-' * 76 + '\n')
s=warnings.formatwarning(w.message, w.category,
w.filename, w.lineno, w.line)
sys.stdout.write(s)
sys.stdout.write('<<<' + ('-' * 73) + '\n')
self.cout.truncate(0)
return (ret, input_lines, output, is_doctest, decorator, image_file,
image_directive)
2
Example 47
Project: raspberry_pwn Source File: sqlharvest.py
def main():
tables = dict()
cookies = cookielib.CookieJar()
cookie_processor = urllib2.HTTPCookieProcessor(cookies)
opener = urllib2.build_opener(cookie_processor)
opener.addheaders = [("User-Agent", USER_AGENT)]
conn = opener.open(SEARCH_URL)
page = conn.read() # set initial cookie values
config = ConfigParser.ConfigParser()
config.read(CONFIG_FILE)
if not config.has_section("options"):
config.add_section("options")
if not config.has_option("options", "index"):
config.set("options", "index", "0")
i = int(config.get("options", "index"))
try:
with open(TABLES_FILE, 'r') as f:
for line in f.xreadlines():
if len(line) > 0 and ',' in line:
temp = line.split(',')
tables[temp[0]] = int(temp[1])
except:
pass
socket.setdefaulttimeout(TIMEOUT)
files, old_files = None, None
try:
while True:
abort = False
old_files = files
files = []
try:
conn = opener.open("%s&q=%s&start=%d&sa=N" % (SEARCH_URL, QUERY.replace(' ', '+'), i * 10))
page = conn.read()
for match in re.finditer(REGEX_URLS, page):
files.append(urllib.unquote(match.group(1)))
if len(files) >= 10:
break
abort = (files == old_files)
except KeyboardInterrupt:
raise
except Exception, msg:
print msg
if abort:
break
sys.stdout.write("\n---------------\n")
sys.stdout.write("Result page #%d\n" % (i + 1))
sys.stdout.write("---------------\n")
for sqlfile in files:
print sqlfile
try:
req = urllib2.Request(sqlfile)
response = urllib2.urlopen(req)
if "Content-Length" in response.headers:
if int(response.headers.get("Content-Length")) > MAX_FILE_SIZE:
continue
page = response.read()
found = False
counter = 0
for match in re.finditer(REGEX_RESULT, page):
counter += 1
table = match.group("result").strip().strip("`\"'").replace('"."', ".").replace("].[", ".").strip('[]')
if table and not any(_ in table for _ in ('>', '<', '--', ' ')):
found = True
sys.stdout.write('*')
if table in tables:
tables[table] += 1
else:
tables[table] = 1
if found:
sys.stdout.write("\n")
except KeyboardInterrupt:
raise
except Exception, msg:
print msg
else:
i += 1
except KeyboardInterrupt:
pass
finally:
with open(TABLES_FILE, 'w+') as f:
tables = sorted(tables.items(), key=itemgetter(1), reverse=True)
for table, count in tables:
f.write("%s,%d\n" % (table, count))
config.set("options", "index", str(i + 1))
with open(CONFIG_FILE, 'w+') as f:
config.write(f)
2
Example 48
Project: bonding Source File: bonding.py
def peers(quiet=True):
if os.geteuid() != 0:
print ('%sroot privileges are needed to properly check for bonding '
'peers. Skipping...%s' % (RED, RESET))
return {}
syslog.openlog('bonding')
syslog.syslog('Scanning for bonding interface peers')
ifaces = get_iface_list()
# Enable all normal interfaces
if not quiet:
sys.stdout.write('Enabling interfaces')
sys.stdout.flush()
for iface in ifaces:
if is_iface_loopback(iface) or is_iface_master(iface):
continue
if not quiet:
sys.stdout.write('.')
sys.stdout.flush()
syslog.syslog('Enabling interface %s' % iface)
try:
set_iface_flag(iface, IFF_UP)
except IOError, e:
raise SystemExit('%s %s. This generally indicates a misconfigured '
'interface' % (e, iface))
if not quiet:
print '\nSleeping 5 seconds for switch port negotiation...'
time.sleep(5)
if not quiet:
sys.stdout.write('Scanning')
sys.stdout.flush()
secondaries = []
groups = {}
for send_iface in ifaces:
if not quiet:
sys.stdout.write('.')
sys.stdout.flush()
if (is_iface_loopback(send_iface) or is_iface_master(send_iface) or
send_iface in secondaries):
continue
# The data required for building the frame
# Static data for frame payload that includes the sending interface
static = 'IF%sIF' % send_iface
# Build the rest of the payload using random data
payload = '%s%s' % (static, os.urandom(46 - len(static)))
# Broadcast FF:FF:FF:FF:FF:FF
dst_mac = '\xff\xff\xff\xff\xff\xff'
if USEREALSRCMAC:
# The real MAC address of the sending interface
src_mac = get_mac_addr_raw(send_iface)
else:
# Invalid source MAC
src_mac = '\x00\x00\x00\x00\x00\x00'
# Unregistered EtherType, in this case for Interface Peer Discovery
frame_type = '\x50\x44'
# Set up the sending interface socket
s1 = socket.socket(socket.AF_PACKET, socket.SOCK_RAW,
socket.htons(ETH_P_ALL))
s1.setsockopt(socket.SOL_SOCKET, SO_BINDTODEVICE, send_iface + '\0')
s1.setsockopt(socket.SOL_SOCKET, socket.SO_BROADCAST, 1)
s1.bind((send_iface, 0))
s1.setblocking(0)
for recv_iface in ifaces:
if not quiet:
sys.stdout.write('.')
sys.stdout.flush()
if (is_iface_loopback(recv_iface) or is_iface_master(recv_iface) or
recv_iface == send_iface):
continue
# Set up the receiving interface socket
s2 = socket.socket(socket.AF_PACKET, socket.SOCK_RAW,
socket.htons(ETH_P_ALL))
s2.setsockopt(socket.SOL_SOCKET, SO_BINDTODEVICE,
recv_iface + '\0')
s2.bind((recv_iface, 0))
s2.settimeout(TIMEOUT)
# Place current receiving interface into promiscuous mode
current_flags = 0
ifreq = fcntl.ioctl(s2.fileno(), SIOCGIFFLAGS,
struct.pack('256s', recv_iface[:15]))
(current_flags,) = struct.unpack('16xH', ifreq[:18])
current_flags |= IFF_PROMISC
ifreq = struct.pack('16sH', recv_iface, current_flags)
fcntl.ioctl(s2.fileno(), SIOCSIFFLAGS, ifreq)
# Try sending and receiving 3 times to give us better chances of
# catching the send
# Generally we always catch on the first time
for i in xrange(0, 3):
try:
s1.sendall('%s%s%s%s' % (dst_mac, src_mac, frame_type,
payload))
except (socket.timeout, socket.error):
continue
try:
data = s2.recv(60)
except (socket.timeout, socket.error):
continue
recv_frame_type = data[12:14]
recv_payload = data[14:]
if payload == recv_payload and recv_frame_type == frame_type:
if send_iface not in groups:
groups[send_iface] = []
groups[send_iface].append(recv_iface)
secondaries.append(recv_iface)
break
# Take the receiving interface out of promiscuous mode
current_flags ^= IFF_PROMISC
ifreq = struct.pack('16sH', recv_iface, current_flags)
fcntl.ioctl(s1.fileno(), SIOCSIFFLAGS, ifreq)
s2.close()
s1.close()
for iface in sorted(groups.keys()):
syslog.syslog('Interface group: %s %s' %
(iface, ' '.join(groups[iface])))
syslog.syslog('Scan for bonding interface peers completed')
if not quiet:
print 'Done'
return groups
2
Example 49
Project: astor Source File: rtrip.py
def convert(srctree, dsttree=dsttree, readonly=False, dumpall=False):
"""Walk the srctree, and convert/copy all python files
into the dsttree
"""
allow_ast_comparison()
parse_file = code_to_ast.parse_file
find_py_files = code_to_ast.find_py_files
srctree = os.path.normpath(srctree)
if not readonly:
dsttree = os.path.normpath(dsttree)
logging.info('')
logging.info('Trashing ' + dsttree)
shutil.rmtree(dsttree, True)
unknown_src_nodes = set()
unknown_dst_nodes = set()
badfiles = set()
broken = []
# TODO: When issue #26 resolved, remove UnicodeDecodeError
handled_exceptions = SyntaxError, UnicodeDecodeError
oldpath = None
allfiles = find_py_files(srctree, None if readonly else dsttree)
for srcpath, fname in allfiles:
# Create destination directory
if not readonly and srcpath != oldpath:
oldpath = srcpath
if srcpath >= srctree:
dstpath = srcpath.replace(srctree, dsttree, 1)
if not dstpath.startswith(dsttree):
raise ValueError("%s not a subdirectory of %s" %
(dstpath, dsttree))
else:
assert srctree.startswith(srcpath)
dstpath = dsttree
os.makedirs(dstpath)
srcfname = os.path.join(srcpath, fname)
logging.info('Converting %s' % srcfname)
try:
srcast = parse_file(srcfname)
except handled_exceptions:
badfiles.add(srcfname)
continue
dsttxt = to_source(srcast)
if not readonly:
dstfname = os.path.join(dstpath, fname)
try:
with open(dstfname, 'w') as f:
f.write(dsttxt)
except UnicodeEncodeError:
badfiles.add(dstfname)
# As a sanity check, make sure that ASTs themselves
# round-trip OK
try:
dstast = ast.parse(dsttxt) if readonly else parse_file(dstfname)
except SyntaxError:
dstast = []
unknown_src_nodes.update(strip_tree(srcast))
unknown_dst_nodes.update(strip_tree(dstast))
if dumpall or srcast != dstast:
srcdump = dump_tree(srcast)
dstdump = dump_tree(dstast)
bad = srcdump != dstdump
logging.warning(' calculating dump -- %s' %
('bad' if bad else 'OK'))
if bad:
broken.append(srcfname)
if dumpall or bad:
if not readonly:
try:
with open(dstfname[:-3] + '.srcdmp', 'w') as f:
f.write(srcdump)
except UnicodeEncodeError:
badfiles.add(dstfname[:-3] + '.srcdmp')
try:
with open(dstfname[:-3] + '.dstdmp', 'w') as f:
f.write(dstdump)
except UnicodeEncodeError:
badfiles.add(dstfname[:-3] + '.dstdmp')
elif dumpall:
sys.stdout.write('\n\nAST:\n\n ')
sys.stdout.write(srcdump.replace('\n', '\n '))
sys.stdout.write('\n\nDecompile:\n\n ')
sys.stdout.write(dsttxt.replace('\n', '\n '))
sys.stdout.write('\n\nNew AST:\n\n ')
sys.stdout.write('(same as old)' if dstdump == srcdump
else dstdump.replace('\n', '\n '))
sys.stdout.write('\n')
if badfiles:
logging.warning('\nFiles not processed due to syntax errors:')
for fname in sorted(badfiles):
logging.warning(' %s' % fname)
if broken:
logging.warning('\nFiles failed to round-trip to AST:')
for srcfname in broken:
logging.warning(' %s' % srcfname)
ok_to_strip = 'col_offset _precedence _use_parens lineno _p_op _pp'
ok_to_strip = set(ok_to_strip.split())
bad_nodes = (unknown_dst_nodes | unknown_src_nodes) - ok_to_strip
if bad_nodes:
logging.error('\nERROR -- UNKNOWN NODES STRIPPED: %s' % bad_nodes)
logging.info('\n')
2
Example 50
Project: ptsa Source File: meld.py
def __init__(self, fe_formula, re_formula,
re_group, dep_data, ind_data,
factors=None, row_mask=None,
dep_mask=None,
use_ranks=False, use_norm=True,
memmap=False, memmap_dir=None,
resid_formula=None,
svd_terms=None, feat_thresh=0.05,
feat_nboot=1000, do_tfce=False,
connectivity=None, shape=None,
dt=.01, E=2/3., H=2.0,
n_jobs=1, verbose=10,
lmer_opts=None):
"""
dep_data can be an array or a dict of arrays (possibly
memmapped), one for each group.
ind_data can be a rec_array for each group or one large rec_array
with a grouping variable.
"""
if verbose>0:
sys.stdout.write('Initializing...')
sys.stdout.flush()
start_time = time.time()
# save the formula
self._formula_str = fe_formula + ' + ' + re_formula
# see if there's a resid formula
if resid_formula:
# the random effects are the same
self._resid_formula_str = resid_formula + ' + ' + re_formula
else:
self._resid_formula_str = None
# save whether using ranks
self._use_ranks = use_ranks
# see the thresh for keeping a feature
self._feat_thresh = feat_thresh
self._feat_nboot = feat_nboot
self._do_tfce = do_tfce
self._connectivity = connectivity
self._dt = dt
self._E = E
self._H = H
# see if memmapping
self._memmap = memmap
# save job info
self._n_jobs = n_jobs
self._verbose = verbose
# eventually fill the feature shape
self._feat_shape = None
# handle the dep_mask
self._dep_mask = dep_mask
# fill A,M,O,D
self._A = {}
self._M = {}
self._O = {}
self._D = {}
O = []
# loop over unique grouping var
self._re_group = re_group
if isinstance(ind_data, dict):
# groups are the keys
self._groups = np.array(ind_data.keys())
else:
# groups need to be extracted from the recarray
self._groups = np.unique(ind_data[re_group])
for g in self._groups:
# get that subj inds
if isinstance(ind_data, dict):
# the index is just the group into that dict
ind_ind = g
else:
# select the rows based on the group
ind_ind = ind_data[re_group] == g
# process the row mask
if row_mask is None:
# no mask, so all good
row_ind = np.ones(len(ind_data[ind_ind]), dtype=np.bool)
elif isinstance(row_mask, dict):
# pull the row_mask from the dict
row_ind = row_mask[g]
else:
# index into it with ind_ind
row_ind = row_mask[ind_ind]
# extract that group's A,M,O
# first save the observations (rows of A)
self._O[g] = ind_data[ind_ind][row_ind]
if use_ranks:
# loop over non-factors and rank them
for n in self._O[g].dtype.names:
if (n in factors) or isinstance(self._O[g][n][0], str):
continue
self._O[g][n] = rankdata(self._O[g][n])
O.append(self._O[g])
# eventually allow for dict of data files for dep_data
if isinstance(dep_data, dict):
# the index is just the group into that dict
dep_ind = g
else:
# select the rows based on the group
dep_ind = ind_ind
# save feature shape if necessary
if self._feat_shape is None:
self._feat_shape = dep_data[dep_ind].shape[1:]
# handle the mask
if self._dep_mask is None:
self._dep_mask = np.ones(self._feat_shape,
dtype=np.bool)
# create the connectivity (will mask later)
if self._do_tfce and self._connectivity is None and \
(len(self._dep_mask.flatten()) > self._dep_mask.sum()):
# create the connectivity
self._connectivity = cluster.sparse_dim_connectivity([cluster.simple_neighbors_1d(n)
for n in self._feat_shape])
# Save D index into data (apply row and feature masks
# This will also reshape it
self._D[g] = dep_data[dep_ind][row_ind][:, self._dep_mask].copy()
# reshape it
#self._D[g] = self._D[g].reshape((self._D[g].shape[0], -1))
if use_ranks:
if verbose > 0:
sys.stdout.write('Ranking %s...' % (str(g)))
sys.stdout.flush()
for i in xrange(self._D[g].shape[1]):
# rank it
self._D[g][:, i] = rankdata(self._D[g][:, i])
# normalize it
self._D[g][:, i] = ((self._D[g][:, i] - 1) /
(len(self._D[g][:, i]) - 1))
# save M from D so we can have a normalized version
self._M[g] = self._D[g].copy()
# remove any NaN's in dep_data
self._D[g][np.isnan(self._D[g])] = 0.0
# normalize M
if use_norm:
self._M[g] -= self._M[g].mean(0)
self._M[g] /= np.sqrt((self._M[g]**2).sum(0))
# determine A from the model.matrix
rdf = DataFrame({k: (FactorVector(self._O[g][k])
if k in factors else self._O[g][k])
for k in self._O[g].dtype.names})
# model spec as data frame
ms = r['data.frame'](r_model_matrix(Formula(fe_formula), data=rdf))
cols = list(r['names'](ms))
if svd_terms is None:
self._svd_terms = [c for c in cols
if 'Intercept' not in c]
else:
self._svd_terms = svd_terms
# self._A[g] = np.vstack([ms[c] #np.array(ms.rx(c))
self._A[g] = np.concatenate([np.array(ms.rx(c))
for c in self._svd_terms]).T
if use_ranks:
for i in xrange(self._A[g].shape[1]):
# rank it
self._A[g][:, i] = rankdata(self._A[g][:, i])
# normalize it
self._A[g][:, i] = ((self._A[g][:, i] - 1) /
(len(self._A[g][:, i]) - 1))
# normalize A
if True: # use_norm:
self._A[g] -= self._A[g].mean(0)
self._A[g] /= np.sqrt((self._A[g]**2).sum(0))
# memmap if desired
if self._memmap:
self._M[g] = _memmap_array(self._M[g], memmap_dir,
unique_id=str(g))
self._D[g] = _memmap_array(self._D[g], memmap_dir,
unique_id=str(g))
# save the new O
self._O = O
if lmer_opts is None:
lmer_opts = {}
self._lmer_opts = lmer_opts
self._factors = factors
# mask the connectivity
if self._do_tfce and (len(self._dep_mask.flatten()) > self._dep_mask.sum()):
self._connectivity = self._connectivity.tolil()[self._dep_mask.flatten()][:,self._dep_mask.flatten()].tocoo()
# prepare for the perms and boots and jackknife
self._perms = []
self._tp = []
self._tb = []
self._tj = []
self._pfmask = []
if verbose > 0:
sys.stdout.write('Done (%.2g sec)\n' % (time.time()-start_time))
sys.stdout.write('Processing actual data...')
sys.stdout.flush()
start_time = time.time()
global _global_meld
_global_meld[id(self)] = self
# run for actual data (returns both perm and boot vals)
self._R = None
self._ss = None
self._mer = None
tp, tb, R, feat_mask, ss, mer = _eval_model(id(self), None)
self._R = R
self._tp.append(tp)
self._tb.append(tb)
self._feat_mask = feat_mask
self._fmask = ~feat_mask[0]
self._pfmask.append(~feat_mask[0])
self._ss = ss
self._mer = mer
if verbose > 0:
sys.stdout.write('Done (%.2g sec)\n' % (time.time()-start_time))
sys.stdout.flush()