#!/usr/bin/env python2.2 import string import sys import os import getopt import random class Dataset: fname = "" label = "" events = None def __init__(self,fname,label): self.fname = fname self.label = label self.events = [] F = open(fname,"r") while 1: line = F.readline() if not line: break self.events.append(line) def GenerateSteps(datasets,name,initial_step_size,step_size_multiplier,min_step_size,num_steps): print "Writing event stream to " + name + ".events." print "Writing event labels to " + name + ".labels." print "Writing summary to " + name + ".smry." event_F = open(name + ".events","w") label_F = open(name + ".labels","w") smry_F = open(name + ".smry","w") smry_F.write("\n") for dataset in datasets: smry_F.write("#" + dataset.fname + " as " + dataset.label + "\n") smry_F.write("\n") smry_F.write("#initial_step_size = " + str(initial_step_size) + "\n") smry_F.write("#step_size_multiplier = " + str(step_size_multiplier) + "\n") smry_F.write("#min_step_size = " + str(min_step_size) + "\n") smry_F.write("#num_steps = " + str(num_steps) + "\n") smry_F.write("\n") smry_F.write("#steps taken:\n") event_count = 0 step_count = 0 step_size = initial_step_size dataset_index = len(datasets) while step_count < num_steps: if dataset_index >= len(datasets): random.shuffle(datasets) dataset_index = 0 dataset = datasets[dataset_index] dataset_index = dataset_index + 1 smry_F.write(dataset.label + " " + str(step_size) + "\n") for i in range(0,step_size): event = random.choice(dataset.events) event_F.write(event) label_F.write("Event " + str(event_count) + " " + dataset.label + "\n") event_count = event_count + 1 step_count = step_count + 1 step_size = long(step_size * step_size_multiplier) if step_size < min_step_size: step_size = min_step_size def PrintUsage(): msg = """ USAGE: mixer.py options input_file1 [as label1] input_file2 [as label2] ... Mixes together a stream of records produced from two or more samples. Each "step" taken by the algorithm switches between input samples. Output files: *.events (output stream of mixed events) *.labels (label of each event) *.smry (summary of actions taken) OPTIONS: --output_name=X --initial_step_size=10000 --step_size_multiplier=1 --min_step_size=10 --steps=20 """ sys.stderr.write(msg) long_options = [ "initial_step_size=", "step_size_multiplier=", "min_step_size=", "steps=", "output_name=", "help" ] options,args = getopt.getopt(sys.argv[1:],"h",long_options) initial_step_size = 10000 step_size_multiplier = 1 min_step_size = 10 num_steps = 20 name = None for option,value in options: if option == "--initial_step_size": initial_step_size = long(value) elif option == "--step_size_multiplier": step_size_multiplier = float(value) elif option == "--min_step_size": min_step_size = long(value) elif option == "--steps": num_steps = long(value) elif option == "--output_name": name = value elif option == "--help" or option == "-h": PrintUsage() sys.exit(0) if not name: sys.stderr.write("You must specify an output name.\n") PrintUsage() sys.exit(2) #labels = [] #for c in string.ascii_uppercase: # if len(labels) == len(args): break # labels.append(c) #random.shuffle(labels) datasets = [] i = 0 while i < len(args): fname = args[i] i = i + 1 #label = labels[len(datasets)] label = os.path.basename(fname) if i+1