bcbio.distributed.transaction.file_transaction

Here are the examples of the python api bcbio.distributed.transaction.file_transaction taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.

199 Examples 7

Example 1

Project: bcbio-nextgen Source File: bubbletree.py
def _create_subset_file(in_file, het_region_bed, work_dir, data):
    """Subset the VCF to a set of pre-calculated smaller regions.
    """
    cnv_regions = shared.get_base_cnv_regions(data, work_dir)
    region_bed = bedutils.intersect_two(het_region_bed, cnv_regions, work_dir, data)
    out_file = os.path.join(work_dir, "%s-origsubset.bcf" % utils.splitext_plus(os.path.basename(in_file))[0])
    if not utils.file_uptodate(out_file, in_file):
        with file_transaction(data, out_file) as tx_out_file:
            regions = ("-R %s" % region_bed) if utils.file_exists(region_bed) else ""
            cmd = "bcftools view {regions} -o {tx_out_file} -O b {in_file}"
            do.run(cmd.format(**locals()), "Extract regions for BubbleTree frequency determination")
    return out_file

Example 2

Project: bcbio-nextgen Source File: install.py
def _set_matplotlib_default_backend():
    """
    matplotlib will try to print to a display if it is available, but don't want
    to run it in interactive mode. we tried setting the backend to 'Agg'' before
    importing, but it was still resulting in issues. we replace the existing
    backend with 'agg' in the default matplotlibrc. This is a hack until we can
    find a better solution
    """
    if _matplotlib_installed():
        import matplotlib
        matplotlib.use('Agg', force=True)
        config = matplotlib.matplotlib_fname()
        with file_transaction(config) as tx_out_file:
            with open(config) as in_file, open(tx_out_file, "w") as out_file:
                for line in in_file:
                    if line.split(":")[0].strip() == "backend":
                        out_file.write("backend: agg\n")
                    else:
                        out_file.write(line)

Example 3

Project: bcbio-nextgen Source File: picardrun.py
def picard_sort(picard, align_bam, sort_order="coordinate",
                out_file=None, compression_level=None, pipe=False):
    """Sort a BAM file by coordinates.
    """
    base, ext = os.path.splitext(align_bam)
    if out_file is None:
        out_file = "%s-sort%s" % (base, ext)
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("INPUT", align_bam),
                        ("OUTPUT", out_file if pipe else tx_out_file),
                        ("TMP_DIR", tmp_dir),
                        ("SORT_ORDER", sort_order)]
                if compression_level:
                    opts.append(("COMPRESSION_LEVEL", compression_level))
                picard.run("SortSam", opts, pipe=pipe)
    return out_file

Example 4

Project: bcbio-nextgen Source File: objectstore.py
    @classmethod
    def download(cls, filename, input_dir, dl_dir=None):
        """Download the resource from the storage."""
        file_info = cls.parse_remote(filename)
        if not dl_dir:
            dl_dir = os.path.join(input_dir, file_info.container,
                                  os.path.dirname(file_info.blob))
            utils.safe_makedir(dl_dir)

        out_file = os.path.join(dl_dir, os.path.basename(file_info.blob))

        if not utils.file_exists(out_file):
            with file_transaction({}, out_file) as tx_out_file:
                blob_service = cls.connect(filename)
                blob_service.get_blob_to_path(
                    container_name=file_info.container,
                    blob_name=file_info.blob,
                    file_path=tx_out_file)
        return out_file

Example 5

Project: bcbio-nextgen Source File: picardrun.py
def picard_fix_rgs(picard, in_bam, names):
    """Add read group information to BAM files and coordinate sort.
    """
    out_file = "%s-fixrgs.bam" % os.path.splitext(in_bam)[0]
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("INPUT", in_bam),
                        ("OUTPUT", tx_out_file),
                        ("SORT_ORDER", "coordinate"),
                        ("RGID", names["rg"]),
                        ("RGLB", names.get("lb", "unknown")),
                        ("RGPL", names["pl"]),
                        ("RGPU", names["pu"]),
                        ("RGSM", names["sample"]),
                        ("TMP_DIR", tmp_dir)]
                picard.run("AddOrReplaceReadGroups", opts)
    return out_file

Example 6

Project: bcbio-nextgen Source File: metrics.py
    def _collect_align_metrics(self, dup_bam, ref_file):
        align_metrics = self._check_metrics_file(dup_bam, "align_metrics")
        if not file_exists(align_metrics):
            with file_transaction(align_metrics) as tx_metrics:
                opts = [("INPUT", dup_bam),
                        ("OUTPUT", tx_metrics),
                        ("R", ref_file)]
                self._picard.run("CollectAlignmentSummaryMetrics", opts)
        return align_metrics

Example 7

Project: bcbio-nextgen Source File: phylowgs.py
def _prep_inputs(vrn_info, cnv_info, somatic_info, work_dir, config):
    """Prepare inputs for running PhyloWGS from variant and CNV calls.
    """
    exe = os.path.join(os.path.dirname(sys.executable), "create_phylowgs_inputs.py")
    assert os.path.exists(exe), "Could not find input prep script for PhyloWGS runs."
    ssm_file = os.path.join(work_dir, "ssm_data.txt")
    cnv_file = os.path.join(work_dir, "cnv_data.txt")
    if not utils.file_exists(ssm_file) or not utils.file_exists(cnv_file):
        with file_transaction(somatic_info.tumor_data, ssm_file, cnv_file) as (tx_ssm_file, tx_cnv_file):
            variant_type, input_vcf_file = _prep_vrn_file(vrn_info["vrn_file"], vrn_info["variantcaller"],
                                                          work_dir, somatic_info, cnv_info["ignore"], config)
            input_cnv_file = _prep_cnv_file(cnv_info["subclones"], work_dir, somatic_info)
            cmd = [sys.executable, exe,
                   "--sample-size", str(config["sample_size"]), "--tumor-sample", somatic_info.tumor_name,
                   "--battenberg", input_cnv_file, "--cellularity", _read_contam(cnv_info["contamination"]),
                   "--output-cnvs", tx_cnv_file, "--output-variants", tx_ssm_file,
                   "--variant-type", variant_type, input_vcf_file]
            do.run(cmd, "Prepare PhyloWGS inputs.")
    return ssm_file, cnv_file

Example 8

Project: bcbio-nextgen Source File: fastq.py
@utils.memoize_outfile(stem=".groom")
def groom(in_file, data, in_qual="illumina", out_dir=None, out_file=None):
    """
    Grooms a FASTQ file from Illumina 1.3/1.5 quality scores into
    sanger format, if it is not already in that format.
    """
    seqtk = config_utils.get_program("seqtk", data["config"])
    if in_qual == "fastq-sanger":
        logger.info("%s is already in Sanger format." % in_file)
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        cmd = "{seqtk} seq -Q64 {in_file} | gzip > {tmp_out_file}".format(**locals())
        do.run(cmd, "Converting %s to Sanger format." % in_file)
    return out_file

Example 9

Project: bcbio-nextgen Source File: postalign.py
def umi_consensus(data):
    """Convert UMI grouped reads into fastq pair for re-alignment.
    """
    align_bam = dd.get_work_bam(data)
    f1_out = "%s-cuemi-1.fq.gz" % utils.splitext_plus(align_bam)[0]
    f2_out = "%s-cuemi-2.fq.gz" % utils.splitext_plus(align_bam)[0]
    if not utils.file_uptodate(f1_out, align_bam):
        with file_transaction(data, f1_out, f2_out) as (tx_f1_out, tx_f2_out):
            jvm_opts = _get_fgbio_jvm_opts(data, os.path.dirname(tx_f1_out), 2)
            cmd = ("fgbio {jvm_opts} GroupReadsByUmi -m 1 -e 1 -s adjacency -i {align_bam} | "
                   "fgbio {jvm_opts} CallMolecularConsensusReads -S queryname -i /dev/stdin -o /dev/stdout | "
                   "bamtofastq F={tx_f1_out} F2={tx_f2_out} gz=1")
            do.run(cmd.format(**locals()), "UMI consensus fastq generation")
    return f1_out, f2_out

Example 10

Project: bcbio-nextgen Source File: __init__.py
def mapped(in_bam, config):
    """
    return a bam file of only the mapped reads
    """
    out_file = os.path.splitext(in_bam)[0] + ".mapped.bam"
    if utils.file_exists(out_file):
        return out_file
    sambamba = _get_sambamba(config)
    with file_transaction(config, out_file) as tx_out_file:
        if sambamba:
            cmd = ("{sambamba} view --format=bam -F 'not (unmapped or mate_is_unmapped)' "
                   "{in_bam} -o {tx_out_file}")
        else:
            samtools = config_utils.get_program("samtools", config)
            cmd = "{samtools} view -b -F 4 {in_bam} -o {tx_out_file}"
        do.run(cmd.format(**locals()),
               "Filtering mapped reads to %s." % (tx_out_file))
    return out_file

Example 11

Project: bcbio-nextgen Source File: picardrun.py
def picard_fixmate(picard, align_bam):
    """Run Picard's FixMateInformation generating an aligned output file.
    """
    base, ext = os.path.splitext(align_bam)
    out_file = "%s-sort%s" % (base, ext)
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("INPUT", align_bam),
                        ("OUTPUT", tx_out_file),
                        ("TMP_DIR", tmp_dir),
                        ("SORT_ORDER", "coordinate")]
                picard.run("FixMateInformation", opts)
    return out_file

Example 12

Project: bcbio-nextgen Source File: picardrun.py
def picard_index_ref(picard, ref_file):
    """Provide a Picard style dict index file for a reference genome.
    """
    dict_file = "%s.dict" % os.path.splitext(ref_file)[0]
    if not file_exists(dict_file):
        with file_transaction(picard._config, dict_file) as tx_dict_file:
            opts = [("REFERENCE", ref_file),
                    ("OUTPUT", tx_dict_file)]
            picard.run("CreateSequenceDictionary", opts)
    return dict_file

Example 13

Project: bcbio-nextgen Source File: metrics.py
    def _gc_bias(self, dup_bam, ref_file):
        gc_metrics = self._check_metrics_file(dup_bam, "gc_metrics")
        gc_graph = "%s-gc.pdf" % os.path.splitext(gc_metrics)[0]
        if not file_exists(gc_metrics):
            with file_transaction(gc_graph, gc_metrics) as \
                     (tx_graph, tx_metrics):
                opts = [("INPUT", dup_bam),
                        ("OUTPUT", tx_metrics),
                        ("CHART", tx_graph),
                        ("R", ref_file)]
                self._picard.run("CollectGcBiasMetrics", opts)
        return gc_graph, gc_metrics

Example 14

Project: bcbio-nextgen Source File: bubbletree.py
def _prep_cnv_file(cns_file, svcaller, work_dir, data):
    """Create a CSV file of CNV calls with log2 and number of marks.
    """
    in_file = cns_file
    out_file = os.path.join(work_dir, "%s-%s-prep.csv" % (utils.splitext_plus(os.path.basename(in_file))[0],
                                                          svcaller))
    if not utils.file_uptodate(out_file, in_file):
        with file_transaction(data, out_file) as tx_out_file:
            with open(in_file) as in_handle:
                with open(tx_out_file, "w") as out_handle:
                    reader = csv.reader(in_handle, dialect="excel-tab")
                    writer = csv.writer(out_handle)
                    writer.writerow(["chrom", "start", "end", "num.mark", "seg.mean"])
                    reader.next()  # header
                    for chrom, start, end, _, log2, probes in (xs[:6] for xs in reader):
                        if chromhacks.is_autosomal(chrom):
                            writer.writerow([_to_ucsc_style(chrom), start, end, probes, log2])
    return out_file

Example 15

Project: bipy Source File: tophat.py
    def __call__(self, in_file):
        self._start_message(in_file)
        out_file = self.out_file(in_file)

        if file_exists(out_file):
            return out_file

        with file_transaction(out_file) as tmp_out_file:
            if is_pair(in_file):
                self._bowtie_pe(in_file, tmp_out_file)
            else:
                self._bowtie_se(in_file, tmp_out_file)
        self._end_message(in_file)

        return out_file

Example 16

Project: bcbio-nextgen Source File: phylowgs.py
def _prepare_summary(evolve_file, ssm_file, cnv_file, work_dir, somatic_info):
    """Prepare a summary with gene-labelled heterogeneity from PhyloWGS predictions.
    """
    out_file = os.path.join(work_dir, "%s-phylowgs.txt" % somatic_info.tumor_name)
    if not utils.file_uptodate(out_file, evolve_file):
        with file_transaction(somatic_info.tumor_data, out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                ssm_locs = _read_ssm_locs(ssm_file)
                cnv_ssms = _read_cnv_ssms(cnv_file)
                for i, (ids, tree) in enumerate(_evolve_reader(evolve_file)):
                    out_handle.write("* Tree %s\n" % (i + 1))
                    out_handle.write("\n" + "\n".join(tree) + "\n\n")
                    for nid, freq, gids in ids:
                        genes = _gids_to_genes(gids, ssm_locs, cnv_ssms, somatic_info.tumor_data)
                        out_handle.write("%s\t%s\t%s\n" % (nid, freq, ",".join(genes)))
                    out_handle.write("\n")
    return out_file

Example 17

Project: bcbio-nextgen Source File: picardrun.py
def picard_rnaseq_metrics(picard, align_bam, ref, ribo="null", out_file=None):
    """ Collect RNASeq metrics for a bam file """
    base, ext = os.path.splitext(align_bam)
    if out_file is None:
        out_file = "%s.metrics" % (base)
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("INPUT", align_bam),
                        ("OUTPUT", tx_out_file),
                        ("TMP_DIR", tmp_dir),
                        ("REF_FLAT", ref),
                        ("STRAND_SPECIFICITY", "NONE"),
                        ("ASSUME_SORTED", "True"),
                        ("RIBOSOMAL_INTERVALS", ribo)]

                picard.run("CollectRnaSeqMetrics", opts)
    return out_file

Example 18

Project: bcbio-nextgen Source File: theta.py
def _merge_theta_calls(bounds_file, result_file, cnv_file, data):
    """Create a final output file with merged CNVkit and THetA copy and population estimates.
    """
    out_file = "%s-merged.txt" % (result_file.replace(".BEST.results", ""))
    if not utils.file_uptodate(out_file, result_file):
        with file_transaction(data, out_file) as tx_out_file:
            updater = _update_with_calls(result_file, cnv_file)
            with open(bounds_file) as in_handle:
                with open(tx_out_file, "w") as out_handle:
                    i = 0
                    for line in in_handle:
                        if line.startswith("#"):
                            parts = line.rstrip().split("\t")
                            parts += ["cnv", "pop_cnvs", "pop_pvals"]
                            out_handle.write("\t".join(parts) + "\n")
                        else:
                            out_handle.write(updater(i, line))
                            i += 1
    return out_file

Example 19

Project: bipy Source File: sam.py
def sortsam(in_file, out_file=None):
    out_file = append_stem(in_file, "sorted")
    with file_transaction(out_file) as tmp_out_file:
        sort = sh.sort.bake(s=True, k="1,1", _out=tmp_out_file)
        sort(in_file)
    return out_file

Example 20

Project: bcbio-nextgen Source File: mosaik.py
def _convert_fastq(fastq_file, pair_file, rg_name, out_file, config):
    """Convert fastq inputs into internal Mosaik representation.
    """
    out_file = "{0}-fq.mkb".format(os.path.splitext(out_file)[0])
    if not file_exists(out_file):
        with file_transaction(config, out_file) as tx_out_file:
            cl = [config_utils.get_program("mosaik", config,
                                           default="MosaikAligner").replace("Aligner", "Build")]
            cl += ["-q", fastq_file,
                   "-out", tx_out_file,
                   "-st", config["algorithm"].get("platform", "illumina").lower()]
            if pair_file:
                cl += ["-q2", pair_file]
            if rg_name:
                cl += ["-id", rg_name]
            env_set = "export MOSAIK_TMP={0}".format(os.path.dirname(tx_out_file))
            subprocess.check_call(env_set + " && " + " ".join(cl), shell=True)
    return out_file

Example 21

Project: bcbio-nextgen Source File: picardrun.py
def picard_index(picard, in_bam):
    index_file = "%s.bai" % in_bam
    alt_index_file = "%s.bai" % os.path.splitext(in_bam)[0]
    if not file_exists(index_file) and not file_exists(alt_index_file):
        with file_transaction(picard._config, index_file) as tx_index_file:
            opts = [("INPUT", in_bam),
                    ("OUTPUT", tx_index_file)]
            picard.run("BuildBamIndex", opts)
    return index_file if file_exists(index_file) else alt_index_file

Example 22

Project: bcbio-nextgen Source File: picardrun.py
def picard_reorder(picard, in_bam, ref_file, out_file):
    """Reorder BAM file to match reference file ordering.
    """
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("INPUT", in_bam),
                        ("OUTPUT", tx_out_file),
                        ("REFERENCE", ref_file),
                        ("ALLOW_INCOMPLETE_DICT_CONCORDANCE", "true"),
                        ("TMP_DIR", tmp_dir)]
                picard.run("ReorderSam", opts)
    return out_file

Example 23

Project: bipy Source File: sam.py
def only_mapped(in_file, out_file=None):
    if out_file is None:
        out_file = append_stem(in_file, "mapped")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        sh.samtools.view(in_file, h=True, S=True, F=4, o=tmp_out_file)
    return out_file

Example 24

Project: bcbio-nextgen Source File: picardrun.py
def picard_downsample(picard, in_bam, ds_pct, random_seed=None):
    out_file = "%s-downsample%s" % os.path.splitext(in_bam)
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("INPUT", in_bam),
                        ("OUTPUT", tx_out_file),
                        ("PROBABILITY", "%.3f" % ds_pct),
                        ("TMP_DIR", tmp_dir)]
                if random_seed:
                    opts += [("RANDOM_SEED", str(random_seed))]
                picard.run("DownsampleSam", opts)
    return out_file

Example 25

Project: bcbio-nextgen Source File: __init__.py
def sam_to_bam(in_sam, config):
    if is_bam(in_sam):
        return in_sam

    assert is_sam(in_sam), "%s is not a SAM file" % in_sam
    out_file = os.path.splitext(in_sam)[0] + ".bam"
    if utils.file_exists(out_file):
        return out_file

    samtools = config_utils.get_program("samtools", config)
    num_cores = config["algorithm"].get("num_cores", 1)
    with file_transaction(config, out_file) as tx_out_file:
        cmd = "{samtools} view -@ {num_cores} -h -S -b {in_sam} -o {tx_out_file}"
        do.run(cmd.format(**locals()),
               ("Convert SAM to BAM (%s cores): %s to %s"
                % (str(num_cores), in_sam, out_file)))
    return out_file

Example 26

Project: bcbio-nextgen Source File: picardrun.py
def picard_mark_duplicates(picard, align_bam, remove_dups=False):
    base, ext = os.path.splitext(align_bam)
    base = base.replace(".", "-")
    dup_bam = "%s-dup%s" % (base, ext)
    dup_metrics = "%s-dup.dup_metrics" % base
    if not file_exists(dup_bam):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, dup_bam, dup_metrics) as (tx_dup_bam, tx_dup_metrics):
                opts = [("INPUT", align_bam),
                        ("OUTPUT", tx_dup_bam),
                        ("TMP_DIR", tmp_dir),
                        ("REMOVE_DUPLICATES", "true" if remove_dups else "false"),
                        ("METRICS_FILE", tx_dup_metrics)]
                if picard.get_picard_version("MarkDuplicates") >= 1.82:
                    opts += [("PROGRAM_RECORD_ID", "null")]
                picard.run("MarkDuplicates", opts, memscale={"direction": "decrease", "magnitude": 2})
    return dup_bam, dup_metrics

Example 27

Project: bcbio-nextgen Source File: picardrun.py
def picard_bam_to_fastq(picard, in_bam, fastq_one, fastq_two=None):
    """Convert BAM file to fastq.
    """
    if not file_exists(fastq_one):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, fastq_one) as tx_out1:
                opts = [("INPUT", in_bam),
                        ("FASTQ", tx_out1),
                        ("TMP_DIR", tmp_dir)]
                if fastq_two is not None:
                    opts += [("SECOND_END_FASTQ", fastq_two)]
                picard.run("SamToFastq", opts)
    return (fastq_one, fastq_two)

Example 28

Project: bcbio-nextgen Source File: __init__.py
def filter_primary(bam_file, data):
    stem, ext = os.path.splitext(bam_file)
    out_file = stem + ".primary" + ext
    if utils.file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tx_out_file:
        cmd = filter_primary_stream_cmd(bam_file, data)
        cmd += "> {tx_out_file}"
        do.run(cmd.format(**locals()), ("Filtering primary alignments in %s." %
                                        os.path.basename(bam_file)))
    return out_file

Example 29

Project: bcbio-nextgen Source File: peaks.py
def _prepare_bam(bam_file, bed_file, config):
    if not bam_file or not bed_file:
        return bam_file
    out_file = utils.append_stem(bam_file, '_filter')
    samtools = config_utils.get_program("samtools", config)
    if not utils.file_exists(out_file):
        with file_transaction(out_file) as tx_out:
            cmd = "{samtools} view -bh -L {bed_file} {bam_file} > {tx_out}"
            do.run(cmd.format(**locals()), "Clean %s" % bam_file)
    return out_file

Example 30

Project: bipy Source File: sam.py
def sam2bam(in_file, out_file=None):
    """ convert a SAM file to a BAM file. if the file is already a
    BAM file, return the BAM file name """

    if is_bam(in_file):
        return in_file

    if out_file is None:
        out_file = replace_suffix(in_file, "bam")

    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        sort_sam = sh.samtools.view.bake(S=True, b=True, o=tmp_out_file)
        sort_sam(in_file)
    return out_file

Example 31

Project: bcbio-nextgen Source File: nglims.py
def _concat_bgzip_fastq(finputs, out_dir, read, ldetail):
    """Concatenate multiple input fastq files, preparing a bgzipped output file.
    """
    out_file = os.path.join(out_dir, "%s_%s.fastq.gz" % (ldetail["name"], read))
    if not utils.file_exists(out_file):
        with file_transaction(out_file) as tx_out_file:
            subprocess.check_call("zcat %s | bgzip -c > %s" % (" ".join(finputs), tx_out_file), shell=True)
    return out_file

Example 32

Project: bcbio-nextgen Source File: metrics.py
    def _insert_sizes(self, dup_bam):
        insert_metrics = self._check_metrics_file(dup_bam, "insert_metrics")
        insert_graph = "%s-insert.pdf" % os.path.splitext(insert_metrics)[0]
        if not file_exists(insert_metrics):
            with file_transaction(insert_graph, insert_metrics) as \
                     (tx_graph, tx_metrics):
                opts = [("INPUT", dup_bam),
                        ("OUTPUT", tx_metrics),
                        ("H", tx_graph)]
                self._picard.run("CollectInsertSizeMetrics", opts)
        return insert_graph, insert_metrics

Example 33

Project: bcbio-nextgen Source File: bubbletree.py
def _identify_heterogeneity_blocks_shared(in_file, segment_fn, params, work_dir, somatic_info):
    """Identify heterogeneity blocks corresponding to segmentation from CNV input file.
    """
    out_file = os.path.join(work_dir, "%s-hetblocks.bed" % utils.splitext_plus(os.path.basename(in_file))[0])
    if not utils.file_uptodate(out_file, in_file):
        with file_transaction(somatic_info.tumor_data, out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                for chrom, freqs, coords in _freqs_by_chromosome(in_file, params, somatic_info):
                    for start, end in segment_fn(chrom, freqs, coords):
                        out_handle.write("%s\t%s\t%s\n" % (chrom, start, end))
    return out_file

Example 34

Project: bipy Source File: sam.py
def bam2sam(in_file, out_file=None):
    """ convert a BAM file to a SAM file """
    if is_sam(in_file):
        return in_file

    if out_file is None:
        out_file = replace_suffix(in_file, "sam")

    if file_exists(out_file):
        return out_file

    with file_transaction(out_file) as tmp_out_file:
        cmd = sh.samtools.view.bake(h=True, _out=tmp_out_file)
        cmd(in_file)

    return out_file

Example 35

Project: bcbio-nextgen Source File: chromhacks.py
def bed_to_standardonly(in_file, data, headers=None):
    out_file = "%s-stdchrs%s" % utils.splitext_plus(in_file)
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            with open(in_file) as in_handle:
                with open(tx_out_file, "w") as out_handle:
                    for line in in_handle:
                        if is_autosomal(line.split()[0]) or (headers and line.startswith(headers)):
                            out_handle.write(line)
    return out_file

Example 36

Project: bcbio-nextgen Source File: metrics.py
    def _rnaseq_metrics(self, align_bam, gtf_file, rrna_file):
        metrics = self._check_metrics_file(align_bam, "rnaseq_metrics")
        if not file_exists(metrics):
            with file_transaction(metrics) as tx_metrics:
                picard_rnaseq_metrics(self._picard, align_bam, gtf_file,
                                      rrna_file, tx_metrics)

        return metrics

Example 37

Project: bcbio-nextgen Source File: phylowgs.py
def _run_evolve(ssm_file, cnv_file, work_dir, data):
    """Run evolve.py to infer subclonal composition.
    """
    exe = os.path.join(os.path.dirname(sys.executable), "evolve.py")
    assert os.path.exists(exe), "Could not find evolve script for PhyloWGS runs."
    out_dir = os.path.join(work_dir, "evolve")
    out_file = os.path.join(out_dir, "top_k_trees")
    if not utils.file_uptodate(out_file, cnv_file):
        with file_transaction(data, out_dir) as tx_out_dir:
            with utils.chdir(tx_out_dir):
                cmd = [sys.executable, exe, "-r", "42", ssm_file, cnv_file]
                do.run(cmd, "Run PhyloWGS evolution")
    return out_file

Example 38

Project: bcbio-nextgen Source File: cram.py
def index(in_cram, config):
    """Ensure CRAM file has a .crai index file.
    """
    out_file = in_cram + ".crai"
    if not utils.file_uptodate(out_file, in_cram):
        with file_transaction(config, in_cram + ".crai") as tx_out_file:
            tx_in_file = os.path.splitext(tx_out_file)[0]
            utils.symlink_plus(in_cram, tx_in_file)
            cmd = "samtools index {tx_in_file}"
            do.run(cmd.format(**locals()), "Index CRAM file")
    return out_file

Example 39

Project: bcbio-nextgen Source File: phylowgs.py
def _prep_cnv_file(in_file, work_dir, somatic_info):
    """Prepare Battenberg CNV file for ingest by PhyloWGS.

    The PhyloWGS preparation script does not handle 'chr' prefixed chromosomes (hg19 style)
    correctly. This converts them over to GRCh37 (no 'chr') style to match preparation
    work in _prep_vrn_file.
    """
    out_file = os.path.join(work_dir, "%s-prep%s" % utils.splitext_plus(os.path.basename(in_file)))
    if not utils.file_uptodate(out_file, in_file):
        with file_transaction(somatic_info.tumor_data, out_file) as tx_out_file:
            with open(in_file) as in_handle:
                with open(tx_out_file, "w") as out_handle:
                    out_handle.write(in_handle.readline())  # header
                    for line in in_handle:
                        parts = line.split("\t")
                        parts[1] = _phylowgs_compatible_chroms(parts[1])
                        out_handle.write("\t".join(parts))
    return out_file

Example 40

Project: bcbio-nextgen Source File: picardrun.py
def picard_insert_metrics(picard, align_bam, out_file=None):
    """ Collect insert size metrics for a bam file """
    base, ext = os.path.splitext(align_bam)
    if out_file is None:
        out_file = "%s-insert-metrics.txt" % (base)
    histogram = "%s-insert-histogram.pdf" % (base)
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("INPUT", align_bam),
                        ("OUTPUT", tx_out_file),
                        ("HISTOGRAM_FILE", histogram),
                        ("TMP_DIR", tmp_dir)]
                picard.run("CollectInsertSizeMetrics", opts)
    return out_file

Example 41

Project: bcbio-nextgen Source File: optitype.py
def combine_hla_fqs(hlas, out_file, data):
    """OptiType performs best on a combination of all extracted HLAs.
    """
    if not utils.file_exists(out_file):
        with file_transaction(data, out_file) as tx_out_file:
            with open(tx_out_file, "w") as out_handle:
                for hla_type, hla_fq in hlas:
                    with open(hla_fq) as in_handle:
                        shutil.copyfileobj(in_handle, out_handle)
    return out_file

Example 42

Project: bipy Source File: fastq.py
def filter_single_reads_by_length(in_file, min_length=30):
    """
    removes reads from a fastq file which are below a min_length in bases

    """
    logger.info("Removing reads in %s thare are less than %d bases."
                % (in_file, min_length))
    quality_type = QUALITY_TYPE[DetectFastqFormat.run(in_file)[0]]
    out_file = append_stem(in_file, "fixed")
    if file_exists(out_file):
        return out_file
    in_iterator = SeqIO.parse(in_file, quality_type)
    out_iterator = (record for record in in_iterator if
                    len(record.seq) > min_length)
    with file_transaction(out_file) as tmp_out_file:
        with open(tmp_out_file, "w") as out_handle:
            SeqIO.write(out_iterator, out_handle, quality_type)
    return out_file

Example 43

Project: bcbio-nextgen Source File: hisat2.py
def create_splicesites_file(gtf_file, align_dir, data):
    """
    if not pre-created, make a splicesites file to use with hisat2
    """
    out_file = os.path.join(align_dir, "ref-transcripts-splicesites.txt")
    if file_exists(out_file):
        return out_file
    safe_makedir(align_dir)
    hisat2_ss = config_utils.get_program("hisat2_extract_splice_sites.py", data)
    cmd = "{hisat2_ss} {gtf_file} > {tx_out_file}"
    message = "Creating hisat2 splicesites file from %s." % gtf_file
    with file_transaction(out_file) as tx_out_file:
        do.run(cmd.format(**locals()), message)
    return out_file

Example 44

Project: bcbio-nextgen Source File: picardrun.py
def picard_merge(picard, in_files, out_file=None,
                 merge_seq_dicts=False):
    """Merge multiple BAM files together with Picard.
    """
    if out_file is None:
        out_file = "%smerge.bam" % os.path.commonprefix(in_files)
    if not file_exists(out_file):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_file) as tx_out_file:
                opts = [("OUTPUT", tx_out_file),
                        ("SORT_ORDER", "coordinate"),
                        ("MERGE_SEQUENCE_DICTIONARIES",
                         "true" if merge_seq_dicts else "false"),
                        ("USE_THREADING", "true"),
                        ("TMP_DIR", tmp_dir)]
                for in_file in in_files:
                    opts.append(("INPUT", in_file))
                picard.run("MergeSamFiles", opts)
    return out_file

Example 45

Project: bcbio-nextgen Source File: fastq.py
@utils.memoize_outfile(stem=".fixed")
def filter_single_reads_by_length(in_file, quality_format, min_length=20,
                                  out_file=None):
    """
    removes reads from a fastq file which are shorter than a minimum
    length

    """
    logger.info("Removing reads in %s thare are less than %d bases."
                % (in_file, min_length))
    in_iterator = SeqIO.parse(in_file, quality_format)
    out_iterator = (record for record in in_iterator if
                    len(record.seq) > min_length)
    with file_transaction(out_file) as tmp_out_file:
        with open(tmp_out_file, "w") as out_handle:
            SeqIO.write(out_iterator, out_handle, quality_format)
    return out_file

Example 46

Project: bipy Source File: stages.py
    def _run_vep(self, in_file):
        out_file = append_stem(in_file, "vep")
        if file_exists(out_file):
            return out_file

        with file_transaction(out_file) as tmp_out_file:
            sh.perl(self.vep, "-i", in_file, "-o", tmp_out_file,
                    species=self.species, _convert_underscore=False,
                    **self.options)

        return out_file

Example 47

Project: bipy Source File: htseq_count.py
def run(input_file, gtf_file, out_file=None):
    if out_file is None:
        out_file = _get_outfilename(input_file)

    safe_makedir(os.path.dirname(out_file))

    if file_exists(out_file):
        return out_file

    with file_transaction(out_file) as tmp_out_file:
        htseq_cmd = ("htseq-count --mode=union --stranded=no --type=exon "
                     "--idattr=gene_id {input_file} {gtf_file} > {tmp_out_file}")

        cmd = htseq_cmd.format(**locals())
        do.run(cmd, "Running htseq-count on %s." % (input_file), None)

    return out_file

Example 48

Project: bipy Source File: sam.py
def only_unmapped(in_file, out_file=None):
    if out_file is None:
        out_file = append_stem(in_file, "unmapped")
    if file_exists(out_file):
        return out_file
    with file_transaction(out_file) as tmp_out_file:
        sh.samtools.view(in_file, h=True, S=True, f=4, o=out_file)
    return out_file

Example 49

Project: bcbio-nextgen Source File: __init__.py
def bam_to_sam(in_file, config):
    if is_sam(in_file):
        return in_file

    assert is_bam(in_file), "%s is not a BAM file" % in_file
    out_file = os.path.splitext(in_file)[0] + ".sam"
    if utils.file_exists(out_file):
        return out_file

    samtools = config_utils.get_program("samtools", config)
    num_cores = config["algorithm"].get("num_cores", 1)
    with file_transaction(config, out_file) as tx_out_file:
        cmd = "{samtools} view -@ {num_cores} -h {in_file} -o {tx_out_file}"
        do.run(cmd.format(**locals()),
               ("Convert BAM to SAM (%s cores): %s to %s"
                % (str(num_cores), in_file, out_file)))
    return out_file

Example 50

Project: bcbio-nextgen Source File: picardrun.py
def picard_formatconverter(picard, align_sam):
    """Convert aligned SAM file to BAM format.
    """
    out_bam = "%s.bam" % os.path.splitext(align_sam)[0]
    if not file_exists(out_bam):
        with tx_tmpdir(picard._config) as tmp_dir:
            with file_transaction(picard._config, out_bam) as tx_out_bam:
                opts = [("INPUT", align_sam),
                        ("OUTPUT", tx_out_bam),
                        ("TMP_DIR", tmp_dir)]
                picard.run("SamFormatConverter", opts)
    return out_bam
See More Examples - Go to Next Page
Page 1 Selected Page 2 Page 3 Page 4