Secure your code as it's written. Use Snyk Code to scan source code in minutes - no build needed - and fix issues immediately.
# create a mapping from symbol -> gene locations
if "/" in geneType:
defGenes = geneType.split("/")[1]
elif genome=="hg38":
defGenes = "gencode24"
elif genome=="hg19":
defGenes = "gencode19"
elif genome=="mm10":
defGenes = "gencode-m13"
else:
errAbort("Unclear how to map symbols to genome for db %s. Please adapt cellbrowser.py" % genome)
logging.info("Using %s to map symbols to genome" % defGenes)
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':defGenes})
geneLocsId = cellbrowser.parseGeneLocs(defGenes)
geneLocs = {}
for geneId, locs in iterItems(geneLocsId):
sym = geneToSym[geneId]
geneLocs[sym] = locs
else:
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':geneType})
geneLocs = cellbrowser.parseGeneLocs(geneType)
matOfh = open(outMatrixFname, "w")
clustOfh = open(clusterFname, "w")
mr = cellbrowser.MatrixTsvReader()
mr.open(inMatrixFname)
matType, cellNames = mr.matType, mr.sampleNames
cellIds = range(0, len(cellNames))
if geneType.startswith("symbol"):
# create a mapping from symbol -> gene locations
if "/" in geneType:
defGenes = geneType.split("/")[1]
elif genome=="hg38":
defGenes = "gencode24"
elif genome=="hg19":
defGenes = "gencode19"
elif genome=="mm10":
defGenes = "gencode-m13"
else:
errAbort("Unclear how to map symbols to genome for db %s. Please adapt cellbrowser.py" % genome)
logging.info("Using %s to map symbols to genome" % defGenes)
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':defGenes})
geneLocsId = cellbrowser.parseGeneLocs(defGenes)
geneLocs = {}
for geneId, locs in iterItems(geneLocsId):
sym = geneToSym[geneId]
geneLocs[sym] = locs
else:
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':geneType})
geneLocs = cellbrowser.parseGeneLocs(geneType)
matOfh = open(outMatrixFname, "w")
clustOfh = open(clusterFname, "w")
mr = cellbrowser.MatrixTsvReader()
mr.open(inMatrixFname)
matType, cellNames = mr.matType, mr.sampleNames
def cbTrackHub(options):
" make track hub given meta file and directory with bam files "
if options.init:
cellbrowser.copyPkgFile("sampleConfig/cellbrowser.conf")
sys.exit(0)
if isfile(options.inConf):
conf = cellbrowser.loadConfig(options.inConf)
db = conf["ucscDb"]
inMatrixFname = conf["exprMatrix"]
metaFname = conf["meta"]
clusterFieldName = conf["clusterField"]
clusterOrderFile = conf.get("clusterOrder")
bamDir = conf.get("bamDir", "bam")
fixDot = conf.get("fixDot", False)
email = conf.get("hubEmail", CBEMAIL)
geneType = conf["geneIdType"]
outDir = conf["hubDir"]
unitName = conf.get("unit", "TPM")
def cbTrackHub(options):
" make track hub given meta file and directory with bam files "
if options.init:
cellbrowser.copyPkgFile("sampleConfig/cellbrowser.conf")
sys.exit(0)
if isfile(options.inConf):
conf = cellbrowser.loadConfig(options.inConf)
db = conf["ucscDb"]
inMatrixFname = conf["exprMatrix"]
metaFname = conf["meta"]
clusterFieldName = conf["clusterField"]
clusterOrderFile = conf.get("clusterOrder")
bamDir = conf.get("bamDir", "bam")
fixDot = conf.get("fixDot", False)
email = conf.get("hubEmail", CBEMAIL)
geneType = conf["geneIdType"]
outDir = conf["hubDir"]
unitName = conf.get("unit", "TPM")
hubUrl = conf.get("hubUrl", "")
refHtmlFname = conf.get("refHtml", None)
# use name, shortLabel or hubName from conf
#parser.add_option("", "--fixDot", dest="fixDot", action="store_true", help="replace dots in cell meta IDs with dashes (for R)")
#parser.add_option("-t", "--geneType", dest="geneType", help="type of gene IDs in expression matrix. values like 'symbols', or 'gencode22', 'gencode28' or 'gencode-m13'.")
#parser.add_option("", "--bamDir", dest="bamDir", help="directory with BAM files, one per cell. Merges small BAM files into one per cell cluster.")
parser.add_option("", "--clusterOrder", dest="clusterOrder", help="file with cluster names in the order that they should appear in the track. default is alphabetical order.")
parser.add_option("-s", "--skipBarchart", dest="skipBarchart", help="do not create the bar chart graph", action="store_true")
#parser.add_option("", "--name", dest="name", help="name of track hub.")
#parser.add_option("", "--email", dest="email", help="contact email for track hub. Default is %default, taken from the env. variable CBEMAIL", default=CBEMAIL)
#parser.add_option("-f", "--file", dest="file", action="store", help="run on file")
#parser.add_option("", "--test", dest="test", action="store_true", help="do something")
(options, args) = parser.parse_args()
if not options.exprMatrix and not isfile(options.inConf) and not options.init:
parser.print_help()
exit(1)
cellbrowser.setDebug(options.debug)
return args, options
bedFh.write("\n")
bedFh.close()
if skipCount != 0:
logging.info("Could not place %d genes, these were skipped" % skipCount)
bedFname2 = bedFname.replace(".bed", ".sorted.bed")
cmd = "LC_COLLATE=C sort -k1,1 -k2,2n %s > %s" % (bedFname, bedFname2)
cellbrowser.runCommand(cmd)
# convert to .bb using .as file
# from https://genome.ucsc.edu/goldenpath/help/examples/barChart/barChartBed.as
#asFname = join(dataDir, )
asFname = cellbrowser.getStaticFile(["genomes", "barChartBed.as"])
sizesFname = cellbrowser.getSizesFname(genome)
cmd = "bedToBigBed -as=%s -type=bed6+5 -tab %s %s %s" % (asFname, bedFname2, sizesFname, bbFname)
cellbrowser.runCommand(cmd)
idReportFname = join(outDir, "metaBamMatch.txt")
cellCount = writeDebugReport(allMetaCellIds, cellIdToBams, clusterBams, idReportFname)
jlFh = open("jobList", "w")
#cellCount = 0
for clusterName, (cellIds, bamFnames) in clusterBams.iteritems():
uniqueCellIds = set(cellIds)
#cellCount += len(uniqueCellIds)
logging.info("Merging BAM files and writing hub")
saneHubName = sanitizeName(hubName)
writeParentStanzas(tfh, saneHubName, hubName, cellCount)
chromSizes = cellbrowser.getSizesFname(db)
jobNo = 0
emptyClusterCount = 0
for clusterName, (cellIds, bamFnames) in clusterBams.iteritems():
saneClusterName = sanitizeName(clusterName)
logging.info("Processing cluster %s, %d cellIds/BAM files, examples: %s" % (clusterName, len(cellIds), cellIds[0]))
cmds = []
outBam = join(outDir, saneClusterName+".bam")
outStat = join(outDir, saneClusterName+".stats.txt")
outCalls = join(outDir, saneClusterName+".calls.tsv")
junctionBed = join(outDir, saneClusterName+".junctions.bed")
intronBed = join(outDir, saneClusterName+".introns.bed")
junctionBedSorted = junctionBed.replace(".bed", ".sorted.bed")
intronBedSorted = intronBed.replace(".bed", ".sorted.bed")
elif genome=="mm10":
defGenes = "gencode-m13"
else:
errAbort("Unclear how to map symbols to genome for db %s. Please adapt cellbrowser.py" % genome)
logging.info("Using %s to map symbols to genome" % defGenes)
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':defGenes})
geneLocsId = cellbrowser.parseGeneLocs(defGenes)
geneLocs = {}
for geneId, locs in iterItems(geneLocsId):
sym = geneToSym[geneId]
geneLocs[sym] = locs
else:
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':geneType})
geneLocs = cellbrowser.parseGeneLocs(geneType)
matOfh = open(outMatrixFname, "w")
clustOfh = open(clusterFname, "w")
mr = cellbrowser.MatrixTsvReader()
mr.open(inMatrixFname)
matType, cellNames = mr.matType, mr.sampleNames
cellIds = range(0, len(cellNames))
cellNameToId = dict(zip(cellNames, cellIds))
# make a list of lists of cellIds, one per cluster, in the right order
clusterCellIds = [] # list of tuples with cell-indexes, one per cluster
allCellNames = [] # list for cellIds, with a matrix, meta and with bam file
allCellIndices = [] # position of all cellIds in allCellNames
for clusterName in clusterOrder:
logging.info("Using %s to map symbols to genome" % defGenes)
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':defGenes})
geneLocsId = cellbrowser.parseGeneLocs(defGenes)
geneLocs = {}
for geneId, locs in iterItems(geneLocsId):
sym = geneToSym[geneId]
geneLocs[sym] = locs
else:
geneToSym = cellbrowser.readGeneSymbols({'geneIdType':geneType})
geneLocs = cellbrowser.parseGeneLocs(geneType)
matOfh = open(outMatrixFname, "w")
clustOfh = open(clusterFname, "w")
mr = cellbrowser.MatrixTsvReader()
mr.open(inMatrixFname)
matType, cellNames = mr.matType, mr.sampleNames
cellIds = range(0, len(cellNames))
cellNameToId = dict(zip(cellNames, cellIds))
# make a list of lists of cellIds, one per cluster, in the right order
clusterCellIds = [] # list of tuples with cell-indexes, one per cluster
allCellNames = [] # list for cellIds, with a matrix, meta and with bam file
allCellIndices = [] # position of all cellIds in allCellNames
for clusterName in clusterOrder:
cellIdxList = []
for cellName in clusterToCells[clusterName]:
if cellName not in cellNameToId:
logging.warn("%s is in meta but not in expression matrix." % cellName)
continue
bedFh.write("\t".join(bedRow))
bedFh.write("\n")
bedFh.close()
if skipCount != 0:
logging.info("Could not place %d genes, these were skipped" % skipCount)
bedFname2 = bedFname.replace(".bed", ".sorted.bed")
cmd = "LC_COLLATE=C sort -k1,1 -k2,2n %s > %s" % (bedFname, bedFname2)
cellbrowser.runCommand(cmd)
# convert to .bb using .as file
# from https://genome.ucsc.edu/goldenpath/help/examples/barChart/barChartBed.as
#asFname = join(dataDir, )
asFname = cellbrowser.getStaticFile(["genomes", "barChartBed.as"])
sizesFname = cellbrowser.getSizesFname(genome)
cmd = "bedToBigBed -as=%s -type=bed6+5 -tab %s %s %s" % (asFname, bedFname2, sizesFname, bbFname)
cellbrowser.runCommand(cmd)