libStatGen Software 1
Loading...
Searching...
No Matches
BamIndex.h
1/*
2 * Copyright (C) 2010-2012 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#ifndef __BAM_INDEX_H__
19#define __BAM_INDEX_H__
20
21#include <stdint.h>
22#include <vector>
23#include <map>
24#include <stdlib.h>
25
26#include "IndexBase.h"
27
28#include "InputFile.h"
29#include "SamStatus.h"
30
31class BamIndex : public IndexBase
32{
33public:
34
35 BamIndex();
36 virtual ~BamIndex();
37
38 /// Reset the member data for a new index file.
39 virtual void resetIndex();
40
41 // Read & parse the specified index file.
42 /// \param filename the bam index file to be read.
43 /// \return the status of the read.
44 SamStatus::Status readIndex(const char* filename);
45
46 /// Get the list of chunks associated with this region.
47 /// For an entire reference ID, set start and end to -1.
48 /// To start at the beginning of the region, set start to 0/-1.
49 /// To go to the end of the region, set end to -1.
50 bool getChunksForRegion(int32_t refID, int32_t start, int32_t end,
51 SortedChunkList& chunkList);
52
53 uint64_t getMaxOffset() const;
54
55 /// Get the minimum and maximum file offsets for the specfied reference ID.
56 /// \param refID the reference ID to locate in the file.
57 /// \param minOffset returns the min file offset for the specified reference
58 /// \param maxOffset returns the max file offset for the specified reference
59 /// \return whether or not the reference was found in the file
60 bool getReferenceMinMax(int32_t refID,
61 uint64_t& minOffset,
62 uint64_t& maxOffset) const;
63
64 /// Get the number of mapped reads for this reference id. Returns -1 for
65 /// out of range refIDs.
66 /// \param refID reference ID for which to extract the number of mapped reads.
67 /// \return number of mapped reads for the specified reference id.
68 int32_t getNumMappedReads(int32_t refID);
69
70 /// Get the number of unmapped reads for this reference id. Returns -1 for
71 /// out of range refIDs.
72 /// \param refID reference ID for which to extract the number of unmapped reads.
73 /// \return number of unmapped reads for the specified reference id
74 int32_t getNumUnMappedReads(int32_t refID);
75
76 /// Print the index information.
77 /// \param refID reference ID for which to print info for. -1 means print for all references.
78 /// \param summary whether or not to just print a summary (defaults to false). The summary just contains summary info for each reference and not every bin/chunk.
79 void printIndex(int32_t refID, bool summary = false);
80
81 // Number of reference sequences.
82 /// The number used for an unknown number of reads.
83 static const int32_t UNKNOWN_NUM_READS = -1;
84
85 /// The number used for the reference id of unmapped reads.
86 static const int32_t REF_ID_UNMAPPED = -1;
87
88 /// The number used to indicate that all reference ids should be used.
89 static const int32_t REF_ID_ALL = -2;
90
91private:
92 uint64_t maxOverallOffset;
93
94 int32_t myUnMappedNumReads;
95};
96
97
98#endif
virtual void resetIndex()
Reset the member data for a new index file.
Definition BamIndex.cpp:35
bool getChunksForRegion(int32_t refID, int32_t start, int32_t end, SortedChunkList &chunkList)
Get the list of chunks associated with this region.
Definition BamIndex.cpp:218
int32_t getNumMappedReads(int32_t refID)
Get the number of mapped reads for this reference id.
Definition BamIndex.cpp:355
SamStatus::Status readIndex(const char *filename)
Definition BamIndex.cpp:45
static const int32_t REF_ID_ALL
The number used to indicate that all reference ids should be used.
Definition BamIndex.h:89
static const int32_t UNKNOWN_NUM_READS
The number used for an unknown number of reads.
Definition BamIndex.h:83
bool getReferenceMinMax(int32_t refID, uint64_t &minOffset, uint64_t &maxOffset) const
Get the minimum and maximum file offsets for the specfied reference ID.
Definition BamIndex.cpp:337
int32_t getNumUnMappedReads(int32_t refID)
Get the number of unmapped reads for this reference id.
Definition BamIndex.cpp:377
static const int32_t REF_ID_UNMAPPED
The number used for the reference id of unmapped reads.
Definition BamIndex.h:86
void printIndex(int32_t refID, bool summary=false)
Print the index information.
Definition BamIndex.cpp:398
Status
Return value enum for StatGenFile methods.