libStatGen Software 1
Loading...
Searching...
No Matches
SamRecord Class Reference

Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record. More...

#include <SamRecord.h>

Public Types

enum  SequenceTranslation { NONE , EQUAL , BASES }
 Enum containing the settings on how to translate the sequence if a reference is available. More...
 

Public Member Functions

 SamRecord ()
 Default Constructor.
 
 SamRecord (ErrorHandler::HandlingType errorHandlingType)
 Constructor that sets the error handling type.
 
 ~SamRecord ()
 Destructor.
 
void resetRecord ()
 Reset the fields of the record to a default value.
 
bool isValid (SamFileHeader &header)
 Returns whether or not the record is valid, setting the status to indicate success or failure.
 
void setReference (GenomeSequence *reference)
 Set the reference to the specified genome sequence object.
 
void setSequenceTranslation (SequenceTranslation translation)
 Set the type of sequence translation to use when getting the sequence.
 
Set Alignment Data

Set methods for record fields.

All of the "set" methods set the status to indicate success or the failure reason.

bool setReadName (const char *readName)
 Set QNAME to the passed in name.
 
bool setFlag (uint16_t flag)
 Set the bitwise FLAG to the specified value.
 
bool setReferenceName (SamFileHeader &header, const char *referenceName)
 Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id.
 
bool set1BasedPosition (int32_t position)
 Set the leftmost position (POS) using the specified 1-based (SAM format) value.
 
bool set0BasedPosition (int32_t position)
 Set the leftmost position using the specified 0-based (BAM format) value.
 
bool setMapQuality (uint8_t mapQuality)
 Set the mapping quality (MAPQ).
 
bool setCigar (const char *cigar)
 Set the CIGAR to the specified SAM formatted cigar string.
 
bool setCigar (const Cigar &cigar)
 Set the CIGAR to the specified Cigar object.
 
bool setMateReferenceName (SamFileHeader &header, const char *mateReferenceName)
 Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id.
 
bool set1BasedMatePosition (int32_t matePosition)
 Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value.
 
bool set0BasedMatePosition (int32_t matePosition)
 Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.
 
bool setInsertSize (int32_t insertSize)
 Sets the inferred insert size (ISIZE)/observed template length (TLEN).
 
bool setSequence (const char *seq)
 Sets the sequence (SEQ) to the specified SAM formatted sequence string.
 
bool setQuality (const char *quality)
 Sets the quality (QUAL) to the specified SAM formatted quality string.
 
bool shiftIndelsLeft ()
 Shift the indels (if any) to the left by updating the CIGAR.
 
SamStatus::Status setBuffer (const char *fromBuffer, uint32_t fromBufferSize, SamFileHeader &header)
 Sets the SamRecord to contain the information in the BAM formatted fromBuffer.
 
SamStatus::Status setBufferFromFile (IFILE filePtr, SamFileHeader &header)
 Read the BAM record from a file.
 
Set Tag Data

Set methods for tags.

bool addIntTag (const char *tag, int32_t value)
 Add the specified integer tag to the record.
 
bool addTag (const char *tag, char vtype, const char *value)
 Add the specified tag,vtype,value to the record.
 
void clearTags ()
 Clear the tags in this record.
 
bool rmTag (const char *tag, char type)
 Remove a tag.
 
bool rmTags (const char *tags)
 Remove tags.
 
Get Alignment Data

Get methods for record fields.

All of the "get" methods set the status to indicate success or the failure reason.

const void * getRecordBuffer ()
 Get a const pointer to the buffer that contains the BAM representation of the record.
 
const void * getRecordBuffer (SequenceTranslation translation)
 Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence.
 
SamStatus::Status writeRecordBuffer (IFILE filePtr)
 Write the record as a BAM into the specified already opened file.
 
SamStatus::Status writeRecordBuffer (IFILE filePtr, SequenceTranslation translation)
 Write the record as a BAM into the specified already opened file using the specified translation on the sequence.
 
int32_t getBlockSize ()
 Get the block size of the record (BAM format).
 
const char * getReferenceName ()
 Get the reference sequence name (RNAME) of the record.
 
int32_t getReferenceID ()
 Get the reference sequence id of the record (BAM format rid).
 
int32_t get1BasedPosition ()
 Get the 1-based(SAM) leftmost position (POS) of the record.
 
int32_t get0BasedPosition ()
 Get the 0-based(BAM) leftmost position of the record.
 
uint8_t getReadNameLength ()
 Get the length of the readname (QNAME) including the null.
 
uint8_t getMapQuality ()
 Get the mapping quality (MAPQ) of the record.
 
uint16_t getBin ()
 Get the BAM bin for the record.
 
uint16_t getCigarLength ()
 Get the length of the BAM formatted CIGAR.
 
uint16_t getFlag ()
 Get the flag (FLAG).
 
int32_t getReadLength ()
 Get the length of the read.
 
const char * getMateReferenceName ()
 Get the mate/next fragment's reference sequence name (RNEXT).
 
const char * getMateReferenceNameOrEqual ()
 Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.
 
int32_t getMateReferenceID ()
 Get the mate reference id of the record (BAM format: mate_rid/next_refID).
 
int32_t get1BasedMatePosition ()
 Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).
 
int32_t get0BasedMatePosition ()
 Get the 0-based(BAM) leftmost mate/next fragment's position.
 
int32_t getInsertSize ()
 Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).
 
int32_t get0BasedAlignmentEnd ()
 Returns the 0-based inclusive rightmost position of the clipped sequence.
 
int32_t get1BasedAlignmentEnd ()
 Returns the 1-based inclusive rightmost position of the clipped sequence.
 
int32_t getAlignmentLength ()
 Returns the length of the clipped sequence, returning 0 if the cigar is '*'.
 
int32_t get0BasedUnclippedStart ()
 Returns the 0-based inclusive left-most position adjusted for clipped bases.
 
int32_t get1BasedUnclippedStart ()
 Returns the 1-based inclusive left-most position adjusted for clipped bases.
 
int32_t get0BasedUnclippedEnd ()
 Returns the 0-based inclusive right-most position adjusted for clipped bases.
 
int32_t get1BasedUnclippedEnd ()
 Returns the 1-based inclusive right-most position adjusted for clipped bases.
 
const char * getReadName ()
 Returns the SAM formatted Read Name (QNAME).
 
const char * getCigar ()
 Returns the SAM formatted CIGAR string.
 
const char * getSequence ()
 Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation.
 
const char * getSequence (SequenceTranslation translation)
 Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation.
 
const char * getQuality ()
 Returns the SAM formatted quality string (QUAL).
 
char getSequence (int index)
 Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.
 
char getSequence (int index, SequenceTranslation translation)
 Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation.
 
char getQuality (int index)
 Get the quality character at the specified index into the quality 0 to readLength - 1.
 
CigargetCigarInfo ()
 Returns a pointer to the Cigar object associated with this record.
 
uint32_t getNumOverlaps (int32_t start, int32_t end)
 Return the number of bases in this read that overlap the passed in region.
 
bool getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
 Returns the values of all fields except the tags.
 
bool getFields (bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality, SequenceTranslation translation)
 Returns the values of all fields except the tags using the specified sequence translation.
 
GenomeSequencegetReference ()
 Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set).
 

Get Tag Methods

Get methods for obtaining information on tags.

uint32_t getTagLength ()
 Returns the length of the BAM formatted tags.
 
bool getNextSamTag (char *tag, char &vtype, void **value)
 Get the next tag from the record.
 
void resetTagIter ()
 Reset the tag iterator to the beginning of the tags.
 
bool getTagsString (const char *tags, String &returnString, char delim='\t')
 Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... Sets the Status to SUCCESS when the tags are successfully returned or the tags were not found.
 
const StringgetStringTag (const char *tag)
 Get the string value for the specified tag.
 
int * getIntegerTag (const char *tag)
 Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure).
 
bool getIntegerTag (const char *tag, int &tagVal)
 Get the integer value for the specified tag.
 
bool getFloatTag (const char *tag, float &tagVal)
 Get the float value for the specified tag.
 
const StringgetString (const char *tag)
 Get the string value for the specified tag.
 
int & getInteger (const char *tag)
 Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool.
 
bool checkString (const char *tag)
 Check if the specified tag contains a string.
 
bool checkInteger (const char *tag)
 Check if the specified tag contains an integer.
 
bool checkFloat (const char *tag)
 Check if the specified tag contains a string.
 
bool checkTag (const char *tag, char type)
 Check if the specified tag contains a value of the specified vtype.
 
const SamStatusgetStatus ()
 Returns the status associated with the last method that sets the status.
 
static bool isIntegerType (char vtype)
 Returns whether or not the specified vtype is an integer type.
 
static bool isFloatType (char vtype)
 Returns whether or not the specified vtype is a float type.
 
static bool isCharType (char vtype)
 Returns whether or not the specified vtype is a char type.
 
static bool isStringType (char vtype)
 Returns whether or not the specified vtype is a string type.
 

Detailed Description

Class providing an easy to use interface to get/set/operate on the fields in a SAM/BAM record.


Definition at line 51 of file SamRecord.h.

Member Enumeration Documentation

◆ SequenceTranslation

Enum containing the settings on how to translate the sequence if a reference is available.

If no reference is available, no translation is done.

Enumerator
NONE 

Leave the sequence as is.

EQUAL 

Translate bases that match the reference to '='.

BASES 

Translate '=' to the actual base.

Definition at line 57 of file SamRecord.h.

57 {
58 NONE, ///< Leave the sequence as is.
59 EQUAL, ///< Translate bases that match the reference to '='
60 BASES, ///< Translate '=' to the actual base.
61 };
@ NONE
Leave the sequence as is.
Definition SamRecord.h:58
@ BASES
Translate '=' to the actual base.
Definition SamRecord.h:60
@ EQUAL
Translate bases that match the reference to '='.
Definition SamRecord.h:59

Constructor & Destructor Documentation

◆ SamRecord() [1/2]

SamRecord::SamRecord ( )

Default Constructor.

Definition at line 34 of file SamRecord.cpp.

35 : myStatus(),
36 myRefPtr(NULL),
37 mySequenceTranslation(NONE)
38{
39 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
40
41 myRecordPtr =
42 (bamRecordStruct *) malloc(defaultAllocSize);
43
44 myCigarTempBuffer = NULL;
45 myCigarTempBufferAllocatedSize = 0;
46
47 allocatedSize = defaultAllocSize;
48
50}
void resetRecord()
Reset the fields of the record to a default value.
Definition SamRecord.cpp:91
Structure of a BAM record.
Definition SamRecord.h:34

References resetRecord().

◆ SamRecord() [2/2]

SamRecord::SamRecord ( ErrorHandler::HandlingType  errorHandlingType)

Constructor that sets the error handling type.

Parameters
errorHandlingTypehow to handle errors.

Definition at line 53 of file SamRecord.cpp.

54 : myStatus(errorHandlingType),
55 myRefPtr(NULL),
56 mySequenceTranslation(NONE)
57{
58 int32_t defaultAllocSize = DEFAULT_BLOCK_SIZE + sizeof(int32_t);
59
60 myRecordPtr =
61 (bamRecordStruct *) malloc(defaultAllocSize);
62
63 myCigarTempBuffer = NULL;
64 myCigarTempBufferAllocatedSize = 0;
65
66 allocatedSize = defaultAllocSize;
67
69}

References resetRecord().

◆ ~SamRecord()

SamRecord::~SamRecord ( )

Destructor.

Definition at line 72 of file SamRecord.cpp.

73{
75
76 if(myRecordPtr != NULL)
77 {
78 free(myRecordPtr);
79 myRecordPtr = NULL;
80 }
81 if(myCigarTempBuffer != NULL)
82 {
83 free(myCigarTempBuffer);
84 myCigarTempBuffer = NULL;
85 myCigarTempBufferAllocatedSize = 0;
86 }
87}

References resetRecord().

Member Function Documentation

◆ addIntTag()

bool SamRecord::addIntTag ( const char *  tag,
int32_t  value 
)

Add the specified integer tag to the record.

Internal processing handles switching between SAM/BAM formats when read/written and determining the type for BAM format. If the tag is already there this code will replace it if the specified value is different.

Parameters
tagtwo character tag to be added to the SAM/BAM record.
valuevalue for the specified tag.
Returns
true if the tag was successfully added, false otherwise.

Definition at line 647 of file SamRecord.cpp.

648{
649 myStatus = SamStatus::SUCCESS;
650 int key = 0;
651 int index = 0;
652 char bamvtype;
653
654 int tagBufferSize = 0;
655
656 // First check to see if the tags need to be synced to the buffer.
657 if(myNeedToSetTagsFromBuffer)
658 {
659 if(!setTagsFromBuffer())
660 {
661 // Failed to read tags from the buffer, so cannot add new ones.
662 return(false);
663 }
664 }
665
666 // Ints come in as int. But it can be represented in fewer bits.
667 // So determine a more specific type that is in line with the
668 // types for BAM files.
669 // First check to see if it is a negative.
670 if(value < 0)
671 {
672 // The int is negative, so it will need to use a signed type.
673 // See if it is greater than the min value for a char.
674 if(value > ((std::numeric_limits<char>::min)()))
675 {
676 // It can be stored in a signed char.
677 bamvtype = 'c';
678 tagBufferSize += 4;
679 }
680 else if(value > ((std::numeric_limits<short>::min)()))
681 {
682 // It fits in a signed short.
683 bamvtype = 's';
684 tagBufferSize += 5;
685 }
686 else
687 {
688 // Just store it as a signed int.
689 bamvtype = 'i';
690 tagBufferSize += 7;
691 }
692 }
693 else
694 {
695 // It is positive, so an unsigned type can be used.
696 if(value < ((std::numeric_limits<unsigned char>::max)()))
697 {
698 // It is under the max of an unsigned char.
699 bamvtype = 'C';
700 tagBufferSize += 4;
701 }
702 else if(value < ((std::numeric_limits<unsigned short>::max)()))
703 {
704 // It is under the max of an unsigned short.
705 bamvtype = 'S';
706 tagBufferSize += 5;
707 }
708 else
709 {
710 // Just store it as an unsigned int.
711 bamvtype = 'I';
712 tagBufferSize += 7;
713 }
714 }
715
716 // Check to see if the tag is already there.
717 key = MAKEKEY(tag[0], tag[1], bamvtype);
718 unsigned int hashIndex = extras.Find(key);
719 if(hashIndex != LH_NOTFOUND)
720 {
721 // Tag was already found.
722 index = extras[hashIndex];
723
724 // Since the tagBufferSize was already updated with the new value,
725 // subtract the size for the previous tag (even if they are the same).
726 switch(intType[index])
727 {
728 case 'c':
729 case 'C':
730 case 'A':
731 tagBufferSize -= 4;
732 break;
733 case 's':
734 case 'S':
735 tagBufferSize -= 5;
736 break;
737 case 'i':
738 case 'I':
739 tagBufferSize -= 7;
740 break;
741 default:
743 "unknown tag inttype type found.\n");
744 return(false);
745 }
746
747 // Tag already existed, print message about overwriting.
748 // WARN about dropping duplicate tags.
749 if(myNumWarns++ < myMaxWarns)
750 {
751 String newVal;
752 String origVal;
753 appendIntArrayValue(index, origVal);
754 appendIntArrayValue(bamvtype, value, newVal);
755 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
756 tag[0], tag[1], intType[index], origVal.c_str(), tag[0], tag[1], bamvtype, newVal.c_str());
757 if(myNumWarns == myMaxWarns)
758 {
759 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
760 }
761 }
762
763 // Update the integer value and type.
764 integers[index] = value;
765 intType[index] = bamvtype;
766 }
767 else
768 {
769 // Tag is not already there, so add it.
770 index = integers.Length();
771
772 integers.Push(value);
773 intType.push_back(bamvtype);
774
775 extras.Add(key, index);
776 }
777
778 // The buffer tags are now out of sync.
779 myNeedToSetTagsInBuffer = true;
780 myIsTagsBufferValid = false;
781 myIsBufferSynced = false;
782 myTagBufferSize += tagBufferSize;
783
784 return(true);
785}
@ SUCCESS
method completed successfully.
@ INVALID
invalid other than for sorting.
void setStatus(Status newStatus, const char *newMessage)
Set the status with the specified status enum and message.

References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

Referenced by addTag().

◆ addTag()

bool SamRecord::addTag ( const char *  tag,
char  vtype,
const char *  value 
)

Add the specified tag,vtype,value to the record.

Vtype can be SAM/BAM format. Internal processing handles switching between SAM/BAM formats when read/written. If the tag is already there this code will replace it if the specified value is different.

Parameters
tagtwo character tag to be added to the SAM/BAM record.
vtypevtype of the specified value - either SAM/BAM vtypes.
valuevalue as a string for the specified tag.
Returns
true if the tag was successfully added, false otherwise.

Definition at line 791 of file SamRecord.cpp.

792{
793 if(vtype == 'i')
794 {
795 // integer type. Call addIntTag to handle it.
796 int intVal = atoi(valuePtr);
797 return(addIntTag(tag, intVal));
798 }
799
800 // Non-int type.
801 myStatus = SamStatus::SUCCESS;
802 bool status = true; // default to successful.
803 int key = 0;
804 int index = 0;
805
806 int tagBufferSize = 0;
807
808 // First check to see if the tags need to be synced to the buffer.
809 if(myNeedToSetTagsFromBuffer)
810 {
811 if(!setTagsFromBuffer())
812 {
813 // Failed to read tags from the buffer, so cannot add new ones.
814 return(false);
815 }
816 }
817
818 // First check to see if the tag is already there.
819 key = MAKEKEY(tag[0], tag[1], vtype);
820 unsigned int hashIndex = extras.Find(key);
821 if(hashIndex != LH_NOTFOUND)
822 {
823 // The key was found in the hash, so get the lookup index.
824 index = extras[hashIndex];
825
826 String origTag;
827 char origType = vtype;
828
829 // Adjust the currently pointed to value to the new setting.
830 switch (vtype)
831 {
832 case 'A' :
833 // First check to see if the value changed.
834 if((integers[index] == (const int)*(valuePtr)) &&
835 (intType[index] == vtype))
836 {
837 // The value & type has not changed, so do nothing.
838 return(true);
839 }
840 else
841 {
842 // Tag buffer size changes if type changes, so subtract & add.
843 origType = intType[index];
844 appendIntArrayValue(index, origTag);
845 tagBufferSize -= getNumericTagTypeSize(intType[index]);
846 tagBufferSize += getNumericTagTypeSize(vtype);
847 integers[index] = (const int)*(valuePtr);
848 intType[index] = vtype;
849 }
850 break;
851 case 'Z' :
852 // First check to see if the value changed.
853 if(strings[index] == valuePtr)
854 {
855 // The value has not changed, so do nothing.
856 return(true);
857 }
858 else
859 {
860 // Adjust the tagBufferSize by removing the size of the old string.
861 origTag = strings[index];
862 tagBufferSize -= strings[index].Length();
863 strings[index] = valuePtr;
864 // Adjust the tagBufferSize by adding the size of the new string.
865 tagBufferSize += strings[index].Length();
866 }
867 break;
868 case 'B' :
869 // First check to see if the value changed.
870 if(strings[index] == valuePtr)
871 {
872 // The value has not changed, so do nothing.
873 return(true);
874 }
875 else
876 {
877 // Adjust the tagBufferSize by removing the size of the old field.
878 origTag = strings[index];
879 tagBufferSize -= getBtagBufferSize(strings[index]);
880 strings[index] = valuePtr;
881 // Adjust the tagBufferSize by adding the size of the new field.
882 tagBufferSize += getBtagBufferSize(strings[index]);
883 }
884 break;
885 case 'f' :
886 // First check to see if the value changed.
887 if(floats[index] == (float)atof(valuePtr))
888 {
889 // The value has not changed, so do nothing.
890 return(true);
891 }
892 else
893 {
894 // Tag buffer size doesn't change between different 'f' entries.
895 origTag.appendFullFloat(floats[index]);
896 floats[index] = (float)atof(valuePtr);
897 }
898 break;
899 default :
900 fprintf(stderr,
901 "samRecord::addTag() - Unknown custom field of type %c\n",
902 vtype);
904 "Unknown custom field in a tag");
905 status = false;
906 break;
907 }
908
909 // Duplicate tag in this record.
910 // Tag already existed, print message about overwriting.
911 // WARN about dropping duplicate tags.
912 if(myNumWarns++ < myMaxWarns)
913 {
914 fprintf(stderr, "WARNING: Duplicate Tags, overwritting %c%c:%c:%s with %c%c:%c:%s\n",
915 tag[0], tag[1], origType, origTag.c_str(), tag[0], tag[1], vtype, valuePtr);
916 if(myNumWarns == myMaxWarns)
917 {
918 fprintf(stderr, "Suppressing rest of Duplicate Tag warnings.\n");
919 }
920 }
921 }
922 else
923 {
924 // The key was not found in the hash, so add it.
925 switch (vtype)
926 {
927 case 'A' :
928 index = integers.Length();
929 integers.Push((const int)*(valuePtr));
930 intType.push_back(vtype);
931 tagBufferSize += 4;
932 break;
933 case 'Z' :
934 index = strings.Length();
935 strings.Push(valuePtr);
936 tagBufferSize += 4 + strings.Last().Length();
937 break;
938 case 'B' :
939 index = strings.Length();
940 strings.Push(valuePtr);
941 tagBufferSize += 3 + getBtagBufferSize(strings[index]);
942 break;
943 case 'f' :
944 index = floats.size();
945 floats.push_back((float)atof(valuePtr));
946 tagBufferSize += 7;
947 break;
948 default :
949 fprintf(stderr,
950 "samRecord::addTag() - Unknown custom field of type %c\n",
951 vtype);
953 "Unknown custom field in a tag");
954 status = false;
955 break;
956 }
957 if(status)
958 {
959 // If successful, add the key to extras.
960 extras.Add(key, index);
961 }
962 }
963
964 // Only add the tag if it has so far been successfully processed.
965 if(status)
966 {
967 // The buffer tags are now out of sync.
968 myNeedToSetTagsInBuffer = true;
969 myIsTagsBufferValid = false;
970 myIsBufferSynced = false;
971 myTagBufferSize += tagBufferSize;
972 }
973 return(status);
974}
bool addIntTag(const char *tag, int32_t value)
Add the specified integer tag to the record.
@ FAIL_PARSE
failed to parse a record/header - invalid format.

References addIntTag(), StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ checkFloat()

bool SamRecord::checkFloat ( const char *  tag)
inline

Check if the specified tag contains a string.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
Returns
true if the value associated with the tag is a string.

Definition at line 613 of file SamRecord.h.

613{ return checkTag(tag, 'f'); }
bool checkTag(const char *tag, char type)
Check if the specified tag contains a value of the specified vtype.

References checkTag().

◆ checkInteger()

bool SamRecord::checkInteger ( const char *  tag)
inline

Check if the specified tag contains an integer.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
Returns
true if the value associated with the tag is a string.

Definition at line 607 of file SamRecord.h.

607{ return checkTag(tag, 'i'); }

References checkTag().

◆ checkString()

bool SamRecord::checkString ( const char *  tag)
inline

Check if the specified tag contains a string.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
Returns
true if the value associated with the tag is a string.

Definition at line 600 of file SamRecord.h.

601 { return(checkTag(tag, 'Z') || checkTag(tag, 'B')); }

References checkTag().

◆ checkTag()

bool SamRecord::checkTag ( const char *  tag,
char  type 
)

Check if the specified tag contains a value of the specified vtype.

Does not set SamStatus.

Parameters
tagSAM tag to check contents of.
typevalue type to check if the SAM tag matches.
Returns
true if the value associated with the tag is a string.

Definition at line 2381 of file SamRecord.cpp.

2382{
2383 // Init to success.
2384 myStatus = SamStatus::SUCCESS;
2385 // Parse the buffer if necessary.
2386 if(myNeedToSetTagsFromBuffer)
2387 {
2388 if(!setTagsFromBuffer())
2389 {
2390 // Failed to read the tags from the buffer, so cannot
2391 // get tags. setTagsFromBuffer set the error.
2392 return("");
2393 }
2394 }
2395
2396 int key = MAKEKEY(tag[0], tag[1], type);
2397
2398 return (extras.Find(key) != LH_NOTFOUND);
2399}

References StatGenStatus::SUCCESS.

Referenced by checkFloat(), checkInteger(), and checkString().

◆ clearTags()

void SamRecord::clearTags ( )

Clear the tags in this record.

Does not set SamStatus.

Definition at line 977 of file SamRecord.cpp.

978{
979 if(extras.Entries() != 0)
980 {
981 extras.Clear();
982 }
983 strings.Clear();
984 integers.Clear();
985 intType.clear();
986 floats.clear();
987 myTagBufferSize = 0;
988 resetTagIter();
989}
void resetTagIter()
Reset the tag iterator to the beginning of the tags.

References resetTagIter().

Referenced by resetRecord().

◆ get0BasedAlignmentEnd()

int32_t SamRecord::get0BasedAlignmentEnd ( )

Returns the 0-based inclusive rightmost position of the clipped sequence.

Returns
0-based inclusive rightmost position

Definition at line 1467 of file SamRecord.cpp.

1468{
1469 myStatus = SamStatus::SUCCESS;
1470 if(myAlignmentLength == -1)
1471 {
1472 // Alignment end has not been set, so calculate it.
1473 parseCigar();
1474 }
1475 // If alignment length > 0, subtract 1 from it to get the end.
1476 if(myAlignmentLength == 0)
1477 {
1478 // Length is 0, just return the start position.
1479 return(myRecordPtr->myPosition);
1480 }
1481 return(myRecordPtr->myPosition + myAlignmentLength - 1);
1482}

References StatGenStatus::SUCCESS.

Referenced by get0BasedUnclippedEnd(), get1BasedAlignmentEnd(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and CigarHelper::softClipEndByRefPos().

◆ get0BasedMatePosition()

int32_t SamRecord::get0BasedMatePosition ( )

Get the 0-based(BAM) leftmost mate/next fragment's position.

Returns
0-based leftmost position.

Definition at line 1452 of file SamRecord.cpp.

1453{
1454 myStatus = SamStatus::SUCCESS;
1455 return myRecordPtr->myMatePosition;
1456}

References StatGenStatus::SUCCESS.

◆ get0BasedPosition()

◆ get0BasedUnclippedEnd()

int32_t SamRecord::get0BasedUnclippedEnd ( )

Returns the 0-based inclusive right-most position adjusted for clipped bases.

Returns
0-based inclusive rightmost position including clips.

Definition at line 1526 of file SamRecord.cpp.

1527{
1528 // myUnclippedEndOffset will be set by get0BasedAlignmentEnd if the
1529 // cigar has not yet been parsed, so no need to check it here.
1530 return(get0BasedAlignmentEnd() + myUnclippedEndOffset);
1531}
int32_t get0BasedAlignmentEnd()
Returns the 0-based inclusive rightmost position of the clipped sequence.

References get0BasedAlignmentEnd().

Referenced by get1BasedUnclippedEnd().

◆ get0BasedUnclippedStart()

int32_t SamRecord::get0BasedUnclippedStart ( )

Returns the 0-based inclusive left-most position adjusted for clipped bases.

Returns
0-based inclusive leftmost position including clips.

Definition at line 1506 of file SamRecord.cpp.

1507{
1508 myStatus = SamStatus::SUCCESS;
1509 if(myUnclippedStartOffset == -1)
1510 {
1511 // Unclipped has not yet been calculated, so parse the cigar to get it
1512 parseCigar();
1513 }
1514 return(myRecordPtr->myPosition - myUnclippedStartOffset);
1515}

References StatGenStatus::SUCCESS.

Referenced by get1BasedUnclippedStart().

◆ get1BasedAlignmentEnd()

int32_t SamRecord::get1BasedAlignmentEnd ( )

Returns the 1-based inclusive rightmost position of the clipped sequence.

Returns
1-based inclusive rightmost position

Definition at line 1486 of file SamRecord.cpp.

1487{
1488 return(get0BasedAlignmentEnd() + 1);
1489}

References get0BasedAlignmentEnd().

Referenced by getBin().

◆ get1BasedMatePosition()

int32_t SamRecord::get1BasedMatePosition ( )

Get the 1-based(SAM) leftmost mate/next fragment's position (PNEXT).

Returns
1-based leftmost position.

Definition at line 1445 of file SamRecord.cpp.

1446{
1447 myStatus = SamStatus::SUCCESS;
1448 return (myRecordPtr->myMatePosition + 1);
1449}

References StatGenStatus::SUCCESS.

◆ get1BasedPosition()

int32_t SamRecord::get1BasedPosition ( )

Get the 1-based(SAM) leftmost position (POS) of the record.

Returns
1-based leftmost position.

Definition at line 1312 of file SamRecord.cpp.

1313{
1314 myStatus = SamStatus::SUCCESS;
1315 return (myRecordPtr->myPosition + 1);
1316}

References StatGenStatus::SUCCESS.

Referenced by SamValidator::isValid().

◆ get1BasedUnclippedEnd()

int32_t SamRecord::get1BasedUnclippedEnd ( )

Returns the 1-based inclusive right-most position adjusted for clipped bases.

Returns
1-based inclusive rightmost position including clips.

Definition at line 1535 of file SamRecord.cpp.

1536{
1537 return(get0BasedUnclippedEnd() + 1);
1538}
int32_t get0BasedUnclippedEnd()
Returns the 0-based inclusive right-most position adjusted for clipped bases.

References get0BasedUnclippedEnd().

◆ get1BasedUnclippedStart()

int32_t SamRecord::get1BasedUnclippedStart ( )

Returns the 1-based inclusive left-most position adjusted for clipped bases.

Returns
1-based inclusive leftmost position including clips.

Definition at line 1519 of file SamRecord.cpp.

1520{
1521 return(get0BasedUnclippedStart() + 1);
1522}
int32_t get0BasedUnclippedStart()
Returns the 0-based inclusive left-most position adjusted for clipped bases.

References get0BasedUnclippedStart().

◆ getAlignmentLength()

int32_t SamRecord::getAlignmentLength ( )

Returns the length of the clipped sequence, returning 0 if the cigar is '*'.

Returns
length of the clipped sequence.

Definition at line 1493 of file SamRecord.cpp.

1494{
1495 myStatus = SamStatus::SUCCESS;
1496 if(myAlignmentLength == -1)
1497 {
1498 // Alignment end has not been set, so calculate it.
1499 parseCigar();
1500 }
1501 // Return the alignment length.
1502 return(myAlignmentLength);
1503}

References StatGenStatus::SUCCESS.

◆ getBin()

uint16_t SamRecord::getBin ( )

Get the BAM bin for the record.

Returns
BAM bin

Definition at line 1347 of file SamRecord.cpp.

1348{
1349 myStatus = SamStatus::SUCCESS;
1350 if(!myIsBinValid)
1351 {
1352 // The bin that is set in the record is not valid, so
1353 // reset it.
1354 myRecordPtr->myBin =
1355 bam_reg2bin(myRecordPtr->myPosition, get1BasedAlignmentEnd());
1356 myIsBinValid = true;
1357 }
1358 return(myRecordPtr->myBin);
1359}
int32_t get1BasedAlignmentEnd()
Returns the 1-based inclusive rightmost position of the clipped sequence.

References get1BasedAlignmentEnd(), and StatGenStatus::SUCCESS.

◆ getBlockSize()

int32_t SamRecord::getBlockSize ( )

Get the block size of the record (BAM format).

Returns
BAM block size of the record.

Definition at line 1281 of file SamRecord.cpp.

1282{
1283 myStatus = SamStatus::SUCCESS;
1284 // If the buffer isn't synced, sync the buffer to determine the
1285 // block size.
1286 if(myIsBufferSynced == false)
1287 {
1288 // Since this just returns the block size, the translation of
1289 // the sequence does not matter, so just use the currently set
1290 // value.
1291 fixBuffer(myBufferSequenceTranslation);
1292 }
1293 return myRecordPtr->myBlockSize;
1294}

References StatGenStatus::SUCCESS.

◆ getCigar()

const char * SamRecord::getCigar ( )

Returns the SAM formatted CIGAR string.

Returns
cigar string.

Definition at line 1555 of file SamRecord.cpp.

1556{
1557 myStatus = SamStatus::SUCCESS;
1558 if(myCigar.Length() == 0)
1559 {
1560 // 0 Length, means that it is in the buffer, but has not yet
1561 // been synced to the string, so do the sync.
1562 parseCigarBinary();
1563 }
1564 return myCigar.c_str();
1565}

References StatGenStatus::SUCCESS.

Referenced by getFields(), SamValidator::isValidCigar(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().

◆ getCigarInfo()

Cigar * SamRecord::getCigarInfo ( )

Returns a pointer to the Cigar object associated with this record.


The object is essentially read-only, only allowing modifications due to lazy evaluations.

Returns
pointer to the Cigar object.

Definition at line 1836 of file SamRecord.cpp.

1837{
1838 // Check to see whether or not the Cigar has already been
1839 // set - this is determined by checking if alignment length
1840 // is set since alignment length and the cigar are set
1841 // at the same time.
1842 if(myAlignmentLength == -1)
1843 {
1844 // Not been set, so calculate it.
1845 parseCigar();
1846 }
1847 return(&myCigarRoller);
1848}

Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getSequence(), getSequence(), SamQuerySeqWithRefIter::reset(), SamFilter::softClip(), CigarHelper::softClipBeginByRefPos(), and CigarHelper::softClipEndByRefPos().

◆ getCigarLength()

uint16_t SamRecord::getCigarLength ( )

Get the length of the BAM formatted CIGAR.

Returns
length of BAM formatted cigar.

Definition at line 1362 of file SamRecord.cpp.

1363{
1364 myStatus = SamStatus::SUCCESS;
1365 // If the cigar buffer is valid
1366 // then get the length from there.
1367 if(myIsCigarBufferValid)
1368 {
1369 return myRecordPtr->myCigarLength;
1370 }
1371
1372 if(myCigarTempBufferLength == -1)
1373 {
1374 // The cigar buffer is not valid and the cigar temp buffer is not set,
1375 // so parse the string.
1376 parseCigarString();
1377 }
1378
1379 // The temp buffer is now set, so return the size.
1380 return(myCigarTempBufferLength);
1381}

References StatGenStatus::SUCCESS.

◆ getFields() [1/2]

bool SamRecord::getFields ( bamRecordStruct recStruct,
String readName,
String cigar,
String sequence,
String quality 
)

Returns the values of all fields except the tags.

Parameters
recStructstructure containing the contents of all non-variable length fields.
readNameread name from the record (return param)
cigarcigar string from the record (return param)
sequencesequence string from the record (return param)
qualityquality string from the record (return param)
Returns
true if all fields were successfully set, false otherwise.

Definition at line 1866 of file SamRecord.cpp.

1868{
1869 return(getFields(recStruct, readName, cigar, sequence, quality,
1870 mySequenceTranslation));
1871}
bool getFields(bamRecordStruct &recStruct, String &readName, String &cigar, String &sequence, String &quality)
Returns the values of all fields except the tags.

References getFields().

Referenced by getFields().

◆ getFields() [2/2]

bool SamRecord::getFields ( bamRecordStruct recStruct,
String readName,
String cigar,
String sequence,
String quality,
SequenceTranslation  translation 
)

Returns the values of all fields except the tags using the specified sequence translation.

Parameters
recStructstructure containing the contents of all non-variable length fields.
readNameread name from the record (return param)
cigarcigar string from the record (return param)
sequencesequence string from the record (return param)
qualityquality string from the record (return param)
translationtype of sequence translation to use.
Returns
true if all fields were successfully set, false otherwise.

Definition at line 1875 of file SamRecord.cpp.

1878{
1879 myStatus = SamStatus::SUCCESS;
1880 if(myIsBufferSynced == false)
1881 {
1882 if(!fixBuffer(translation))
1883 {
1884 // failed to set the buffer, return false.
1885 return(false);
1886 }
1887 }
1888 memcpy(&recStruct, myRecordPtr, sizeof(bamRecordStruct));
1889
1890 readName = getReadName();
1891 // Check the status.
1892 if(myStatus != SamStatus::SUCCESS)
1893 {
1894 // Failed to set the fields, return false.
1895 return(false);
1896 }
1897 cigar = getCigar();
1898 // Check the status.
1899 if(myStatus != SamStatus::SUCCESS)
1900 {
1901 // Failed to set the fields, return false.
1902 return(false);
1903 }
1904 sequence = getSequence(translation);
1905 // Check the status.
1906 if(myStatus != SamStatus::SUCCESS)
1907 {
1908 // Failed to set the fields, return false.
1909 return(false);
1910 }
1911 quality = getQuality();
1912 // Check the status.
1913 if(myStatus != SamStatus::SUCCESS)
1914 {
1915 // Failed to set the fields, return false.
1916 return(false);
1917 }
1918 return(true);
1919}
const char * getCigar()
Returns the SAM formatted CIGAR string.
const char * getReadName()
Returns the SAM formatted Read Name (QNAME).
const char * getQuality()
Returns the SAM formatted quality string (QUAL).
const char * getSequence()
Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTran...

References getCigar(), getQuality(), getReadName(), getSequence(), and StatGenStatus::SUCCESS.

◆ getFlag()

uint16_t SamRecord::getFlag ( )

Get the flag (FLAG).

Returns
flag.

Definition at line 1384 of file SamRecord.cpp.

1385{
1386 myStatus = SamStatus::SUCCESS;
1387 return myRecordPtr->myFlag;
1388}

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead(), SamQuerySeqWithRefIter::getNextMatchMismatch(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processFile(), and SamFile::ReadRecord().

◆ getFloatTag()

bool SamRecord::getFloatTag ( const char *  tag,
float &  tagVal 
)

Get the float value for the specified tag.

Parameters
tagtag to retrieve
tagValreturn parameter with integer value for the tag
Returns
bool true if Float tag was found and tagVal was set, false if not.

Definition at line 2281 of file SamRecord.cpp.

2282{
2283 // Init to success.
2284 myStatus = SamStatus::SUCCESS;
2285 // Parse the buffer if necessary.
2286 if(myNeedToSetTagsFromBuffer)
2287 {
2288 if(!setTagsFromBuffer())
2289 {
2290 // Failed to read the tags from the buffer, so cannot
2291 // get tags. setTagsFromBuffer set the errors,
2292 // so just return false.
2293 return(false);
2294 }
2295 }
2296
2297 int key = MAKEKEY(tag[0], tag[1], 'f');
2298 int offset = extras.Find(key);
2299
2300 int value;
2301 if (offset < 0)
2302 {
2303 // Failed to find the tag.
2304 return(false);
2305 }
2306 else
2307 value = extras[offset];
2308
2309 tagVal = floats[value];
2310 return(true);
2311}

References StatGenStatus::SUCCESS.

◆ getInsertSize()

int32_t SamRecord::getInsertSize ( )

Get the inferred insert size of the read pair (ISIZE) or observed template length (TLEN).

Returns
inferred insert size or observed template length.

Definition at line 1459 of file SamRecord.cpp.

1460{
1461 myStatus = SamStatus::SUCCESS;
1462 return myRecordPtr->myInsertSize;
1463}

References StatGenStatus::SUCCESS.

◆ getInteger()

int & SamRecord::getInteger ( const char *  tag)

Get the integer value for the specified tag, DEPRECATED, use getIntegerTag that returns a bool.

Definition at line 2350 of file SamRecord.cpp.

2351{
2352 // Init to success.
2353 myStatus = SamStatus::SUCCESS;
2354 // Parse the buffer if necessary.
2355 if(myNeedToSetTagsFromBuffer)
2356 {
2357 if(!setTagsFromBuffer())
2358 {
2359 // Failed to read the tags from the buffer, so cannot
2360 // get tags. setTagsFromBuffer set the error.
2361 // TODO - what do we want to do on failure?
2362 }
2363 }
2364
2365 int key = MAKEKEY(tag[0], tag[1], 'i');
2366 int offset = extras.Find(key);
2367
2368 int value;
2369 if (offset < 0)
2370 {
2371 // TODO - what do we want to do on failure?
2372 return NOT_FOUND_TAG_INT;
2373 }
2374 else
2375 value = extras[offset];
2376
2377 return integers[value];
2378}

References StatGenStatus::SUCCESS.

◆ getIntegerTag() [1/2]

int * SamRecord::getIntegerTag ( const char *  tag)

Get the integer value for the specified tag, DEPRECATED, use one that returns a bool (success/failure).

Parameters
tagtag to retrieve \retun pointer to the tag's integer value if found, NULL if not found.

Definition at line 2216 of file SamRecord.cpp.

2217{
2218 // Init to success.
2219 myStatus = SamStatus::SUCCESS;
2220 // Parse the buffer if necessary.
2221 if(myNeedToSetTagsFromBuffer)
2222 {
2223 if(!setTagsFromBuffer())
2224 {
2225 // Failed to read the tags from the buffer, so cannot
2226 // get tags. setTagsFromBuffer set the errors,
2227 // so just return NULL.
2228 return(NULL);
2229 }
2230 }
2231
2232 int key = MAKEKEY(tag[0], tag[1], 'i');
2233 int offset = extras.Find(key);
2234
2235 int value;
2236 if (offset < 0)
2237 {
2238 // Failed to find the tag.
2239 return(NULL);
2240 }
2241 else
2242 value = extras[offset];
2243
2244 return(&(integers[value]));
2245}

References StatGenStatus::SUCCESS.

◆ getIntegerTag() [2/2]

bool SamRecord::getIntegerTag ( const char *  tag,
int &  tagVal 
)

Get the integer value for the specified tag.

Parameters
tagtag to retrieve
tagValreturn parameter with integer value for the tag \retun bool true if Integer tag was found and tagVal was set, false if not.

Definition at line 2248 of file SamRecord.cpp.

2249{
2250 // Init to success.
2251 myStatus = SamStatus::SUCCESS;
2252 // Parse the buffer if necessary.
2253 if(myNeedToSetTagsFromBuffer)
2254 {
2255 if(!setTagsFromBuffer())
2256 {
2257 // Failed to read the tags from the buffer, so cannot
2258 // get tags. setTagsFromBuffer set the errors,
2259 // so just return false.
2260 return(false);
2261 }
2262 }
2263
2264 int key = MAKEKEY(tag[0], tag[1], 'i');
2265 int offset = extras.Find(key);
2266
2267 int value;
2268 if (offset < 0)
2269 {
2270 // Failed to find the tag.
2271 return(false);
2272 }
2273 else
2274 value = extras[offset];
2275
2276 tagVal = integers[value];
2277 return(true);
2278}

References StatGenStatus::SUCCESS.

◆ getMapQuality()

uint8_t SamRecord::getMapQuality ( )

Get the mapping quality (MAPQ) of the record.

Returns
map quality.

Definition at line 1340 of file SamRecord.cpp.

1341{
1342 myStatus = SamStatus::SUCCESS;
1343 return myRecordPtr->myMapQuality;
1344}

References StatGenStatus::SUCCESS.

Referenced by SamValidator::isValid().

◆ getMateReferenceID()

int32_t SamRecord::getMateReferenceID ( )

Get the mate reference id of the record (BAM format: mate_rid/next_refID).

Returns
reference id

Definition at line 1438 of file SamRecord.cpp.

1439{
1440 myStatus = SamStatus::SUCCESS;
1441 return myRecordPtr->myMateReferenceID;
1442}

References StatGenStatus::SUCCESS.

◆ getMateReferenceName()

const char * SamRecord::getMateReferenceName ( )

Get the mate/next fragment's reference sequence name (RNEXT).

If it is equal to the reference name, it still returns the reference name.

Returns
reference sequence name

Definition at line 1410 of file SamRecord.cpp.

1411{
1412 myStatus = SamStatus::SUCCESS;
1413 return myMateReferenceName.c_str();
1414}

References StatGenStatus::SUCCESS.

◆ getMateReferenceNameOrEqual()

const char * SamRecord::getMateReferenceNameOrEqual ( )

Get the mate/next fragment's reference sequence name (RNEXT), returning "=" if it is the same as the reference name, unless they are both "*" in which case "*" is returned.

Returns
reference sequence name or '='

Definition at line 1420 of file SamRecord.cpp.

1421{
1422 myStatus = SamStatus::SUCCESS;
1423 if(myMateReferenceName == "*")
1424 {
1425 return(myMateReferenceName);
1426 }
1427 if(myMateReferenceName == getReferenceName())
1428 {
1429 return(FIELD_ABSENT_STRING);
1430 }
1431 else
1432 {
1433 return(myMateReferenceName);
1434 }
1435}
const char * getReferenceName()
Get the reference sequence name (RNAME) of the record.

References getReferenceName(), and StatGenStatus::SUCCESS.

◆ getNextSamTag()

bool SamRecord::getNextSamTag ( char *  tag,
char &  vtype,
void **  value 
)

Get the next tag from the record.

Sets the Status to SUCCESS when a tag is successfully returned or when there are no more tags. Otherwise the status is set to describe why it failed (parsing, etc).

Parameters
tagset to the tag when a tag is read.
vtypeset to the vtype when a tag is read.
valuepointer to the value of the tag (will need to cast to int, float, char, or string based on vtype).
Returns
true if a tag was read, false if there are no more tags.

Definition at line 1962 of file SamRecord.cpp.

1963{
1964 myStatus = SamStatus::SUCCESS;
1965 if(myNeedToSetTagsFromBuffer)
1966 {
1967 if(!setTagsFromBuffer())
1968 {
1969 // Failed to read the tags from the buffer, so cannot
1970 // get tags.
1971 return(false);
1972 }
1973 }
1974
1975 // Increment the tag index to start looking at the next tag.
1976 // At the beginning, it is set to -1.
1977 myLastTagIndex++;
1978 int maxTagIndex = extras.Capacity();
1979 if(myLastTagIndex >= maxTagIndex)
1980 {
1981 // Hit the end of the tags, return false, no more tags.
1982 // Status is still success since this is not an error,
1983 // it is just the end of the list.
1984 return(false);
1985 }
1986
1987 bool tagFound = false;
1988 // Loop until a tag is found or the end of extras is hit.
1989 while((tagFound == false) && (myLastTagIndex < maxTagIndex))
1990 {
1991 if(extras.SlotInUse(myLastTagIndex))
1992 {
1993 // Found a slot to use.
1994 int key = extras.GetKey(myLastTagIndex);
1995 getTag(key, tag);
1996 getTypeFromKey(key, vtype);
1997 tagFound = true;
1998 // Get the value associated with the key based on the vtype.
1999 switch (vtype)
2000 {
2001 case 'f' :
2002 *value = getFloatPtr(myLastTagIndex);
2003 break;
2004 case 'i' :
2005 *value = getIntegerPtr(myLastTagIndex, vtype);
2006 if(vtype != 'A')
2007 {
2008 // Convert all int types to 'i'
2009 vtype = 'i';
2010 }
2011 break;
2012 case 'Z' :
2013 case 'B' :
2014 *value = getStringPtr(myLastTagIndex);
2015 break;
2016 default:
2018 "Unknown tag type");
2019 tagFound = false;
2020 break;
2021 }
2022 }
2023 if(!tagFound)
2024 {
2025 // Increment the index since a tag was not found.
2026 myLastTagIndex++;
2027 }
2028 }
2029 return(tagFound);
2030}

References StatGenStatus::FAIL_PARSE, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

Referenced by SamRecordHelper::genSamTagsString().

◆ getNumOverlaps()

uint32_t SamRecord::getNumOverlaps ( int32_t  start,
int32_t  end 
)

Return the number of bases in this read that overlap the passed in region.

Matches & mismatches between the read and the reference are counted as overlaps, but insertions, deletions, skips, clips, and pads are not counted.

Parameters
startinclusive 0-based start position (reference position) of the region to check for overlaps in. (-1 indicates to start at the beginning of the reference.)
endexclusive 0-based end position (reference position) of the region to check for overlaps in. (-1 indicates to go to the end of the reference.)
Returns
number of overlapping bases

Definition at line 1853 of file SamRecord.cpp.

1854{
1855 // Determine whether or not the cigar has been parsed, which sets up
1856 // the cigar roller. This is determined by checking the alignment length.
1857 if(myAlignmentLength == -1)
1858 {
1859 parseCigar();
1860 }
1861 return(myCigarRoller.getNumOverlaps(start, end, get0BasedPosition()));
1862}
uint32_t getNumOverlaps(int32_t start, int32_t end, int32_t queryStartPos)
Return the number of bases that overlap the reference and the read associated with this cigar that fa...
Definition Cigar.cpp:334
int32_t get0BasedPosition()
Get the 0-based(BAM) leftmost position of the record.

References get0BasedPosition(), and Cigar::getNumOverlaps().

Referenced by SamFile::GetNumOverlaps().

◆ getQuality() [1/2]

const char * SamRecord::getQuality ( )

Returns the SAM formatted quality string (QUAL).

Returns
quality string.

Definition at line 1638 of file SamRecord.cpp.

1639{
1640 myStatus = SamStatus::SUCCESS;
1641 if(myQuality.Length() == 0)
1642 {
1643 // 0 Length, means that it is in the buffer, but has not yet
1644 // been synced to the string, so do the sync.
1645 setSequenceAndQualityFromBuffer();
1646 }
1647 return myQuality.c_str();
1648}

References StatGenStatus::SUCCESS.

Referenced by PileupElementBaseQual::addEntry(), getFields(), SamValidator::isValidQuality(), and SamFilter::sumMismatchQuality().

◆ getQuality() [2/2]

char SamRecord::getQuality ( int  index)

Get the quality character at the specified index into the quality 0 to readLength - 1.

Throws an exception if index is out of range.

Parameters
indexindex into the quality string (0 to readLength-1).
Returns
the quality character at the specified index into the quality.

Definition at line 1782 of file SamRecord.cpp.

1783{
1784 // Determine the read length.
1785 int32_t readLen = getReadLength();
1786
1787 // If the read length is 0, return ' ' whose ascii code is below
1788 // the minimum ascii code for qualities.
1789 if(readLen == 0)
1790 {
1792 }
1793 else if((index < 0) || (index >= readLen))
1794 {
1795 // Only get here if the index was out of range, so thow an exception.
1796 String exceptionString = "SamRecord::getQuality(";
1797 exceptionString += index;
1798 exceptionString += ") is out of range. Index must be between 0 and ";
1799 exceptionString += (readLen - 1);
1800 throw std::runtime_error(exceptionString.c_str());
1801 }
1802
1803 if(myQuality.Length() == 0)
1804 {
1805 // Parse BAM Quality.
1806 // Know that myPackedQuality is correct since readLen != 0.
1807 return(myPackedQuality[index] + 33);
1808 }
1809 else
1810 {
1811 // Already have string.
1812 if((myQuality.Length() == 1) && (myQuality[0] == '*'))
1813 {
1814 // Return the unknown quality character.
1816 }
1817 else if(index >= myQuality.Length())
1818 {
1819 // Only get here if the index was out of range, so thow an exception.
1820 // Technically the myQuality string is not guaranteed to be the same length
1821 // as the sequence, so this catches that error.
1822 String exceptionString = "SamRecord::getQuality(";
1823 exceptionString += index;
1824 exceptionString += ") is out of range. Index must be between 0 and ";
1825 exceptionString += (myQuality.Length() - 1);
1826 throw std::runtime_error(exceptionString.c_str());
1827 }
1828 else
1829 {
1830 return(myQuality[index]);
1831 }
1832 }
1833}
static const char UNKNOWN_QUALITY_CHAR
Character used when the quality is unknown.
int32_t getReadLength()
Get the length of the read.

References getReadLength(), and BaseUtilities::UNKNOWN_QUALITY_CHAR.

◆ getReadLength()

int32_t SamRecord::getReadLength ( )

Get the length of the read.

Returns
read length.

Definition at line 1391 of file SamRecord.cpp.

1392{
1393 myStatus = SamStatus::SUCCESS;
1394 if(myIsSequenceBufferValid == false)
1395 {
1396 // If the sequence is "*", then return 0.
1397 if((mySequence.Length() == 1) && (mySequence[0] == '*'))
1398 {
1399 return(0);
1400 }
1401 // Do not add 1 since it is not null terminated.
1402 return(mySequence.Length());
1403 }
1404 return(myRecordPtr->myReadLength);
1405}

References StatGenStatus::SUCCESS.

Referenced by SamFilter::clipOnMismatchThreshold(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getQuality(), getSequence(), SamValidator::isValidCigar(), SamValidator::isValidQuality(), SamQuerySeqWithRefIter::reset(), and CigarHelper::softClipEndByRefPos().

◆ getReadName()

const char * SamRecord::getReadName ( )

Returns the SAM formatted Read Name (QNAME).

Returns
read name.

Definition at line 1542 of file SamRecord.cpp.

1543{
1544 myStatus = SamStatus::SUCCESS;
1545 if(myReadName.Length() == 0)
1546 {
1547 // 0 Length, means that it is in the buffer, but has not yet
1548 // been synced to the string, so do the sync.
1549 myReadName = (char*)&(myRecordPtr->myData);
1550 }
1551 return myReadName.c_str();
1552}

References StatGenStatus::SUCCESS.

Referenced by getFields(), SamValidator::isValid(), and SamFile::validateSortOrder().

◆ getReadNameLength()

uint8_t SamRecord::getReadNameLength ( )

Get the length of the readname (QNAME) including the null.

Returns
length of the read name (including null).

Definition at line 1326 of file SamRecord.cpp.

1327{
1328 myStatus = SamStatus::SUCCESS;
1329 // If the buffer is valid, return the size from there, otherwise get the
1330 // size from the string length + 1 (ending null).
1331 if(myIsReadNameBufferValid)
1332 {
1333 return(myRecordPtr->myReadNameLength);
1334 }
1335
1336 return(myReadName.Length() + 1);
1337}

References StatGenStatus::SUCCESS.

Referenced by SamValidator::isValid().

◆ getRecordBuffer() [1/2]

const void * SamRecord::getRecordBuffer ( )

Get a const pointer to the buffer that contains the BAM representation of the record.

Returns
const pointer to the buffer that contains the BAM representation of the record.

Definition at line 1204 of file SamRecord.cpp.

1205{
1206 return(getRecordBuffer(mySequenceTranslation));
1207}
const void * getRecordBuffer()
Get a const pointer to the buffer that contains the BAM representation of the record.

References getRecordBuffer().

Referenced by getRecordBuffer().

◆ getRecordBuffer() [2/2]

const void * SamRecord::getRecordBuffer ( SequenceTranslation  translation)

Get a const pointer to the buffer that contains the BAM representation of the record using the specified translation on the sequence.

Parameters
translationtype of sequence translation to use.
Returns
const pointer to the buffer that contains the BAM representation of the record.

Definition at line 1211 of file SamRecord.cpp.

1212{
1213 myStatus = SamStatus::SUCCESS;
1214 bool status = true;
1215 // If the buffer is not synced or the sequence in the buffer is not
1216 // properly translated, fix the buffer.
1217 if((myIsBufferSynced == false) ||
1218 (myBufferSequenceTranslation != translation))
1219 {
1220 status &= fixBuffer(translation);
1221 }
1222 // If the buffer is synced, check to see if the tags need to be synced.
1223 if(myNeedToSetTagsInBuffer)
1224 {
1225 status &= setTagsInBuffer();
1226 }
1227 if(!status)
1228 {
1229 return(NULL);
1230 }
1231 return (const void *)myRecordPtr;
1232}

References StatGenStatus::SUCCESS.

◆ getReference()

GenomeSequence * SamRecord::getReference ( )

Returns a pointer to the genome sequence object associated with this record if it was set (NULL if it was not set).

Returns
pointer to the GenomeSequence object or NULL if there isn't one.

Definition at line 1923 of file SamRecord.cpp.

1924{
1925 return(myRefPtr);
1926}

Referenced by SamValidator::isValidTags().

◆ getReferenceID()

int32_t SamRecord::getReferenceID ( )

Get the reference sequence id of the record (BAM format rid).

Returns
reference sequence id

Definition at line 1305 of file SamRecord.cpp.

1306{
1307 myStatus = SamStatus::SUCCESS;
1308 return myRecordPtr->myReferenceID;
1309}

References StatGenStatus::SUCCESS.

Referenced by SamCoordOutput::add(), SamValidator::isValid(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignment(), Pileup< PILEUP_TYPE, FUNC_CLASS >::processAlignmentRegion(), and SamFile::validateSortOrder().

◆ getReferenceName()

const char * SamRecord::getReferenceName ( )

Get the reference sequence name (RNAME) of the record.

Returns
reference sequence name

Definition at line 1298 of file SamRecord.cpp.

1299{
1300 myStatus = SamStatus::SUCCESS;
1301 return myReferenceName.c_str();
1302}

References StatGenStatus::SUCCESS.

Referenced by PileupElement::addEntry(), SamTags::createMDTag(), getMateReferenceNameOrEqual(), getSequence(), getSequence(), SamValidator::isValid(), and SamQuerySeqWithRefIter::reset().

◆ getSequence() [1/4]

const char * SamRecord::getSequence ( )

Returns the SAM formatted sequence string (SEQ), translating the base as specified by setSequenceTranslation.

Returns
sequence string.

Definition at line 1568 of file SamRecord.cpp.

1569{
1570 return(getSequence(mySequenceTranslation));
1571}

References getSequence().

Referenced by PileupElementBaseQual::addEntry(), SamRecordHelper::checkSequence(), SamTags::createMDTag(), getFields(), SamQuerySeqWithRefIter::getNextMatchMismatch(), getSequence(), getSequence(), and shiftIndelsLeft().

◆ getSequence() [2/4]

char SamRecord::getSequence ( int  index)

Get the sequence base at the specified index into this sequence 0 to readLength - 1, translating the base as specified by setSequenceTranslation.

Throws an exception if index is out of range.

Parameters
indexindex into the sequence string (0 to readLength-1).
Returns
the sequence base at the specified index into the sequence.

Definition at line 1651 of file SamRecord.cpp.

1652{
1653 return(getSequence(index, mySequenceTranslation));
1654}

References getSequence().

◆ getSequence() [3/4]

char SamRecord::getSequence ( int  index,
SequenceTranslation  translation 
)

Get the sequence base at the specified index into this sequence 0 to readLength - 1 performing the specified sequence translation.

Throws an exception if index is out of range.

Parameters
indexindex into the sequence string (0 to readLength-1).
translationtype of sequence translation to use.
Returns
the sequence base at the specified index into the sequence.

Definition at line 1657 of file SamRecord.cpp.

1658{
1659 static const char * asciiBases = "=AC.G...T......N";
1660
1661 // Determine the read length.
1662 int32_t readLen = getReadLength();
1663
1664 // If the read length is 0, this method should not be called.
1665 if(readLen == 0)
1666 {
1667 String exceptionString = "SamRecord::getSequence(";
1668 exceptionString += index;
1669 exceptionString += ") is not allowed since sequence = '*'";
1670 throw std::runtime_error(exceptionString.c_str());
1671 }
1672 else if((index < 0) || (index >= readLen))
1673 {
1674 // Only get here if the index was out of range, so thow an exception.
1675 String exceptionString = "SamRecord::getSequence(";
1676 exceptionString += index;
1677 exceptionString += ") is out of range. Index must be between 0 and ";
1678 exceptionString += (readLen - 1);
1679 throw std::runtime_error(exceptionString.c_str());
1680 }
1681
1682 // Determine if translation needs to be done.
1683 if((translation == NONE) || (myRefPtr == NULL))
1684 {
1685 // No translation needs to be done.
1686 if(mySequence.Length() == 0)
1687 {
1688 // Parse BAM sequence.
1689 if(myIsSequenceBufferValid)
1690 {
1691 return(index & 1 ?
1692 asciiBases[myPackedSequence[index / 2] & 0xF] :
1693 asciiBases[myPackedSequence[index / 2] >> 4]);
1694 }
1695 else
1696 {
1697 String exceptionString = "SamRecord::getSequence(";
1698 exceptionString += index;
1699 exceptionString += ") called with no sequence set";
1700 throw std::runtime_error(exceptionString.c_str());
1701 }
1702 }
1703 // Already have string.
1704 return(mySequence[index]);
1705 }
1706 else
1707 {
1708 // Need to translate the sequence either to have '=' or to not
1709 // have it.
1710 // First check to see if the sequence has been set.
1711 if(mySequence.Length() == 0)
1712 {
1713 // 0 Length, means that it is in the buffer, but has not yet
1714 // been synced to the string, so do the sync.
1715 setSequenceAndQualityFromBuffer();
1716 }
1717
1718 // Check the type of translation.
1719 if(translation == EQUAL)
1720 {
1721 // Check whether or not the string has already been
1722 // retrieved that has the '=' in it.
1723 if(mySeqWithEq.length() == 0)
1724 {
1725 // The string with '=' has not yet been determined,
1726 // so get the string.
1727 // Check to see if the sequence is defined.
1728 if(mySequence == "*")
1729 {
1730 // Sequence is undefined, so no translation necessary.
1731 mySeqWithEq = '*';
1732 }
1733 else
1734 {
1735 // Sequence defined, so translate it.
1736 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1737 myRecordPtr->myPosition,
1738 *(getCigarInfo()),
1740 *myRefPtr,
1741 mySeqWithEq);
1742 }
1743 }
1744 // Sequence is set, so return it.
1745 return(mySeqWithEq[index]);
1746 }
1747 else
1748 {
1749 // translation == BASES
1750 // Check whether or not the string has already been
1751 // retrieved that does not have the '=' in it.
1752 if(mySeqWithoutEq.length() == 0)
1753 {
1754 // The string with '=' has not yet been determined,
1755 // so get the string.
1756 // Check to see if the sequence is defined.
1757 if(mySequence == "*")
1758 {
1759 // Sequence is undefined, so no translation necessary.
1760 mySeqWithoutEq = '*';
1761 }
1762 else
1763 {
1764 // Sequence defined, so translate it.
1765 // The string without '=' has not yet been determined,
1766 // so get the string.
1767 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1768 myRecordPtr->myPosition,
1769 *(getCigarInfo()),
1771 *myRefPtr,
1772 mySeqWithoutEq);
1773 }
1774 }
1775 // Sequence is set, so return it.
1776 return(mySeqWithoutEq[index]);
1777 }
1778 }
1779}
static void seqWithoutEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence converting '=' to the appropriate base using the reference.
static void seqWithEquals(const char *currentSeq, int32_t seq0BasedPos, Cigar &cigar, const char *referenceName, const GenomeSequence &refSequence, std::string &updatedSeq)
Gets the sequence with '=' in any position where the sequence matches the reference.
Cigar * getCigarInfo()
Returns a pointer to the Cigar object associated with this record.

References EQUAL, getCigarInfo(), getReadLength(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), and SamQuerySeqWithRef::seqWithoutEquals().

◆ getSequence() [4/4]

const char * SamRecord::getSequence ( SequenceTranslation  translation)

Returns the SAM formatted sequence string (SEQ) performing the specified sequence translation.

Parameters
translationtype of sequence translation to use.
Returns
sequence string.

Definition at line 1574 of file SamRecord.cpp.

1575{
1576 myStatus = SamStatus::SUCCESS;
1577 if(mySequence.Length() == 0)
1578 {
1579 // 0 Length, means that it is in the buffer, but has not yet
1580 // been synced to the string, so do the sync.
1581 setSequenceAndQualityFromBuffer();
1582 }
1583
1584 // Determine if translation needs to be done.
1585 if((translation == NONE) || (myRefPtr == NULL))
1586 {
1587 return mySequence.c_str();
1588 }
1589 else if(translation == EQUAL)
1590 {
1591 if(mySeqWithEq.length() == 0)
1592 {
1593 // Check to see if the sequence is defined.
1594 if(mySequence == "*")
1595 {
1596 // Sequence is undefined, so no translation necessary.
1597 mySeqWithEq = '*';
1598 }
1599 else
1600 {
1601 // Sequence defined, so translate it.
1602 SamQuerySeqWithRef::seqWithEquals(mySequence.c_str(),
1603 myRecordPtr->myPosition,
1604 *(getCigarInfo()),
1606 *myRefPtr,
1607 mySeqWithEq);
1608 }
1609 }
1610 return(mySeqWithEq.c_str());
1611 }
1612 else
1613 {
1614 // translation == BASES
1615 if(mySeqWithoutEq.length() == 0)
1616 {
1617 if(mySequence == "*")
1618 {
1619 // Sequence is undefined, so no translation necessary.
1620 mySeqWithoutEq = '*';
1621 }
1622 else
1623 {
1624 // Sequence defined, so translate it.
1625 SamQuerySeqWithRef::seqWithoutEquals(mySequence.c_str(),
1626 myRecordPtr->myPosition,
1627 *(getCigarInfo()),
1629 *myRefPtr,
1630 mySeqWithoutEq);
1631 }
1632 }
1633 return(mySeqWithoutEq.c_str());
1634 }
1635}

References EQUAL, getCigarInfo(), getReferenceName(), NONE, SamQuerySeqWithRef::seqWithEquals(), SamQuerySeqWithRef::seqWithoutEquals(), and StatGenStatus::SUCCESS.

◆ getStatus()

const SamStatus & SamRecord::getStatus ( )

Returns the status associated with the last method that sets the status.

Returns
SamStatus of the last command that sets status.

Definition at line 2403 of file SamRecord.cpp.

2404{
2405 return(myStatus);
2406}

◆ getString()

const String & SamRecord::getString ( const char *  tag)

Get the string value for the specified tag.

Definition at line 2314 of file SamRecord.cpp.

2315{
2316 // Init to success.
2317 myStatus = SamStatus::SUCCESS;
2318 // Parse the buffer if necessary.
2319 if(myNeedToSetTagsFromBuffer)
2320 {
2321 if(!setTagsFromBuffer())
2322 {
2323 // Failed to read the tags from the buffer, so cannot
2324 // get tags.
2325 // TODO - what do we want to do on failure?
2326 }
2327 }
2328
2329 int key = MAKEKEY(tag[0], tag[1], 'Z');
2330 int offset = extras.Find(key);
2331
2332 int value;
2333 if (offset < 0)
2334 {
2335
2336 key = MAKEKEY(tag[0], tag[1], 'B');
2337 offset = extras.Find(key);
2338 if (offset < 0)
2339 {
2340 // TODO - what do we want to do on failure?
2341 return(NOT_FOUND_TAG_STRING);
2342 }
2343 }
2344 value = extras[offset];
2345
2346 return strings[value];
2347}

References StatGenStatus::SUCCESS.

Referenced by rmTag(), and rmTags().

◆ getStringTag()

const String * SamRecord::getStringTag ( const char *  tag)

Get the string value for the specified tag.

Parameters
tagtag to retrieve
pointerto the tag's string value if found, NULL if not found.

Definition at line 2180 of file SamRecord.cpp.

2181{
2182 // Parse the buffer if necessary.
2183 if(myNeedToSetTagsFromBuffer)
2184 {
2185 if(!setTagsFromBuffer())
2186 {
2187 // Failed to read the tags from the buffer, so cannot
2188 // get tags. setTagsFromBuffer set the errors,
2189 // so just return null.
2190 return(NULL);
2191 }
2192 }
2193
2194 int key = MAKEKEY(tag[0], tag[1], 'Z');
2195 int offset = extras.Find(key);
2196
2197 int value;
2198 if (offset < 0)
2199 {
2200 // Check for 'B' tag.
2201 key = MAKEKEY(tag[0], tag[1], 'B');
2202 offset = extras.Find(key);
2203 if(offset < 0)
2204 {
2205 // Tag not found.
2206 return(NULL);
2207 }
2208 }
2209
2210 // Offset is valid, so return the tag.
2211 value = extras[offset];
2212 return(&(strings[value]));
2213}

Referenced by SamTags::isMDTagCorrect(), and SamValidator::isValidTags().

◆ getTagLength()

uint32_t SamRecord::getTagLength ( )

Returns the length of the BAM formatted tags.

Returns
length of the BAM formatted tags.

Definition at line 1929 of file SamRecord.cpp.

1930{
1931 myStatus = SamStatus::SUCCESS;
1932 if(myNeedToSetTagsFromBuffer)
1933 {
1934 // Tags are only set in the buffer, so the size of the tags is
1935 // the length of the record minus the starting location of the tags.
1936 unsigned char * tagStart =
1937 (unsigned char *)myRecordPtr->myData
1938 + myRecordPtr->myReadNameLength
1939 + myRecordPtr->myCigarLength * sizeof(int)
1940 + (myRecordPtr->myReadLength + 1) / 2 + myRecordPtr->myReadLength;
1941
1942 // The non-tags take up from the start of the record to the tag start.
1943 // Do not include the block size part of the record since it is not
1944 // included in the size.
1945 uint32_t nonTagSize =
1946 tagStart - (unsigned char*)&(myRecordPtr->myReferenceID);
1947 // Tags take up the size of the block minus the non-tag section.
1948 uint32_t tagSize = myRecordPtr->myBlockSize - nonTagSize;
1949 return(tagSize);
1950 }
1951
1952 // Tags are stored outside the buffer, so myTagBufferSize is set.
1953 return(myTagBufferSize);
1954}

References StatGenStatus::SUCCESS.

◆ getTagsString()

bool SamRecord::getTagsString ( const char *  tags,
String returnString,
char  delim = '\t' 
)

Get the string representation of the tags from the record, formatted as TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE... Sets the Status to SUCCESS when the tags are successfully returned or the tags were not found.

If a different error occured, the status is set appropriately. The delimiter between the tags to retrieve is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.

Parameters
tagsthe tags to retrieve, formatted as TAG:TYPE,TAG:TYPE...
returnStringthe String to set (this method first clears returnString) to TAG:TYPE:VALUE<delim>TAG:TYPE:VALUE...
delimdelimiter to use to separate two tags, default is a tab.
Returns
true if there were not any errors even if no tags were found.

Definition at line 2082 of file SamRecord.cpp.

2083{
2084 const char* currentTagPtr = tags;
2085
2086 returnString.Clear();
2087 myStatus = SamStatus::SUCCESS;
2088 if(myNeedToSetTagsFromBuffer)
2089 {
2090 if(!setTagsFromBuffer())
2091 {
2092 // Failed to read the tags from the buffer, so cannot
2093 // get tags.
2094 return(false);
2095 }
2096 }
2097
2098 bool returnStatus = true;
2099
2100 while(*currentTagPtr != '\0')
2101 {
2102 // Tags are formatted as: XY:Z
2103 // Where X is [A-Za-z], Y is [A-Za-z], and
2104 // Z is A,i,f,Z,H (cCsSI are also excepted)
2105 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
2106 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
2107 {
2108 myStatus.setStatus(SamStatus::INVALID,
2109 "getTagsString called with improperly formatted tags.\n");
2110 returnStatus = false;
2111 break;
2112 }
2113
2114 // Construct the key.
2115 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
2116 currentTagPtr[3]);
2117 // Look to see if the key exsists in the hash.
2118 int offset = extras.Find(key);
2119
2120 if(offset >= 0)
2121 {
2122 // Offset is set, so the key was found.
2123 if(!returnString.IsEmpty())
2124 {
2125 returnString += delim;
2126 }
2127 returnString += currentTagPtr[0];
2128 returnString += currentTagPtr[1];
2129 returnString += ':';
2130 returnString += currentTagPtr[3];
2131 returnString += ':';
2132
2133 // First if it is an integer, determine the actual type of the int.
2134 char vtype;
2135 getTypeFromKey(key, vtype);
2136
2137 switch(vtype)
2138 {
2139 case 'i':
2140 returnString += *(int*)getIntegerPtr(offset, vtype);
2141 break;
2142 case 'f':
2143 returnString += *(float*)getFloatPtr(offset);
2144 break;
2145 case 'Z':
2146 case 'B':
2147 returnString += *(String*)getStringPtr(offset);
2148 break;
2149 default:
2150 myStatus.setStatus(SamStatus::INVALID,
2151 "rmTag called with unknown type.\n");
2152 returnStatus = false;
2153 break;
2154 };
2155 }
2156 // Increment to the next tag.
2157 if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
2158 {
2159 // Increment once more.
2160 currentTagPtr += 5;
2161 }
2162 else if(currentTagPtr[4] != '\0')
2163 {
2164 // Invalid tag format.
2165 myStatus.setStatus(SamStatus::INVALID,
2166 "rmTags called with improperly formatted tags.\n");
2167 returnStatus = false;
2168 break;
2169 }
2170 else
2171 {
2172 // Last Tag.
2173 currentTagPtr += 4;
2174 }
2175 }
2176 return(returnStatus);
2177}

References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ isCharType()

bool SamRecord::isCharType ( char  vtype)
static

Returns whether or not the specified vtype is a char type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is a char ('A'), false otherwise.

Definition at line 2062 of file SamRecord.cpp.

2063{
2064 if(vtype == 'A')
2065 {
2066 return(true);
2067 }
2068 return(false);
2069}

Referenced by SamRecordHelper::genSamTagString().

◆ isFloatType()

bool SamRecord::isFloatType ( char  vtype)
static

Returns whether or not the specified vtype is a float type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is a float ('f'), false otherwise.

Definition at line 2052 of file SamRecord.cpp.

2053{
2054 if(vtype == 'f')
2055 {
2056 return(true);
2057 }
2058 return(false);
2059}

Referenced by SamRecordHelper::genSamTagString().

◆ isIntegerType()

bool SamRecord::isIntegerType ( char  vtype)
static

Returns whether or not the specified vtype is an integer type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is an integer ('c', 'C', 's', 'S', 'i', 'I'), false otherwise.

Definition at line 2040 of file SamRecord.cpp.

2041{
2042 if((vtype == 'c') || (vtype == 'C') ||
2043 (vtype == 's') || (vtype == 'S') ||
2044 (vtype == 'i') || (vtype == 'I'))
2045 {
2046 return(true);
2047 }
2048 return(false);
2049}

Referenced by SamRecordHelper::genSamTagString().

◆ isStringType()

bool SamRecord::isStringType ( char  vtype)
static

Returns whether or not the specified vtype is a string type.

Does not set SamStatus.

Parameters
vtypevalue type to check.
Returns
true if the passed in vtype is a string ('Z'/'B'), false othwerise.

Definition at line 2072 of file SamRecord.cpp.

2073{
2074 if((vtype == 'Z') || (vtype == 'B'))
2075 {
2076 return(true);
2077 }
2078 return(false);
2079}

Referenced by SamRecordHelper::genSamTagString().

◆ isValid()

bool SamRecord::isValid ( SamFileHeader header)

Returns whether or not the record is valid, setting the status to indicate success or failure.

Parameters
headerSAM Header associated with the record. Used to perform some validation against the header.
Returns
true if the record is valid, false if not.

Definition at line 161 of file SamRecord.cpp.

162{
163 myStatus = SamStatus::SUCCESS;
164 SamValidationErrors invalidSamErrors;
165 if(!SamValidator::isValid(header, *this, invalidSamErrors))
166 {
167 // The record is not valid.
168 std::string errorMessage = "";
169 invalidSamErrors.getErrorString(errorMessage);
170 myStatus.setStatus(SamStatus::INVALID, errorMessage.c_str());
171 return(false);
172 }
173 // The record is valid.
174 return(true);
175}
The SamValidationErrors class is a container class that holds SamValidationError Objects,...
void getErrorString(std::string &errorString) const
Append the error messages contained in this container to the passed in string.
static bool isValid(SamFileHeader &samHeader, SamRecord &samRecord, SamValidationErrors &validationErrors)
Validates whether or not the specified SamRecord is valid, calling all of the other validations.

References SamValidationErrors::getErrorString(), StatGenStatus::INVALID, SamValidator::isValid(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ resetRecord()

void SamRecord::resetRecord ( )

Reset the fields of the record to a default value.

This is not necessary when you are reading a SAM/BAM file, but if you are setting fields, it is a good idea to clean out a record before reusing it. Clearing it allows you to not have to set any empty fields.

Definition at line 91 of file SamRecord.cpp.

92{
93 myIsBufferSynced = true;
94
95 myRecordPtr->myBlockSize = DEFAULT_BLOCK_SIZE;
96 myRecordPtr->myReferenceID = -1;
97 myRecordPtr->myPosition = -1;
98 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
99 myRecordPtr->myMapQuality = 0;
100 myRecordPtr->myBin = DEFAULT_BIN;
101 myRecordPtr->myCigarLength = 0;
102 myRecordPtr->myFlag = 0;
103 myRecordPtr->myReadLength = 0;
104 myRecordPtr->myMateReferenceID = -1;
105 myRecordPtr->myMatePosition = -1;
106 myRecordPtr->myInsertSize = 0;
107
108 // Set the sam values for the variable length fields.
109 // TODO - one way to speed this up might be to not set to "*" and just
110 // clear them, and write out a '*' for SAM if it is empty.
111 myReadName = DEFAULT_READ_NAME;
112 myReferenceName = "*";
113 myMateReferenceName = "*";
114 myCigar = "*";
115 mySequence = "*";
116 mySeqWithEq.clear();
117 mySeqWithoutEq.clear();
118 myQuality = "*";
119 myNeedToSetTagsFromBuffer = false;
120 myNeedToSetTagsInBuffer = false;
121
122 // Initialize the calculated alignment info to the uncalculated value.
123 myAlignmentLength = -1;
124 myUnclippedStartOffset = -1;
125 myUnclippedEndOffset = -1;
126
127 clearTags();
128
129 // Set the bam values for the variable length fields.
130 // Only the read name needs to be set, the others are a length of 0.
131 // Set the read name. The min size of myRecordPtr includes the size for
132 // the default read name.
133 memcpy(&(myRecordPtr->myData), myReadName.c_str(),
134 myRecordPtr->myReadNameLength);
135
136 // Set that the variable length buffer fields are valid.
137 myIsReadNameBufferValid = true;
138 myIsCigarBufferValid = true;
139 myPackedSequence =
140 (unsigned char *)myRecordPtr->myData + myRecordPtr->myReadNameLength +
141 myRecordPtr->myCigarLength * sizeof(int);
142 myIsSequenceBufferValid = true;
143 myBufferSequenceTranslation = NONE;
144
145 myPackedQuality = myPackedSequence;
146 myIsQualityBufferValid = true;
147 myIsTagsBufferValid = true;
148 myIsBinValid = true;
149
150 myCigarTempBufferLength = -1;
151
152 myStatus = SamStatus::SUCCESS;
153
154 NOT_FOUND_TAG_STRING = "";
155 NOT_FOUND_TAG_INT = -1; // TODO - deprecate
156}
void clearTags()
Clear the tags in this record.

References clearTags(), NONE, and StatGenStatus::SUCCESS.

Referenced by SamRecord(), SamRecord(), ~SamRecord(), setBuffer(), and setBufferFromFile().

◆ resetTagIter()

void SamRecord::resetTagIter ( )

Reset the tag iterator to the beginning of the tags.

Definition at line 2034 of file SamRecord.cpp.

2035{
2036 myLastTagIndex = -1;
2037}

Referenced by clearTags(), and SamRecordHelper::genSamTagsString().

◆ rmTag()

bool SamRecord::rmTag ( const char *  tag,
char  type 
)

Remove a tag.

Parameters
tagtag to remove.
typeof the tag to be removed.
Returns
true if the tag no longer exists in the record, false if it could not be removed (Returns true if the tag was not found in the record).

Definition at line 992 of file SamRecord.cpp.

993{
994 // Check the length of tag.
995 if(strlen(tag) != 2)
996 {
997 // Tag is the wrong length.
999 "rmTag called with tag that is not 2 characters\n");
1000 return(false);
1001 }
1002
1003 myStatus = SamStatus::SUCCESS;
1004 if(myNeedToSetTagsFromBuffer)
1005 {
1006 if(!setTagsFromBuffer())
1007 {
1008 // Failed to read the tags from the buffer, so cannot
1009 // get tags.
1010 return(false);
1011 }
1012 }
1013
1014 // Construct the key.
1015 int key = MAKEKEY(tag[0], tag[1], type);
1016 // Look to see if the key exsists in the hash.
1017 int offset = extras.Find(key);
1018
1019 if(offset < 0)
1020 {
1021 // Not found, so return true, successfully removed since
1022 // it is not in tag.
1023 return(true);
1024 }
1025
1026 // Offset is set, so the key was found.
1027 // First if it is an integer, determine the actual type of the int.
1028 char vtype;
1029 getTypeFromKey(key, vtype);
1030 if(vtype == 'i')
1031 {
1032 vtype = getIntegerType(offset);
1033 }
1034
1035 // Offset is set, so recalculate the buffer size without this entry.
1036 // Do NOT remove from strings, integers, or floats because then
1037 // extras would need to be updated for all entries with the new indexes
1038 // into those variables.
1039 int rmBuffSize = 0;
1040 switch(vtype)
1041 {
1042 case 'A':
1043 case 'c':
1044 case 'C':
1045 rmBuffSize = 4;
1046 break;
1047 case 's':
1048 case 'S':
1049 rmBuffSize = 5;
1050 break;
1051 case 'i':
1052 case 'I':
1053 rmBuffSize = 7;
1054 break;
1055 case 'f':
1056 rmBuffSize = 7;
1057 break;
1058 case 'Z':
1059 rmBuffSize = 4 + getString(offset).Length();
1060 break;
1061 case 'B':
1062 rmBuffSize = 3 + getBtagBufferSize(getString(offset));
1063 break;
1064 default:
1065 myStatus.setStatus(SamStatus::INVALID,
1066 "rmTag called with unknown type.\n");
1067 return(false);
1068 break;
1069 };
1070
1071 // The buffer tags are now out of sync.
1072 myNeedToSetTagsInBuffer = true;
1073 myIsTagsBufferValid = false;
1074 myIsBufferSynced = false;
1075 myTagBufferSize -= rmBuffSize;
1076
1077 // Remove from the hash.
1078 extras.Delete(offset);
1079 return(true);
1080}
const String & getString(const char *tag)
Get the string value for the specified tag.

References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ rmTags()

bool SamRecord::rmTags ( const char *  tags)

Remove tags.

The delimiter between the tags is ',' or ';'. ',' was added since the original delimiter, ';', requires the string to be quoted on the command-line.

Parameters
tagstags to remove, formatted as Tag:Type,Tag:Type,Tag:Type...
Returns
true if all tags no longer exist in the record, false if any could not be removed (Returns true if the tags were not found in the record). SamStatus is set to INVALID if the tags are incorrectly formatted.

Definition at line 1083 of file SamRecord.cpp.

1084{
1085 const char* currentTagPtr = tags;
1086
1087 myStatus = SamStatus::SUCCESS;
1088 if(myNeedToSetTagsFromBuffer)
1089 {
1090 if(!setTagsFromBuffer())
1091 {
1092 // Failed to read the tags from the buffer, so cannot
1093 // get tags.
1094 return(false);
1095 }
1096 }
1097
1098 bool returnStatus = true;
1099
1100 int rmBuffSize = 0;
1101 while(*currentTagPtr != '\0')
1102 {
1103
1104 // Tags are formatted as: XY:Z
1105 // Where X is [A-Za-z], Y is [A-Za-z], and
1106 // Z is A,i,f,Z,H (cCsSI are also excepted)
1107 if((currentTagPtr[0] == '\0') || (currentTagPtr[1] == '\0') ||
1108 (currentTagPtr[2] != ':') || (currentTagPtr[3] == '\0'))
1109 {
1110 myStatus.setStatus(SamStatus::INVALID,
1111 "rmTags called with improperly formatted tags.\n");
1112 returnStatus = false;
1113 break;
1114 }
1115
1116 // Construct the key.
1117 int key = MAKEKEY(currentTagPtr[0], currentTagPtr[1],
1118 currentTagPtr[3]);
1119 // Look to see if the key exsists in the hash.
1120 int offset = extras.Find(key);
1121
1122 if(offset >= 0)
1123 {
1124 // Offset is set, so the key was found.
1125 // First if it is an integer, determine the actual type of the int.
1126 char vtype;
1127 getTypeFromKey(key, vtype);
1128 if(vtype == 'i')
1129 {
1130 vtype = getIntegerType(offset);
1131 }
1132
1133 // Offset is set, so recalculate the buffer size without this entry.
1134 // Do NOT remove from strings, integers, or floats because then
1135 // extras would need to be updated for all entries with the new indexes
1136 // into those variables.
1137 switch(vtype)
1138 {
1139 case 'A':
1140 case 'c':
1141 case 'C':
1142 rmBuffSize += 4;
1143 break;
1144 case 's':
1145 case 'S':
1146 rmBuffSize += 5;
1147 break;
1148 case 'i':
1149 case 'I':
1150 rmBuffSize += 7;
1151 break;
1152 case 'f':
1153 rmBuffSize += 7;
1154 break;
1155 case 'Z':
1156 rmBuffSize += 4 + getString(offset).Length();
1157 break;
1158 case 'B':
1159 rmBuffSize += 3 + getBtagBufferSize(getString(offset));
1160 break;
1161 default:
1162 myStatus.setStatus(SamStatus::INVALID,
1163 "rmTag called with unknown type.\n");
1164 returnStatus = false;
1165 break;
1166 };
1167
1168 // Remove from the hash.
1169 extras.Delete(offset);
1170 }
1171 // Increment to the next tag.
1172 if((currentTagPtr[4] == ';') || (currentTagPtr[4] == ','))
1173 {
1174 // Increment once more.
1175 currentTagPtr += 5;
1176 }
1177 else if(currentTagPtr[4] != '\0')
1178 {
1179 // Invalid tag format.
1180 myStatus.setStatus(SamStatus::INVALID,
1181 "rmTags called with improperly formatted tags.\n");
1182 returnStatus = false;
1183 break;
1184 }
1185 else
1186 {
1187 // Last Tag.
1188 currentTagPtr += 4;
1189 }
1190 }
1191
1192 // The buffer tags are now out of sync.
1193 myNeedToSetTagsInBuffer = true;
1194 myIsTagsBufferValid = false;
1195 myIsBufferSynced = false;
1196 myTagBufferSize -= rmBuffSize;
1197
1198
1199 return(returnStatus);
1200}

References getString(), StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ set0BasedMatePosition()

bool SamRecord::set0BasedMatePosition ( int32_t  matePosition)

Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position0-based start position
Returns
true if successfully set, false if not.

Definition at line 328 of file SamRecord.cpp.

329{
330 myStatus = SamStatus::SUCCESS;
331 myRecordPtr->myMatePosition = matePosition;
332 return true;
333}

References StatGenStatus::SUCCESS.

Referenced by set1BasedMatePosition().

◆ set0BasedPosition()

bool SamRecord::set0BasedPosition ( int32_t  position)

Set the leftmost position using the specified 0-based (BAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position0-based start position
Returns
true if successfully set, false if not.

Definition at line 242 of file SamRecord.cpp.

243{
244 myStatus = SamStatus::SUCCESS;
245 myRecordPtr->myPosition = position;
246 myIsBinValid = false;
247 return true;
248}

References StatGenStatus::SUCCESS.

Referenced by set1BasedPosition(), and SamFilter::softClip().

◆ set1BasedMatePosition()

bool SamRecord::set1BasedMatePosition ( int32_t  matePosition)

Set the mate/next fragment's leftmost position (PNEXT) using the specified 1-based (SAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position1-based start position
Returns
true if successfully set, false if not.

Definition at line 322 of file SamRecord.cpp.

323{
324 return(set0BasedMatePosition(matePosition - 1));
325}
bool set0BasedMatePosition(int32_t matePosition)
Set the mate/next fragment's leftmost position using the specified 0-based (BAM format) value.

References set0BasedMatePosition().

◆ set1BasedPosition()

bool SamRecord::set1BasedPosition ( int32_t  position)

Set the leftmost position (POS) using the specified 1-based (SAM format) value.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
position1-based start position
Returns
true if successfully set, false if not.

Definition at line 236 of file SamRecord.cpp.

237{
238 return(set0BasedPosition(position - 1));
239}
bool set0BasedPosition(int32_t position)
Set the leftmost position using the specified 0-based (BAM format) value.

References set0BasedPosition().

◆ setBuffer()

SamStatus::Status SamRecord::setBuffer ( const char *  fromBuffer,
uint32_t  fromBufferSize,
SamFileHeader header 
)

Sets the SamRecord to contain the information in the BAM formatted fromBuffer.

Parameters
fromBufferbuffer to read the BAM record from.
fromBufferSizesize of the buffer containing the BAM record.
headerBAM header for the record.
Returns
status of reading the BAM record from the buffer.

Definition at line 525 of file SamRecord.cpp.

528{
529 myStatus = SamStatus::SUCCESS;
530 if((fromBuffer == NULL) || (fromBufferSize == 0))
531 {
532 // Buffer is empty.
534 "Cannot parse an empty file.");
535 return(SamStatus::FAIL_PARSE);
536 }
537
538 // Clear the record.
539 resetRecord();
540
541 // allocate space for the record size.
542 if(!allocateRecordStructure(fromBufferSize))
543 {
544 // Failed to allocate space.
545 return(SamStatus::FAIL_MEM);
546 }
547
548 memcpy(myRecordPtr, fromBuffer, fromBufferSize);
549
550 setVariablesForNewBuffer(header);
551
552 // Return the status of the record.
553 return(SamStatus::SUCCESS);
554}
@ FAIL_MEM
fail a memory allocation.

References StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_PARSE, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ setBufferFromFile()

SamStatus::Status SamRecord::setBufferFromFile ( IFILE  filePtr,
SamFileHeader header 
)

Read the BAM record from a file.

Parameters
filePtrfile to read the buffer from.
headerBAM header for the record.
Returns
status of the reading the BAM record from the file.

Definition at line 558 of file SamRecord.cpp.

560{
561 myStatus = SamStatus::SUCCESS;
562 if((filePtr == NULL) || (filePtr->isOpen() == false))
563 {
564 // File is not open, return failure.
566 "Can't read from an unopened file.");
567 return(SamStatus::FAIL_ORDER);
568 }
569
570 // Clear the record.
571 resetRecord();
572
573 // read the record size.
574 int numBytes =
575 ifread(filePtr, &(myRecordPtr->myBlockSize), sizeof(int32_t));
576
577 // Check to see if the end of the file was hit and no bytes were read.
578 if(ifeof(filePtr) && (numBytes == 0))
579 {
580 // End of file, nothing was read, no more records.
581 std::string statusMsg = "No more records left to read, ";
582 statusMsg += filePtr->getFileName();
583 statusMsg += ".";
585 statusMsg.c_str());
587 }
588
589 if(numBytes != sizeof(int32_t))
590 {
591 // Failed to read the entire block size. Either the end of the file
592 // was reached early or there was an error.
593 if(ifeof(filePtr))
594 {
595 // Error: end of the file reached prior to reading the rest of the
596 // record.
597 std::string statusMsg = "EOF reached in the middle of a record, ";
598 statusMsg += filePtr->getFileName();
599 statusMsg += ".";
601 statusMsg.c_str());
602 return(SamStatus::FAIL_PARSE);
603 }
604 else
605 {
606 // Error reading.
607 std::string statusMsg = "Failed to read the record size, ";
608 statusMsg += filePtr->getFileName();
609 statusMsg += ".";
611 statusMsg.c_str());
612 return(SamStatus::FAIL_IO);
613 }
614 }
615
616 // allocate space for the record size.
617 if(!allocateRecordStructure(myRecordPtr->myBlockSize + sizeof(int32_t)))
618 {
619 // Failed to allocate space.
620 // Status is set by allocateRecordStructure.
621 return(SamStatus::FAIL_MEM);
622 }
623
624 // Read the rest of the alignment block, starting at the reference id.
625 if(ifread(filePtr, &(myRecordPtr->myReferenceID), myRecordPtr->myBlockSize)
626 != (unsigned int)myRecordPtr->myBlockSize)
627 {
628 // Error reading the record. Reset it and return failure.
629 resetRecord();
630 std::string statusMsg = "Failed to read the record, ";
631 statusMsg += filePtr->getFileName();
632 statusMsg += ".";
634 statusMsg.c_str());
635 return(SamStatus::FAIL_IO);
636 }
637
638 setVariablesForNewBuffer(header);
639
640 // Return the status of the record.
641 return(SamStatus::SUCCESS);
642}
int ifeof(IFILE file)
Check to see if we have reached the EOF (returns 0 if not EOF).
Definition InputFile.h:654
unsigned int ifread(IFILE file, void *buffer, unsigned int size)
Read up to size bytes from the file into the buffer.
Definition InputFile.h:600
bool isOpen() const
Returns whether or not the file was successfully opened.
Definition InputFile.h:423
const char * getFileName() const
Get the filename that is currently opened.
Definition InputFile.h:473
@ NO_MORE_RECS
NO_MORE_RECS: failed to read a record since there are no more to read either in the file or section i...
@ FAIL_IO
method failed due to an I/O issue.
@ FAIL_ORDER
FAIL_ORDER: method failed because it was called out of order, like trying to read a file without open...

References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_MEM, StatGenStatus::FAIL_ORDER, StatGenStatus::FAIL_PARSE, InputFile::getFileName(), ifeof(), ifread(), InputFile::isOpen(), StatGenStatus::NO_MORE_RECS, resetRecord(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ setCigar() [1/2]

bool SamRecord::setCigar ( const char *  cigar)

Set the CIGAR to the specified SAM formatted cigar string.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
cigarstring containing the SAM formatted cigar.
Returns
true if successfully set, false if not.

Definition at line 259 of file SamRecord.cpp.

260{
261 myStatus = SamStatus::SUCCESS;
262 myCigar = cigar;
263
264 myIsBufferSynced = false;
265 myIsCigarBufferValid = false;
266 myCigarTempBufferLength = -1;
267 myIsBinValid = false;
268
269 // Initialize the calculated alignment info to the uncalculated value.
270 myAlignmentLength = -1;
271 myUnclippedStartOffset = -1;
272 myUnclippedEndOffset = -1;
273
274 return true;
275}

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead(), shiftIndelsLeft(), and SamFilter::softClip().

◆ setCigar() [2/2]

bool SamRecord::setCigar ( const Cigar cigar)

Set the CIGAR to the specified Cigar object.

Internal processing handles the switching between SAM/BAM formats when read/written.

Parameters
cigarobject to set this record's cigar to have.
Returns
true if successfully set, false if not.

Definition at line 278 of file SamRecord.cpp.

279{
280 myStatus = SamStatus::SUCCESS;
281 cigar.getCigarString(myCigar);
282
283 myIsBufferSynced = false;
284 myIsCigarBufferValid = false;
285 myCigarTempBufferLength = -1;
286 myIsBinValid = false;
287
288 // Initialize the calculated alignment info to the uncalculated value.
289 myAlignmentLength = -1;
290 myUnclippedStartOffset = -1;
291 myUnclippedEndOffset = -1;
292
293 return true;
294}
void getCigarString(String &cigarString) const
Set the passed in String to the string reprentation of the Cigar operations in this object.
Definition Cigar.cpp:52

References Cigar::getCigarString(), and StatGenStatus::SUCCESS.

◆ setFlag()

bool SamRecord::setFlag ( uint16_t  flag)

Set the bitwise FLAG to the specified value.

Parameters
flaginteger flag to use.
Returns
true if successfully set, false if not.

Definition at line 215 of file SamRecord.cpp.

216{
217 myStatus = SamStatus::SUCCESS;
218 myRecordPtr->myFlag = flag;
219 return true;
220}

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead().

◆ setInsertSize()

bool SamRecord::setInsertSize ( int32_t  insertSize)

Sets the inferred insert size (ISIZE)/observed template length (TLEN).

Parameters
insertSizeinferred insert size/observed template length.
Returns
true if successfully set, false if not.

Definition at line 336 of file SamRecord.cpp.

337{
338 myStatus = SamStatus::SUCCESS;
339 myRecordPtr->myInsertSize = insertSize;
340 return true;
341}

References StatGenStatus::SUCCESS.

◆ setMapQuality()

bool SamRecord::setMapQuality ( uint8_t  mapQuality)

Set the mapping quality (MAPQ).

Parameters
mapQualitymap quality to set in the record.
Returns
true if successfully set, false if not.

Definition at line 251 of file SamRecord.cpp.

252{
253 myStatus = SamStatus::SUCCESS;
254 myRecordPtr->myMapQuality = mapQuality;
255 return true;
256}

References StatGenStatus::SUCCESS.

Referenced by SamFilter::filterRead().

◆ setMateReferenceName()

bool SamRecord::setMateReferenceName ( SamFileHeader header,
const char *  mateReferenceName 
)

Set the mate/next fragment's reference sequence name (RNEXT) to the specified name, using the header to determine the mate reference id.

Parameters
headerSAM/BAM header to use to determine the mate reference id.
referenceNamemate reference name to use.
Returns
true if successfully set, false if not

Definition at line 297 of file SamRecord.cpp.

299{
300 myStatus = SamStatus::SUCCESS;
301 // Set the mate reference, if it is "=", set it to be equal
302 // to myReferenceName. This assumes that myReferenceName has already
303 // been called.
304 if(strcmp(mateReferenceName, FIELD_ABSENT_STRING) == 0)
305 {
306 myMateReferenceName = myReferenceName;
307 }
308 else
309 {
310 myMateReferenceName = mateReferenceName;
311 }
312
313 // Set the Mate Reference ID.
314 // If the reference ID does not already exist, add it (pass true)
315 myRecordPtr->myMateReferenceID =
316 header.getReferenceID(myMateReferenceName, true);
317
318 return true;
319}
int getReferenceID(const String &referenceName, bool addID=false)
Get the reference ID for the specified reference name (chromosome).

References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.

◆ setQuality()

bool SamRecord::setQuality ( const char *  quality)

Sets the quality (QUAL) to the specified SAM formatted quality string.

Internal processing handles switching between SAM/BAM formats when read/written.

Parameters
qualitySAM quality string.
Returns
true if successfully set, false if not.

Definition at line 357 of file SamRecord.cpp.

358{
359 myStatus = SamStatus::SUCCESS;
360 myQuality = quality;
361 myIsBufferSynced = false;
362 myIsQualityBufferValid = false;
363 return true;
364}

References StatGenStatus::SUCCESS.

◆ setReadName()

bool SamRecord::setReadName ( const char *  readName)

Set QNAME to the passed in name.

Parameters
readNamethe readname to set the QNAME to.
Returns
true if successfully set, false if not.

Definition at line 193 of file SamRecord.cpp.

194{
195 myReadName = readName;
196 myIsBufferSynced = false;
197 myIsReadNameBufferValid = false;
198 myStatus = SamStatus::SUCCESS;
199
200 // The read name must at least have some length, otherwise this is a parsing
201 // error.
202 if(myReadName.Length() == 0)
203 {
204 // Invalid - reset ReadName return false.
205 myReadName = DEFAULT_READ_NAME;
206 myRecordPtr->myReadNameLength = DEFAULT_READ_NAME_LENGTH;
207 myStatus.setStatus(SamStatus::INVALID, "0 length Query Name.");
208 return(false);
209 }
210
211 return true;
212}

References StatGenStatus::INVALID, StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.

◆ setReference()

void SamRecord::setReference ( GenomeSequence reference)

Set the reference to the specified genome sequence object.

Parameters
referencepointer to the GenomeSequence object.

Definition at line 178 of file SamRecord.cpp.

179{
180 myRefPtr = reference;
181}

Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), SamFile::validateSortOrder(), and SamFile::WriteRecord().

◆ setReferenceName()

bool SamRecord::setReferenceName ( SamFileHeader header,
const char *  referenceName 
)

Set the reference sequence name (RNAME) to the specified name, using the header to determine the reference id.

Parameters
headerSAM/BAM header to use to determine the reference id.
referenceNamereference name to use.
Returns
true if successfully set, false if not

Definition at line 223 of file SamRecord.cpp.

225{
226 myStatus = SamStatus::SUCCESS;
227
228 myReferenceName = referenceName;
229 // If the reference ID does not already exist, add it (pass true)
230 myRecordPtr->myReferenceID = header.getReferenceID(referenceName, true);
231
232 return true;
233}

References SamFileHeader::getReferenceID(), and StatGenStatus::SUCCESS.

◆ setSequence()

bool SamRecord::setSequence ( const char *  seq)

Sets the sequence (SEQ) to the specified SAM formatted sequence string.

Internal processing handles switching between SAM/BAM formats when read/written.

Parameters
seqSAM sequence string. May contain '='.
Returns
true if successfully set, false if not.

Definition at line 344 of file SamRecord.cpp.

345{
346 myStatus = SamStatus::SUCCESS;
347 mySequence = seq;
348 mySeqWithEq.clear();
349 mySeqWithoutEq.clear();
350
351 myIsBufferSynced = false;
352 myIsSequenceBufferValid = false;
353 return true;
354}

References StatGenStatus::SUCCESS.

◆ setSequenceTranslation()

void SamRecord::setSequenceTranslation ( SequenceTranslation  translation)

Set the type of sequence translation to use when getting the sequence.

The default type (if this method is never called) is NONE (the sequence is left as-is). Can be over-ridden by using the accessors that take a SequenceTranslation parameter.

Parameters
translationtype of sequence translation to use.

Definition at line 187 of file SamRecord.cpp.

188{
189 mySequenceTranslation = translation;
190}

Referenced by SamFile::GetNumOverlaps(), SamFile::ReadRecord(), and SamFile::validateSortOrder().

◆ shiftIndelsLeft()

bool SamRecord::shiftIndelsLeft ( )

Shift the indels (if any) to the left by updating the CIGAR.

Returns
true if the cigar was shifted, false if not.

Definition at line 368 of file SamRecord.cpp.

369{
370 // Check to see whether or not the Cigar has already been
371 // set - this is determined by checking if alignment length
372 // is set since alignment length and the cigar are set
373 // at the same time.
374 if(myAlignmentLength == -1)
375 {
376 // Not been set, so calculate it.
377 parseCigar();
378 }
379
380 // Track whether or not there was a shift.
381 bool shifted = false;
382
383 // Cigar is set, so now myCigarRoller can be used.
384 // Track where in the read we are.
385 uint32_t currentPos = 0;
386
387 // Since the loop starts at 1 because the first operation can't be shifted,
388 // increment the currentPos past the first operation.
389 if(Cigar::foundInQuery(myCigarRoller[0]))
390 {
391 // This op was found in the read, increment the current position.
392 currentPos += myCigarRoller[0].count;
393 }
394
395 int numOps = myCigarRoller.size();
396
397 // Loop through the cigar operations from the 2nd operation since
398 // the first operation is already on the end and can't shift.
399 for(int currentOp = 1; currentOp < numOps; currentOp++)
400 {
401 if(myCigarRoller[currentOp].operation == Cigar::insert)
402 {
403 // For now, only shift a max of 1 operation.
404 int prevOpIndex = currentOp-1;
405 // Track the next op for seeing if it is the same as the
406 // previous for merging reasons.
407 int nextOpIndex = currentOp+1;
408 if(nextOpIndex == numOps)
409 {
410 // There is no next op, so set it equal to the current one.
411 nextOpIndex = currentOp;
412 }
413 // The start of the previous operation, so we know when we hit it
414 // so we don't shift past it.
415 uint32_t prevOpStart =
416 currentPos - myCigarRoller[prevOpIndex].count;
417
418 // We can only shift if the previous operation
419 if(!Cigar::isMatchOrMismatch(myCigarRoller[prevOpIndex]))
420 {
421 // TODO - shift past pads
422 // An insert is in the read, so increment the position.
423 currentPos += myCigarRoller[currentOp].count;
424 // Not a match/mismatch, so can't shift into it.
425 continue;
426 }
427
428 // It is a match or mismatch, so check to see if we can
429 // shift into it.
430
431 // The end of the insert is calculated by adding the size
432 // of this insert minus 1 to the start of the insert.
433 uint32_t insertEndPos =
434 currentPos + myCigarRoller[currentOp].count - 1;
435
436 // The insert starts at the current position.
437 uint32_t insertStartPos = currentPos;
438
439 // Loop as long as the position before the insert start
440 // matches the last character in the insert. If they match,
441 // the insert can be shifted one index left because the
442 // implied reference will not change. If they do not match,
443 // we can't shift because the implied reference would change.
444 // Stop loop when insertStartPos = prevOpStart, because we
445 // don't want to move past that.
446 while((insertStartPos > prevOpStart) &&
447 (getSequence(insertEndPos,BASES) ==
448 getSequence(insertStartPos - 1, BASES)))
449 {
450 // We can shift, so move the insert start & end one left.
451 --insertEndPos;
452 --insertStartPos;
453 }
454
455 // Determine if a shift has occurred.
456 int shiftLen = currentPos - insertStartPos;
457 if(shiftLen > 0)
458 {
459 // Shift occured, so adjust the cigar if the cigar will
460 // not become more operations.
461 // If the next operation is the same as the previous or
462 // if the insert and the previous operation switch positions
463 // then the cigar has the same number of operations.
464 // If the next operation is different, and the shift splits
465 // the previous operation in 2, then the cigar would
466 // become longer, so we do not want to shift.
467 if(myCigarRoller[nextOpIndex].operation ==
468 myCigarRoller[prevOpIndex].operation)
469 {
470 // The operations are the same, so merge them by adding
471 // the length of the shift to the next operation.
472 myCigarRoller.IncrementCount(nextOpIndex, shiftLen);
473 myCigarRoller.IncrementCount(prevOpIndex, -shiftLen);
474
475 // If the previous op length is 0, just remove that
476 // operation.
477 if(myCigarRoller[prevOpIndex].count == 0)
478 {
479 myCigarRoller.Remove(prevOpIndex);
480 }
481 shifted = true;
482 }
483 else
484 {
485 // Can only shift if the insert shifts past the
486 // entire previous operation, otherwise an operation
487 // would need to be added.
488 if(insertStartPos == prevOpStart)
489 {
490 // Swap the positions of the insert and the
491 // previous operation.
492 myCigarRoller.Update(currentOp,
493 myCigarRoller[prevOpIndex].operation,
494 myCigarRoller[prevOpIndex].count);
495 // Size of the previous op is the entire
496 // shift length.
497 myCigarRoller.Update(prevOpIndex,
499 shiftLen);
500 shifted = true;
501 }
502 }
503 }
504 // An insert is in the read, so increment the position.
505 currentPos += myCigarRoller[currentOp].count;
506 }
507 else if(Cigar::foundInQuery(myCigarRoller[currentOp]))
508 {
509 // This op was found in the read, increment the current position.
510 currentPos += myCigarRoller[currentOp].count;
511 }
512 }
513 if(shifted)
514 {
515 // TODO - setCigar is currently inefficient because later the cigar
516 // roller will be recalculated, but for now it will work.
517 setCigar(myCigarRoller);
518 }
519 return(shifted);
520}
bool Remove(int index)
Remove the operation at the specified index.
bool IncrementCount(int index, int increment)
Increments the count for the operation at the specified index by the specified value,...
bool Update(int index, Operation op, int count)
Updates the operation at the specified index to be the specified operation and have the specified cou...
int size() const
Return the number of cigar operations.
Definition Cigar.h:364
static bool isMatchOrMismatch(Operation op)
Return true if the specified operation is a match/mismatch operation, false if not.
Definition Cigar.h:298
static bool foundInQuery(Operation op)
Return true if the specified operation is found in the query sequence, false if not.
Definition Cigar.h:219
@ insert
insertion to the reference (the query sequence contains bases that have no corresponding base in the ...
Definition Cigar.h:91
bool setCigar(const char *cigar)
Set the CIGAR to the specified SAM formatted cigar string.

References BASES, Cigar::foundInQuery(), getSequence(), CigarRoller::IncrementCount(), Cigar::insert, Cigar::isMatchOrMismatch(), CigarRoller::Remove(), setCigar(), Cigar::size(), and CigarRoller::Update().

◆ writeRecordBuffer() [1/2]

SamStatus::Status SamRecord::writeRecordBuffer ( IFILE  filePtr)

Write the record as a BAM into the specified already opened file.

Parameters
filePtrfile to write the BAM record into.
Returns
status of the write.

Definition at line 1237 of file SamRecord.cpp.

1238{
1239 return(writeRecordBuffer(filePtr, mySequenceTranslation));
1240}
SamStatus::Status writeRecordBuffer(IFILE filePtr)
Write the record as a BAM into the specified already opened file.

References writeRecordBuffer().

Referenced by writeRecordBuffer().

◆ writeRecordBuffer() [2/2]

SamStatus::Status SamRecord::writeRecordBuffer ( IFILE  filePtr,
SequenceTranslation  translation 
)

Write the record as a BAM into the specified already opened file using the specified translation on the sequence.

Parameters
filePtrfile to write the BAM record into.
translationtype of sequence translation to use.
Returns
status of the write.

Definition at line 1244 of file SamRecord.cpp.

1246{
1247 myStatus = SamStatus::SUCCESS;
1248 if((filePtr == NULL) || (filePtr->isOpen() == false))
1249 {
1250 // File is not open, return failure.
1252 "Can't write to an unopened file.");
1253 return(SamStatus::FAIL_ORDER);
1254 }
1255
1256 if((myIsBufferSynced == false) ||
1257 (myBufferSequenceTranslation != translation))
1258 {
1259 if(!fixBuffer(translation))
1260 {
1261 return(myStatus.getStatus());
1262 }
1263 }
1264
1265 // Write the record.
1266 unsigned int numBytesToWrite = myRecordPtr->myBlockSize + sizeof(int32_t);
1267 unsigned int numBytesWritten =
1268 ifwrite(filePtr, myRecordPtr, numBytesToWrite);
1269
1270 // Return status based on if the correct number of bytes were written.
1271 if(numBytesToWrite == numBytesWritten)
1272 {
1273 return(SamStatus::SUCCESS);
1274 }
1275 // The correct number of bytes were not written.
1276 myStatus.setStatus(SamStatus::FAIL_IO, "Failed to write the entire record.");
1277 return(SamStatus::FAIL_IO);
1278}
unsigned int ifwrite(IFILE file, const void *buffer, unsigned int size)
Write the specified number of bytes from the specified buffer into the file.
Definition InputFile.h:669
Status getStatus() const
Return the enum for this status object.

References StatGenStatus::FAIL_IO, StatGenStatus::FAIL_ORDER, StatGenStatus::getStatus(), ifwrite(), InputFile::isOpen(), StatGenStatus::setStatus(), and StatGenStatus::SUCCESS.


The documentation for this class was generated from the following files: