libStatGen Software 1
Loading...
Searching...
No Matches
WriteFiles.cpp
1/*
2 * Copyright (C) 2010 Regents of the University of Michigan
3 *
4 * This program is free software: you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation, either version 3 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program. If not, see <http://www.gnu.org/licenses/>.
16 */
17
18#include "WriteFiles.h"
19#include "Validate.h"
20
21#include <assert.h>
22
23void testWrite()
24{
25 TestWrite writeTest;
26 writeTest.testWrite();
27}
28
29const std::string TestWrite::HEADER_TEXT1 = "This is my 1st test header.";
30const std::string TestWrite::SEC1_REFNAME = "This is my 1st RefName";
31const std::string TestWrite::SEC1REC2_INDELSEQ1 = "AC";
32const std::string TestWrite::SEC1REC2_INDELSEQ2 = "TCA";
33const std::string TestWrite::SEC2_REFNAME = "This is my 2nd RefName";
34const std::string TestWrite::HEADER_TEXT2 = "This is my 2nd test header.";
35const std::string TestWrite::HEADER_TEXT3 = "This is my 3rd test header.";
36
37void TestWrite::testWrite()
38{
39 GlfFile glfOut;
40
41 std::string testFile = "results/MyTestOut1.glf";
42
43 assert(glfOut.openForWrite(testFile.c_str(), false));
44
45 // Create a glf header.
46 GlfHeader glfHeader;
47 GlfRefSection glfSection;
48 GlfRecord record;
49
50 // Test writing refsection with no header - exception
51 bool caughtException = false;
52 try
53 {
54 assert(glfOut.writeRefSection(glfSection) == false);
55 }
56 catch (std::exception& e)
57 {
58 caughtException = true;
59 }
60 assert(caughtException);
61
62 // Test writing record with no header - exception.
63 caughtException = false;
64 try
65 {
66 assert(glfOut.writeRecord(record) == false);
67 }
68 catch (std::exception& e)
69 {
70 caughtException = true;
71 }
72 assert(caughtException);
73
74 // Write the header.
75 writeHeader(glfOut, 1);
76
77 // Test writing record with no refsection - exception.
78 caughtException = false;
79 try
80 {
81 assert(glfOut.writeRecord(record) == false);
82 }
83 catch (std::exception& e)
84 {
85 caughtException = true;
86 }
87 assert(caughtException);
88
89
90 //////////////////////////////////////////////
91 writeRefSection1(glfOut);
92
93 // Test writing header after refSection - exception
94 caughtException = false;
95 try
96 {
97 assert(glfOut.writeHeader(glfHeader) == false);
98 }
99 catch (std::exception& e)
100 {
101 caughtException = true;
102 }
103 assert(caughtException);
104
105 writeSec1Record1(glfOut);
106 // Test writing header after record - exception
107 caughtException = false;
108 try
109 {
110 assert(glfOut.writeHeader(glfHeader) == false);
111 }
112 catch (std::exception& e)
113 {
114 caughtException = true;
115 }
116 assert(caughtException);
117
118 writeSec1Record2(glfOut);
119 writeEndMarker(glfOut);
120
121 writeRefSection2(glfOut);
122 writeSec2Record1(glfOut);
123 writeEndMarker(glfOut);
124
125 ////////////////////
126 // Close the file.
127 glfOut.close();
128
129 //////////////////////////////////////////////
130 // Validate the just written file.
131 GlfFile glfIn;
132 assert(glfIn.openForRead(testFile.c_str()));
133
134 readHeader(glfIn, 1);
135 readRefSection1(glfIn);
136 readSec1Record1(glfIn);
137 readSec1Record2(glfIn);
138 readEndMarker(glfIn);
139 readRefSection2(glfIn);
140 readSec2Record1(glfIn);
141 readEndMarker(glfIn);
142 checkEOF(glfIn);
143
144 ////////////////////////////////
145 // NEW FILE
146 testFile = "results/MyTestOut2.glf";
147 assert(glfOut.openForWrite(testFile.c_str()));
148
149 writeHeader(glfOut, 2);
150 writeRefSection1(glfOut);
151 writeSec1Record1(glfOut);
152 writeSec1Record2(glfOut);
153 // Test writing new section without end of section marker - auto-added.
154 writeRefSection2(glfOut);
155 writeSec2Record1(glfOut);
156 // Test closing file with no end of section marker - auto-added.
157 glfOut.close();
158
159 //////////////////////////////////////////////
160 // Validate the just written file.
161 assert(glfIn.openForRead(testFile.c_str()));
162
163 readHeader(glfIn, 2);
164 readRefSection1(glfIn);
165 readSec1Record1(glfIn);
166 readSec1Record2(glfIn);
167 readEndMarker(glfIn);
168 readRefSection2(glfIn);
169 readSec2Record1(glfIn);
170 readEndMarker(glfIn);
171 checkEOF(glfIn);
172
173
174 ////////////////////////////////
175 // NEW FILE
176 testFile = "results/MyTestOut3.glf";
177 {
178 GlfFile glfOutScoped;
179 assert(glfOutScoped.openForWrite(testFile.c_str()));
180
181 writeHeader(glfOutScoped, 3);
182 writeRefSection1(glfOutScoped);
183 writeSec1Record1(glfOutScoped);
184 writeSec1Record2(glfOutScoped);
185 // Test writing new section without end of section marker - auto-added.
186 writeRefSection2(glfOutScoped);
187 writeSec2Record1(glfOutScoped);
188 // Test just letting the file go out of scope with no end
189 // of section marker - auto added.
190 }
191 //////////////////////////////////////////////
192 // Validate the just written file.
193 assert(glfIn.openForRead(testFile.c_str()));
194
195 // Test reading refsection with no header - exception.
196 caughtException = false;
197 try
198 {
199 assert(glfIn.getNextRefSection(glfSection) == false);
200 }
201 catch (std::exception& e)
202 {
203 caughtException = true;
204 }
205 assert(caughtException);
206
207 // Test reading record with no header - exception.
208 caughtException = false;
209 try
210 {
211 assert(glfIn.getNextRecord(record) == false);
212 }
213 catch (std::exception& e)
214 {
215 caughtException = true;
216 }
217 assert(caughtException);
218
219 readHeader(glfIn, 3);
220
221 // Test reading record with no reference section - exception.
222 caughtException = false;
223 try
224 {
225 assert(glfIn.getNextRecord(record) == false);
226 }
227 catch (std::exception& e)
228 {
229 caughtException = true;
230 }
231 assert(caughtException);
232
233 // Test reading header after already read - exception
234 caughtException = false;
235 try
236 {
237 assert(glfIn.readHeader(glfHeader) == false);
238 }
239 catch (std::exception& e)
240 {
241 caughtException = true;
242 }
243 assert(caughtException);
244
245 readRefSection1(glfIn);
246 readSec1Record1(glfIn);
247 readSec1Record2(glfIn);
248 readEndMarker(glfIn);
249 readRefSection2(glfIn);
250 readSec2Record1(glfIn);
251 readEndMarker(glfIn);
252 checkEOF(glfIn);
253
254
255 // Read again, but text reading next refsection before
256 //end of current section - consumes the rest of the records.
257 assert(glfIn.openForRead(testFile.c_str()));
258
259 readHeader(glfIn, 3);
260 readRefSection1(glfIn);
261 readRefSection2(glfIn);
262 readSec2Record1(glfIn);
263 readEndMarker(glfIn);
264 checkEOF(glfIn);
265
266
267}
268
269
270void TestWrite::writeHeader(GlfFile& glfOut, int headerNum)
271{
272 GlfHeader glfHeader;
273 std::string headerString = "t";
274 std::string expectedHeader = "";
275 if(headerNum == 1)
276 {
277 expectedHeader = HEADER_TEXT1;
278 }
279 else if(headerNum == 2)
280 {
281 expectedHeader = HEADER_TEXT2;
282 }
283 else if(headerNum == 3)
284 {
285 expectedHeader = HEADER_TEXT3;
286 }
287
288 assert(glfHeader.getHeaderTextString(headerString));
289 assert(headerString == "");
290 assert(glfHeader.setHeaderTextString(expectedHeader));
291 assert(glfHeader.getHeaderTextString(headerString));
292 assert(headerString == expectedHeader);
293 assert(glfOut.writeHeader(glfHeader));
294}
295
296
297void TestWrite::writeRefSection1(GlfFile& glfOut)
298{
299 GlfRefSection glfSection;
300
301 ////////////////////////////////
302 // Write the reference section.
303 std::string refNameString = "";
304 // Check the default settings (no data has been set yet).
305 assert(glfSection.getName(refNameString));
306 assert(refNameString == "");
307 assert(glfSection.getRefLen() == 0);
308
309 // Set the reference name.
310 assert(glfSection.setName(SEC1_REFNAME));
311 // Check properly set.
312 assert(glfSection.getName(refNameString));
313 assert(refNameString == SEC1_REFNAME);
314 assert(glfSection.getRefLen() == 0);
315
316 // Set the reference sequence length.
317 assert(glfSection.setRefLen(SEC1_REFLEN));
318 // Check properly set.
319 assert(glfSection.getRefLen() == SEC1_REFLEN);
320 assert(glfSection.getName(refNameString));
321 assert(refNameString == SEC1_REFNAME);
322
323 // Write the reference section
324 assert(glfOut.writeRefSection(glfSection));
325}
326
327
328void TestWrite::writeSec1Record1(GlfFile& glfOut)
329{
330 GlfRecord record;
331 assert(record.setRecordType(SEC1REC1_RECTYPE));
332 assert(record.setRefBaseInt(SEC1REC1_REFBASE));
333 assert(record.setOffset(SEC1REC1_OFFSET));
334 assert(record.setMinLk(SEC1REC1_MINLK));
335 assert(record.setReadDepth(SEC1REC1_READDEPTH));
336 assert(record.setRmsMapQ(SEC1REC1_RMSMAPQ));
337 assert(glfOut.writeRecord(record));
338
339 // Verify the settings of record 1.
340 assert(record.getRecordType() == SEC1REC1_RECTYPE);
341 assert(record.getRefBase() == SEC1REC1_REFBASE);
342 assert(record.getOffset() == SEC1REC1_OFFSET);
343 assert(record.getMinLk() == SEC1REC1_MINLK);
344 assert(record.getReadDepth() == SEC1REC1_READDEPTH);
345 assert(record.getRmsMapQ() == SEC1REC1_RMSMAPQ);
346}
347
348
349void TestWrite::writeSec1Record2(GlfFile& glfOut)
350{
351 //////////////////////////////////////////////
352 // Write a record of type 2.
353 GlfRecord record;
354
355 assert(record.setRecordType(SEC1REC2_RECTYPE));
356 assert(record.setRefBaseInt(SEC1REC2_REFBASE));
357 assert(record.setOffset(SEC1REC2_OFFSET));
358 assert(record.setMinLk(SEC1REC2_MINLK));
359 assert(record.setReadDepth(SEC1REC2_READDEPTH));
360 assert(record.setRmsMapQ(SEC1REC2_RMSMAPQ));
361 assert(record.setLkHom1(SEC1REC2_LKHOM1));
362 assert(record.setLkHom2(SEC1REC2_LKHOM2));
363 assert(record.setLkHet(SEC1REC2_LKHET));
364 assert(record.setInsertionIndel1(SEC1REC2_INDELSEQ1));
365 assert(record.setDeletionIndel2(SEC1REC2_INDELSEQ2));
366 assert(glfOut.writeRecord(record));
367
368 // Verify the settings of record 2.
369 std::string indelSeq = "";
370 assert(record.getRecordType() == SEC1REC2_RECTYPE);
371 assert(record.getRefBase() == SEC1REC2_REFBASE);
372 assert(record.getOffset() == SEC1REC2_OFFSET);
373 assert(record.getMinLk() == SEC1REC2_MINLK);
374 assert(record.getReadDepth() == SEC1REC2_READDEPTH);
375 assert(record.getRmsMapQ() == SEC1REC2_RMSMAPQ);
376 assert(record.getLkHom1() == SEC1REC2_LKHOM1);
377 assert(record.getLkHom2() == SEC1REC2_LKHOM2);
378 assert(record.getLkHet() == SEC1REC2_LKHET);
379 assert(record.getIndel1(indelSeq) == SEC1REC2_INDELLEN1);
380 assert(indelSeq == SEC1REC2_INDELSEQ1);
381 assert(record.getIndel2(indelSeq) == SEC1REC2_INDELLEN2);
382 assert(indelSeq == SEC1REC2_INDELSEQ2);
383}
384
385
386void TestWrite::writeEndMarker(GlfFile& glfOut)
387{
388 //////////////////////////////////////////////
389 // Write a record of type 0.
390 GlfRecord record;
391 assert(glfOut.writeRecord(record));
392
393 // Verify the settings of the types.
394 assert(record.getRecordType() == 0);
395 assert(record.getRefBase() == 0);
396}
397
398
399void TestWrite::writeRefSection2(GlfFile& glfOut)
400{
401 GlfRefSection glfSection;
402
403 ////////////////////////////////
404 // Write the reference section.
405 std::string refNameString = "";
406 // Check the default settings (no data has been set yet).
407 assert(glfSection.getName(refNameString));
408 assert(refNameString == "");
409 assert(glfSection.getRefLen() == 0);
410
411 // Set the reference name.
412 assert(glfSection.setName(SEC2_REFNAME));
413 // Check properly set.
414 assert(glfSection.getName(refNameString));
415 assert(refNameString == SEC2_REFNAME);
416 assert(glfSection.getRefLen() == 0);
417
418 // Set the reference sequence length.
419 assert(glfSection.setRefLen(SEC2_REFLEN));
420 // Check properly set.
421 assert(glfSection.getRefLen() == SEC2_REFLEN);
422 assert(glfSection.getName(refNameString));
423 assert(refNameString == SEC2_REFNAME);
424
425 // Write the reference section
426 assert(glfOut.writeRefSection(glfSection));
427}
428
429
430void TestWrite::writeSec2Record1(GlfFile& glfOut)
431{
432 GlfRecord record;
433 assert(record.setRecordType(SEC2REC1_RECTYPE));
434 assert(record.setRefBaseInt(SEC2REC1_REFBASE));
435 assert(record.setOffset(SEC2REC1_OFFSET));
436 assert(record.setMinLk(SEC2REC1_MINLK));
437 assert(record.setReadDepth(SEC2REC1_READDEPTH));
438 assert(record.setRmsMapQ(SEC2REC1_RMSMAPQ));
439 assert(glfOut.writeRecord(record));
440
441 // Verify the settings of record 1.
442 assert(record.getRecordType() == SEC2REC1_RECTYPE);
443 assert(record.getRefBase() == SEC2REC1_REFBASE);
444 assert(record.getOffset() == SEC2REC1_OFFSET);
445 assert(record.getMinLk() == SEC2REC1_MINLK);
446 assert(record.getReadDepth() == SEC2REC1_READDEPTH);
447 assert(record.getRmsMapQ() == SEC2REC1_RMSMAPQ);
448}
449
450
451void TestWrite::readHeader(GlfFile& glfIn, int headerNum)
452{
453 GlfHeader glfHeader;
454 std::string expectedHeader = "";
455 std::string headerString;
456 if(headerNum == 1)
457 {
458 expectedHeader = HEADER_TEXT1;
459 }
460 else if(headerNum == 2)
461 {
462 expectedHeader = HEADER_TEXT2;
463 }
464 else if(headerNum == 3)
465 {
466 expectedHeader = HEADER_TEXT3;
467 }
468 // Check the header string.
469 assert(glfIn.readHeader(glfHeader));
470 assert(glfHeader.getHeaderTextString(headerString));
471 assert(headerString == expectedHeader);
472}
473
474void TestWrite::readRefSection1(GlfFile& glfIn)
475{
476 GlfRefSection glfSection;
477 std::string refNameString;
478 // Check the reference section.
479 assert(glfIn.getNextRefSection(glfSection));
480 assert(glfSection.getName(refNameString));
481 assert(refNameString == SEC1_REFNAME);
482 assert(glfSection.getRefLen() == SEC1_REFLEN);
483}
484
485void TestWrite::readSec1Record1(GlfFile& glfIn)
486{
487 GlfRecord record;
488 // Check the record of type 1.
489 assert(glfIn.getNextRecord(record));
490 assert(record.getRecordType() == SEC1REC1_RECTYPE);
491 assert(record.getRefBase() == SEC1REC1_REFBASE);
492 assert(record.getOffset() == SEC1REC1_OFFSET);
493 assert(record.getMinLk() == SEC1REC1_MINLK);
494 assert(record.getReadDepth() == SEC1REC1_READDEPTH);
495 assert(record.getRmsMapQ() == SEC1REC1_RMSMAPQ);
496}
497
498void TestWrite::readSec1Record2(GlfFile& glfIn)
499{
500 GlfRecord record;
501 std::string indelSeq;
502 //Check the record of type 2.
503 assert(glfIn.getNextRecord(record));
504 assert(record.getRecordType() == SEC1REC2_RECTYPE);
505 assert(record.getRefBase() == SEC1REC2_REFBASE);
506 assert(record.getOffset() == SEC1REC2_OFFSET);
507 assert(record.getMinLk() == SEC1REC2_MINLK);
508 assert(record.getReadDepth() == SEC1REC2_READDEPTH);
509 assert(record.getRmsMapQ() == SEC1REC2_RMSMAPQ);
510 assert(record.getLkHom1() == SEC1REC2_LKHOM1);
511 assert(record.getLkHom2() == SEC1REC2_LKHOM2);
512 assert(record.getLkHet() == SEC1REC2_LKHET);
513 assert(record.getIndel1(indelSeq) == SEC1REC2_INDELLEN1);
514 assert(indelSeq == SEC1REC2_INDELSEQ1);
515 assert(record.getIndel2(indelSeq) == SEC1REC2_INDELLEN2);
516 assert(indelSeq == SEC1REC2_INDELSEQ2);
517}
518
519void TestWrite::readEndMarker(GlfFile& glfIn)
520{
521 GlfRecord record;
522 // Check the record of type 0.
523 // False, since there are no more records in this section.
524 assert(glfIn.getNextRecord(record) == false);
525 assert(record.getRecordType() == 0);
526 assert(record.getRefBase() == 0);
527}
528
529void TestWrite::readRefSection2(GlfFile& glfIn)
530{
531 GlfRefSection glfSection;
532 std::string refNameString;
533 // Check the reference section.
534 assert(glfIn.getNextRefSection(glfSection));
535 assert(glfSection.getName(refNameString));
536 assert(refNameString == SEC2_REFNAME);
537 assert(glfSection.getRefLen() == SEC2_REFLEN);
538}
539
540
541void TestWrite::readSec2Record1(GlfFile& glfIn)
542{
543 GlfRecord record;
544 // Check the record of type 1.
545 assert(glfIn.getNextRecord(record));
546 assert(record.getRecordType() == SEC2REC1_RECTYPE);
547 assert(record.getRefBase() == SEC2REC1_REFBASE);
548 assert(record.getOffset() == SEC2REC1_OFFSET);
549 assert(record.getMinLk() == SEC2REC1_MINLK);
550 assert(record.getReadDepth() == SEC2REC1_READDEPTH);
551 assert(record.getRmsMapQ() == SEC2REC1_RMSMAPQ);
552}
553
554void TestWrite::checkEOF(GlfFile& glfIn)
555{
556 GlfHeader glfHeader;
557 GlfRefSection glfSection;
558 GlfRecord record;
559 // Check end of file - no more refsections
560 assert(glfIn.getNextRefSection(glfSection) == false);
561 assert(glfIn.isEOF());
562}
563
This class allows a user to easily read/write a GLF file.
Definition GlfFile.h:29
bool getNextRefSection(GlfRefSection &refSection)
Gets the next reference section from the file & stores it in the passed in section,...
Definition GlfFile.cpp:240
bool writeRefSection(const GlfRefSection &refSection)
Write the reference section to the file, adding an end marker record if there is a previous section a...
Definition GlfFile.cpp:308
bool openForWrite(const char *filename, bool compressed=true)
Open a glf file for writing with the specified filename.
Definition GlfFile.cpp:109
bool getNextRecord(GlfRecord &record)
Gets the nextrecord from the file & stores it in the passed in record.
Definition GlfFile.cpp:368
bool openForRead(const char *filename)
Open a glf file for reading with the specified filename.
Definition GlfFile.cpp:66
void close()
Close the file if there is one open, adding an end marker record if there is a previous section and o...
Definition GlfFile.cpp:142
bool writeHeader(GlfHeader &header)
Writes the specified header into the file.
Definition GlfFile.cpp:200
bool readHeader(GlfHeader &header)
Reads the header section from the file and stores it in the passed in header.
Definition GlfFile.cpp:165
bool isEOF()
Returns whether or not the end of the file has been reached.
Definition GlfFile.cpp:152
bool writeRecord(const GlfRecord &record)
Writes the specified record into the file.
Definition GlfFile.cpp:429
This class allows a user to easily get/set the fields in a GLF header.
Definition GlfHeader.h:30
bool setHeaderTextString(const std::string &text)
Set the header to the passed in string.
bool getHeaderTextString(std::string &text)
Set the passed in string to the text string stored in this header.
This class allows a user to easily get/set the fields in a GLF record.
Definition GlfRecord.h:29
bool setDeletionIndel2(const std::string &indelSeq)
Set the sequence of the 2nd indel allele if the 2nd indel is an deletion.
int getRecordType() const
Return the record type.
Definition GlfRecord.h:126
uint8_t getLkHet()
Return the likelihood of a heterozygote.
uint8_t getLkHom1()
Return the likelihood of the 1st homozygous indel allele.
bool setLkHom1(uint8_t lk)
Set the likelihood of the first homozygous indel allele.
int getRefBase() const
Return the reference base as an integer.
Definition GlfRecord.h:134
bool setRmsMapQ(uint8_t rmsMapQ)
Set the RMS of mapping qualities of reads covering the site.
uint8_t getRmsMapQ() const
Return the RMS of mapping qualities of reads covering the site.
uint32_t getOffset() const
Return the offset from the precedent record.
bool setRecordType(uint8_t recType)
Set the record type.
bool setRefBaseInt(uint8_t refBase)
Set the reference base from an integer value.
bool setReadDepth(uint32_t readDepth)
Set the the read depth.
bool setLkHet(uint8_t lk)
Set the likelihood of a heterozygote.
uint8_t getMinLk() const
Return the minimum likelihood.
bool setLkHom2(uint8_t lk)
Set the likelihood of the 2nd homozygous indel allele.
int16_t getIndel1(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 1st indel allele.
int16_t getIndel2(std::string &indelSeq)
Get the sequence and length (+:ins, -:del) of the 2nd indel allele.
uint32_t getReadDepth() const
Return the read depth.
bool setMinLk(uint8_t minLk)
Set the minimum likelihood.
uint8_t getLkHom2()
Return the likelihood of the 2nd homozygous indel allele.
bool setOffset(uint32_t offset)
Set the offset from the precedent record.
bool setInsertionIndel1(const std::string &indelSeq)
Set the sequence of the first indel allele if the first indel is an insertion.
This class allows a user to easily get/set the fields in a GLF section/chromosome header.
bool setName(const std::string &name)
Set the reference name.
bool setRefLen(uint32_t refLen)
Set the length of the reference sequence.
uint32_t getRefLen() const
Get the length of the reference sequence.
bool getName(std::string &name) const
Get the reference name.