-
Notifications
You must be signed in to change notification settings - Fork 34
/
Copy pathwindow.cpp
151 lines (123 loc) · 4.7 KB
/
window.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
/*!
* @file window.cpp
*
* @brief Window class source file
*/
#include <algorithm>
#include "window.hpp"
#include "spoa/spoa.hpp"
namespace racon {
std::shared_ptr<Window> createWindow(uint64_t id, uint32_t rank, WindowType type,
const char* backbone, uint32_t backbone_length, const char* quality,
uint32_t quality_length) {
if (backbone_length == 0 || backbone_length != quality_length) {
fprintf(stderr, "[racon::createWindow] error: "
"empty backbone sequence/unequal quality length!\n");
exit(1);
}
return std::shared_ptr<Window>(new Window(id, rank, type, backbone,
backbone_length, quality, quality_length));
}
Window::Window(uint64_t id, uint32_t rank, WindowType type, const char* backbone,
uint32_t backbone_length, const char* quality, uint32_t quality_length)
: id_(id), rank_(rank), type_(type), consensus_(), sequences_(),
qualities_(), positions_() {
sequences_.emplace_back(backbone, backbone_length);
qualities_.emplace_back(quality, quality_length);
positions_.emplace_back(0, 0);
}
Window::~Window() {
}
void Window::add_layer(const char* sequence, uint32_t sequence_length,
const char* quality, uint32_t quality_length, uint32_t begin, uint32_t end) {
if (sequence_length == 0 || begin == end) {
return;
}
if (quality != nullptr && sequence_length != quality_length) {
fprintf(stderr, "[racon::Window::add_layer] error: "
"unequal quality size!\n");
exit(1);
}
if (begin >= end || begin > sequences_.front().second || end > sequences_.front().second) {
fprintf(stderr, "[racon::Window::add_layer] error: "
"layer begin and end positions are invalid!\n");
exit(1);
}
sequences_.emplace_back(sequence, sequence_length);
qualities_.emplace_back(quality, quality_length);
positions_.emplace_back(begin, end);
}
bool Window::generate_consensus(std::shared_ptr<spoa::AlignmentEngine> alignment_engine,
bool trim) {
if (sequences_.size() < 3) {
consensus_ = std::string(sequences_.front().first, sequences_.front().second);
return false;
}
spoa::Graph graph{};
graph.AddAlignment(
spoa::Alignment(),
sequences_.front().first, sequences_.front().second,
qualities_.front().first, qualities_.front().second);
std::vector<uint32_t> rank;
rank.reserve(sequences_.size());
for (uint32_t i = 0; i < sequences_.size(); ++i) {
rank.emplace_back(i);
}
std::sort(rank.begin() + 1, rank.end(), [&](uint32_t lhs, uint32_t rhs) {
return positions_[lhs].first < positions_[rhs].first; });
uint32_t offset = 0.01 * sequences_.front().second;
for (uint32_t j = 1; j < sequences_.size(); ++j) {
uint32_t i = rank[j];
spoa::Alignment alignment;
if (positions_[i].first < offset && positions_[i].second >
sequences_.front().second - offset) {
alignment = alignment_engine->Align(
sequences_[i].first, sequences_[i].second,
graph);
} else {
std::vector<const spoa::Graph::Node*> mapping;
auto subgraph = graph.Subgraph(
positions_[i].first,
positions_[i].second,
&mapping);
alignment = alignment_engine->Align(
sequences_[i].first, sequences_[i].second,
subgraph);
subgraph.UpdateAlignment(mapping, &alignment);
}
if (qualities_[i].first == nullptr) {
graph.AddAlignment(
alignment,
sequences_[i].first, sequences_[i].second);
} else {
graph.AddAlignment(
alignment,
sequences_[i].first, sequences_[i].second,
qualities_[i].first, qualities_[i].second);
}
}
std::vector<uint32_t> coverages;
consensus_ = graph.GenerateConsensus(&coverages);
if (type_ == WindowType::kTGS && trim) {
uint32_t average_coverage = (sequences_.size() - 1) / 2;
int32_t begin = 0, end = consensus_.size() - 1;
for (; begin < static_cast<int32_t>(consensus_.size()); ++begin) {
if (coverages[begin] >= average_coverage) {
break;
}
}
for (; end >= 0; --end) {
if (coverages[end] >= average_coverage) {
break;
}
}
if (begin >= end) {
fprintf(stderr, "[racon::Window::generate_consensus] warning: "
"contig %lu might be chimeric in window %u!\n", id_, rank_);
} else {
consensus_ = consensus_.substr(begin, end - begin + 1);
}
}
return true;
}
}