SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
wuss.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <cmath>
16 #include <vector>
17 
21 
22 // ------------------------------------------------------------------
23 // wuss
24 // ------------------------------------------------------------------
25 
26 namespace seqan3
27 {
28 
58 template <uint8_t SIZE = 51>
59 class wuss : public alphabet_base<wuss<SIZE>, SIZE>
60 {
61  static_assert(SIZE >= 15 && SIZE <= 67 && SIZE % 2 == 1,
62  "The wuss<> alphabet size must be an odd number in range 15..67.");
63 
64 private:
66  using base_t = alphabet_base<wuss<SIZE>, SIZE>;
67 
69  friend base_t;
70 
71 protected:
72  using typename base_t::char_type;
73  using typename base_t::rank_type;
74 
75 public:
77  using base_t::to_char;
78  using base_t::to_rank;
79 
83  constexpr wuss() noexcept = default;
84  constexpr wuss(wuss const &) noexcept = default;
85  constexpr wuss(wuss &&) noexcept = default;
86  constexpr wuss & operator=(wuss const &) noexcept = default;
87  constexpr wuss & operator=(wuss &&) noexcept = default;
88  ~wuss() noexcept = default;
89 
91 
100  constexpr bool is_pair_open() const noexcept
101  {
102  return interaction_tab[to_rank()] < 0;
103  }
104 
110  constexpr bool is_pair_close() const noexcept
111  {
112  return interaction_tab[to_rank()] > 0;
113  }
114 
120  constexpr bool is_unpaired() const noexcept
121  {
122  return interaction_tab[to_rank()] == 0;
123  }
124 
130  // formula: (alphabet size - 7 unpaired characters) / 2, as every bracket exists as opening/closing pair
131  static constexpr uint8_t max_pseudoknot_depth{static_cast<uint8_t>((alphabet_size - 7) / 2)};
132 
141  constexpr std::optional<uint8_t> pseudoknot_id() const noexcept
142  {
143  if (interaction_tab[to_rank()] != 0)
144  return std::abs(interaction_tab[to_rank()]) - 1;
145  else
146  return std::nullopt; // unpaired
147  }
149 
150 private:
152  static constexpr std::array<char_type, alphabet_size> rank_to_char_table{
154  chars{'.', ':', ',', '-', '_', '~', ';', '<', '(', '[', '{', '>', ')', ']', '}'};
155 
156  // pseudoknot letters
157  for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
158  {
159  char_type const off = static_cast<char_type>((rnk - 15u) / 2u);
160  chars[rnk] = 'A' + off;
161  chars[rnk + 1u] = 'a' + off;
162  }
163 
164  return chars;
165 }()
166 }; // namespace seqan3
167 
169 static constexpr std::array<rank_type, 256> char_to_rank_table{[]() constexpr {std::array<rank_type, 256> rank_table{};
170 
171 // initialize with unpaired (std::array::fill unfortunately not constexpr)
172 for (rank_type & rnk : rank_table)
173  rnk = 6u;
174 
175 // set alphabet values
176 for (rank_type rnk = 0u; rnk < alphabet_size; ++rnk)
177  rank_table[rank_to_char_table[rnk]] = rnk;
178 return rank_table;
179 }
180 ()
181 }
182 ;
183 
185 static constexpr char_type rank_to_char(rank_type const rank)
186 {
187  return rank_to_char_table[rank];
188 }
189 
191 static constexpr rank_type char_to_rank(char_type const chr)
192 {
193  using index_t = std::make_unsigned_t<char_type>;
194  return char_to_rank_table[static_cast<index_t>(chr)];
195 }
196 
200 static std::array<int8_t, SIZE> const interaction_tab;
201 }
202 ;
203 
204 template <uint8_t SIZE>
205 constexpr std::array<int8_t, SIZE> wuss<SIZE>::interaction_tab = []() constexpr
206 {
207  std::array<int8_t, alphabet_size> interaction_table{};
208  int cnt_open = 0;
209  int cnt_close = 0;
210 
211  for (rank_type rnk = 0u; rnk <= 6u; ++rnk)
212  {
213  interaction_table[rnk] = 0;
214  }
215 
216  for (rank_type rnk = 7u; rnk <= 10u; ++rnk)
217  {
218  interaction_table[rnk] = --cnt_open;
219  }
220 
221  for (rank_type rnk = 11u; rnk <= 14u; ++rnk)
222  {
223  interaction_table[rnk] = ++cnt_close;
224  }
225 
226  for (rank_type rnk = 15u; rnk + 1u < alphabet_size; rnk += 2u)
227  {
228  interaction_table[rnk] = --cnt_open;
229  interaction_table[rnk + 1u] = ++cnt_close;
230  }
231 
232  return interaction_table;
233 }
234 ();
235 
239 using wuss51 = wuss<51>;
240 
241 inline namespace literals
242 {
243 
257 constexpr wuss51 operator""_wuss51(char const ch) noexcept
258 {
259  return wuss51{}.assign_char(ch);
260 }
261 
273 inline std::vector<wuss51> operator""_wuss51(char const * str, std::size_t len)
274 {
276  vec.resize(len);
277 
278  for (size_t idx = 0ul; idx < len; ++idx)
279  vec[idx].assign_char(str[idx]);
280 
281  return vec;
282 }
284 
285 } // namespace literals
286 
287 } // namespace seqan3
Provides seqan3::rna_structure_alphabet.
Provides seqan3::alphabet_base.
A CRTP-base that makes defining a custom alphabet easier.
Definition: alphabet_base.hpp:57
constexpr char_type to_char() const noexcept requires(!std
Return the letter as a character of char_type.
Definition: alphabet_base.hpp:115
constexpr derived_type & assign_char(char_type const chr) noexcept requires(!std
Assign from a character, implicitly converts invalid characters.
Definition: alphabet_base.hpp:163
constexpr rank_type to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:137
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
The WUSS structure alphabet of the characters .<>:,-_~;()[]{}AaBbCcDd...
Definition: wuss.hpp:60
static constexpr uint8_t max_pseudoknot_depth
The ability of this alphabet to represent pseudoknots, i.e. crossing interactions,...
Definition: wuss.hpp:131
constexpr bool is_pair_close() const noexcept
Check whether the character represents a leftward interaction in an RNA structure.
Definition: wuss.hpp:110
constexpr bool is_pair_open() const noexcept
Check whether the character represents a rightward interaction in an RNA structure.
Definition: wuss.hpp:100
constexpr std::optional< uint8_t > pseudoknot_id() const noexcept
Get an identifier for a pseudoknotted interaction, where opening and closing brackets of the same typ...
Definition: wuss.hpp:141
constexpr rank_type to_rank() const noexcept
Return the letter's numeric value (rank in the alphabet).
Definition: alphabet_base.hpp:137
detail::min_viable_uint_t< size - 1 > rank_type
The type of the alphabet when represented as a number (e.g. via to_rank()).
Definition: alphabet_base.hpp:80
static constexpr detail::min_viable_uint_t< size > alphabet_size
The size of the alphabet, i.e. the number of different values it can take.
Definition: alphabet_base.hpp:199
constexpr wuss() noexcept=default
Defaulted.
constexpr bool is_unpaired() const noexcept
Check whether the character represents an unpaired position in an RNA structure.
Definition: wuss.hpp:120
std::conditional_t< std::same_as< char_t, void >, char, char_t > char_type
The char representation; conditional needed to make semi alphabet definitions legal.
Definition: alphabet_base.hpp:72
The main SeqAn3 namespace.
Definition: aligned_sequence_concept.hpp:29
T resize(T... args)
Provides utilities for modifying characters.