SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
alignment_algorithm.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
15 #include <iterator>
16 #include <memory>
17 #include <optional>
18 #include <ranges>
19 #include <type_traits>
20 
38 
39 namespace seqan3::detail
40 {
41 
75 template <typename config_t, typename... algorithm_policies_t>
76 class alignment_algorithm :
77  public invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>...
78 {
79 private:
81  using traits_t = alignment_configuration_traits<config_t>;
82 
93  template <typename alignment_algorithm_t = alignment_algorithm>
94  static auto _alignment_column_t() -> decltype(std::declval<alignment_algorithm_t>().current_alignment_column());
95 
97  using alignment_column_t = decltype(_alignment_column_t());
99  using alignment_column_iterator_t = std::ranges::iterator_t<alignment_column_t>;
101  using alignment_result_t = typename traits_t::alignment_result_type;
102 
103  static_assert(!std::same_as<alignment_result_t, empty_type>, "Alignment result type was not configured.");
104 
106  using score_debug_matrix_t =
107  std::conditional_t<traits_t::is_debug,
108  two_dimensional_matrix<std::optional<typename traits_t::original_score_type>,
110  matrix_major_order::column>,
111  empty_type>;
113  using trace_debug_matrix_t =
114  std::conditional_t<traits_t::is_debug,
115  two_dimensional_matrix<std::optional<trace_directions>,
117  matrix_major_order::column>,
118  empty_type>;
119 
120 public:
124  constexpr alignment_algorithm() = default;
125  constexpr alignment_algorithm(alignment_algorithm const &) = default;
126  constexpr alignment_algorithm(alignment_algorithm &&) = default;
127  constexpr alignment_algorithm & operator=(alignment_algorithm const &) = default;
128  constexpr alignment_algorithm & operator=(alignment_algorithm &&) = default;
129  ~alignment_algorithm() = default;
130 
139  explicit constexpr alignment_algorithm(config_t const & cfg) :
140  invoke_deferred_crtp_base<algorithm_policies_t, alignment_algorithm<config_t, algorithm_policies_t...>>{cfg}...,
141  cfg_ptr{std::make_shared<config_t>(cfg)}
142  {
143  this->scoring_scheme = seqan3::get<align_cfg::scoring_scheme>(*cfg_ptr).scheme;
144  this->initialise_alignment_state(*cfg_ptr);
145  }
147 
193  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
194  requires (!traits_t::is_vectorised) && std::invocable<callback_t, alignment_result_t>
195  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
196  {
197  using std::get;
198 
199  for (auto && [sequence_pair, idx] : indexed_sequence_pairs)
200  compute_single_pair(idx, get<0>(sequence_pair), get<1>(sequence_pair), callback);
201  }
202 
204  template <indexed_sequence_pair_range indexed_sequence_pairs_t, typename callback_t>
205  requires traits_t::is_vectorised && std::invocable<callback_t, alignment_result_t>
206  void operator()(indexed_sequence_pairs_t && indexed_sequence_pairs, callback_t && callback)
207  {
208  assert(cfg_ptr != nullptr);
209 
210  static_assert(simd_concept<typename traits_t::score_type>, "Expected simd score type.");
211  static_assert(simd_concept<typename traits_t::trace_type>, "Expected simd trace type.");
212 
213  // Extract the batch of sequences for the first and the second sequence.
214  auto sequence1_range = indexed_sequence_pairs | views::elements<0> | views::elements<0>;
215  auto sequence2_range = indexed_sequence_pairs | views::elements<0> | views::elements<1>;
216 
217  // Initialise the find_optimum policy in the simd case.
218  this->initialise_find_optimum_policy(sequence1_range,
219  sequence2_range,
220  this->scoring_scheme.padding_match_score());
221 
222  // Convert batch of sequences to sequence of simd vectors.
223  auto simd_sequences1 = convert_batch_of_sequences_to_simd_vector(sequence1_range);
224  auto simd_sequences2 = convert_batch_of_sequences_to_simd_vector(sequence2_range);
225 
226  max_size_in_collection = std::pair{simd_sequences1.size(), simd_sequences2.size()};
227  // Reset the alignment state's optimum between executions of the alignment algorithm.
228  this->alignment_state.reset_optimum();
229 
230  compute_matrix(simd_sequences1, simd_sequences2);
231 
232  make_alignment_result(indexed_sequence_pairs, callback);
233  }
235 
236 private:
250  template <typename sequence_range_t>
251  constexpr auto convert_batch_of_sequences_to_simd_vector(sequence_range_t & sequences)
252  {
253  assert(static_cast<size_t>(std::ranges::distance(sequences)) <= traits_t::alignments_per_vector);
254 
255  using simd_score_t = typename traits_t::score_type;
256 
257  std::vector<simd_score_t, aligned_allocator<simd_score_t, alignof(simd_score_t)>> simd_sequence{};
258 
259  for (auto && simd_vector_chunk : sequences | views::to_simd<simd_score_t>(this->scoring_scheme.padding_symbol))
260  for (auto && simd_vector : simd_vector_chunk)
261  simd_sequence.push_back(std::move(simd_vector));
262 
263  return simd_sequence;
264  }
265 
283  template <std::ranges::forward_range sequence1_t, std::ranges::forward_range sequence2_t, typename callback_t>
284  constexpr void
285  compute_single_pair(size_t const idx, sequence1_t && sequence1, sequence2_t && sequence2, callback_t & callback)
286  {
287  assert(cfg_ptr != nullptr);
288 
289  if constexpr (traits_t::is_debug)
290  initialise_debug_matrices(sequence1, sequence2);
291 
292  // Reset the alignment state's optimum between executions of the alignment algorithm.
293  this->alignment_state.reset_optimum();
294 
295  if constexpr (traits_t::is_banded)
296  {
297  using seqan3::get;
298  // Get the band and check if band configuration is valid.
299  auto const & band = get<align_cfg::band_fixed_size>(*cfg_ptr);
300  check_valid_band_parameter(sequence1, sequence2, band);
301  auto && [subsequence1, subsequence2] = this->slice_sequences(sequence1, sequence2, band);
302  // It would be great to use this interface here instead
303  compute_matrix(subsequence1, subsequence2, band);
304  make_alignment_result(idx, subsequence1, subsequence2, callback);
305  }
306  else
307  {
308  compute_matrix(sequence1, sequence2);
309  make_alignment_result(idx, sequence1, sequence2, callback);
310  }
311  }
312 
329  template <typename sequence1_t, typename sequence2_t>
330  constexpr void check_valid_band_parameter(sequence1_t && sequence1,
331  sequence2_t && sequence2,
332  align_cfg::band_fixed_size const & band)
333  {
334  static_assert(config_t::template exists<align_cfg::band_fixed_size>(),
335  "The band configuration is required for the banded alignment algorithm.");
336 
337  using diff_type = std::iter_difference_t<std::ranges::iterator_t<sequence1_t>>;
338  static_assert(std::is_signed_v<diff_type>, "Only signed types can be used to test the band parameters.");
339 
340  if (static_cast<diff_type>(band.lower_diagonal) > std::ranges::distance(sequence1))
341  {
342  throw invalid_alignment_configuration{
343  "Invalid band error: The lower diagonal excludes the whole alignment matrix."};
344  }
345 
346  if (static_cast<diff_type>(band.upper_diagonal) < -std::ranges::distance(sequence2))
347  {
348  throw invalid_alignment_configuration{
349  "Invalid band error: The upper diagonal excludes the whole alignment matrix."};
350  }
351  }
352 
365  template <typename sequence1_t, typename sequence2_t>
366  constexpr void initialise_debug_matrices(sequence1_t & sequence1, sequence2_t & sequence2)
367  {
368  size_t rows = std::ranges::distance(sequence2) + 1;
369  size_t cols = std::ranges::distance(sequence1) + 1;
370 
371  score_debug_matrix = score_debug_matrix_t{number_rows{rows}, number_cols{cols}};
372  trace_debug_matrix = trace_debug_matrix_t{number_rows{rows}, number_cols{cols}};
373  }
374 
382  template <typename sequence1_t, typename sequence2_t>
383  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2)
384  requires (!traits_t::is_banded)
385  {
386  // ----------------------------------------------------------------------------
387  // Initialisation phase: allocate memory and initialise first column.
388  // ----------------------------------------------------------------------------
389 
390  this->allocate_matrix(sequence1, sequence2);
391  initialise_first_alignment_column(sequence2);
392 
393  // ----------------------------------------------------------------------------
394  // Recursion phase: compute column-wise the alignment matrix.
395  // ----------------------------------------------------------------------------
396 
397  for (auto const & alphabet1 : sequence1)
398  {
399  compute_alignment_column<true>(this->scoring_scheme_profile_column(alphabet1), sequence2);
400  finalise_last_cell_in_column(true);
401  }
402 
403  // ----------------------------------------------------------------------------
404  // Wrap up phase: track score in last column and prepare the alignment result.
405  // ----------------------------------------------------------------------------
406 
407  finalise_alignment();
408  }
409 
411  template <typename sequence1_t, typename sequence2_t>
412  void compute_matrix(sequence1_t & sequence1, sequence2_t & sequence2, align_cfg::band_fixed_size const & band)
413  requires (traits_t::is_banded)
414  {
415  // ----------------------------------------------------------------------------
416  // Initialisation phase: allocate memory and initialise first column.
417  // ----------------------------------------------------------------------------
418 
419  // Allocate and initialise first column.
420  this->allocate_matrix(sequence1, sequence2, band, this->alignment_state);
421  using row_index_t = std::ranges::range_difference_t<sequence2_t>;
422  row_index_t last_row_index = this->score_matrix.band_row_index;
423  initialise_first_alignment_column(std::views::take(sequence2, last_row_index));
424 
425  // ----------------------------------------------------------------------------
426  // 1st recursion phase: iterate as long as the band intersects with the first row.
427  // ----------------------------------------------------------------------------
428 
429  row_index_t sequence2_size = std::ranges::distance(sequence2);
430  for (auto const & seq1_value : std::views::take(sequence1, this->score_matrix.band_col_index))
431  {
432  compute_alignment_column<true>(seq1_value, std::views::take(sequence2, ++last_row_index));
433  // Only if band reached last row of matrix the last cell might be tracked.
434  finalise_last_cell_in_column(last_row_index >= sequence2_size);
435  }
436 
437  // ----------------------------------------------------------------------------
438  // 2nd recursion phase: iterate until the end of the matrix.
439  // ----------------------------------------------------------------------------
440 
441  size_t first_row_index = 0;
442  for (auto const & seq1_value : std::views::drop(sequence1, this->score_matrix.band_col_index))
443  {
444  // In the second phase the band moves in every column one base down on the second sequence.
445  compute_alignment_column<false>(seq1_value, sequence2 | views::slice(first_row_index++, ++last_row_index));
446  // Only if band reached last row of matrix the last cell might be tracked.
447  finalise_last_cell_in_column(last_row_index >= sequence2_size);
448  }
449 
450  // ----------------------------------------------------------------------------
451  // Wrap up phase: track score in last column and prepare the alignment result.
452  // ----------------------------------------------------------------------------
453 
454  finalise_alignment();
455  }
456 
469  template <typename sequence2_t>
470  auto initialise_first_alignment_column(sequence2_t && sequence2)
471  {
472  // Get the initial column.
473  alignment_column = this->current_alignment_column();
474  assert(!alignment_column.empty()); // Must contain at least one element.
475 
476  // Initialise first cell.
477  alignment_column_it = alignment_column.begin();
478  this->init_origin_cell(*alignment_column_it, this->alignment_state);
479 
480  // Initialise the remaining cells of this column.
481  for (auto it = std::ranges::begin(sequence2); it != std::ranges::end(sequence2); ++it)
482  this->init_column_cell(*++alignment_column_it, this->alignment_state);
483 
484  // Finalise the last cell of the initial column.
485  bool at_last_row = true;
486  if constexpr (traits_t::is_banded) // If the band reaches until the last row of the matrix.
487  at_last_row = static_cast<size_t>(this->score_matrix.band_row_index) == this->score_matrix.num_rows - 1;
488 
489  finalise_last_cell_in_column(at_last_row);
490  }
491 
507  template <bool initialise_first_cell, typename sequence1_value_t, typename sequence2_t>
508  void compute_alignment_column(sequence1_value_t const & seq1_value, sequence2_t && sequence2)
509  {
510  this->next_alignment_column(); // move to next column and set alignment column iterator accordingly.
511  alignment_column = this->current_alignment_column();
512  alignment_column_it = alignment_column.begin();
513 
514  auto seq2_it = std::ranges::begin(sequence2);
515 
516  if constexpr (initialise_first_cell) // Initialise first cell if it intersects with the first row of the matrix.
517  {
518  this->init_row_cell(*alignment_column_it, this->alignment_state);
519  }
520  else // Compute first cell of banded column if it does not intersect with the first row of the matrix.
521  {
522  this->compute_first_band_cell(*alignment_column_it,
523  this->alignment_state,
524  this->scoring_scheme.score(seq1_value, *seq2_it));
525  ++seq2_it;
526  }
527 
528  for (; seq2_it != std::ranges::end(sequence2); ++seq2_it)
529  this->compute_cell(*++alignment_column_it,
530  this->alignment_state,
531  this->scoring_scheme.score(seq1_value, *seq2_it));
532  }
533 
544  constexpr void finalise_last_cell_in_column(bool const at_last_row) noexcept
545  {
546  if (at_last_row)
547  this->check_score_of_last_row_cell(*alignment_column_it, this->alignment_state);
548 
549  if constexpr (traits_t::is_debug)
550  dump_alignment_column();
551  }
552 
554  constexpr void finalise_alignment() noexcept
555  {
556  // ----------------------------------------------------------------------------
557  // Check for the optimum in last cell/column.
558  // ----------------------------------------------------------------------------
559 
560  this->check_score_of_cells_in_last_column(alignment_column, this->alignment_state);
561  this->check_score_of_last_cell(*alignment_column_it, this->alignment_state);
562  }
563 
590  template <typename index_t, typename sequence1_t, typename sequence2_t, typename callback_t>
591  requires (!traits_t::is_vectorised)
592  constexpr void make_alignment_result([[maybe_unused]] index_t const idx,
593  [[maybe_unused]] sequence1_t & sequence1,
594  [[maybe_unused]] sequence2_t & sequence2,
595  callback_t & callback)
596  {
597  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
598 
599  // ----------------------------------------------------------------------------
600  // Build the alignment result
601  // ----------------------------------------------------------------------------
602 
603  static_assert(seqan3::detail::alignment_configuration_traits<config_t>::has_output_configuration,
604  "The configuration must contain at least one align_cfg::output_* element.");
605 
606  result_value_t res{};
607 
608  if constexpr (traits_t::output_sequence1_id)
609  res.sequence1_id = idx;
610 
611  if constexpr (traits_t::output_sequence2_id)
612  res.sequence2_id = idx;
613 
614  // Choose what needs to be computed.
615  if constexpr (traits_t::compute_score)
616  res.score = this->alignment_state.optimum.score;
617 
618  if constexpr (traits_t::compute_end_positions)
619  {
620  using alignment_coordinate_t = detail::advanceable_alignment_coordinate<>;
621  res.end_positions = alignment_coordinate_t{column_index_type{this->alignment_state.optimum.column_index},
622  row_index_type{this->alignment_state.optimum.row_index}};
623  // At some point this needs to be refactored so that it is not necessary to adapt the coordinate.
624  if constexpr (traits_t::is_banded)
625  res.end_positions.second += res.end_positions.first - this->trace_matrix.band_col_index;
626  }
627 
628  if constexpr (traits_t::compute_begin_positions)
629  {
630  // Get a aligned sequence builder for banded or un-banded case.
631  aligned_sequence_builder builder{sequence1, sequence2};
632 
633  detail::matrix_coordinate const optimum_coordinate{
634  detail::row_index_type{this->alignment_state.optimum.row_index},
635  detail::column_index_type{this->alignment_state.optimum.column_index}};
636  auto trace_res = builder(this->trace_matrix.trace_path(optimum_coordinate));
637  res.begin_positions.first = trace_res.first_sequence_slice_positions.first;
638  res.begin_positions.second = trace_res.second_sequence_slice_positions.first;
639 
640  if constexpr (traits_t::compute_sequence_alignment)
641  res.alignment = std::move(trace_res.alignment);
642  }
643 
644  // Store the matrices in debug mode.
645  if constexpr (traits_t::is_debug)
646  {
647  res.score_debug_matrix = std::move(score_debug_matrix);
648  if constexpr (traits_t::compute_sequence_alignment) // compute alignment
649  res.trace_debug_matrix = std::move(trace_debug_matrix);
650  }
651 
652  callback(std::move(res));
653  }
654 
680  template <typename indexed_sequence_pair_range_t, typename callback_t>
681  requires traits_t::is_vectorised
682  constexpr auto make_alignment_result(indexed_sequence_pair_range_t && index_sequence_pairs, callback_t & callback)
683  {
684  using result_value_t = typename alignment_result_value_type_accessor<alignment_result_t>::type;
685 
686  size_t simd_index = 0;
687  for (auto && [sequence_pairs, alignment_index] : index_sequence_pairs)
688  {
689  (void)sequence_pairs;
690  result_value_t res{};
691 
692  if constexpr (traits_t::output_sequence1_id)
693  res.sequence1_id = alignment_index;
694 
695  if constexpr (traits_t::output_sequence2_id)
696  res.sequence2_id = alignment_index;
697 
698  if constexpr (traits_t::compute_score)
699  res.score = this->alignment_state.optimum.score[simd_index]; // Just take this
700 
701  if constexpr (traits_t::compute_end_positions)
702  {
703  res.end_positions.first = this->alignment_state.optimum.column_index[simd_index];
704  res.end_positions.second = this->alignment_state.optimum.row_index[simd_index];
705  }
706 
707  callback(std::move(res));
708  ++simd_index;
709  }
710  }
711 
720  void dump_alignment_column()
721  {
722  using std::get;
723 
724  auto column = this->current_alignment_column();
725 
726  auto coord = get<1>(column.front()).coordinate;
727  if constexpr (traits_t::is_banded)
728  coord.second += coord.first - this->score_matrix.band_col_index;
729 
730  matrix_offset offset{row_index_type{static_cast<std::ptrdiff_t>(coord.second)},
731  column_index_type{static_cast<std::ptrdiff_t>(coord.first)}};
732 
733  std::ranges::copy(column
735  [](auto const & tpl)
736  {
737  using std::get;
738  return get<0>(tpl).current;
739  }),
740  score_debug_matrix.begin() + offset);
741 
742  // if traceback is enabled.
743  if constexpr (traits_t::compute_sequence_alignment)
744  {
745  auto trace_matrix_it = trace_debug_matrix.begin() + offset;
746  std::ranges::copy(column
748  [](auto const & tpl)
749  {
750  using std::get;
751  return get<1>(tpl).current;
752  }),
753  trace_debug_matrix.begin() + offset);
754  }
755  }
756 
758  std::shared_ptr<config_t> cfg_ptr{};
760  alignment_column_t alignment_column{};
762  alignment_column_iterator_t alignment_column_it{};
764  score_debug_matrix_t score_debug_matrix{};
766  trace_debug_matrix_t trace_debug_matrix{};
768  std::pair<size_t, size_t> max_size_in_collection{};
769 };
770 
771 } // namespace seqan3::detail
Provides seqan3::detail::align_config_band.
Provides seqan3::align_cfg::scoring_scheme.
Provides seqan3::detail::align_result_selector.
Provides seqan3::aligned_allocator.
Provides seqan3::detail::aligned_sequence_builder.
Includes customized exception types for the alignment module .
Provides concepts needed internally for the alignment algorithms.
Provides helper type traits for the configuration and execution of the alignment algorithm.
Provides seqan3::detail::deferred_crtp_base.
Provides seqan3::views::elements.
Provides seqan3::detail::empty_type.
Provides various type traits for use on functions.
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
@ offset
Sequence (seqan3::field::seq) relative start position (0-based), unsigned value.
decltype(detail::transform< trait_t >(list_t{})) transform
Apply a transformation trait to every type in the list and return a seqan3::type_list of the results.
Definition: type_list/traits.hpp:469
constexpr auto slice
A view adaptor that returns a half-open interval on the underlying range.
Definition: slice.hpp:178
matrix_index< size_t > matrix_coordinate
A coordinate type to access an element inside of a two-dimensional matrix.
Definition: matrix_coordinate.hpp:178
constexpr auto const & get(configuration< configs_t... > const &config) noexcept
This is an overloaded member function, provided for convenience. It differs from the above function o...
Definition: configuration.hpp:415
T push_back(T... args)
The <ranges> header from C++20's standard library.
Provides seqan3::simd::simd_type.
Provides seqan3::simd::simd_traits.
Provides seqan3::detail::to_simd view.
Provides the declaration of seqan3::detail::trace_directions.
Provides seqan3::simd::simd_concept.