SeqAn3  3.2.0
The Modern C++ library for sequence analysis.
kmer_hash.hpp
Go to the documentation of this file.
1 // -----------------------------------------------------------------------------------------------------
2 // Copyright (c) 2006-2022, Knut Reinert & Freie Universität Berlin
3 // Copyright (c) 2016-2022, Knut Reinert & MPI für molekulare Genetik
4 // This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
5 // shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
6 // -----------------------------------------------------------------------------------------------------
7 
13 #pragma once
14 
18 #include <seqan3/utility/math.hpp>
19 
20 namespace seqan3::detail
21 {
22 // ---------------------------------------------------------------------------------------------------------------------
23 // kmer_hash_view class
24 // ---------------------------------------------------------------------------------------------------------------------
25 
38 template <std::ranges::view urng_t>
39 class kmer_hash_view : public std::ranges::view_interface<kmer_hash_view<urng_t>>
40 {
41 private:
42  static_assert(std::ranges::forward_range<urng_t>, "The kmer_hash_view only works on forward_ranges");
43  static_assert(semialphabet<std::ranges::range_reference_t<urng_t>>,
44  "The reference type of the underlying range must model seqan3::semialphabet.");
45 
47  urng_t urange;
48 
50  shape shape_;
51 
52  template <bool const_range>
53  class basic_iterator;
54 
55 public:
59  kmer_hash_view()
60  requires std::default_initializable<urng_t>
61  = default;
62  kmer_hash_view(kmer_hash_view const & rhs) = default;
63  kmer_hash_view(kmer_hash_view && rhs) = default;
64  kmer_hash_view & operator=(kmer_hash_view const & rhs) = default;
65  kmer_hash_view & operator=(kmer_hash_view && rhs) = default;
66  ~kmer_hash_view() = default;
67 
72  kmer_hash_view(urng_t urange_, shape const & s_) : urange{std::move(urange_)}, shape_{s_}
73  {
74  if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
75  {
76  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
77  "The alphabet or shape size must be reduced."};
78  }
79  }
80 
85  template <typename rng_t>
86  requires (!std::same_as<std::remove_cvref_t<rng_t>, kmer_hash_view>) && std::ranges::viewable_range<rng_t>
87  && std::constructible_from<urng_t, std::ranges::ref_view<std::remove_reference_t<rng_t>>>
88  kmer_hash_view(rng_t && urange_, shape const & s_) :
89  urange{std::views::all(std::forward<rng_t>(urange_))},
90  shape_{s_}
91  {
92  if (shape_.count() > (64 / std::log2(alphabet_size<std::ranges::range_reference_t<urng_t>>)))
93  {
94  throw std::invalid_argument{"The chosen shape/alphabet combination is not valid. "
95  "The alphabet or shape size must be reduced."};
96  }
97  }
99 
116  auto begin() noexcept
117  {
118  return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_};
119  }
120 
122  auto begin() const noexcept
124  {
125  return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_};
126  }
127 
143  auto end() noexcept
144  {
145  // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
146  if constexpr (std::ranges::common_range<urng_t>)
147  return basic_iterator<false>{std::ranges::begin(urange), std::ranges::end(urange), shape_, true};
148  else
149  return std::ranges::end(urange);
150  }
151 
153  auto end() const noexcept
155  {
156  // Assigning the end iterator to the text_right iterator of the basic_iterator only works for common ranges.
157  if constexpr (std::ranges::common_range<urng_t const>)
158  return basic_iterator<true>{std::ranges::cbegin(urange), std::ranges::cend(urange), shape_, true};
159  else
160  return std::ranges::cend(urange);
161  }
163 
167  auto size()
168  requires std::ranges::sized_range<urng_t>
169  {
170  using size_type = std::ranges::range_size_t<urng_t>;
171  return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
172  }
173 
175  auto size() const
176  requires std::ranges::sized_range<urng_t const>
177  {
178  using size_type = std::ranges::range_size_t<urng_t const>;
179  return std::max<size_type>(std::ranges::size(urange) + 1, shape_.size()) - shape_.size();
180  }
181 };
182 
210 template <std::ranges::view urng_t>
211 template <bool const_range>
212 class kmer_hash_view<urng_t>::basic_iterator :
213  public maybe_iterator_category<maybe_const_iterator_t<const_range, urng_t>>
214 {
215 private:
217  using it_t = maybe_const_iterator_t<const_range, urng_t>;
219  using sentinel_t = maybe_const_sentinel_t<const_range, urng_t>;
220 
221  template <bool other_const_range>
222  friend class basic_iterator;
223 
224 public:
229  using difference_type = typename std::iter_difference_t<it_t>;
231  using value_type = size_t;
233  using pointer = void;
235  using reference = value_type;
237  using iterator_concept = std::conditional_t<std::contiguous_iterator<it_t>,
239  detail::iterator_concept_tag_t<it_t>>;
241 
245  constexpr basic_iterator() = default;
246  constexpr basic_iterator(basic_iterator const &) = default;
247  constexpr basic_iterator(basic_iterator &&) = default;
248  constexpr basic_iterator & operator=(basic_iterator const &) = default;
249  constexpr basic_iterator & operator=(basic_iterator &&) = default;
250  ~basic_iterator() = default;
251 
253  constexpr basic_iterator(basic_iterator<!const_range> const & it) noexcept
254  requires const_range
255  :
256  hash_value{std::move(it.hash_value)},
257  roll_factor{std::move(it.roll_factor)},
258  shape_{std::move(it.shape_)},
259  text_left{std::move(it.text_left)},
260  text_right{std::move(it.text_right)}
261  {}
262 
274  basic_iterator(it_t it_start, sentinel_t it_end, shape s_) :
275  shape_{s_},
276  text_left{it_start},
277  text_right{std::ranges::next(text_left, shape_.size() - 1, it_end)}
278  {
279  assert(std::ranges::size(shape_) > 0);
280 
281  // shape size = 3
282  // Text: 1 2 3 4 5 6 7 8 9
283  // text_left: ^
284  // text_right: ^
285  // distance(text_left, text_right) = 2
286  if (shape_.size() <= std::ranges::distance(text_left, text_right) + 1)
287  {
288  roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
289  hash_full();
290  }
291  }
292 
316  basic_iterator(it_t it_start, sentinel_t it_end, shape s_, bool SEQAN3_DOXYGEN_ONLY(is_end)) : shape_{s_}
317  {
318  assert(std::ranges::size(shape_) > 0);
319 
320  auto urange_size = std::ranges::distance(it_start, it_end);
321  auto step = (shape_.size() > urange_size + 1) ? 0 : urange_size - shape_.size() + 1;
322  text_left = std::ranges::next(it_start, step, it_end);
323 
324  // shape size = 3
325  // Text: 1 2 3 4 5 6 7 8 9
326  // text_left: ^
327  // text_right: ^
328  // distance(text_left, text_right) = 2
329  if (shape_.size() <= std::ranges::distance(text_left, it_end) + 1)
330  {
331  roll_factor = pow(sigma, static_cast<size_t>(std::ranges::size(shape_) - 1));
332  hash_full();
333  }
334 
335  text_right = it_end;
336  }
338 
342 
344  friend bool operator==(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
345  {
346  return lhs.text_right == rhs;
347  }
348 
350  friend bool operator==(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
351  {
352  return lhs == rhs.text_right;
353  }
354 
356  friend bool operator==(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
357  {
358  return std::tie(lhs.text_right, lhs.shape_) == std::tie(rhs.text_right, rhs.shape_);
359  }
360 
362  friend bool operator!=(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
363  {
364  return !(lhs == rhs);
365  }
366 
368  friend bool operator!=(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
369  {
370  return !(lhs == rhs);
371  }
372 
374  friend bool operator!=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
375  {
376  return !(lhs == rhs);
377  }
378 
380  friend bool operator<(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
381  {
382  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right < rhs.text_right);
383  }
384 
386  friend bool operator>(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
387  {
388  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right > rhs.text_right);
389  }
390 
392  friend bool operator<=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
393  {
394  return (lhs.shape_ <= rhs.shape_) && (lhs.text_right <= rhs.text_right);
395  }
396 
398  friend bool operator>=(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
399  {
400  return (lhs.shape_ >= rhs.shape_) && (lhs.text_right >= rhs.text_right);
401  }
402 
404 
406  basic_iterator & operator++() noexcept
407  {
408  hash_forward();
409  return *this;
410  }
411 
413  basic_iterator operator++(int) noexcept
414  {
415  basic_iterator tmp{*this};
416  hash_forward();
417  return tmp;
418  }
419 
423  basic_iterator & operator--() noexcept
424  requires std::bidirectional_iterator<it_t>
425  {
426  hash_backward();
427  return *this;
428  }
429 
433  basic_iterator operator--(int) noexcept
434  requires std::bidirectional_iterator<it_t>
435  {
436  basic_iterator tmp{*this};
437  hash_backward();
438  return tmp;
439  }
440 
444  basic_iterator & operator+=(difference_type const skip) noexcept
445  requires std::random_access_iterator<it_t>
446  {
447  hash_forward(skip);
448  return *this;
449  }
450 
454  basic_iterator operator+(difference_type const skip) const noexcept
455  requires std::random_access_iterator<it_t>
456  {
457  basic_iterator tmp{*this};
458  return tmp += skip;
459  }
460 
464  friend basic_iterator operator+(difference_type const skip, basic_iterator const & it) noexcept
465  requires std::random_access_iterator<it_t>
466  {
467  return it + skip;
468  }
469 
473  basic_iterator & operator-=(difference_type const skip) noexcept
474  requires std::random_access_iterator<it_t>
475  {
476  hash_backward(skip);
477  return *this;
478  }
479 
484  basic_iterator operator-(difference_type const skip) const noexcept
485  requires std::random_access_iterator<it_t>
486  {
487  basic_iterator tmp{*this};
488  return tmp -= skip;
489  }
490 
494  friend basic_iterator operator-(difference_type const skip, basic_iterator const & it) noexcept
495  requires std::random_access_iterator<it_t>
496  {
497  return it - skip;
498  }
499 
504  friend difference_type operator-(basic_iterator const & lhs, basic_iterator const & rhs) noexcept
505  requires std::sized_sentinel_for<it_t, it_t>
506  {
507  return static_cast<difference_type>(lhs.text_right - rhs.text_right);
508  }
509 
513  friend difference_type operator-(sentinel_t const & lhs, basic_iterator const & rhs) noexcept
514  requires std::sized_sentinel_for<sentinel_t, it_t>
515  {
516  return static_cast<difference_type>(lhs - rhs.text_right);
517  }
518 
522  friend difference_type operator-(basic_iterator const & lhs, sentinel_t const & rhs) noexcept
523  requires std::sized_sentinel_for<it_t, sentinel_t>
524  {
525  return static_cast<difference_type>(lhs.text_right - rhs);
526  }
527 
531  reference operator[](difference_type const n) const
532  requires std::random_access_iterator<it_t>
533  {
534  return *(*this + n);
535  }
536 
538  value_type operator*() const noexcept
539  {
540  return hash_value + to_rank(*text_right);
541  }
542 
543 private:
545  using alphabet_t = std::iter_value_t<it_t>;
546 
548  static constexpr auto const sigma{alphabet_size<alphabet_t>};
549 
551  size_t hash_value{0};
552 
554  size_t roll_factor{0};
555 
557  shape shape_;
558 
560  it_t text_left;
561 
563  it_t text_right;
564 
566  void hash_forward()
567  {
568  if (shape_.all())
569  {
570  hash_roll_forward();
571  }
572  else
573  {
574  std::ranges::advance(text_left, 1);
575  hash_full();
576  }
577  }
578 
583  void hash_forward(difference_type const skip)
584  requires std::random_access_iterator<it_t>
585  {
586  std::ranges::advance(text_left, skip);
587  hash_full();
588  }
589 
593  void hash_backward()
594  requires std::bidirectional_iterator<it_t>
595  {
596  if (shape_.all())
597  {
598  hash_roll_backward();
599  }
600  else
601  {
602  std::ranges::advance(text_left, -1);
603  hash_full();
604  }
605  }
606 
611  void hash_backward(difference_type const skip)
612  {
613  std::ranges::advance(text_left, -skip);
614  hash_full();
615  }
616 
618  void hash_full()
619  {
620  text_right = text_left;
621  hash_value = 0;
622 
623  for (size_t i{0}; i < shape_.size() - 1u; ++i)
624  {
625  hash_value += shape_[i] * to_rank(*text_right);
626  hash_value *= shape_[i] ? sigma : 1;
627  std::ranges::advance(text_right, 1);
628  }
629  }
630 
632  void hash_roll_forward()
633  {
634  hash_value -= to_rank(*(text_left)) * roll_factor;
635  hash_value += to_rank(*(text_right));
636  hash_value *= sigma;
637 
638  std::ranges::advance(text_left, 1);
639  std::ranges::advance(text_right, 1);
640  }
641 
645  void hash_roll_backward()
646  requires std::bidirectional_iterator<it_t>
647  {
648  std::ranges::advance(text_left, -1);
649  std::ranges::advance(text_right, -1);
650 
651  hash_value /= sigma;
652  hash_value -= to_rank(*(text_right));
653  hash_value += to_rank(*(text_left)) * roll_factor;
654  }
655 };
656 
658 template <std::ranges::viewable_range rng_t>
659 kmer_hash_view(rng_t &&, shape const & shape_) -> kmer_hash_view<std::views::all_t<rng_t>>;
660 
661 // ---------------------------------------------------------------------------------------------------------------------
662 // kmer_hash_fn (adaptor definition)
663 // ---------------------------------------------------------------------------------------------------------------------
664 
668 struct kmer_hash_fn
669 {
671  constexpr auto operator()(shape const & shape_) const
672  {
673  return adaptor_from_functor{*this, shape_};
674  }
675 
683  template <std::ranges::range urng_t>
684  constexpr auto operator()(urng_t && urange, shape const & shape_) const
685  {
686  static_assert(std::ranges::viewable_range<urng_t>,
687  "The range parameter to views::kmer_hash cannot be a temporary of a non-view range.");
688  static_assert(std::ranges::forward_range<urng_t>,
689  "The range parameter to views::kmer_hash must model std::ranges::forward_range.");
690  static_assert(semialphabet<std::ranges::range_reference_t<urng_t>>,
691  "The range parameter to views::kmer_hash must be over elements of seqan3::semialphabet.");
692 
693  return kmer_hash_view{std::forward<urng_t>(urange), shape_};
694  }
695 };
697 
698 } // namespace seqan3::detail
699 
700 namespace seqan3::views
701 {
750 inline constexpr auto kmer_hash = detail::kmer_hash_fn{};
751 
752 } // namespace seqan3::views
Core alphabet concept and free function/type trait wrappers.
T begin(T... args)
T end(T... args)
T forward(T... args)
requires requires
The rank_type of the semi-alphabet; defined as the return type of seqan3::to_rank....
Definition: alphabet/concept.hpp:164
constexpr auto to_rank
Return the rank representation of a (semi-)alphabet object.
Definition: alphabet/concept.hpp:155
constexpr auto kmer_hash
Computes hash values for each position of a range via a given shape.
Definition: kmer_hash.hpp:750
constexpr size_t size
The size of a type pack.
Definition: type_pack/traits.hpp:146
Specifies requirements of an input range type for which the const version of that type satisfies the ...
The basis for seqan3::alphabet, but requires only rank interface (not char).
Provides math related functionality.
The SeqAn namespace for views.
Definition: char_strictly_to.hpp:22
SeqAn specific customisations in the standard namespace.
T next(T... args)
T operator!=(T... args)
T pow(T... args)
Provides overloads for std::hash.
Provides seqan3::shape.
T tie(T... args)