Rosalind: Finding a Motif in DNA

Python

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
from typing import Generator

def kmers(sequence: str, k: int) -> Generator[str, None, None]:
    """
    Generates k-mers of a given length from a DNA sequence.

    Args:
        sequence (str): The DNA sequence.
        k (int): The length of the k-mers to generate.

    Yields:
        str: Each generated k-mer.

    Examples:
        >>> dna_sequence = "ATCGATCG"
        >>> kmer_generator = kmers(dna_sequence, 3)
        >>> for kmer in kmer_generator:
        ...     print(kmer)
        ATC
        TCG
        CGA
        ...

    """
    for i in range(len(sequence) - k + 1):
        yield sequence[i:i + k]


def motif_finder(sequence: str, motif: str) -> Generator[int, None, None]:
    """
    Finds occurrences of a motif within a DNA sequence.

    Args:
        sequence (str): The DNA sequence.
        motif (str): The motif to search for.

    Yields:
        int: The indices of the occurrences of the motif within the sequence (1-based index).

    Examples:
        >>> dna_sequence = "ATCGATCG"
        >>> motif_indices = motif_finder(dna_sequence, "ATC")
        >>> for index in motif_indices:
        ...     print(index)
        1

    """
    for i, kmer in enumerate(kmers(sequence, len(motif))):
        if kmer == motif:
            yield i + 1
0%