Skip to content

search_string

Search string generation and formulation module.

InvalidPubyearBoundariesError

Bases: ValueError

The provided pubyear boundaries are invalid.

Source code in src/sesg/search_string/formulation.py
6
7
class InvalidPubyearBoundariesError(ValueError):
    """The provided pubyear boundaries are invalid."""

generate_search_string(topics, n_words_per_topic, *, n_similar_words_per_word=0, similar_words_generator=None)

Generates a search string that will be enriched with the desired number of similar words.

Parameters:

Name Type Description Default
topics list[list[str]]

List of topics to use.

required
n_words_per_topic int

Number of words to keep in each topic.

required
n_similar_words_per_word int

Number of similar words to generate for each word in each topic.

0
similar_words_generator SimilarWordsFinder

Instance of SimilarWordsFinder.

None

Returns:

Type Description
str

A search string.

Source code in src/sesg/search_string/generation.py
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
def generate_search_string(
    topics: list[list[str]],
    n_words_per_topic: int,
    *,
    n_similar_words_per_word: int = 0,
    similar_words_generator: SimilarWordsGenerator | None = None,
) -> str:
    """Generates a search string that will be enriched with the desired number of similar words.

    Args:
        topics (list[list[str]]): List of topics to use.
        n_words_per_topic (int): Number of words to keep in each topic.
        n_similar_words_per_word (int): Number of similar words to generate for each word in each topic.
        similar_words_generator (SimilarWordsFinder): Instance of SimilarWordsFinder.

    Returns:
        A search string.
    """  # noqa: E501
    if n_similar_words_per_word == 0:
        return generate_search_string_without_similar_words(
            topics=topics,
            n_words_per_topic=n_words_per_topic,
        )

    if similar_words_generator is None:
        raise ValueError(
            "similar_words_generator must be provided if n_similar_words_per_word > 0"
        )

    return generate_search_string_with_similar_words(
        topics=topics,
        n_words_per_topic=n_words_per_topic,
        n_similar_words_per_word=n_similar_words_per_word,
        similar_words_generator=similar_words_generator,
    )

set_pub_year_boundaries(string, *, min_year=None, max_year=None)

Given a search string, will append PUBYEAR > and PUBYEAR < boundaries as needed.

Parameters:

Name Type Description Default
string str

A search string.

required
min_year int | None

Minimum year of publication. Defaults to None.

None
max_year int | None

Maximum year of publication. Defaults to None.

None

Returns:

Type Description
str

A search string with PUBYEAR boundaries.

Examples:

>>> set_pub_year_boundaries(string='title("machine" and "learning")', max_year=2018)
'title("machine" and "learning") AND PUBYEAR < 2018'
Source code in src/sesg/search_string/formulation.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
def set_pub_year_boundaries(
    string: str,
    *,
    min_year: int | None = None,
    max_year: int | None = None,
) -> str:
    """Given a search string, will append `PUBYEAR >` and `PUBYEAR <` boundaries as needed.

    Args:
        string (str): A search string.
        min_year (int | None, optional): Minimum year of publication. Defaults to None.
        max_year (int | None, optional): Maximum year of publication. Defaults to None.

    Returns:
        A search string with PUBYEAR boundaries.

    Examples:
        >>> set_pub_year_boundaries(string='title("machine" and "learning")', max_year=2018)
        'title("machine" and "learning") AND PUBYEAR < 2018'
    """  # noqa: E501
    if min_year is not None and max_year is not None and min_year >= max_year:
        raise InvalidPubyearBoundariesError("Max year must be greater than min year")

    if min_year is not None:
        string += f" AND PUBYEAR > {min_year}"

    if max_year is not None:
        string += f" AND PUBYEAR < {max_year}"

    return string