Library Reference

This page documents how to include CiteURL in your Python programming projects.

The first step is to instantiate a Citator, which by default contains all of CiteURL's built-in Templates:

from citeurl import Citator
citator = Citator()

After that, you can feed it text to return a list of Citations it finds:

text = """
Federal law provides that courts should award prevailing civil rights plaintiffs reasonable attorneys fees, 42 USC § 1988(b), and, by discretion, expert fees, id. at (c). This is because the importance of civil rights litigation cannot be measured by a damages judgment. See Riverside v. Rivera, 477 U.S. 561 (1986). But Evans v. Jeff D. upheld a settlement where the plaintiffs got everything they wanted, on condition that they waive attorneys' fees. 475 U.S. 717 (1986). This ruling lets savvy defendants create a wedge between plaintiffs and their attorneys, discouraging civil rights suits and undermining the court's logic in Riverside, 477 U.S. at 574-78.
"""
citations = citator.list_cites(text)

Once you have a list of citations, you can get information about each one:

print(citations[0].text)
# 42 USC § 1988(b)
print(citations[0].tokens)
# {'Title': '42', 'Section': '1988', 'subsection': '(b)'}
print(citations[0].URL)
# https://www.law.cornell.edu/uscode/text/42/1988#b

You can also compare citations to one another, to determine whether they reference the same material or a subsection thereof:

art_I = citator.cite('U.S. Const. Art. I')
also_art_I = citator.cite('Article I of the U.S. Constitution')
art_I_sec_3 = citator.cite('U.S. Const. Art. I, § 3')

assert art_I == also_art_I
assert art_I_sec_3 in art_I

If you don't want to bother with all the details, you can also just use insert_links() to turn all the citations in a text into hyperlinks:

from citeurl import insert_links

text = "42 USC § 1988. <i>Id.</i> at (b)."
output = insert_links(text)

assert output == '<a class="citation" href="https://www.law.cornell.edu/uscode/text/42/1988" title="42 U.S.C. § 1988">42 USC § 1988</a>. <a class="citation" href="https://www.law.cornell.edu/uscode/text/42/1988#b" title="42 U.S.C. § 1988(b)"><i>Id.</i> at (b)</a>.'

Citator

A collection of citation templates, and the tools to match text against them en masse.

Attributes:

Name	Type	Description
`templates`		a dictionary of citation templates that this citator will try to match against

Source code in citeurl/citator.py

class Citator:
    """
    A collection of citation templates, and the tools to match text
    against them en masse.

    Attributes:
        templates: a dictionary of citation templates that this citator
            will try to match against
    """

    def __init__(
        self,
        defaults = [
            'caselaw',
            'general federal law',
            'specific federal laws',
            'state law',
            'secondary sources',
        ],
        yaml_paths: list[str] = [],
        templates: dict[str, Template] = {},
    ):
        """
        Create a citator from any combination of CiteURL's default
        template sets (by default, all of them), plus any custom
        templates you want, either by pointing to custom YAML files or
        making Template objects at runtime.

        Arguments:
            defaults: names of files to load from the citeurl/templates
                folder. Each file contains one or more of CiteURL's
                built-in templates relevant to the given topic.
            yaml_paths: paths to custom YAML files to load templates
                from. These are loaded after the defaults, so they can
                inherit and/or overwrite them. If 
            templates: optional list of Template objects to load
                directly. These are loaded last, after the defaults and
                any yaml_paths.
        """
        self.templates = {}

        yamls_path = Path(__file__).parent.absolute() / 'templates'    
        for name in defaults or []:
            yaml_file = yamls_path / f'{name}.yaml'
            self.load_yaml(yaml_file.read_text())

        for path in yaml_paths:
            self.load_yaml(Path(path).read_text())
        self.templates.update(templates)

    @classmethod
    def from_yaml(cls, yaml: str):
        """
        Create a citator from scratch (i.e. without the default
        templates) by loading templates from the specified YAML string.
        """
        citator = cls(defaults=None)
        citator.load_yaml(yaml)
        return citator

    def to_yaml(self):
        "Save this citator to a YAML string to load later"
        yamls = [t.to_yaml() for t in self.templates.values()]
        return '\n\n'.join(yamls)

    def load_yaml(self, yaml: str):
        """
        Load templates from the given YAML, overwriting any existing
        templates with the same name.
        """
        for name, data in safe_load(yaml).items():
            self.templates[name] = Template.from_dict(
                name, data, inheritables=self.templates
            )

    def cite(self, text: str, broad: bool=True) -> Citation:
        """
        Check the given text against each of the citator's templates and
        return the first citation detected, or None.

        If broad is true, matching is case-insensitive and each
        template's broad regexes are used in addition to its normal
        regexes.
        """
        for template in self.templates.values():
            cite = template.cite(text, broad=broad)
            if cite:
                return cite
        else:
            return None

    def list_cites(
        self,
        text: str,
        id_breaks: re.Pattern = None,
    ) -> list[Citation]:
        """
        Find all citations in the given text, whether longform,
        shortform, or idform. They will be listed in order of
        appearance. If any two citations overlap, the shorter one will
        be deleted. 

        Wherever the id_breaks pattern appears, it will interrupt chains
        of id-form citations. This is helpful for handling unrecognized
        citations that would otherwise cause CiteURL's notion of "id."
        to get out of sync with what the text is talking about.
        """
        # first get a list of all long and shortform (not id.) citations
        longforms = []
        for template in self.templates.values():
            longforms += template.list_longform_cites(text)

        shortforms = []
        for citation in longforms:
            shortforms += citation.get_shortform_cites()

        citations = longforms + shortforms
        _sort_and_remove_overlaps(citations)

        # Figure out where to interrupt chains of idform citations,
        # i.e. anywhere a longform or shortform citation starts, plus
        # the start of any substring that matches the id_breaks pattern
        breakpoints = [c.span[0] for c in citations]
        if id_breaks:
            breakpoints += [
                match.span()[0] for match in
                id_breaks.finditer(text)
            ]
        breakpoints = sorted(set(breakpoints))
        breakpoints.append(len(text))

        # for each cite, look for idform citations until the next cite
        # or until the next breakpoint
        idforms = []
        for cite in citations:
            # find the next relevant breakpoint, and delete any
            # breakpoints that are already behind the current citation
            for i, breakpoint in enumerate(breakpoints):
                if breakpoint >= cite.span[1]:
                    breakpoints = breakpoints[i:]
                    break
            try:
                breakpoint = breakpoints[0]
            except IndexError:
                breakpoint = None

            # find the first idform reference to the citation, then the
            # first idform reference to that idform, and so on, until
            # the breakpoint
            idform = cite.get_idform_cite(until_index=breakpoint)
            while idform:
                idforms.append(idform)
                idform = idform.get_idform_cite(until_index=breakpoint)

        citations += idforms
        _sort_and_remove_overlaps(citations)
        return citations

    def list_authorities(
        self,
        text: str,
        ignored_tokens = ['subsection', 'clause', 'pincite', 'paragraph'],
        known_authorities: list = [],
        sort_by_cites: bool = True,
        id_breaks: re.Pattern = None,
    ) -> list[Authority]:
        """
        Find each distinct authority mentioned in the given text, and 
        return Authority objects whose `citations` attribute lists the
        references to each.

        Arguments:
            text: The string to be scanned for citations
            ignored_tokens: the names of tokens whose values are
                irrelevant to whether the citation matches an authority,
                because they  just designate portions within a single
                authority
            sort_by_cites: Whether to sort the resulting list of
                authorities by the number of citations to each one
        """
        cites = self.list_cites(text, id_breaks=id_breaks)
        return list_authorities(
            cites,
            ignored_tokens = ignored_tokens,
            known_authorities = known_authorities,
            sort_by_cites = sort_by_cites,
        )        

    def insert_links(
        self,
        text: str,
        attrs: dict = {'class': 'citation'},
        add_title: bool = True,
        URL_optional: bool = False,
        redundant_links: bool = True,
        id_breaks: re.Pattern = None,
        ignore_markup: bool = True,
        markup_format = 'html',
    ) -> str:
        """
        Scan a text for citations, and return a text with each citation
        converted to a hyperlink.

        Arguments:
            text: the string to scan for citations.
            attrs: various HTML link attributes to give to each link.
                Only relevant when markup_format is html
            add_title: whether to use citation.name for link titles.
                Only relevant when markup_format is html
            URL_optional: whether to insert an <a> element even when the
                citation does not have an associated URL. Only relevant
                when markup_format is html; in markdown a link needs a
                URL.
            redundant_links: whether to insert a hyperlink if it would
                point to the same URL as the previous link
            id_breaks: wherever this regex appears, interrupt chains of
                "Id."-style citations.
            ignore_markup: whether to preprocess and postprocess the
                text so that CiteURL can detect citations even when
                they contain inline markup, like "<i>Id.</i> at 32"
            markup_format: Either 'html' or 'markdown'. Determines
                what markup to ignore, and also what format to use
                for inserted links.

        Returns:
            text, with an HTML `a` element for each citation. 
        """

        # pull out all the inline HTML tags, e.g. <b>,
        # so they don't interfere with citation matching
        if ignore_markup:
            text, stored_tags = _strip_inline_tags(text, markup_format)

        cite_offsets = []
        running_offset = 0

        last_URL = None
        for cite in self.list_cites(text, id_breaks = id_breaks):
            if markup_format == 'html':
                attrs['href'] = cite.URL
                if not cite.URL and not URL_optional:
                    continue
                if not redundant_links and cite.URL == last_URL:
                    continue
                if add_title:
                    attrs['title'] = cite.name

                attr_str = ''.join([
                    f' {k}="{v}"'
                    for k, v in attrs.items() if v
                ])
                link = f'<a{attr_str}>{cite.text}</a>'
            elif markup_format == 'markdown':
                link = f'[{cite.text}]({cite.URL})'
            else:
                raise NotImplementedError()

            cite_offset = len(link) - len(cite.text)   
            cite_offsets.append((
                cite.span[0], # beginning of citation
                cite_offset,  # length the citation markup adds
                cite.text,    # the text that was picked up as citation
            ))

            span = (
                cite.span[0] + running_offset,
                cite.span[1] + running_offset
            )

            text = text[:span[0]] + link + text[span[1]:]

            running_offset += cite_offset
            last_URL = cite.URL

        if ignore_markup:
            running_offset = 0
            for tag in stored_tags:
                temp_offset = 0
                while len(cite_offsets) > 0:
                    # only offset by a cite if the tag
                    # is after the cite start
                    if tag[1] >= cite_offsets[0][0]:
                        offset = cite_offsets[0]
                        # check if the tag is after the cite end
                        tag_start = tag[1]
                        cite_end = offset[0] + len(offset[2])

                        if tag_start >= cite_end:
                            running_offset += offset[1]
                            cite_offsets.pop(0)
                        else:
                            if markup_format == 'html':
                                temp_offset = offset[1] - 4
                            elif markup_format == 'markdown':
                                temp_offset = 1
                            break
                    else:
                        break
                tag_pos = tag[1] + running_offset + temp_offset

                text = text[:tag_pos] + tag[0] + text[tag_pos:]

                running_offset += tag[2]

        return text

    def __iter__(self):
        return self.templates.values().__iter__()

    def __getitem__(self, key):
        return self.templates[key]

    def __setitem__(self, key, value):
        self.templates[key] = value

    def __eq__(self, other_citator):
        return self.templates == other_citator.templates

`init(defaults=['caselaw', 'general federal law', 'specific federal laws', 'state law', 'secondary sources'], yaml_paths=[], templates={})`

Create a citator from any combination of CiteURL's default template sets (by default, all of them), plus any custom templates you want, either by pointing to custom YAML files or making Template objects at runtime.

Parameters:

Name	Type	Description	Default
`defaults`		names of files to load from the citeurl/templates folder. Each file contains one or more of CiteURL's built-in templates relevant to the given topic.	`['caselaw', 'general federal law', 'specific federal laws', 'state law', 'secondary sources']`
`yaml_paths`	`list[str]`	paths to custom YAML files to load templates from. These are loaded after the defaults, so they can inherit and/or overwrite them. If	`[]`
`templates`	`dict[str, Template]`	optional list of Template objects to load directly. These are loaded last, after the defaults and any yaml_paths.	`{}`

Source code in citeurl/citator.py

def __init__(
    self,
    defaults = [
        'caselaw',
        'general federal law',
        'specific federal laws',
        'state law',
        'secondary sources',
    ],
    yaml_paths: list[str] = [],
    templates: dict[str, Template] = {},
):
    """
    Create a citator from any combination of CiteURL's default
    template sets (by default, all of them), plus any custom
    templates you want, either by pointing to custom YAML files or
    making Template objects at runtime.

    Arguments:
        defaults: names of files to load from the citeurl/templates
            folder. Each file contains one or more of CiteURL's
            built-in templates relevant to the given topic.
        yaml_paths: paths to custom YAML files to load templates
            from. These are loaded after the defaults, so they can
            inherit and/or overwrite them. If 
        templates: optional list of Template objects to load
            directly. These are loaded last, after the defaults and
            any yaml_paths.
    """
    self.templates = {}

    yamls_path = Path(__file__).parent.absolute() / 'templates'    
    for name in defaults or []:
        yaml_file = yamls_path / f'{name}.yaml'
        self.load_yaml(yaml_file.read_text())

    for path in yaml_paths:
        self.load_yaml(Path(path).read_text())
    self.templates.update(templates)

`cite(text, broad=True)`

Check the given text against each of the citator's templates and return the first citation detected, or None.

If broad is true, matching is case-insensitive and each template's broad regexes are used in addition to its normal regexes.

Source code in citeurl/citator.py

def cite(self, text: str, broad: bool=True) -> Citation:
    """
    Check the given text against each of the citator's templates and
    return the first citation detected, or None.

    If broad is true, matching is case-insensitive and each
    template's broad regexes are used in addition to its normal
    regexes.
    """
    for template in self.templates.values():
        cite = template.cite(text, broad=broad)
        if cite:
            return cite
    else:
        return None

`from_yaml(yaml)` `classmethod`

Create a citator from scratch (i.e. without the default templates) by loading templates from the specified YAML string.

Source code in citeurl/citator.py

@classmethod
def from_yaml(cls, yaml: str):
    """
    Create a citator from scratch (i.e. without the default
    templates) by loading templates from the specified YAML string.
    """
    citator = cls(defaults=None)
    citator.load_yaml(yaml)
    return citator

`insert_links(text, attrs={'class': 'citation'}, add_title=True, URL_optional=False, redundant_links=True, id_breaks=None, ignore_markup=True, markup_format='html')`

Scan a text for citations, and return a text with each citation converted to a hyperlink.

Parameters:

Name	Type	Description	Default
`text`	`str`	the string to scan for citations.	required
`attrs`	`dict`	various HTML link attributes to give to each link. Only relevant when markup_format is html	`{'class': 'citation'}`
`add_title`	`bool`	whether to use citation.name for link titles. Only relevant when markup_format is html	`True`
`URL_optional`	`bool`	whether to insert an element even when the citation does not have an associated URL. Only relevant when markup_format is html; in markdown a link needs a URL.	`False`
`redundant_links`	`bool`	whether to insert a hyperlink if it would point to the same URL as the previous link	`True`
`id_breaks`	`Pattern`	wherever this regex appears, interrupt chains of "Id."-style citations.	`None`
`ignore_markup`	`bool`	whether to preprocess and postprocess the text so that CiteURL can detect citations even when they contain inline markup, like "Id. at 32"	`True`
`markup_format`		Either 'html' or 'markdown'. Determines what markup to ignore, and also what format to use for inserted links.	`'html'`

Returns:

Type	Description
`str`	text, with an HTML `a` element for each citation.

Source code in citeurl/citator.py

def insert_links(
    self,
    text: str,
    attrs: dict = {'class': 'citation'},
    add_title: bool = True,
    URL_optional: bool = False,
    redundant_links: bool = True,
    id_breaks: re.Pattern = None,
    ignore_markup: bool = True,
    markup_format = 'html',
) -> str:
    """
    Scan a text for citations, and return a text with each citation
    converted to a hyperlink.

    Arguments:
        text: the string to scan for citations.
        attrs: various HTML link attributes to give to each link.
            Only relevant when markup_format is html
        add_title: whether to use citation.name for link titles.
            Only relevant when markup_format is html
        URL_optional: whether to insert an <a> element even when the
            citation does not have an associated URL. Only relevant
            when markup_format is html; in markdown a link needs a
            URL.
        redundant_links: whether to insert a hyperlink if it would
            point to the same URL as the previous link
        id_breaks: wherever this regex appears, interrupt chains of
            "Id."-style citations.
        ignore_markup: whether to preprocess and postprocess the
            text so that CiteURL can detect citations even when
            they contain inline markup, like "<i>Id.</i> at 32"
        markup_format: Either 'html' or 'markdown'. Determines
            what markup to ignore, and also what format to use
            for inserted links.

    Returns:
        text, with an HTML `a` element for each citation. 
    """

    # pull out all the inline HTML tags, e.g. <b>,
    # so they don't interfere with citation matching
    if ignore_markup:
        text, stored_tags = _strip_inline_tags(text, markup_format)

    cite_offsets = []
    running_offset = 0

    last_URL = None
    for cite in self.list_cites(text, id_breaks = id_breaks):
        if markup_format == 'html':
            attrs['href'] = cite.URL
            if not cite.URL and not URL_optional:
                continue
            if not redundant_links and cite.URL == last_URL:
                continue
            if add_title:
                attrs['title'] = cite.name

            attr_str = ''.join([
                f' {k}="{v}"'
                for k, v in attrs.items() if v
            ])
            link = f'<a{attr_str}>{cite.text}</a>'
        elif markup_format == 'markdown':
            link = f'[{cite.text}]({cite.URL})'
        else:
            raise NotImplementedError()

        cite_offset = len(link) - len(cite.text)   
        cite_offsets.append((
            cite.span[0], # beginning of citation
            cite_offset,  # length the citation markup adds
            cite.text,    # the text that was picked up as citation
        ))

        span = (
            cite.span[0] + running_offset,
            cite.span[1] + running_offset
        )

        text = text[:span[0]] + link + text[span[1]:]

        running_offset += cite_offset
        last_URL = cite.URL

    if ignore_markup:
        running_offset = 0
        for tag in stored_tags:
            temp_offset = 0
            while len(cite_offsets) > 0:
                # only offset by a cite if the tag
                # is after the cite start
                if tag[1] >= cite_offsets[0][0]:
                    offset = cite_offsets[0]
                    # check if the tag is after the cite end
                    tag_start = tag[1]
                    cite_end = offset[0] + len(offset[2])

                    if tag_start >= cite_end:
                        running_offset += offset[1]
                        cite_offsets.pop(0)
                    else:
                        if markup_format == 'html':
                            temp_offset = offset[1] - 4
                        elif markup_format == 'markdown':
                            temp_offset = 1
                        break
                else:
                    break
            tag_pos = tag[1] + running_offset + temp_offset

            text = text[:tag_pos] + tag[0] + text[tag_pos:]

            running_offset += tag[2]

    return text

`list_authorities(text, ignored_tokens=['subsection', 'clause', 'pincite', 'paragraph'], known_authorities=[], sort_by_cites=True, id_breaks=None)`

Find each distinct authority mentioned in the given text, and return Authority objects whose citations attribute lists the references to each.

Parameters:

Name	Type	Description	Default
`text`	`str`	The string to be scanned for citations	required
`ignored_tokens`		the names of tokens whose values are irrelevant to whether the citation matches an authority, because they just designate portions within a single authority	`['subsection', 'clause', 'pincite', 'paragraph']`
`sort_by_cites`	`bool`	Whether to sort the resulting list of authorities by the number of citations to each one	`True`

Source code in citeurl/citator.py

def list_authorities(
    self,
    text: str,
    ignored_tokens = ['subsection', 'clause', 'pincite', 'paragraph'],
    known_authorities: list = [],
    sort_by_cites: bool = True,
    id_breaks: re.Pattern = None,
) -> list[Authority]:
    """
    Find each distinct authority mentioned in the given text, and 
    return Authority objects whose `citations` attribute lists the
    references to each.

    Arguments:
        text: The string to be scanned for citations
        ignored_tokens: the names of tokens whose values are
            irrelevant to whether the citation matches an authority,
            because they  just designate portions within a single
            authority
        sort_by_cites: Whether to sort the resulting list of
            authorities by the number of citations to each one
    """
    cites = self.list_cites(text, id_breaks=id_breaks)
    return list_authorities(
        cites,
        ignored_tokens = ignored_tokens,
        known_authorities = known_authorities,
        sort_by_cites = sort_by_cites,
    )        

`list_cites(text, id_breaks=None)`

Find all citations in the given text, whether longform, shortform, or idform. They will be listed in order of appearance. If any two citations overlap, the shorter one will be deleted.

Wherever the id_breaks pattern appears, it will interrupt chains of id-form citations. This is helpful for handling unrecognized citations that would otherwise cause CiteURL's notion of "id." to get out of sync with what the text is talking about.

Source code in citeurl/citator.py

def list_cites(
    self,
    text: str,
    id_breaks: re.Pattern = None,
) -> list[Citation]:
    """
    Find all citations in the given text, whether longform,
    shortform, or idform. They will be listed in order of
    appearance. If any two citations overlap, the shorter one will
    be deleted. 

    Wherever the id_breaks pattern appears, it will interrupt chains
    of id-form citations. This is helpful for handling unrecognized
    citations that would otherwise cause CiteURL's notion of "id."
    to get out of sync with what the text is talking about.
    """
    # first get a list of all long and shortform (not id.) citations
    longforms = []
    for template in self.templates.values():
        longforms += template.list_longform_cites(text)

    shortforms = []
    for citation in longforms:
        shortforms += citation.get_shortform_cites()

    citations = longforms + shortforms
    _sort_and_remove_overlaps(citations)

    # Figure out where to interrupt chains of idform citations,
    # i.e. anywhere a longform or shortform citation starts, plus
    # the start of any substring that matches the id_breaks pattern
    breakpoints = [c.span[0] for c in citations]
    if id_breaks:
        breakpoints += [
            match.span()[0] for match in
            id_breaks.finditer(text)
        ]
    breakpoints = sorted(set(breakpoints))
    breakpoints.append(len(text))

    # for each cite, look for idform citations until the next cite
    # or until the next breakpoint
    idforms = []
    for cite in citations:
        # find the next relevant breakpoint, and delete any
        # breakpoints that are already behind the current citation
        for i, breakpoint in enumerate(breakpoints):
            if breakpoint >= cite.span[1]:
                breakpoints = breakpoints[i:]
                break
        try:
            breakpoint = breakpoints[0]
        except IndexError:
            breakpoint = None

        # find the first idform reference to the citation, then the
        # first idform reference to that idform, and so on, until
        # the breakpoint
        idform = cite.get_idform_cite(until_index=breakpoint)
        while idform:
            idforms.append(idform)
            idform = idform.get_idform_cite(until_index=breakpoint)

    citations += idforms
    _sort_and_remove_overlaps(citations)
    return citations

`load_yaml(yaml)`

Load templates from the given YAML, overwriting any existing templates with the same name.

Source code in citeurl/citator.py

def load_yaml(self, yaml: str):
    """
    Load templates from the given YAML, overwriting any existing
    templates with the same name.
    """
    for name, data in safe_load(yaml).items():
        self.templates[name] = Template.from_dict(
            name, data, inheritables=self.templates
        )

`to_yaml()`

Save this citator to a YAML string to load later

Source code in citeurl/citator.py

def to_yaml(self):
    "Save this citator to a YAML string to load later"
    yamls = [t.to_yaml() for t in self.templates.values()]
    return '\n\n'.join(yamls)

Citation

A legal reference found in text.

Attributes:

Name	Type	Description
`tokens`		dictionary of the values that define this citation, such as its volume and page number, or its title, section, and subsection, etc
`URL`	`str`	the location, if any, where this citation can be found online, defined by the template's URL_builder
`name`	`str`	a uniform, human-readable representation of this citation, written by the template's name_builder
`text`		the actual text of this citation as found in the source text
`source_text`		the full text that this citation was found in
`template`		the template whose regexes found this citation or its parent
`parent`		the earlier citation, if any, that this citation is a shortform or idform child of
`raw_tokens`		dictionary of tokens as captured in the original regex match, before normalization. Note that for child citations, raw_tokens will include any raw_tokens inferred from the parent citation.
`idform_regexes`		list of regex pattern objects to find child citations later in the text, valid until the next different citation appears.
`shortform_regexes`		list of regex pattern objects to find child citations anywhere in the subsequent text

Source code in citeurl/citation.py

class Citation:
    """
    A legal reference found in text.

    Attributes:
        tokens: dictionary of the values that define this citation, such
            as its volume and page number, or its title, section, and
            subsection, etc

        URL: the location, if any, where this citation can be found
            online, defined by the template's URL_builder

        name: a uniform, human-readable representation of this citation,
            written by the template's name_builder

        text: the actual text of this citation as found in the source
            text

        source_text: the full text that this citation was found in

        template: the template whose regexes found this citation or its
            parent

        parent: the earlier citation, if any, that this citation is a
            shortform or idform child of

        raw_tokens: dictionary of tokens as captured in the original
            regex match, before normalization. Note that for child
            citations, raw_tokens will include any raw_tokens inferred
            from the parent citation.

        idform_regexes: list of regex pattern objects to find child
            citations later in the text, valid until the next different
            citation appears.

        shortform_regexes: list of regex pattern objects to find
            child citations anywhere in the subsequent text
    """

    def __init__(
        self,
        match: re.match,
        template,
        parent = None,
    ):
        self.match = match
        self.text = match.group(0)
        self.source_text = match.string
        self.span = match.span()
        self.template = template
        self.parent = parent
        self.tokens = {}
        self.raw_tokens = match.groupdict()

        # copy raw_tokens (in order) from the parent citation, but
        # stop at the first one that the child citation overwrites
        if parent:
            merged_tokens = {}
            for k in template.tokens.keys():
                if self.raw_tokens.get(k):
                    merged_tokens.update(self.raw_tokens)
                    break
                else:
                    merged_tokens[k] = parent.raw_tokens.get(k)
            self.raw_tokens = merged_tokens

        # normalize raw_tokens to get consistent token values across
        # differently-formatted citations to the same source.
        # This will raise a SyntaxError if a mandatory edit fails
        for name, ttype in template.tokens.items():
            value = self.raw_tokens.get(name)
            self.tokens[name] = ttype.normalize(value)

        # Finally, compile the citation's idform and shortform regexes.
        # To avoid unneccessary work, first try to copy regexes from the
        # parent citation if applicable.

        if parent and parent.raw_tokens == self.raw_tokens:
        # then we can safely copy the parent's regexes to the child
            self.idform_regexes = parent.idform_regexes
            self.shortform_regexes = parent.shortform_regexes
            return

        # otherwise we'll need to compile new shortform regexes,
        # but we can still copy some of them from the parent

        kwargs = {
            'replacements': self.raw_tokens,
            'token_prefix': 'same',
        }
        if parent:
        # we can copy regexes, but only if they do not reference a
        # specific value from the citation, e.g. {same volume}.
            self.shortform_regexes = [
                (
                    re.compile(process_pattern(pattern, **kwargs))
                    if '{same ' in pattern else parent.shortform_regexes[i]
                )
                for i, pattern in enumerate(template._processed_shortforms)
            ]

            self.idform_regexes = [
                (
                    re.compile(process_pattern(pattern, **kwargs))
                    if '{same ' in pattern else parent.idform_regexes[i]
                )
                for i, pattern in enumerate(template._processed_idforms)
            ]

        else: # compile all-new idforms and shortforms
            self.shortform_regexes = [
                re.compile(process_pattern(pattern, **kwargs))
                for pattern in self.template._processed_shortforms
            ]
            self.idform_regexes = [
                re.compile(process_pattern(pattern, **kwargs))
                for pattern in self.template._processed_idforms
            ]
        self.idform_regexes.append(BASIC_ID_REGEX)

    @property
    def URL(self) -> str:
        if self.template.URL_builder:
            url =  self.template.URL_builder(self.tokens)
            if url:
                url = url.replace(' ', '%20')
        else:
            url = None
        return url

    @property
    def name(self) -> str:
        if self.template.name_builder:
            return self.template.name_builder(self.tokens)
        else:
            return None

    def get_shortform_cites(self) -> Iterable:
        keep_trying = True
        span_start = self.span[1]
        while keep_trying:
            try:
                match = next(match_regexes(
                    regexes=self.shortform_regexes,
                    text=self.source_text,
                    span=(span_start,),
                ))
                span_start = match.span()[1]
                try:
                    yield Citation(
                        match=match,
                        template=self.template,
                        parent=self,
                    )
                except SyntaxError: # it's an invalid citation
                    pass
            except StopIteration:
                keep_trying = False

    def get_idform_cite(self, until_index: int=None):
        try:
            match = next(match_regexes(
                regexes = self.idform_regexes,
                text = self.source_text,
                span = (self.span[1], until_index)
            ))
            return Citation(match=match, template=self.template, parent=self)
        except StopIteration:
            return None
        except SyntaxError:
            return None

    def get_next_child(self, span: tuple=None):
        try:
            match = next(match_regexes(
                regexes = self.shortform_regexes + self.idform_regexes,
                text = self.source_text,
                span = span if span else (self.span[1], ),
            ))
            return Citation(match=match, template=self.template, parent=self)
        except StopIteration:
            return None

    def __str__(self):
        return str(self.text)

    def __repr__(self):
        return str(self.text)
        return (
            f'Citation(match={self.match}, template={repr(self.template)}'
            + (f', parent={repr(self.parent)}' if self.parent else '')
        )

    def __contains__(self, other_cite):
        """
        Returns True if both citations are from templates with the same
        name, and the only difference between their tokens is that the
        other one has a more specific (i.e. higher-indexed) token than
        any of this one's. Severable tokens are considered a match if
        the other token's value *starts with* this one's.
        """
        if (
            other_cite.template.name != self.template.name
            or other_cite.tokens == self.tokens
        ):
            return False
        for key, value in self.tokens.items():
            if value and other_cite.tokens.get(key) != value:
                if (
                    self.template.tokens[key].severable
                    and other_cite.tokens[key]
                    and other_cite.tokens[key].startswith(value)
                ):
                    continue
                else:
                    return False
        else:
            return True

    def __eq__(self, other_cite):
        """
        Returns True if both citations are from templates with the same
        name, and they have the exact same token values.
        """
        return (
            other_cite.template.name == self.template.name
            and other_cite.tokens == self.tokens
        )

    def __len__(self):
        return len(self.text)

`contains(other_cite)`

Returns True if both citations are from templates with the same name, and the only difference between their tokens is that the other one has a more specific (i.e. higher-indexed) token than any of this one's. Severable tokens are considered a match if the other token's value starts with this one's.

Source code in citeurl/citation.py

def __contains__(self, other_cite):
    """
    Returns True if both citations are from templates with the same
    name, and the only difference between their tokens is that the
    other one has a more specific (i.e. higher-indexed) token than
    any of this one's. Severable tokens are considered a match if
    the other token's value *starts with* this one's.
    """
    if (
        other_cite.template.name != self.template.name
        or other_cite.tokens == self.tokens
    ):
        return False
    for key, value in self.tokens.items():
        if value and other_cite.tokens.get(key) != value:
            if (
                self.template.tokens[key].severable
                and other_cite.tokens[key]
                and other_cite.tokens[key].startswith(value)
            ):
                continue
            else:
                return False
    else:
        return True

`eq(other_cite)`

Returns True if both citations are from templates with the same name, and they have the exact same token values.

Source code in citeurl/citation.py

def __eq__(self, other_cite):
    """
    Returns True if both citations are from templates with the same
    name, and they have the exact same token values.
    """
    return (
        other_cite.template.name == self.template.name
        and other_cite.tokens == self.tokens
    )

Template

A pattern to recognize a single kind of citation and extract information from it.

Source code in citeurl/citator.py

class Template:
    """
    A pattern to recognize a single kind of citation and extract
    information from it.
    """
    def __init__(
        self,
        name: str,
        tokens: dict[str, TokenType] = {},
        meta: dict[str, str] = {},
        patterns: list[str] = [],
        broad_patterns: list[str] = [],
        shortform_patterns: list[str] = [],
        idform_patterns: list[str] = [],
        name_builder: StringBuilder = None,
        URL_builder: StringBuilder = None,
        inherit_template = None,
    ):
        """
        Arguments:
            name: the name of this template

            tokens: The full dictionary of TokenTypes that citations from
                this template can contain. These must be listed in order
                from least-specific to most. For instance, the U.S.
                Constitution's template puts 'article' before 'section'
                before 'clause', because articles contain sections, and
                sections contain clauses.

            patterns: Patterns are essentially regexes to recognize
                recognize long-form citations to this template. However,
                wherever a token would appear in the regex, it should be
                replaced by the name of the token, enclosed in curly
                braces.

                Patterns are matched in the order that they are listed,
                so if there is a pattern that can only find a subset of
                tokens, it should be listed after the more-complete
                pattern so that the better match won't be precluded.

            broad_patterns: Same as `patterns`, except that they will
                only be used in contexts like search engines, where
                convenience is more important than avoiding false
                positive matches. When used, they will be used in
                addition to the normal patterns.

            shortform_patterns: Same as `patterns`, but these will only
                go into effect after a longform citation has been
                recognized. If a shortform pattern includes "same
                TOKEN_NAME" in curly braces, e.g. "{same volume}", the
                bracketed portion will be replaced with the exact text
                of the corresponding `raw_token` from the long-form
                citation.

            idform_patterns: Same as `shortform_patterns`, except that
                they will only be used to scan text until the next
                different citation occurs.

            URL_builder: `StringBuilder` to construct URLs for found
                citations

            name_builder: `StringBuilder` to construct canonical names
                of found citations

            meta: Optional metadata relating to this template. Patterns
                and StringBuilders can access metadata fields as if they
                were tokens, though fields can be overridden by tokens
                with the same name.

            inherit_template: another `Template` whose values this one
                should copy unless expressly overwritten.
        """
        kwargs = locals()
        for attr, default in {
            'name':               None,
            'tokens':             {},
            'patterns':           [],
            'broad_patterns':     [],
            'shortform_patterns': [],
            'idform_patterns':    [],
            'URL_builder':        None,
            'name_builder':       None,
            'meta':               {},
        }.items():
            if inherit_template and kwargs[attr] == default:
                value = inherit_template.__dict__.get(attr)
            elif attr.endswith('patterns') and not kwargs[attr]:
                value = []
            else:
                value = kwargs[attr]
            self.__dict__[attr] = value

        # update inherited StringBuilders with the correct metadata
        if inherit_template and self.meta:
            if self.URL_builder:
                self.URL_builder = copy(self.URL_builder)
                self.URL_builder.defaults = self.meta
            if self.name_builder:
                self.name_builder = copy(self.name_builder)
                self.name_builder.defaults = self.meta

        # use the template's metadata and tokens to make a dictionary
        # of replacements to insert into the regexes before compilation
        replacements = {k:str(v) for (k, v) in self.meta.items()}
        replacements.update({
            k:fr'(?P<{k}>{v.regex})(?!\w)'
            for (k,v) in self.tokens.items()
        })

        # compile the template's regexes and broad_regexes
        self.regexes = []
        self.broad_regexes = []
        for kind in ['regexes', 'broad_regexes']:
            if kind == 'broad_regexes':
                pattern_list = self.patterns + self.broad_patterns
                flags = re.I
            else:
                pattern_list = self.patterns
                flags = 0

            for p in pattern_list:
                pattern = process_pattern(
                    p,
                    replacements,
                    add_word_breaks=True
                )
                try:
                    regex = re.compile(pattern, flags)
                    self.__dict__[kind].append(regex)
                except re.error as e:
                    i = 'broad ' if kind == 'broad_regexes' else ''
                    raise re.error(
                        f'{self} template\'s {i}pattern "{pattern}" has '
                        f'an error: {e}'
                    )

        self._processed_shortforms = [
            process_pattern(p, replacements, add_word_breaks=True)
            for p in self.shortform_patterns
        ]
        self._processed_idforms = [
            process_pattern(p, replacements, add_word_breaks=True)
            for p in self.idform_patterns
        ]

    @classmethod
    def from_dict(cls, name: str, values: dict, inheritables: dict={}):
        """
        Return a template from a dictionary of values, like a dictionary
        created by parsing a template from YAML format.
        """
        values = {
            k.replace(' ', '_'):v
            for k,v in values.items()
        }

        # when pattern is listed in singular form,
        # replace it with a one-item list
        items = values.items()
        values = {}
        for key, value in items:
            if key.endswith('pattern'):
                values[key + 's'] = [value]
            else:
                values[key] = value

        # unrelated: when a single pattern is split
        # into a list (likely to take advantage of
        # YAML anchors), join it into one string
        for k,v in values.items():
            if not k.endswith('patterns'):
                continue
            elif v is None:
                values[k] = None
                continue
            for i, pattern in enumerate(v):
                if type(pattern) is list:
                    values[k][i] = ''.join(pattern)

        inherit = values.get('inherit')

        if inherit:
            values.pop('inherit')
            try:
                values['inherit_template'] = inheritables[inherit]
            except KeyError:
                raise KeyError(
                    f'The {name} template tried to reference template '
                    f'"{inherit}" but could not find it. Note that '
                    f'templates can only reference others that are '
                    f'defined higher up in the list, not lower.'
                )

        for key in ['name_builder', 'URL_builder']:
            data = values.get(key)
            if data:
                data['defaults'] = values.get('meta') or {}
                values[key] = StringBuilder.from_dict(data)
        values['tokens'] = {
            k: TokenType.from_dict(k, v)
            for k,v in values.get('tokens', {}).items()
        }
        return cls(name=name, **values)

    def to_dict(self) -> dict:
        "save this Template to a dictionary of values"
        output = {}
        if self.meta:
            output['meta'] = self.meta
        output['tokens'] = {
            k:v.to_dict() for k, v in self.tokens.items()
        }
        for key in ['patterns', 'shortform_patterns', 'idform_patterns']:
            value = self.__dict__.get(key)
            if not value:
                continue
            elif len(value) > 1:
                output[key] = value
            else: # de-pluralize lists that contain only one pattern
                output[key[:-1]] = value[0]
        for key in ['name_builder', 'URL_builder']:
            if self.__dict__.get(key):
                output[key] = self.__dict__[key].to_dict()

        spaced_output = {k.replace('_', ' '):v for k, v in output.items()}

        return spaced_output

    def to_yaml(self) -> str:
        "save this Template to a YAML string"
        return safe_dump(
            {self.name: self.to_dict()},
            sort_keys = False,
            allow_unicode = True,
        )

    def cite(self, text, broad: bool=True, span: tuple=(0,)) -> Citation:
        """
        Return the first citation that matches this template. If 'broad'
        is True, case-insensitive matching and broad regex patterns will
        be used. If no matches are found, return None.
        """
        regexes = self.broad_regexes if broad else self.regexes
        matches = match_regexes(text, regexes, span=span)
        for match in matches:
            try:
                return Citation(match, self)
            except SyntaxError: # invalid citation
                continue
        else:
            return None

    def list_longform_cites(self, text, broad: bool=False, span: tuple=(0,)):
        """
        Get a list of all long-form citations to this template found in
        the given text.
        """
        cites = []
        regexes = self.broad_regexes if broad else self.regexes
        for match in match_regexes(text, regexes, span=span):
            try:
                cites.append(Citation(match, self))
            except SyntaxError:
                continue
        return cites

    def __str__(self):
        return self.name

    def __repr__(self):
        return (
            f'Template(name="{self.name}"'
            + (f', tokens={self.tokens}' if self.tokens else '')
            + (f', meta={self.meta}' if self.meta else '')
            + (f', patterns={self.patterns}' if self.patterns else '')
            + (
                f', broad_patterns={self.broad_patterns}' 
                if self.broad_patterns else ''
            )
            + (
                f', shortform_patterns={self.shortform_patterns}'
                if self.shortform_patterns else ''
            )
            + (
                f', idform_patterns={self.idform_patterns}'
                if self.idform_patterns else ''
            )
            + (
                f', name_builder={self.name_builder}'
                if self.name_builder else ''
            )
            + (
                f', URL_builder={self.URL_builder}'
                if self.URL_builder else ''
            )
            + ')'
        )

    def __contains__(self, citation: Citation):
        return citation.template.name == self.name

    def __eq__(self, other_template):
        return repr(self) == repr(other_template)

`init(name, tokens={}, meta={}, patterns=[], broad_patterns=[], shortform_patterns=[], idform_patterns=[], name_builder=None, URL_builder=None, inherit_template=None)`

Parameters:

Name	Type	Description	Default
`name`	`str`	the name of this template	required
`tokens`	`dict[str, TokenType]`	The full dictionary of TokenTypes that citations from this template can contain. These must be listed in order from least-specific to most. For instance, the U.S. Constitution's template puts 'article' before 'section' before 'clause', because articles contain sections, and sections contain clauses.	`{}`
`patterns`	`list[str]`	Patterns are essentially regexes to recognize recognize long-form citations to this template. However, wherever a token would appear in the regex, it should be replaced by the name of the token, enclosed in curly braces. Patterns are matched in the order that they are listed, so if there is a pattern that can only find a subset of tokens, it should be listed after the more-complete pattern so that the better match won't be precluded.	`[]`
`broad_patterns`	`list[str]`	Same as `patterns`, except that they will only be used in contexts like search engines, where convenience is more important than avoiding false positive matches. When used, they will be used in addition to the normal patterns.	`[]`
`shortform_patterns`	`list[str]`	Same as `patterns`, but these will only go into effect after a longform citation has been recognized. If a shortform pattern includes "same TOKEN_NAME" in curly braces, e.g. "{same volume}", the bracketed portion will be replaced with the exact text of the corresponding `raw_token` from the long-form citation.	`[]`
`idform_patterns`	`list[str]`	Same as `shortform_patterns`, except that they will only be used to scan text until the next different citation occurs.	`[]`
`URL_builder`	`StringBuilder`	`StringBuilder` to construct URLs for found citations	`None`
`name_builder`	`StringBuilder`	`StringBuilder` to construct canonical names of found citations	`None`
`meta`	`dict[str, str]`	Optional metadata relating to this template. Patterns and StringBuilders can access metadata fields as if they were tokens, though fields can be overridden by tokens with the same name.	`{}`
`inherit_template`		another `Template` whose values this one should copy unless expressly overwritten.	`None`

Source code in citeurl/citator.py

def __init__(
    self,
    name: str,
    tokens: dict[str, TokenType] = {},
    meta: dict[str, str] = {},
    patterns: list[str] = [],
    broad_patterns: list[str] = [],
    shortform_patterns: list[str] = [],
    idform_patterns: list[str] = [],
    name_builder: StringBuilder = None,
    URL_builder: StringBuilder = None,
    inherit_template = None,
):
    """
    Arguments:
        name: the name of this template

        tokens: The full dictionary of TokenTypes that citations from
            this template can contain. These must be listed in order
            from least-specific to most. For instance, the U.S.
            Constitution's template puts 'article' before 'section'
            before 'clause', because articles contain sections, and
            sections contain clauses.

        patterns: Patterns are essentially regexes to recognize
            recognize long-form citations to this template. However,
            wherever a token would appear in the regex, it should be
            replaced by the name of the token, enclosed in curly
            braces.

            Patterns are matched in the order that they are listed,
            so if there is a pattern that can only find a subset of
            tokens, it should be listed after the more-complete
            pattern so that the better match won't be precluded.

        broad_patterns: Same as `patterns`, except that they will
            only be used in contexts like search engines, where
            convenience is more important than avoiding false
            positive matches. When used, they will be used in
            addition to the normal patterns.

        shortform_patterns: Same as `patterns`, but these will only
            go into effect after a longform citation has been
            recognized. If a shortform pattern includes "same
            TOKEN_NAME" in curly braces, e.g. "{same volume}", the
            bracketed portion will be replaced with the exact text
            of the corresponding `raw_token` from the long-form
            citation.

        idform_patterns: Same as `shortform_patterns`, except that
            they will only be used to scan text until the next
            different citation occurs.

        URL_builder: `StringBuilder` to construct URLs for found
            citations

        name_builder: `StringBuilder` to construct canonical names
            of found citations

        meta: Optional metadata relating to this template. Patterns
            and StringBuilders can access metadata fields as if they
            were tokens, though fields can be overridden by tokens
            with the same name.

        inherit_template: another `Template` whose values this one
            should copy unless expressly overwritten.
    """
    kwargs = locals()
    for attr, default in {
        'name':               None,
        'tokens':             {},
        'patterns':           [],
        'broad_patterns':     [],
        'shortform_patterns': [],
        'idform_patterns':    [],
        'URL_builder':        None,
        'name_builder':       None,
        'meta':               {},
    }.items():
        if inherit_template and kwargs[attr] == default:
            value = inherit_template.__dict__.get(attr)
        elif attr.endswith('patterns') and not kwargs[attr]:
            value = []
        else:
            value = kwargs[attr]
        self.__dict__[attr] = value

    # update inherited StringBuilders with the correct metadata
    if inherit_template and self.meta:
        if self.URL_builder:
            self.URL_builder = copy(self.URL_builder)
            self.URL_builder.defaults = self.meta
        if self.name_builder:
            self.name_builder = copy(self.name_builder)
            self.name_builder.defaults = self.meta

    # use the template's metadata and tokens to make a dictionary
    # of replacements to insert into the regexes before compilation
    replacements = {k:str(v) for (k, v) in self.meta.items()}
    replacements.update({
        k:fr'(?P<{k}>{v.regex})(?!\w)'
        for (k,v) in self.tokens.items()
    })

    # compile the template's regexes and broad_regexes
    self.regexes = []
    self.broad_regexes = []
    for kind in ['regexes', 'broad_regexes']:
        if kind == 'broad_regexes':
            pattern_list = self.patterns + self.broad_patterns
            flags = re.I
        else:
            pattern_list = self.patterns
            flags = 0

        for p in pattern_list:
            pattern = process_pattern(
                p,
                replacements,
                add_word_breaks=True
            )
            try:
                regex = re.compile(pattern, flags)
                self.__dict__[kind].append(regex)
            except re.error as e:
                i = 'broad ' if kind == 'broad_regexes' else ''
                raise re.error(
                    f'{self} template\'s {i}pattern "{pattern}" has '
                    f'an error: {e}'
                )

    self._processed_shortforms = [
        process_pattern(p, replacements, add_word_breaks=True)
        for p in self.shortform_patterns
    ]
    self._processed_idforms = [
        process_pattern(p, replacements, add_word_breaks=True)
        for p in self.idform_patterns
    ]

`cite(text, broad=True, span=(0))`

Return the first citation that matches this template. If 'broad' is True, case-insensitive matching and broad regex patterns will be used. If no matches are found, return None.

Source code in citeurl/citator.py

def cite(self, text, broad: bool=True, span: tuple=(0,)) -> Citation:
    """
    Return the first citation that matches this template. If 'broad'
    is True, case-insensitive matching and broad regex patterns will
    be used. If no matches are found, return None.
    """
    regexes = self.broad_regexes if broad else self.regexes
    matches = match_regexes(text, regexes, span=span)
    for match in matches:
        try:
            return Citation(match, self)
        except SyntaxError: # invalid citation
            continue
    else:
        return None

`from_dict(name, values, inheritables={})` `classmethod`

Return a template from a dictionary of values, like a dictionary created by parsing a template from YAML format.

Source code in citeurl/citator.py

@classmethod
def from_dict(cls, name: str, values: dict, inheritables: dict={}):
    """
    Return a template from a dictionary of values, like a dictionary
    created by parsing a template from YAML format.
    """
    values = {
        k.replace(' ', '_'):v
        for k,v in values.items()
    }

    # when pattern is listed in singular form,
    # replace it with a one-item list
    items = values.items()
    values = {}
    for key, value in items:
        if key.endswith('pattern'):
            values[key + 's'] = [value]
        else:
            values[key] = value

    # unrelated: when a single pattern is split
    # into a list (likely to take advantage of
    # YAML anchors), join it into one string
    for k,v in values.items():
        if not k.endswith('patterns'):
            continue
        elif v is None:
            values[k] = None
            continue
        for i, pattern in enumerate(v):
            if type(pattern) is list:
                values[k][i] = ''.join(pattern)

    inherit = values.get('inherit')

    if inherit:
        values.pop('inherit')
        try:
            values['inherit_template'] = inheritables[inherit]
        except KeyError:
            raise KeyError(
                f'The {name} template tried to reference template '
                f'"{inherit}" but could not find it. Note that '
                f'templates can only reference others that are '
                f'defined higher up in the list, not lower.'
            )

    for key in ['name_builder', 'URL_builder']:
        data = values.get(key)
        if data:
            data['defaults'] = values.get('meta') or {}
            values[key] = StringBuilder.from_dict(data)
    values['tokens'] = {
        k: TokenType.from_dict(k, v)
        for k,v in values.get('tokens', {}).items()
    }
    return cls(name=name, **values)

`list_longform_cites(text, broad=False, span=(0))`

Get a list of all long-form citations to this template found in the given text.

Source code in citeurl/citator.py

def list_longform_cites(self, text, broad: bool=False, span: tuple=(0,)):
    """
    Get a list of all long-form citations to this template found in
    the given text.
    """
    cites = []
    regexes = self.broad_regexes if broad else self.regexes
    for match in match_regexes(text, regexes, span=span):
        try:
            cites.append(Citation(match, self))
        except SyntaxError:
            continue
    return cites

`to_dict()`

save this Template to a dictionary of values

Source code in citeurl/citator.py

def to_dict(self) -> dict:
    "save this Template to a dictionary of values"
    output = {}
    if self.meta:
        output['meta'] = self.meta
    output['tokens'] = {
        k:v.to_dict() for k, v in self.tokens.items()
    }
    for key in ['patterns', 'shortform_patterns', 'idform_patterns']:
        value = self.__dict__.get(key)
        if not value:
            continue
        elif len(value) > 1:
            output[key] = value
        else: # de-pluralize lists that contain only one pattern
            output[key[:-1]] = value[0]
    for key in ['name_builder', 'URL_builder']:
        if self.__dict__.get(key):
            output[key] = self.__dict__[key].to_dict()

    spaced_output = {k.replace('_', ' '):v for k, v in output.items()}

    return spaced_output

`to_yaml()`

save this Template to a YAML string

Source code in citeurl/citator.py

def to_yaml(self) -> str:
    "save this Template to a YAML string"
    return safe_dump(
        {self.name: self.to_dict()},
        sort_keys = False,
        allow_unicode = True,
    )

TokenType

These objects represent categories of tokens that might be found in a citation.

Attributes:

Name	Type	Description
`regex`		A regular expression that matches the actual text of the token as found in any document, like the "42" in "42 USC § 1983" or the "Fourteenth" in "The Fourteenth Amendment". This regex will automatically be enclosed in a named capture group and inserted into any of the template's match patterns wherever the token's name appears in curly braces.
`edits`		Steps to normalize the token as captured in the regex into a value that is consistent across multiple styles.
`default`		Set the token to this value if it is not found in the citation.
`severable`		If two citations only differ based on this token, and only because one of the tokens extends longer than the other, e.g. "(b)(2)" and "(b)(2)(A)", then `severable` means that the former citation is thought to encompass the latter.

Source code in citeurl/tokens.py

class TokenType:
    """
    These objects represent categories of tokens that might be found in
    a citation.

    Attributes:
        regex: A regular expression that matches the actual text of the
            token as found in any document, like the "42" in "42 USC §
            1983" or the "Fourteenth" in "The Fourteenth Amendment".
            This regex will automatically be enclosed in a named capture
            group and inserted into any of the template's match patterns
            wherever the token's name appears in curly braces.
        edits: Steps to normalize the token as captured in the regex
            into a value that is consistent across multiple styles.
        default: Set the token to this value if it is not found in the
            citation.
        severable: If two citations only differ based on this token,
            and only because one of the tokens extends longer than the
            other, e.g. "(b)(2)" and "(b)(2)(A)", then `severable` means
            that the former citation is thought to encompass the latter.
    """
    def __init__(
        self,
        regex: str = r'\d+',
        edits: list[TokenOperation] = [],
        default: str = None,
        severable: bool = False,
    ):
        self.regex = regex
        self.edits = edits
        self.default = default
        self.severable = severable

    @classmethod
    def from_dict(cls, name: str, data: dict):
        "load a TokenType from a dictionary of values"
        return cls(
            regex = data['regex'],
            default = data.get('default'),
            edits = [
                TokenOperation.from_dict(v)
                for v in data.get('edits', [])
            ],
            severable=data.get('severable', False)
        )

    def to_dict(self) -> dict:
        "save this TokenType to a dictionary for storage in YAML format"
        output = {'regex': self.regex}
        if self.edits:
            output['edits'] = [
                e.to_dict() for e in self.edits
            ]
        if self.default:
            output['default'] = self.default
        if self.severable:
            output['severable'] = True
        return output

    def normalize(self, token: str) -> str:
        if not token:
            return self.default
        for op in self.edits:
            token = op(token)
        return token

    def __str__(self):
        return self.regex

    def __repr__(self):
        norms = '[' + ', '.join([
            repr(n) for n in self.edits or []
        ]) + ']'
        return (
            f"TokenType(regex='{self.regex}'"
            + (f", default='{self.default}'" if self.default else '')
            + (f', edits={norms}' if self.edits else '')
            + ')'
        )

`from_dict(name, data)` `classmethod`

load a TokenType from a dictionary of values

Source code in citeurl/tokens.py

@classmethod
def from_dict(cls, name: str, data: dict):
    "load a TokenType from a dictionary of values"
    return cls(
        regex = data['regex'],
        default = data.get('default'),
        edits = [
            TokenOperation.from_dict(v)
            for v in data.get('edits', [])
        ],
        severable=data.get('severable', False)
    )

`to_dict()`

save this TokenType to a dictionary for storage in YAML format

Source code in citeurl/tokens.py

def to_dict(self) -> dict:
    "save this TokenType to a dictionary for storage in YAML format"
    output = {'regex': self.regex}
    if self.edits:
        output['edits'] = [
            e.to_dict() for e in self.edits
        ]
    if self.default:
        output['default'] = self.default
    if self.severable:
        output['severable'] = True
    return output

TokenOperation

A function to perform a predefined string manipulation

Source code in citeurl/tokens.py

class TokenOperation:
    """A function to perform a predefined string manipulation"""
    def __init__(
        self,
        action: str,
        data,
        mandatory: bool = True,
        token: str = None,
        output: str = None,
    ):
        """
        Arguments:
            action: The kind of string manipulation that this operation
                will perform, using the given data. There are a few
                different options:

                'sub': Regex substitution to perform on the text. Needs
                    a list of two values: [PATTERN, REPLACEMENT]

                'lookup': Check if the token matches any of the given
                    regexes (via case-insensitive matching), and if so,
                    replace it with the corresponding value. Needs a
                    dictionary of `regex`: `replacement` pairs.

                'case': Capitalize the token in the specified way.
                    Options are 'upper', 'lower', and 'title'.

                'lpad': Left pad the token with zeros until it is the
                    specified number of characters long. Requires an
                    int specifying the number of characters. You can
                    also specify the padding character by providing a
                    tuple: (MINIMUM_LENGTH, PADDING_CHARACTER).

                'number_style': Assume that the token is a number,
                    either in the form of digits, Roman numerals, or
                    number words like "thirty-seven". Convert it into
                    the specified number format, which can be any of
                    these:

                    'cardinal', e.g. "twenty-seven"

                    'cardinal spaced', e.g. "twenty seven"

                    'cardinal unspaced', e.g. "twentyseven"

                    'ordinal', e.g. "twenty-seventh"

                    'ordinal spaced', e.g. "twenty seventh"

                    'ordinal unspaced', e.g. "twentyseventh"

                    'roman numeral', e.g. 'xxvii'

                    'digit', e.g. '27'

                    Note that number formatting only works for positive
                    whole numbers that do not exceed 40.

            data: any data that a given action needs specified, as
                described above

            mandatory: whether a failed lookup or format action should
                invalidate the entire citation

            token: Necessary for operations in StringBuilders. This
                value lets you provide the name of input token to use,
                allowing you to then use the modify_dict() method.

            output: If this value is set, modify_dict() will save the
                operation's output to the dictionary key with this name
                instead of modifying the input token in place.
        """
        if action == 'sub':
            self.func = lambda x: re.sub(data[0], data[1], x)
        elif action == 'lookup':
            table = {
                re.compile(k, flags=re.I):v
                for k, v in data.items()
            }
            self.func = lambda x: self._lookup(x, table, mandatory)
        elif action == 'case':
            self.func = lambda x: self._set_case(x, data)
        elif action == 'lpad':
            self.func = lambda x: self._left_pad(x, data)
        elif action == 'number_style':
            action_options = ['cardinal', 'ordinal', 'roman', 'digit']
            if data not in action_options:
                raise SyntaxError(
                    f'{data} is not a valid number style. Valid options: '
                    f'{action_options}'
                )
            self.func = lambda x: self._number_style(x, data, mandatory)
        else:
            raise SyntaxError(
                f'{action} is not a defined token operation.'
            )

        self.action = action
        self.data = data
        self.mandatory = mandatory
        self.token = token
        self.output = output

    @classmethod
    def from_dict(cls, data: dict):
        "load a TokenOperation from a dictionary of values"
        operations = []
        for key in ['sub', 'lookup', 'case', 'lpad', 'number style']:
            value = data.get(key)
            if value:
                action = key.replace(' ', '_')
                action_data = value
                break
        mandatory = data.get('mandatory', True)
        token = data.get('token')
        output = data.get('output')
        return cls(action, action_data, mandatory, token, output)

    def to_dict(self) -> dict:
        "save this TokenOperation to a dictionary of values"
        output = {}
        for key in ['token', 'output']:
            if self.__dict__.get(key):
                output[key] = self.__dict__[key]
        output[self.action] = self.data
        if not self.mandatory:
            output['mandatory'] = False

        spaced_output = {k.replace('_', ' '):v for k, v in output.items()}

        return spaced_output

    def modify_dict(self, tokens: dict):
        """
        apply this operation to a dictionary of tokens,
        editing them as appropriate
        """
        if not tokens.get(self.token):
            return
        if self.output:
            tokens[self.output] = self.func(tokens[self.token])
        else:
            tokens[self.token] = self.func(tokens[self.token])

    def __call__(self, input_value):
        return self.func(input_value)

    def __repr__(self):
        return (
            f'TokenOperation(action="{self.action}", data="{self.data}"'
            + (f', mandatory=False' if not self.mandatory else '')
            + (f', token="{self.token}"' if self.token else '')
            + (f', output="{self.output}"' if self.output else '')
            + ')'
        )

    # ================ Token Processing Operations =================== #

    def _lookup(
        self,
        input: str,
        table: dict[re.Pattern, str],
        mandatory: bool=False,
    ) -> str:
        for pattern, repl in table.items():
            if pattern.fullmatch(input):
                return repl
        if mandatory:
            regexes = [r.pattern for r in table.keys()]
            raise SyntaxError(f'{input} could not be found in {table}')
        else:
            return input

    def _set_case(self, input: str, case: str) -> str:
        if case == 'upper':
            return input.upper()
        elif case == 'lower':
            return input.lower()
        elif case == 'title':
            return input.title()

    def _left_pad(self, input: str, min_length: int, pad_char='0'):
        diff = min_length - len(input)
        if diff > 0:
            return (pad_char*diff + input)
        return input

    def _number_style(self, input: str, form: str, throw_error: bool=False):
        if input.isnumeric():
            value = int(input)
        elif input[:-2].isnumeric(): # e.g. "2nd"
            value = int(input[:-2])
        else:
            input = input.lower()
            for i, row in enumerate(number_words):
                if input in row:
                    value = i + 1
                    break
            else:
                if throw_error:
                    raise SyntaxError(
                        f'{input} cannot be recognized as a number'
                    )
        if form == 'digit':
            return str(value)
        forms = ['roman', 'cardinal', 'ordinal']
        try:
            output = number_words[value - 1][forms.index(form)]
        except IndexError:
            return NotImplementedError(
                f"CiteURL cannot process a number as high as {value}"
            )
        if form == 'roman':
            return output.upper()
        return output

`init(action, data, mandatory=True, token=None, output=None)`

Parameters:

Name	Type	Description	Default
`action`	`str`	The kind of string manipulation that this operation will perform, using the given data. There are a few different options: 'sub': Regex substitution to perform on the text. Needs a list of two values: [PATTERN, REPLACEMENT] 'lookup': Check if the token matches any of the given regexes (via case-insensitive matching), and if so, replace it with the corresponding value. Needs a dictionary of `regex`: `replacement` pairs. 'case': Capitalize the token in the specified way. Options are 'upper', 'lower', and 'title'. 'lpad': Left pad the token with zeros until it is the specified number of characters long. Requires an int specifying the number of characters. You can also specify the padding character by providing a tuple: (MINIMUM_LENGTH, PADDING_CHARACTER). 'number_style': Assume that the token is a number, either in the form of digits, Roman numerals, or number words like "thirty-seven". Convert it into the specified number format, which can be any of these: `'cardinal', e.g. "twenty-seven" 'cardinal spaced', e.g. "twenty seven" 'cardinal unspaced', e.g. "twentyseven" 'ordinal', e.g. "twenty-seventh" 'ordinal spaced', e.g. "twenty seventh" 'ordinal unspaced', e.g. "twentyseventh" 'roman numeral', e.g. 'xxvii' 'digit', e.g. '27' Note that number formatting only works for positive whole numbers that do not exceed 40.`	required
`data`		any data that a given action needs specified, as described above	required
`mandatory`	`bool`	whether a failed lookup or format action should invalidate the entire citation	`True`
`token`	`str`	Necessary for operations in StringBuilders. This value lets you provide the name of input token to use, allowing you to then use the modify_dict() method.	`None`
`output`	`str`	If this value is set, modify_dict() will save the operation's output to the dictionary key with this name instead of modifying the input token in place.	`None`

Source code in citeurl/tokens.py

def __init__(
    self,
    action: str,
    data,
    mandatory: bool = True,
    token: str = None,
    output: str = None,
):
    """
    Arguments:
        action: The kind of string manipulation that this operation
            will perform, using the given data. There are a few
            different options:

            'sub': Regex substitution to perform on the text. Needs
                a list of two values: [PATTERN, REPLACEMENT]

            'lookup': Check if the token matches any of the given
                regexes (via case-insensitive matching), and if so,
                replace it with the corresponding value. Needs a
                dictionary of `regex`: `replacement` pairs.

            'case': Capitalize the token in the specified way.
                Options are 'upper', 'lower', and 'title'.

            'lpad': Left pad the token with zeros until it is the
                specified number of characters long. Requires an
                int specifying the number of characters. You can
                also specify the padding character by providing a
                tuple: (MINIMUM_LENGTH, PADDING_CHARACTER).

            'number_style': Assume that the token is a number,
                either in the form of digits, Roman numerals, or
                number words like "thirty-seven". Convert it into
                the specified number format, which can be any of
                these:

                'cardinal', e.g. "twenty-seven"

                'cardinal spaced', e.g. "twenty seven"

                'cardinal unspaced', e.g. "twentyseven"

                'ordinal', e.g. "twenty-seventh"

                'ordinal spaced', e.g. "twenty seventh"

                'ordinal unspaced', e.g. "twentyseventh"

                'roman numeral', e.g. 'xxvii'

                'digit', e.g. '27'

                Note that number formatting only works for positive
                whole numbers that do not exceed 40.

        data: any data that a given action needs specified, as
            described above

        mandatory: whether a failed lookup or format action should
            invalidate the entire citation

        token: Necessary for operations in StringBuilders. This
            value lets you provide the name of input token to use,
            allowing you to then use the modify_dict() method.

        output: If this value is set, modify_dict() will save the
            operation's output to the dictionary key with this name
            instead of modifying the input token in place.
    """
    if action == 'sub':
        self.func = lambda x: re.sub(data[0], data[1], x)
    elif action == 'lookup':
        table = {
            re.compile(k, flags=re.I):v
            for k, v in data.items()
        }
        self.func = lambda x: self._lookup(x, table, mandatory)
    elif action == 'case':
        self.func = lambda x: self._set_case(x, data)
    elif action == 'lpad':
        self.func = lambda x: self._left_pad(x, data)
    elif action == 'number_style':
        action_options = ['cardinal', 'ordinal', 'roman', 'digit']
        if data not in action_options:
            raise SyntaxError(
                f'{data} is not a valid number style. Valid options: '
                f'{action_options}'
            )
        self.func = lambda x: self._number_style(x, data, mandatory)
    else:
        raise SyntaxError(
            f'{action} is not a defined token operation.'
        )

    self.action = action
    self.data = data
    self.mandatory = mandatory
    self.token = token
    self.output = output

`from_dict(data)` `classmethod`

load a TokenOperation from a dictionary of values

Source code in citeurl/tokens.py

@classmethod
def from_dict(cls, data: dict):
    "load a TokenOperation from a dictionary of values"
    operations = []
    for key in ['sub', 'lookup', 'case', 'lpad', 'number style']:
        value = data.get(key)
        if value:
            action = key.replace(' ', '_')
            action_data = value
            break
    mandatory = data.get('mandatory', True)
    token = data.get('token')
    output = data.get('output')
    return cls(action, action_data, mandatory, token, output)

`modify_dict(tokens)`

apply this operation to a dictionary of tokens, editing them as appropriate

Source code in citeurl/tokens.py

def modify_dict(self, tokens: dict):
    """
    apply this operation to a dictionary of tokens,
    editing them as appropriate
    """
    if not tokens.get(self.token):
        return
    if self.output:
        tokens[self.output] = self.func(tokens[self.token])
    else:
        tokens[self.token] = self.func(tokens[self.token])

`to_dict()`

save this TokenOperation to a dictionary of values

Source code in citeurl/tokens.py

def to_dict(self) -> dict:
    "save this TokenOperation to a dictionary of values"
    output = {}
    for key in ['token', 'output']:
        if self.__dict__.get(key):
            output[key] = self.__dict__[key]
    output[self.action] = self.data
    if not self.mandatory:
        output['mandatory'] = False

    spaced_output = {k.replace('_', ' '):v for k, v in output.items()}

    return spaced_output

StringBuilder

A function to take a dictionary of values and use it to construct a piece of text from them. This is used for citation templates' name builders and URL builders.

Attributes:

Name	Type	Description
`parts`		A list of strings that will be concatenated to create the string. Parts may contain bracketed references to citations' token values as well as templates' metadata. If a part references a token whose value is not set, the part will be omitted from the created string.
`edits`		A list of TokenOperations that will be performed on the provided tokens before the string is constructed. If the edits have `output` values, it is possible for them to define entirely new tokens for the sole purpose of building the string.
`defaults`		A dictionary of default token values to use when not overwritten by the citation. Generally these are provided by the template's meta attribute.

Source code in citeurl/tokens.py

class StringBuilder:
    """
    A function to take a dictionary of values and use it to construct a
    piece of text from them. This is used for citation templates' name
    builders and URL builders. 

    Attributes:
        parts: A list of strings that will be concatenated to create the
            string. Parts may contain bracketed references to citations'
            token values as well as templates' metadata. If a part
            references a token whose value is not set, the part will be
            omitted from the created string.
        edits: A list of TokenOperations that will be performed on the
            provided tokens before the string is constructed. If the
            edits have `output` values, it is possible for them to
            define entirely new tokens for the sole purpose of building
            the string.
        defaults: A dictionary of default token values to use when not
            overwritten by the citation. Generally these are provided by
            the template's meta attribute.
    """
    def __init__(
        self,
        parts: list[str],
        edits: list[TokenOperation] = [],
        defaults: dict[str, str] = {}
    ):
        self.parts = parts
        self.edits = edits
        self.defaults = defaults

    @classmethod
    def from_dict(cls, data: dict):
        "load StringBuilder from dictionary of values"
        edits = [
            TokenOperation.from_dict(o)
            for o in data.get('edits', [])
        ]
        parts = data['parts']
        defaults = data.get('defaults') or {}
        return cls(parts, edits, defaults)

    def to_dict(self) -> dict:
        "save StringBuilder to a dictionary of values"
        output = {'parts': self.parts}
        if self.edits:
            output['edits'] = [op.to_dict() for op in self.edits]
        return output

    def __call__(
        self,
        tokens: dict[str, str],
    ) -> str:
        if self.defaults:
            defaults = copy(self.defaults)
            defaults.update(tokens)
            tokens = defaults
        else:
            tokens = copy(tokens)
        tokens = {k:v for k,v in tokens.items() if v}
        for op in self.edits:
            try:
                op.modify_dict(tokens)
            except SyntaxError: # token operation failed; just skip it
                pass
        string_parts = []
        for part in self.parts:
            try:
                string_parts.append(part.format(**tokens))
            except KeyError: # skip parts that reference a nonexistent token
                pass
            # if a mandatory TokenOperation failed, don't return a URL
            except SyntaxError:
                string_parts = []
                break
        t = copy(tokens)
        return ''.join(string_parts) or None

    def __repr__(self):
        return (
            f'StringBuilder(parts={self.parts}'
            + (f', edits={self.edits}' if self.edits else '')
            + (f', defaults={self.defaults}' if self.defaults else '')
            + ')'
        )

`from_dict(data)` `classmethod`

load StringBuilder from dictionary of values

Source code in citeurl/tokens.py

@classmethod
def from_dict(cls, data: dict):
    "load StringBuilder from dictionary of values"
    edits = [
        TokenOperation.from_dict(o)
        for o in data.get('edits', [])
    ]
    parts = data['parts']
    defaults = data.get('defaults') or {}
    return cls(parts, edits, defaults)

`to_dict()`

save StringBuilder to a dictionary of values

Source code in citeurl/tokens.py

def to_dict(self) -> dict:
    "save StringBuilder to a dictionary of values"
    output = {'parts': self.parts}
    if self.edits:
        output['edits'] = [op.to_dict() for op in self.edits]
    return output

insert_links()

Convenience function to hyperlink all citations in a text. For more info, see Citator.insert_links().

Source code in citeurl/citator.py

def insert_links(
    text: str,
    attrs: dict = {'class': 'citation'},
    add_title: bool = True,
    URL_optional: bool = False,
    redundant_links: bool = True,
    id_breaks: re.Pattern = None,
    ignore_markup: bool = True,
    markup_format: str = 'html',
    citator: Citator = None,
):
    """
    Convenience function to hyperlink all citations in a text. For more
    info, see Citator.insert_links().
    """
    citator = citator or _get_default_citator()
    return citator.insert_links(
        text = text,
        attrs = attrs,
        add_title = add_title,
        redundant_links = redundant_links,
        id_breaks = id_breaks,
        ignore_markup = ignore_markup,
        markup_format = markup_format,
    )

cite()

Convenience function to find a single citation in text, or None. See Citator.cite() for more info.

Source code in citeurl/citator.py

def cite(
    text: str,
    broad: bool = True,
    citator: Citator = None,
) -> Citation:
    """
    Convenience function to find a single citation in text, or None. See
    Citator.cite() for more info.
    """
    citator = citator or _get_default_citator()
    return citator.cite(text, broad=broad)

list_cites()

Convenience function to list all citations in a text. For more info, see Citator.list_cites().

Source code in citeurl/citator.py

def list_cites(text, citator: Citator = None, id_breaks=None):
    """
    Convenience function to list all citations in a text. For more info,
    see Citator.list_cites().
    """
    citator = citator or _get_default_citator()
    return citator.list_cites(text, id_breaks=id_breaks)

DEFAULT_CITATOR

The insert_links, cite, and list_cites functions all make use of a built-in citator that is not defined by the library user. By default, this is the citator that is returned when you run Citator(). However, it is possible to add additional templates to this default citator, by installing the wonderful AppDirs library and placing the templates in one of the following directories:

Linux: ~/.config/citeurl

Mac: ~/Library/Preferences/citeurl

Windows 7+: C:\Users\<username>\AppData\Local\raindrum\citeurl

Library Reference

Citator

__init__(defaults=['caselaw', 'general federal law', 'specific federal laws', 'state law', 'secondary sources'], yaml_paths=[], templates={})

cite(text, broad=True)

from_yaml(yaml) classmethod

insert_links(text, attrs={'class': 'citation'}, add_title=True, URL_optional=False, redundant_links=True, id_breaks=None, ignore_markup=True, markup_format='html')

list_authorities(text, ignored_tokens=['subsection', 'clause', 'pincite', 'paragraph'], known_authorities=[], sort_by_cites=True, id_breaks=None)

list_cites(text, id_breaks=None)

load_yaml(yaml)

to_yaml()

Citation

__contains__(other_cite)

__eq__(other_cite)

Template

__init__(name, tokens={}, meta={}, patterns=[], broad_patterns=[], shortform_patterns=[], idform_patterns=[], name_builder=None, URL_builder=None, inherit_template=None)

cite(text, broad=True, span=(0))

from_dict(name, values, inheritables={}) classmethod

list_longform_cites(text, broad=False, span=(0))

to_dict()

to_yaml()

TokenType

from_dict(name, data) classmethod

to_dict()

TokenOperation

__init__(action, data, mandatory=True, token=None, output=None)

from_dict(data) classmethod

modify_dict(tokens)

to_dict()

StringBuilder

from_dict(data) classmethod

to_dict()

insert_links()

cite()

list_cites()

DEFAULT_CITATOR

`init(defaults=['caselaw', 'general federal law', 'specific federal laws', 'state law', 'secondary sources'], yaml_paths=[], templates={})`

`cite(text, broad=True)`

`from_yaml(yaml)` `classmethod`

`insert_links(text, attrs={'class': 'citation'}, add_title=True, URL_optional=False, redundant_links=True, id_breaks=None, ignore_markup=True, markup_format='html')`

`list_authorities(text, ignored_tokens=['subsection', 'clause', 'pincite', 'paragraph'], known_authorities=[], sort_by_cites=True, id_breaks=None)`

`list_cites(text, id_breaks=None)`

`load_yaml(yaml)`

`to_yaml()`

`contains(other_cite)`

`eq(other_cite)`

`init(name, tokens={}, meta={}, patterns=[], broad_patterns=[], shortform_patterns=[], idform_patterns=[], name_builder=None, URL_builder=None, inherit_template=None)`

`cite(text, broad=True, span=(0))`

`from_dict(name, values, inheritables={})` `classmethod`

`list_longform_cites(text, broad=False, span=(0))`

`to_dict()`

`to_yaml()`

`from_dict(name, data)` `classmethod`

`to_dict()`

`init(action, data, mandatory=True, token=None, output=None)`

`from_dict(data)` `classmethod`

`modify_dict(tokens)`

`to_dict()`

`from_dict(data)` `classmethod`

`to_dict()`