helper.arxiv

Functions for downloading (the source code of) articles from arXiv

import glob
import tempfile
from unittest.mock import patch, MagicMock
import shutil


from fastcore.test import *
from fastcore.test import test_is
from nbdev.showdoc import show_doc

from trouver.helper.tests import _test_directory

source

arxiv_id

 arxiv_id (arxiv_id_or_url:str)

*Return the arxiv id from a str which is either of the arxiv id itself or the url to the arxiv article.

Raises - ValueError - If the input does not contain a valid arXiv ID.*

test_eq(arxiv_id("1234.5678"), "1234.5678")
test_eq(arxiv_id("https://arxiv.org/abs/1234.5678"), "1234.5678")
test_eq(arxiv_id("1234.5678v1"), "1234.5678v1")
test_eq(arxiv_id("https://arxiv.org/abs/1234.5678v1"), "1234.5678v1")
test_eq(arxiv_id("math/0512085"), "math/0512085")
test_eq(arxiv_id("https://arxiv.org/abs/math/0512085"), "math/0512085")

The arxiv_search function can be used to obtain an arxiv.Search object, which is used for downloading arxiv files.

source

arxiv_search

 arxiv_search (arxiv_ids:Union[str,list[str]],
               client:Optional[arxiv.Client]=None, results:bool=True)

	Type	Default	Details
arxiv_ids	Union		The ID of a single arXiv article or multiple arxiv articles
client	Optional	None	an arxiv API Client. If `None`, create one on the spot.
results	bool	True	If `True` return a `Result` object. otherwise, return a Search`` object.
Returns	Union

# Specify the arXiv ID of the paper you want to download
# arxiv_id = "2106.10586"  # Replace with your desired arXiv ID
arxiv_id = "math/0512085"  # Replace with your desired arXiv ID

# Create a search object with the specified arXiv ID
# client = Client()
# search = Search(id_list=[arxiv_id])
# results = client.results(search)
results = arxiv_search(arxiv_id, results=True)
results

<itertools.islice>

listy = list(results)
print(listy)

[arxiv.Result(entry_id='http://arxiv.org/abs/math/0512085v2', updated=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc), published=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc), title='Finding large Selmer rank via an arithmetic theory of local constants', authors=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')], summary='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n  Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.', comment='Revised and improved. To appear in Annals of Mathematics', journal_ref=None, doi=None, primary_category='math.NT', categories=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'], links=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])]

listy[0].entry_id

'http://arxiv.org/abs/math/0512085v2'

mock_result_2 = arxiv.Result(
    entry_id='http://arxiv.org/abs/2106.10586v4',
    updated=datetime.datetime(2024, 6, 28, 1, 36, 47, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2021, 6, 19, 23, 50, 56, tzinfo=datetime.timezone.utc),
    title='Global $\\mathbb{A}^1$ degrees of covering maps between modular curves',
    authors=[arxiv.Result.Author('Hyun Jong Kim'), arxiv.Result.Author('Sun Woo Park')],
    summary="Given a projective smooth curve $X$ over any field $k$, we discuss two\nnotions of global $\\mathbb{A}^1$ degree of a finite morphism of smooth curves\n$f: X \\to \\mathbb{P}^1_k$ satisfying certain conditions. One originates from\ncomputing the Euler number of the pullback of the line bundle\n$\\mathscr{O}_{\\mathbb{P}^1}(1)$ as a generalization of Kass and Wickelgren's\nconstruction of Euler numbers. The other originates from the construction of\nglobal $\\mathbb{A}^1$ degree of morphisms of projective curves by Kass, Levine,\nSolomon, and Wickelgren as a generalization of Morel's construction of\n$\\mathbb{A}^1$-Brouwer degree of a morphism $f: \\mathbb{P}^1_k \\to\n\\mathbb{P}^1_k$. We prove that under certain conditions on $N$, both notions of\nglobal $\\mathbb{A}^1$ degrees of covering maps between modular curves $X_0(N)\n\\to X(1)$, $X_1(N) \\to X(1)$, and $X(N) \\to X(1)$ agree to be equal to sums of\nhyperbolic elements $\\langle 1 \\rangle + \\langle -1 \\rangle$ in the\nGrothendieck-Witt ring $\\mathrm{GW}(k)$ for any field $k$ whose characteristic\nis coprime to $N$ and the pullback of $\\mathscr{O}_{\\mathbb{P}^1}(1)$ is\nrelatively oriented.",
    comment='35 pages. Modified various statements to more precisely speak of\n  "relatively oriented" maps or vector bundles instead of "relatively\n  orientable" maps or vector bundles where appropriate --- the former phrasing\n  suggests that a relative orientation is fixed. Additional minor edits',
    journal_ref=None,
    doi=None,
    primary_category='math.AG',
    categories=['math.AG', 'math.NT', '14F42, 14G35'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/2106.10586v4', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/2106.10586v4', title='pdf', rel='related', content_type=None)])

mock_result_3 = arxiv.Result(
    entry_id='http://arxiv.org/abs/math/0512085v2',
    updated=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc),
    title='Finding large Selmer rank via an arithmetic theory of local constants',
    authors=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')],
    summary='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n  Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.',
    comment='Revised and improved. To appear in Annals of Mathematics',
    journal_ref=None,
    doi=None,
    primary_category='math.NT',
    categories=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])

Metadata extraction

source

extract_metadata

 extract_metadata (results:Union[list[arxiv.Result],arxiv.Result])

Return the metadata from the arxiv search results

	Type	Details
results	Union
Returns	list	Each dict corresponds to the metadata for each result.

mock_result = arxiv.Result(
    entry_id='http://arxiv.org/abs/1605.08386v1',
    updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    title='Heat-bath random walks with Markov bases',
    authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
    summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
    comment='20 pages, 3 figures',
    journal_ref=None,
    doi=None,
    primary_category='math.CO',
    categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
                             title=None, rel='alternate', content_type=None),
           arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
                             content_type=None),])
extract_metadata(mock_result)

[{'arxiv_id': '1605.08386v1',
  'authors': ['Caprice Stanley', 'Tobias Windisch'],
  'title': 'Heat-bath random walks with Markov bases',
  'summary': 'Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
  'primary_category': 'math.CO',
  'categories': ['math.CO',
   'math.ST',
   'stat.TH',
   'Primary: 05C81, Secondary: 37A25, 11P21'],
  'published': datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
  'updated': datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
  'doi': None,
  'comment': '20 pages, 3 figures',
  'journal_ref': None,
  'links': [arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1', title=None, rel='alternate', content_type=None),
   arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related', content_type=None)]}]

source

ArxivMetadataEncoder

 ArxivMetadataEncoder (skipkeys=False, ensure_ascii=True,
                       check_circular=True, allow_nan=True,
                       sort_keys=False, indent=None, separators=None,
                       default=None)

json encoder to accomapny the extract_metadta function when using json.dump.

# Your dictionary with datetime and arxiv.Result.Link objects
data = {
    "timestamp": datetime.datetime.now(),
    "link": arxiv.Result.Link(href="https://example.com", title="Example")
}

# Convert the dictionary to JSON
json_data = json.dumps(data, cls=ArxivMetadataEncoder, indent=4)
print(json_data)

{
    "timestamp": "2024-12-26T15:31:58.139186",
    "link": "https://example.com"
}

Downloading arxiv files

source

extract_last_names

 extract_last_names (authors:list[str])

The extract_last_names function is a convenient helper function for naming downloaded arxiv files.

# Example usage
authors = ["John Smith", "Maria Garcia-Lopez", "Pieter de Jong", "Xin Li"]
last_names = extract_last_names(authors)
test_eq(last_names, ['Smith', 'Garcia-Lopez', 'de Jong', 'Li'])

test_eq(extract_last_names([author.name for author in mock_result.authors]), ['Stanley', 'Windisch'])

source

create_acronym

 create_acronym (title)

source

folder_name_for_source

 folder_name_for_source (result:arxiv.Result, lowercase:bool=True)

folder_name_for_source and create_acronym are convenient helper functions for naming folders newly created when downloading source code for arxiv files; the author of trouver roughly uses these conventions for organizing source code files.

# Test cases
titles = [
    "Lectures on K3 surfaces",
    "Positivity in Algebraic Geometry I",
    "On the Cohomology of Finite Groups",
    "An Introduction to A-infinity Algebras",
    "Quantum Field Theory and the Standard Model",
    "Category O for gl(n,C) and the Cohomology of Flag Varieties"
]

for title in titles:
    print(f"Title: {title}")
    print(f"Acronym: {create_acronym(title)}")
    print()

Title: Lectures on K3 surfaces
Acronym: lks

Title: Positivity in Algebraic Geometry I
Acronym: pagI

Title: On the Cohomology of Finite Groups
Acronym: cfg

Title: An Introduction to A-infinity Algebras
Acronym: iAia

Title: Quantum Field Theory and the Standard Model
Acronym: qftsm

Title: Category O for gl(n,C) and the Cohomology of Flag Varieties
Acronym: cOgnCcfv

mock_result = arxiv.Result(
    entry_id='http://arxiv.org/abs/1605.08386v1',
    updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    title='Heat-bath random walks with Markov bases',
    authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
    summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
    comment='20 pages, 3 figures',
    journal_ref=None,
    doi=None,
    primary_category='math.CO',
    categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
                             title=None, rel='alternate', content_type=None),
           arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
                             content_type=None),])
extract_metadata(mock_result)
print(mock_result.title)
output = folder_name_for_source(mock_result)
print(output)
assert ' ' not in output
assert output.startswith('stanley_windisch')

Heat-bath random walks with Markov bases
stanley_windisch_hbrwmb

source

file_name_for_pdf

 file_name_for_pdf (result:arxiv.Result)

file_name_for_pdf could be a good convention for naming downloaded pdf files of arxiv articles. Pass this as the file_or_folder_names parameter for download_from_results.

mock_result = arxiv.Result(
    entry_id='http://arxiv.org/abs/1605.08386v1',
    updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    title='Heat-bath random walks with Markov bases',
    authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
    summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
    comment='20 pages, 3 figures',
    journal_ref=None,
    doi=None,
    primary_category='math.CO',
    categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
                             title=None, rel='alternate', content_type=None),
           arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
                             content_type=None),])
file_name_for_pdf(mock_result)

'Stanley, Windisch - Heat-bath random walks with Markov bases'

source

extract_tex_from_gz

 extract_tex_from_gz (filepath)

source

get_tex_filename_from_gz

 get_tex_filename_from_gz (filepath)

source

read_gz_file

 read_gz_file (filepath)

source

analyze_arxiv_tarfile

 analyze_arxiv_tarfile (filepath:os.PathLike)

*Analyzes the contents of an arXiv download file, which can be either a tar.gz archive or a plain .gz file.

This function attempts to determine the structure of the file downloaded from arXiv. It can identify several different types of content structures commonly found in arXiv downloads.

Parameters: filepath (Union[str, Path]): The path to the file to be analyzed. Can be a string or a Path object.

Returns: Literal[“nested_archive”, “direct_tex”, “unknown_tar_structure”, “plain_gz”, “invalid_file”]: - “nested_archive”: If the tar.gz contains another compressed file - “direct_tex”: If the tar.gz contains .tex files directly - “unknown_tar_structure”: If the tar.gz structure doesn’t match known patterns - “plain_gz”: If the file is a plain .gz file (not a tar.gz) - “invalid_file”: If the file is neither a valid tar.gz nor a valid .gz file

Raises: No exceptions are raised; all errors are handled internally and returned as “invalid_file”. Determine what kind of contents the*

	Type	Details
filepath	PathLike	The path to the tar file.
Returns	Literal

# Usage
# file = _test_directory() / 'arxiv_file_download_example_folder' / 'math_0512085v2.Finding_large_Selmer_rank_via_an_arithmetic_theory_of_local_constants.tar.gz'
# result = analyze_arxiv_tarfile(file)
# print(f"The tar.gz file contains: {result}")

# # Usage
# # content = read_gz_file(file)
# # print(content[:100])  # Print the first 100 characters

# filename = get_tex_filename_from_gz(file)
# if filename:
#     print(f"The .tex file inside the .gz archive is: {filename}")
# else:
#     print("No .tex file found in the archive.")

# extracted_file = extract_tex_from_gz(file)
# print(f"Extracted .tex file: {extracted_file}")

The tar.gz file contains: plain_gz
No .tex file found in the archive.
Extracted .tex file: math_0512085v2.Finding_large_Selmer_rank_via_an_arithmetic_theory_of_local_constants.tar.tex

source

download_from_results

 download_from_results (results:arxiv.Result|list[arxiv.Result],
                        dir:os.PathLike, source:bool=True,
                        decompress_compressed_file:bool=True, file_or_fold
                        er_names:Union[NoneType,str,list[str],Callable[[ar
                        xiv.Result],str]]=<function
                        folder_name_for_source>,
                        delete_compressed_file:bool=True,
                        download_metadata:bool=True, verbose:bool=False)

*Download either the source files or pdfs of the arxiv article encoded in the results.

If source = True and decompress_compressed_file = True, then
- Download the source file/folder into a newly created folder (whose name is specified by file_or_folder_names) within dir and decompress the source (if applicable) in this newly created folder.
- If delete_compressed_file = True, then delete the compressed file.
If source = False, then just download a pdf.

For file_or_folder_names, the recommanded Callable arguments are folder_name_for_source for downloading source files and file_name_for_pdf for downloading pdf files.*

	Type	Default	Details
results	arxiv.Result \| list[arxiv.Result]
dir	PathLike		The directory into which to download the files
source	bool	True	If `True`, download the source file. Otherweise, download a pdf file.
decompress_compressed_file	bool	True	If `True`and if `source` is `True`, then decompress the source file after downloading it.
file_or_folder_names	Union	folder_name_for_source	If `None`, then the file/folder is named the arxiv id. If a `str` (in which case `results` must be a single `Result` or a `list[Result]` of length 1) or `list[str]` (whose length must equal that of `results`), then each file/folder is named by the specified corresponding `str`. If `Callable[Result, str]`, then each file/folder is named using the specified `Callable`
delete_compressed_file	bool	True	If `True` and if `source` and `decompress_compressed_file` are `True`, then delete the compressed source file after downloading and then uncompressing it.
download_metadata	bool	True	If `True`, and if `source` is `True`, then create a file called `metadata.json` and put it into the newly created folder, unless a file called `metadata.json` already exists, in which case, a unique file name is created
verbose	bool	False
Returns	list		Each `Path` is the folder in which the source files are newly downloaded or the path to the pdf file that is newly downloaded.

download_from_results downloads an arxiv article (the source or a pdf).

mock_result_1 = arxiv.Result(
    entry_id='http://arxiv.org/abs/1605.08386v1',
    updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
    title='Heat-bath random walks with Markov bases',
    authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
    summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
    comment='20 pages, 3 figures',
    journal_ref=None,
    doi=None,
    primary_category='math.CO',
    categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
                            title=None, rel='alternate', content_type=None),
        arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
                            content_type=None),])
mock_result_2 = arxiv.Result(
    entry_id='http://arxiv.org/abs/2106.10586v4',
    updated=datetime.datetime(2024, 6, 28, 1, 36, 47, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2021, 6, 19, 23, 50, 56, tzinfo=datetime.timezone.utc),
    title='Global $\\mathbb{A}^1$ degrees of covering maps between modular curves',
    authors=[arxiv.Result.Author('Hyun Jong Kim'), arxiv.Result.Author('Sun Woo Park')],
    summary="Given a projective smooth curve $X$ over any field $k$, we discuss two\nnotions of global $\\mathbb{A}^1$ degree of a finite morphism of smooth curves\n$f: X \\to \\mathbb{P}^1_k$ satisfying certain conditions. One originates from\ncomputing the Euler number of the pullback of the line bundle\n$\\mathscr{O}_{\\mathbb{P}^1}(1)$ as a generalization of Kass and Wickelgren's\nconstruction of Euler numbers. The other originates from the construction of\nglobal $\\mathbb{A}^1$ degree of morphisms of projective curves by Kass, Levine,\nSolomon, and Wickelgren as a generalization of Morel's construction of\n$\\mathbb{A}^1$-Brouwer degree of a morphism $f: \\mathbb{P}^1_k \\to\n\\mathbb{P}^1_k$. We prove that under certain conditions on $N$, both notions of\nglobal $\\mathbb{A}^1$ degrees of covering maps between modular curves $X_0(N)\n\\to X(1)$, $X_1(N) \\to X(1)$, and $X(N) \\to X(1)$ agree to be equal to sums of\nhyperbolic elements $\\langle 1 \\rangle + \\langle -1 \\rangle$ in the\nGrothendieck-Witt ring $\\mathrm{GW}(k)$ for any field $k$ whose characteristic\nis coprime to $N$ and the pullback of $\\mathscr{O}_{\\mathbb{P}^1}(1)$ is\nrelatively oriented.",
    comment='35 pages. Modified various statements to more precisely speak of\n  "relatively oriented" maps or vector bundles instead of "relatively\n  orientable" maps or vector bundles where appropriate --- the former phrasing\n  suggests that a relative orientation is fixed. Additional minor edits',
    journal_ref=None,
    doi=None,
    primary_category='math.AG',
    categories=['math.AG', 'math.NT', '14F42, 14G35'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/2106.10586v4', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/2106.10586v4', title='pdf', rel='related', content_type=None)])


mock_result_3 = arxiv.Result(
    entry_id='http://arxiv.org/abs/math/0512085v2',
    updated=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc),
    published=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc),
    title='Finding large Selmer rank via an arithmetic theory of local constants',
    authors=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')],
    summary='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n  Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.',
    comment='Revised and improved. To appear in Annals of Mathematics',
    journal_ref=None,
    doi=None,
    primary_category='math.NT',
    categories=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'],
    links=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])


single_result = mock_result_1
multiple_results = [mock_result_1, mock_result_2] 
folder_name_1 = folder_name_for_source(mock_result_1)
folder_name_2 = folder_name_for_source(mock_result_2)

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
    # 1. Single Result vs. List of Results
    # Test with single Result
    downloaded_paths = download_from_results(mock_result_1, temp_vault, source=True)
    assert (temp_vault / folder_name_1).exists()
    assert downloaded_paths

    # os.startfile(temp_vault)
    download_from_results(mock_result_3, temp_vault, source=True)
    # input()

We can also pass multiple results to download_from_results.

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
    # Test with multiple Results
    download_from_results(multiple_results, temp_vault, source=True)
    # os.startfile(temp_vault)
    # input()
    assert (temp_vault / folder_name_1).exists()
    assert (temp_vault / folder_name_2).exists()

Specifying source=False downloads the pdf instead of the source files.

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
    # 2. Source vs. PDF download
    download_from_results(single_result, temp_vault, source=False)
    assert (temp_vault / f'{folder_name_1}.pdf').exists()

By specifying source=True and decompress_compressed_file=False, we can just download the compressed file.

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
    # 3. Decompression options
    download_from_results(single_result, temp_vault, source=True, decompress_compressed_file=False)
    tar_gz_files = glob.glob(str(temp_vault / folder_name_1 / '*.tar.gz')) 
    assert len(tar_gz_files) > 0

The folder or pdf file can get a custon name

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
    # 4. File/folder naming
    download_from_results(single_result, temp_vault, file_or_folder_names='custom_name')
    assert (temp_vault / 'custom_name').exists()

    download_from_results(multiple_results, temp_vault, file_or_folder_names=['name1', 'name2'])
    assert (temp_vault / 'name1').exists()
    assert (temp_vault / 'name2').exists()

delete_compresed_file can be set to False to preserve the compressed file after decomppressing.

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
    # 5. Compressed file handling
    download_from_results(single_result, temp_vault, delete_compressed_file=False)
    tar_gz_files = glob.glob(str(temp_vault / folder_name_1 / '*.tar.gz')) 
    gz_files = glob.glob(str(temp_vault / folder_name_1 / '*.gz')) 
    # os.startfile(temp_vault)
    # input()
    assert len(tar_gz_files) > 0 or len(gz_files) > 0

By default, if the source is downloaded into a folder, then the metadata of the arxiv article is stored in a json file.

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)

    # 6. Metadata file
    download_from_results(single_result, temp_vault, download_metadata=True)
    assert (temp_vault / folder_name_1 / 'metadata.json').exists()

    # 7. Edge cases
    download_from_results([], temp_vault)  # Empty list
    # Test with non-existent arxiv ID (should handle gracefully)
    # non_existent = next(arxiv.Search(id_list=['0000.00000']).results())
    # download_from_results(non_existent, temp_vault)

with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
    shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
    # 8. Folder creation (duplicate handling)
    download_from_results(single_result, temp_vault)
    download_from_results(single_result, temp_vault)  # Should create a duplicate folder
    assert (temp_vault / folder_name_1).exists()

    # 9. File types (if you have examples of different source types)
    # This would require specific known arxiv IDs with different source types

    # 10. Error handling
    with ExceptionExpected(Exception):
    # with pytest.raises(Exception):  # Replace with specific exception
        download_from_results(single_result, '/non/existent/path')