import glob
import tempfile
from unittest.mock import patch, MagicMock
import shutil
from fastcore.test import *
from fastcore.test import test_is
from nbdev.showdoc import show_doc
from trouver.helper.tests import _test_directoryhelper.arxiv
arxiv_id
arxiv_id (arxiv_id_or_url:str)
*Return the arxiv id from a str which is either of the arxiv id itself or the url to the arxiv article.
Raises - ValueError - If the input does not contain a valid arXiv ID.*
test_eq(arxiv_id("1234.5678"), "1234.5678")
test_eq(arxiv_id("https://arxiv.org/abs/1234.5678"), "1234.5678")
test_eq(arxiv_id("1234.5678v1"), "1234.5678v1")
test_eq(arxiv_id("https://arxiv.org/abs/1234.5678v1"), "1234.5678v1")
test_eq(arxiv_id("math/0512085"), "math/0512085")
test_eq(arxiv_id("https://arxiv.org/abs/math/0512085"), "math/0512085")The arxiv_search function can be used to obtain an arxiv.Search object, which is used for downloading arxiv files.
arxiv_search
arxiv_search (arxiv_ids:Union[str,list[str]], client:Optional[arxiv.Client]=None, results:bool=True)
| Type | Default | Details | |
|---|---|---|---|
| arxiv_ids | Union | The ID of a single arXiv article or multiple arxiv articles | |
| client | Optional | None | an arxiv API Client. If None, create one on the spot. |
| results | bool | True | If True return a Result object. otherwise, return a Search`` object. |
| Returns | Union |
# Specify the arXiv ID of the paper you want to download
# arxiv_id = "2106.10586" # Replace with your desired arXiv ID
arxiv_id = "math/0512085" # Replace with your desired arXiv ID
# Create a search object with the specified arXiv ID
# client = Client()
# search = Search(id_list=[arxiv_id])
# results = client.results(search)
results = arxiv_search(arxiv_id, results=True)
results<itertools.islice>
listy = list(results)
print(listy)[arxiv.Result(entry_id='http://arxiv.org/abs/math/0512085v2', updated=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc), published=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc), title='Finding large Selmer rank via an arithmetic theory of local constants', authors=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')], summary='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.', comment='Revised and improved. To appear in Annals of Mathematics', journal_ref=None, doi=None, primary_category='math.NT', categories=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'], links=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])]
listy[0].entry_id'http://arxiv.org/abs/math/0512085v2'
mock_result_2 = arxiv.Result(
entry_id='http://arxiv.org/abs/2106.10586v4',
updated=datetime.datetime(2024, 6, 28, 1, 36, 47, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2021, 6, 19, 23, 50, 56, tzinfo=datetime.timezone.utc),
title='Global $\\mathbb{A}^1$ degrees of covering maps between modular curves',
authors=[arxiv.Result.Author('Hyun Jong Kim'), arxiv.Result.Author('Sun Woo Park')],
summary="Given a projective smooth curve $X$ over any field $k$, we discuss two\nnotions of global $\\mathbb{A}^1$ degree of a finite morphism of smooth curves\n$f: X \\to \\mathbb{P}^1_k$ satisfying certain conditions. One originates from\ncomputing the Euler number of the pullback of the line bundle\n$\\mathscr{O}_{\\mathbb{P}^1}(1)$ as a generalization of Kass and Wickelgren's\nconstruction of Euler numbers. The other originates from the construction of\nglobal $\\mathbb{A}^1$ degree of morphisms of projective curves by Kass, Levine,\nSolomon, and Wickelgren as a generalization of Morel's construction of\n$\\mathbb{A}^1$-Brouwer degree of a morphism $f: \\mathbb{P}^1_k \\to\n\\mathbb{P}^1_k$. We prove that under certain conditions on $N$, both notions of\nglobal $\\mathbb{A}^1$ degrees of covering maps between modular curves $X_0(N)\n\\to X(1)$, $X_1(N) \\to X(1)$, and $X(N) \\to X(1)$ agree to be equal to sums of\nhyperbolic elements $\\langle 1 \\rangle + \\langle -1 \\rangle$ in the\nGrothendieck-Witt ring $\\mathrm{GW}(k)$ for any field $k$ whose characteristic\nis coprime to $N$ and the pullback of $\\mathscr{O}_{\\mathbb{P}^1}(1)$ is\nrelatively oriented.",
comment='35 pages. Modified various statements to more precisely speak of\n "relatively oriented" maps or vector bundles instead of "relatively\n orientable" maps or vector bundles where appropriate --- the former phrasing\n suggests that a relative orientation is fixed. Additional minor edits',
journal_ref=None,
doi=None,
primary_category='math.AG',
categories=['math.AG', 'math.NT', '14F42, 14G35'],
links=[arxiv.Result.Link('http://arxiv.org/abs/2106.10586v4', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/2106.10586v4', title='pdf', rel='related', content_type=None)])
mock_result_3 = arxiv.Result(
entry_id='http://arxiv.org/abs/math/0512085v2',
updated=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc),
title='Finding large Selmer rank via an arithmetic theory of local constants',
authors=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')],
summary='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.',
comment='Revised and improved. To appear in Annals of Mathematics',
journal_ref=None,
doi=None,
primary_category='math.NT',
categories=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'],
links=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])Metadata extraction
extract_metadata
extract_metadata (results:Union[list[arxiv.Result],arxiv.Result])
Return the metadata from the arxiv search results
| Type | Details | |
|---|---|---|
| results | Union | |
| Returns | list | Each dict corresponds to the metadata for each result. |
mock_result = arxiv.Result(
entry_id='http://arxiv.org/abs/1605.08386v1',
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
title='Heat-bath random walks with Markov bases',
authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
comment='20 pages, 3 figures',
journal_ref=None,
doi=None,
primary_category='math.CO',
categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
title=None, rel='alternate', content_type=None),
arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
content_type=None),])
extract_metadata(mock_result)[{'arxiv_id': '1605.08386v1',
'authors': ['Caprice Stanley', 'Tobias Windisch'],
'title': 'Heat-bath random walks with Markov bases',
'summary': 'Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
'primary_category': 'math.CO',
'categories': ['math.CO',
'math.ST',
'stat.TH',
'Primary: 05C81, Secondary: 37A25, 11P21'],
'published': datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
'updated': datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
'doi': None,
'comment': '20 pages, 3 figures',
'journal_ref': None,
'links': [arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1', title=None, rel='alternate', content_type=None),
arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related', content_type=None)]}]
ArxivMetadataEncoder
ArxivMetadataEncoder (skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)
json encoder to accomapny the extract_metadta function when using json.dump.
# Your dictionary with datetime and arxiv.Result.Link objects
data = {
"timestamp": datetime.datetime.now(),
"link": arxiv.Result.Link(href="https://example.com", title="Example")
}
# Convert the dictionary to JSON
json_data = json.dumps(data, cls=ArxivMetadataEncoder, indent=4)
print(json_data){
"timestamp": "2024-12-26T15:31:58.139186",
"link": "https://example.com"
}
Downloading arxiv files
extract_last_names
extract_last_names (authors:list[str])
The extract_last_names function is a convenient helper function for naming downloaded arxiv files.
# Example usage
authors = ["John Smith", "Maria Garcia-Lopez", "Pieter de Jong", "Xin Li"]
last_names = extract_last_names(authors)
test_eq(last_names, ['Smith', 'Garcia-Lopez', 'de Jong', 'Li'])
test_eq(extract_last_names([author.name for author in mock_result.authors]), ['Stanley', 'Windisch'])create_acronym
create_acronym (title)
folder_name_for_source
folder_name_for_source (result:arxiv.Result, lowercase:bool=True)
folder_name_for_source and create_acronym are convenient helper functions for naming folders newly created when downloading source code for arxiv files; the author of trouver roughly uses these conventions for organizing source code files.
# Test cases
titles = [
"Lectures on K3 surfaces",
"Positivity in Algebraic Geometry I",
"On the Cohomology of Finite Groups",
"An Introduction to A-infinity Algebras",
"Quantum Field Theory and the Standard Model",
"Category O for gl(n,C) and the Cohomology of Flag Varieties"
]
for title in titles:
print(f"Title: {title}")
print(f"Acronym: {create_acronym(title)}")
print()Title: Lectures on K3 surfaces
Acronym: lks
Title: Positivity in Algebraic Geometry I
Acronym: pagI
Title: On the Cohomology of Finite Groups
Acronym: cfg
Title: An Introduction to A-infinity Algebras
Acronym: iAia
Title: Quantum Field Theory and the Standard Model
Acronym: qftsm
Title: Category O for gl(n,C) and the Cohomology of Flag Varieties
Acronym: cOgnCcfv
mock_result = arxiv.Result(
entry_id='http://arxiv.org/abs/1605.08386v1',
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
title='Heat-bath random walks with Markov bases',
authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
comment='20 pages, 3 figures',
journal_ref=None,
doi=None,
primary_category='math.CO',
categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
title=None, rel='alternate', content_type=None),
arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
content_type=None),])
extract_metadata(mock_result)
print(mock_result.title)
output = folder_name_for_source(mock_result)
print(output)
assert ' ' not in output
assert output.startswith('stanley_windisch')Heat-bath random walks with Markov bases
stanley_windisch_hbrwmb
file_name_for_pdf
file_name_for_pdf (result:arxiv.Result)
file_name_for_pdf could be a good convention for naming downloaded pdf files of arxiv articles. Pass this as the file_or_folder_names parameter for download_from_results.
mock_result = arxiv.Result(
entry_id='http://arxiv.org/abs/1605.08386v1',
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
title='Heat-bath random walks with Markov bases',
authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
comment='20 pages, 3 figures',
journal_ref=None,
doi=None,
primary_category='math.CO',
categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
title=None, rel='alternate', content_type=None),
arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
content_type=None),])
file_name_for_pdf(mock_result)'Stanley, Windisch - Heat-bath random walks with Markov bases'
extract_tex_from_gz
extract_tex_from_gz (filepath)
get_tex_filename_from_gz
get_tex_filename_from_gz (filepath)
read_gz_file
read_gz_file (filepath)
analyze_arxiv_tarfile
analyze_arxiv_tarfile (filepath:os.PathLike)
*Analyzes the contents of an arXiv download file, which can be either a tar.gz archive or a plain .gz file.
This function attempts to determine the structure of the file downloaded from arXiv. It can identify several different types of content structures commonly found in arXiv downloads.
Parameters: filepath (Union[str, Path]): The path to the file to be analyzed. Can be a string or a Path object.
Returns: Literal[“nested_archive”, “direct_tex”, “unknown_tar_structure”, “plain_gz”, “invalid_file”]: - “nested_archive”: If the tar.gz contains another compressed file - “direct_tex”: If the tar.gz contains .tex files directly - “unknown_tar_structure”: If the tar.gz structure doesn’t match known patterns - “plain_gz”: If the file is a plain .gz file (not a tar.gz) - “invalid_file”: If the file is neither a valid tar.gz nor a valid .gz file
Raises: No exceptions are raised; all errors are handled internally and returned as “invalid_file”. Determine what kind of contents the*
| Type | Details | |
|---|---|---|
| filepath | PathLike | The path to the tar file. |
| Returns | Literal |
# Usage
# file = _test_directory() / 'arxiv_file_download_example_folder' / 'math_0512085v2.Finding_large_Selmer_rank_via_an_arithmetic_theory_of_local_constants.tar.gz'
# result = analyze_arxiv_tarfile(file)
# print(f"The tar.gz file contains: {result}")
# # Usage
# # content = read_gz_file(file)
# # print(content[:100]) # Print the first 100 characters
# filename = get_tex_filename_from_gz(file)
# if filename:
# print(f"The .tex file inside the .gz archive is: {filename}")
# else:
# print("No .tex file found in the archive.")
# extracted_file = extract_tex_from_gz(file)
# print(f"Extracted .tex file: {extracted_file}")The tar.gz file contains: plain_gz
No .tex file found in the archive.
Extracted .tex file: math_0512085v2.Finding_large_Selmer_rank_via_an_arithmetic_theory_of_local_constants.tar.tex
download_from_results
download_from_results (results:arxiv.Result|list[arxiv.Result], dir:os.PathLike, source:bool=True, decompress_compressed_file:bool=True, file_or_fold er_names:Union[NoneType,str,list[str],Callable[[ar xiv.Result],str]]=<function folder_name_for_source>, delete_compressed_file:bool=True, download_metadata:bool=True, verbose:bool=False)
*Download either the source files or pdfs of the arxiv article encoded in the results.
- If
source = Trueanddecompress_compressed_file = True, then- Download the source file/folder into a newly created folder (whose name is specified by
file_or_folder_names) withindirand decompress the source (if applicable) in this newly created folder. - If
delete_compressed_file = True, then delete the compressed file.
- Download the source file/folder into a newly created folder (whose name is specified by
- If
source = False, then just download a pdf.
For file_or_folder_names, the recommanded Callable arguments are folder_name_for_source for downloading source files and file_name_for_pdf for downloading pdf files.*
| Type | Default | Details | |
|---|---|---|---|
| results | arxiv.Result | list[arxiv.Result] | ||
| dir | PathLike | The directory into which to download the files | |
| source | bool | True | If True, download the source file. Otherweise, download a pdf file. |
| decompress_compressed_file | bool | True | If Trueand if source is True, then decompress the source file after downloading it. |
| file_or_folder_names | Union | folder_name_for_source | If None, then the file/folder is named the arxiv id. If a str (in which case results must be a single Result or a list[Result] of length 1) or list[str] (whose length must equal that of results), then each file/folder is named by the specified corresponding str. If Callable[Result, str], then each file/folder is named using the specified Callable |
| delete_compressed_file | bool | True | If True and if source and decompress_compressed_file are True, then delete the compressed source file after downloading and then uncompressing it. |
| download_metadata | bool | True | If True, and if source is True, then create a file called metadata.json and put it into the newly created folder, unless a file called metadata.json already exists, in which case, a unique file name is created |
| verbose | bool | False | |
| Returns | list | Each Path is the folder in which the source files are newly downloaded or the path to the pdf file that is newly downloaded. |
download_from_results downloads an arxiv article (the source or a pdf).
mock_result_1 = arxiv.Result(
entry_id='http://arxiv.org/abs/1605.08386v1',
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
title='Heat-bath random walks with Markov bases',
authors=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
summary='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
comment='20 pages, 3 figures',
journal_ref=None,
doi=None,
primary_category='math.CO',
categories=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
links=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
title=None, rel='alternate', content_type=None),
arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
content_type=None),])
mock_result_2 = arxiv.Result(
entry_id='http://arxiv.org/abs/2106.10586v4',
updated=datetime.datetime(2024, 6, 28, 1, 36, 47, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2021, 6, 19, 23, 50, 56, tzinfo=datetime.timezone.utc),
title='Global $\\mathbb{A}^1$ degrees of covering maps between modular curves',
authors=[arxiv.Result.Author('Hyun Jong Kim'), arxiv.Result.Author('Sun Woo Park')],
summary="Given a projective smooth curve $X$ over any field $k$, we discuss two\nnotions of global $\\mathbb{A}^1$ degree of a finite morphism of smooth curves\n$f: X \\to \\mathbb{P}^1_k$ satisfying certain conditions. One originates from\ncomputing the Euler number of the pullback of the line bundle\n$\\mathscr{O}_{\\mathbb{P}^1}(1)$ as a generalization of Kass and Wickelgren's\nconstruction of Euler numbers. The other originates from the construction of\nglobal $\\mathbb{A}^1$ degree of morphisms of projective curves by Kass, Levine,\nSolomon, and Wickelgren as a generalization of Morel's construction of\n$\\mathbb{A}^1$-Brouwer degree of a morphism $f: \\mathbb{P}^1_k \\to\n\\mathbb{P}^1_k$. We prove that under certain conditions on $N$, both notions of\nglobal $\\mathbb{A}^1$ degrees of covering maps between modular curves $X_0(N)\n\\to X(1)$, $X_1(N) \\to X(1)$, and $X(N) \\to X(1)$ agree to be equal to sums of\nhyperbolic elements $\\langle 1 \\rangle + \\langle -1 \\rangle$ in the\nGrothendieck-Witt ring $\\mathrm{GW}(k)$ for any field $k$ whose characteristic\nis coprime to $N$ and the pullback of $\\mathscr{O}_{\\mathbb{P}^1}(1)$ is\nrelatively oriented.",
comment='35 pages. Modified various statements to more precisely speak of\n "relatively oriented" maps or vector bundles instead of "relatively\n orientable" maps or vector bundles where appropriate --- the former phrasing\n suggests that a relative orientation is fixed. Additional minor edits',
journal_ref=None,
doi=None,
primary_category='math.AG',
categories=['math.AG', 'math.NT', '14F42, 14G35'],
links=[arxiv.Result.Link('http://arxiv.org/abs/2106.10586v4', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/2106.10586v4', title='pdf', rel='related', content_type=None)])
mock_result_3 = arxiv.Result(
entry_id='http://arxiv.org/abs/math/0512085v2',
updated=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc),
published=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc),
title='Finding large Selmer rank via an arithmetic theory of local constants',
authors=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')],
summary='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.',
comment='Revised and improved. To appear in Annals of Mathematics',
journal_ref=None,
doi=None,
primary_category='math.NT',
categories=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'],
links=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])
single_result = mock_result_1
multiple_results = [mock_result_1, mock_result_2]
folder_name_1 = folder_name_for_source(mock_result_1)
folder_name_2 = folder_name_for_source(mock_result_2)with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# 1. Single Result vs. List of Results
# Test with single Result
downloaded_paths = download_from_results(mock_result_1, temp_vault, source=True)
assert (temp_vault / folder_name_1).exists()
assert downloaded_paths
# os.startfile(temp_vault)
download_from_results(mock_result_3, temp_vault, source=True)
# input()We can also pass multiple results to download_from_results.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# Test with multiple Results
download_from_results(multiple_results, temp_vault, source=True)
# os.startfile(temp_vault)
# input()
assert (temp_vault / folder_name_1).exists()
assert (temp_vault / folder_name_2).exists()Specifying source=False downloads the pdf instead of the source files.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# 2. Source vs. PDF download
download_from_results(single_result, temp_vault, source=False)
assert (temp_vault / f'{folder_name_1}.pdf').exists()By specifying source=True and decompress_compressed_file=False, we can just download the compressed file.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# 3. Decompression options
download_from_results(single_result, temp_vault, source=True, decompress_compressed_file=False)
tar_gz_files = glob.glob(str(temp_vault / folder_name_1 / '*.tar.gz'))
assert len(tar_gz_files) > 0The folder or pdf file can get a custon name
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# 4. File/folder naming
download_from_results(single_result, temp_vault, file_or_folder_names='custom_name')
assert (temp_vault / 'custom_name').exists()
download_from_results(multiple_results, temp_vault, file_or_folder_names=['name1', 'name2'])
assert (temp_vault / 'name1').exists()
assert (temp_vault / 'name2').exists()delete_compresed_file can be set to False to preserve the compressed file after decomppressing.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# 5. Compressed file handling
download_from_results(single_result, temp_vault, delete_compressed_file=False)
tar_gz_files = glob.glob(str(temp_vault / folder_name_1 / '*.tar.gz'))
gz_files = glob.glob(str(temp_vault / folder_name_1 / '*.gz'))
# os.startfile(temp_vault)
# input()
assert len(tar_gz_files) > 0 or len(gz_files) > 0By default, if the source is downloaded into a folder, then the metadata of the arxiv article is stored in a json file.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# 6. Metadata file
download_from_results(single_result, temp_vault, download_metadata=True)
assert (temp_vault / folder_name_1 / 'metadata.json').exists()
# 7. Edge cases
download_from_results([], temp_vault) # Empty list
# Test with non-existent arxiv ID (should handle gracefully)
# non_existent = next(arxiv.Search(id_list=['0000.00000']).results())
# download_from_results(non_existent, temp_vault)with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
temp_vault = Path(temp_dir) / 'arxiv_file_download_example_folder'
shutil.copytree(_test_directory() / 'arxiv_file_download_example_folder', temp_vault)
# 8. Folder creation (duplicate handling)
download_from_results(single_result, temp_vault)
download_from_results(single_result, temp_vault) # Should create a duplicate folder
assert (temp_vault / folder_name_1).exists()
# 9. File types (if you have examples of different source types)
# This would require specific known arxiv IDs with different source types
# 10. Error handling
with ExceptionExpected(Exception):
# with pytest.raises(Exception): # Replace with specific exception
download_from_results(single_result, '/non/existent/path')