import glob
import tempfile
from unittest.mock import patch, MagicMock
import shutil
from fastcore.test import *
from fastcore.test import test_is
from nbdev.showdoc import show_doc
from trouver.helper.tests import _test_directory
helper.arxiv
arxiv_id
arxiv_id (arxiv_id_or_url:str)
*Return the arxiv id from a str which is either of the arxiv id itself or the url to the arxiv article.
Raises - ValueError
- If the input does not contain a valid arXiv ID.*
"1234.5678"), "1234.5678")
test_eq(arxiv_id("https://arxiv.org/abs/1234.5678"), "1234.5678")
test_eq(arxiv_id("1234.5678v1"), "1234.5678v1")
test_eq(arxiv_id("https://arxiv.org/abs/1234.5678v1"), "1234.5678v1")
test_eq(arxiv_id("math/0512085"), "math/0512085")
test_eq(arxiv_id("https://arxiv.org/abs/math/0512085"), "math/0512085") test_eq(arxiv_id(
The arxiv_search
function can be used to obtain an arxiv.Search
object, which is used for downloading arxiv files.
arxiv_search
arxiv_search (arxiv_ids:Union[str,list[str]], client:Optional[arxiv.Client]=None, results:bool=True)
Type | Default | Details | |
---|---|---|---|
arxiv_ids | Union | The ID of a single arXiv article or multiple arxiv articles | |
client | Optional | None | an arxiv API Client. If None , create one on the spot. |
results | bool | True | If True return a Result object. otherwise, return a Search`` object . |
Returns | Union |
# Specify the arXiv ID of the paper you want to download
# arxiv_id = "2106.10586" # Replace with your desired arXiv ID
= "math/0512085" # Replace with your desired arXiv ID
arxiv_id
# Create a search object with the specified arXiv ID
# client = Client()
# search = Search(id_list=[arxiv_id])
# results = client.results(search)
= arxiv_search(arxiv_id, results=True)
results results
<itertools.islice>
= list(results)
listy print(listy)
[arxiv.Result(entry_id='http://arxiv.org/abs/math/0512085v2', updated=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc), published=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc), title='Finding large Selmer rank via an arithmetic theory of local constants', authors=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')], summary='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.', comment='Revised and improved. To appear in Annals of Mathematics', journal_ref=None, doi=None, primary_category='math.NT', categories=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'], links=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])]
0].entry_id listy[
'http://arxiv.org/abs/math/0512085v2'
= arxiv.Result(
mock_result_2 ='http://arxiv.org/abs/2106.10586v4',
entry_id=datetime.datetime(2024, 6, 28, 1, 36, 47, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2021, 6, 19, 23, 50, 56, tzinfo=datetime.timezone.utc),
published='Global $\\mathbb{A}^1$ degrees of covering maps between modular curves',
title=[arxiv.Result.Author('Hyun Jong Kim'), arxiv.Result.Author('Sun Woo Park')],
authors="Given a projective smooth curve $X$ over any field $k$, we discuss two\nnotions of global $\\mathbb{A}^1$ degree of a finite morphism of smooth curves\n$f: X \\to \\mathbb{P}^1_k$ satisfying certain conditions. One originates from\ncomputing the Euler number of the pullback of the line bundle\n$\\mathscr{O}_{\\mathbb{P}^1}(1)$ as a generalization of Kass and Wickelgren's\nconstruction of Euler numbers. The other originates from the construction of\nglobal $\\mathbb{A}^1$ degree of morphisms of projective curves by Kass, Levine,\nSolomon, and Wickelgren as a generalization of Morel's construction of\n$\\mathbb{A}^1$-Brouwer degree of a morphism $f: \\mathbb{P}^1_k \\to\n\\mathbb{P}^1_k$. We prove that under certain conditions on $N$, both notions of\nglobal $\\mathbb{A}^1$ degrees of covering maps between modular curves $X_0(N)\n\\to X(1)$, $X_1(N) \\to X(1)$, and $X(N) \\to X(1)$ agree to be equal to sums of\nhyperbolic elements $\\langle 1 \\rangle + \\langle -1 \\rangle$ in the\nGrothendieck-Witt ring $\\mathrm{GW}(k)$ for any field $k$ whose characteristic\nis coprime to $N$ and the pullback of $\\mathscr{O}_{\\mathbb{P}^1}(1)$ is\nrelatively oriented.",
summary='35 pages. Modified various statements to more precisely speak of\n "relatively oriented" maps or vector bundles instead of "relatively\n orientable" maps or vector bundles where appropriate --- the former phrasing\n suggests that a relative orientation is fixed. Additional minor edits',
comment=None,
journal_ref=None,
doi='math.AG',
primary_category=['math.AG', 'math.NT', '14F42, 14G35'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/2106.10586v4', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/2106.10586v4', title='pdf', rel='related', content_type=None)])
links
= arxiv.Result(
mock_result_3 ='http://arxiv.org/abs/math/0512085v2',
entry_id=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc),
published='Finding large Selmer rank via an arithmetic theory of local constants',
title=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')],
authors='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.',
summary='Revised and improved. To appear in Annals of Mathematics',
comment=None,
journal_ref=None,
doi='math.NT',
primary_category=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)]) links
Metadata extraction
extract_metadata
extract_metadata (results:Union[list[arxiv.Result],arxiv.Result])
Return the metadata from the arxiv search results
Type | Details | |
---|---|---|
results | Union | |
Returns | list | Each dict corresponds to the metadata for each result. |
= arxiv.Result(
mock_result ='http://arxiv.org/abs/1605.08386v1',
entry_id=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published='Heat-bath random walks with Markov bases',
title=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
authors='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
summary='20 pages, 3 figures',
comment=None,
journal_ref=None,
doi='math.CO',
primary_category=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
links=None, rel='alternate', content_type=None),
title'http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
arxiv.Result.Link(=None),])
content_type extract_metadata(mock_result)
[{'arxiv_id': '1605.08386v1',
'authors': ['Caprice Stanley', 'Tobias Windisch'],
'title': 'Heat-bath random walks with Markov bases',
'summary': 'Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
'primary_category': 'math.CO',
'categories': ['math.CO',
'math.ST',
'stat.TH',
'Primary: 05C81, Secondary: 37A25, 11P21'],
'published': datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
'updated': datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
'doi': None,
'comment': '20 pages, 3 figures',
'journal_ref': None,
'links': [arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1', title=None, rel='alternate', content_type=None),
arxiv.Result.Link('http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related', content_type=None)]}]
ArxivMetadataEncoder
ArxivMetadataEncoder (skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, sort_keys=False, indent=None, separators=None, default=None)
json
encoder to accomapny the extract_metadta
function when using json.dump
.
# Your dictionary with datetime and arxiv.Result.Link objects
= {
data "timestamp": datetime.datetime.now(),
"link": arxiv.Result.Link(href="https://example.com", title="Example")
}
# Convert the dictionary to JSON
= json.dumps(data, cls=ArxivMetadataEncoder, indent=4)
json_data print(json_data)
{
"timestamp": "2024-12-26T15:31:58.139186",
"link": "https://example.com"
}
Downloading arxiv files
extract_last_names
extract_last_names (authors:list[str])
The extract_last_names
function is a convenient helper function for naming downloaded arxiv files.
# Example usage
= ["John Smith", "Maria Garcia-Lopez", "Pieter de Jong", "Xin Li"]
authors = extract_last_names(authors)
last_names 'Smith', 'Garcia-Lopez', 'de Jong', 'Li'])
test_eq(last_names, [
for author in mock_result.authors]), ['Stanley', 'Windisch']) test_eq(extract_last_names([author.name
create_acronym
create_acronym (title)
folder_name_for_source
folder_name_for_source (result:arxiv.Result, lowercase:bool=True)
folder_name_for_source
and create_acronym
are convenient helper functions for naming folders newly created when downloading source code for arxiv files; the author of trouver
roughly uses these conventions for organizing source code files.
# Test cases
= [
titles "Lectures on K3 surfaces",
"Positivity in Algebraic Geometry I",
"On the Cohomology of Finite Groups",
"An Introduction to A-infinity Algebras",
"Quantum Field Theory and the Standard Model",
"Category O for gl(n,C) and the Cohomology of Flag Varieties"
]
for title in titles:
print(f"Title: {title}")
print(f"Acronym: {create_acronym(title)}")
print()
Title: Lectures on K3 surfaces
Acronym: lks
Title: Positivity in Algebraic Geometry I
Acronym: pagI
Title: On the Cohomology of Finite Groups
Acronym: cfg
Title: An Introduction to A-infinity Algebras
Acronym: iAia
Title: Quantum Field Theory and the Standard Model
Acronym: qftsm
Title: Category O for gl(n,C) and the Cohomology of Flag Varieties
Acronym: cOgnCcfv
= arxiv.Result(
mock_result ='http://arxiv.org/abs/1605.08386v1',
entry_id=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published='Heat-bath random walks with Markov bases',
title=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
authors='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
summary='20 pages, 3 figures',
comment=None,
journal_ref=None,
doi='math.CO',
primary_category=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
links=None, rel='alternate', content_type=None),
title'http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
arxiv.Result.Link(=None),])
content_type
extract_metadata(mock_result)print(mock_result.title)
= folder_name_for_source(mock_result)
output print(output)
assert ' ' not in output
assert output.startswith('stanley_windisch')
Heat-bath random walks with Markov bases
stanley_windisch_hbrwmb
file_name_for_pdf
file_name_for_pdf (result:arxiv.Result)
file_name_for_pdf
could be a good convention for naming downloaded pdf files of arxiv articles. Pass this as the file_or_folder_names
parameter for download_from_results
.
= arxiv.Result(
mock_result ='http://arxiv.org/abs/1605.08386v1',
entry_id=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published='Heat-bath random walks with Markov bases',
title=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
authors='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
summary='20 pages, 3 figures',
comment=None,
journal_ref=None,
doi='math.CO',
primary_category=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
links=None, rel='alternate', content_type=None),
title'http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
arxiv.Result.Link(=None),])
content_type file_name_for_pdf(mock_result)
'Stanley, Windisch - Heat-bath random walks with Markov bases'
extract_tex_from_gz
extract_tex_from_gz (filepath)
get_tex_filename_from_gz
get_tex_filename_from_gz (filepath)
read_gz_file
read_gz_file (filepath)
analyze_arxiv_tarfile
analyze_arxiv_tarfile (filepath:os.PathLike)
*Analyzes the contents of an arXiv download file, which can be either a tar.gz archive or a plain .gz file.
This function attempts to determine the structure of the file downloaded from arXiv. It can identify several different types of content structures commonly found in arXiv downloads.
Parameters: filepath (Union[str, Path]): The path to the file to be analyzed. Can be a string or a Path object.
Returns: Literal[“nested_archive”, “direct_tex”, “unknown_tar_structure”, “plain_gz”, “invalid_file”]: - “nested_archive”: If the tar.gz contains another compressed file - “direct_tex”: If the tar.gz contains .tex files directly - “unknown_tar_structure”: If the tar.gz structure doesn’t match known patterns - “plain_gz”: If the file is a plain .gz file (not a tar.gz) - “invalid_file”: If the file is neither a valid tar.gz nor a valid .gz file
Raises: No exceptions are raised; all errors are handled internally and returned as “invalid_file”. Determine what kind of contents the*
Type | Details | |
---|---|---|
filepath | PathLike | The path to the tar file. |
Returns | Literal |
# Usage
# file = _test_directory() / 'arxiv_file_download_example_folder' / 'math_0512085v2.Finding_large_Selmer_rank_via_an_arithmetic_theory_of_local_constants.tar.gz'
# result = analyze_arxiv_tarfile(file)
# print(f"The tar.gz file contains: {result}")
# # Usage
# # content = read_gz_file(file)
# # print(content[:100]) # Print the first 100 characters
# filename = get_tex_filename_from_gz(file)
# if filename:
# print(f"The .tex file inside the .gz archive is: {filename}")
# else:
# print("No .tex file found in the archive.")
# extracted_file = extract_tex_from_gz(file)
# print(f"Extracted .tex file: {extracted_file}")
The tar.gz file contains: plain_gz
No .tex file found in the archive.
Extracted .tex file: math_0512085v2.Finding_large_Selmer_rank_via_an_arithmetic_theory_of_local_constants.tar.tex
download_from_results
download_from_results (results:arxiv.Result|list[arxiv.Result], dir:os.PathLike, source:bool=True, decompress_compressed_file:bool=True, file_or_fold er_names:Union[NoneType,str,list[str],Callable[[ar xiv.Result],str]]=<function folder_name_for_source>, delete_compressed_file:bool=True, download_metadata:bool=True, verbose:bool=False)
*Download either the source files or pdfs of the arxiv article encoded in the results.
- If
source = True
anddecompress_compressed_file = True
, then- Download the source file/folder into a newly created folder (whose name is specified by
file_or_folder_names
) withindir
and decompress the source (if applicable) in this newly created folder. - If
delete_compressed_file = True
, then delete the compressed file.
- Download the source file/folder into a newly created folder (whose name is specified by
- If
source = False
, then just download a pdf.
For file_or_folder_names
, the recommanded Callable
arguments are folder_name_for_source
for downloading source files and file_name_for_pdf
for downloading pdf files.*
Type | Default | Details | |
---|---|---|---|
results | arxiv.Result | list[arxiv.Result] | ||
dir | PathLike | The directory into which to download the files | |
source | bool | True | If True , download the source file. Otherweise, download a pdf file. |
decompress_compressed_file | bool | True | If True and if source is True , then decompress the source file after downloading it. |
file_or_folder_names | Union | folder_name_for_source | If None , then the file/folder is named the arxiv id. If a str (in which case results must be a single Result or a list[Result] of length 1) or list[str] (whose length must equal that of results ), then each file/folder is named by the specified corresponding str . If Callable[Result, str] , then each file/folder is named using the specified Callable |
delete_compressed_file | bool | True | If True and if source and decompress_compressed_file are True , then delete the compressed source file after downloading and then uncompressing it. |
download_metadata | bool | True | If True , and if source is True , then create a file called metadata.json and put it into the newly created folder, unless a file called metadata.json already exists, in which case, a unique file name is created |
verbose | bool | False | |
Returns | list | Each Path is the folder in which the source files are newly downloaded or the path to the pdf file that is newly downloaded. |
download_from_results
downloads an arxiv article (the source or a pdf).
= arxiv.Result(
mock_result_1 ='http://arxiv.org/abs/1605.08386v1',
entry_id=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2016, 5, 26, 17, 59, 46, tzinfo=datetime.timezone.utc),
published='Heat-bath random walks with Markov bases',
title=[arxiv.Result.Author('Caprice Stanley'), arxiv.Result.Author('Tobias Windisch')],
authors='Graphs on lattice points are studied whose edges come from a finite set of\nallowed moves of arbitrary length. We show that the diameter of these graphs on\nfibers of a fixed integer matrix can be bounded from above by a constant. We\nthen study the mixing behaviour of heat-bath random walks on these graphs. We\nalso state explicit conditions on the set of moves so that the heat-bath random\nwalk, a generalization of the Glauber dynamics, is an expander in fixed\ndimension.',
summary='20 pages, 3 figures',
comment=None,
journal_ref=None,
doi='math.CO',
primary_category=['math.CO', 'math.ST', 'stat.TH', 'Primary: 05C81, Secondary: 37A25, 11P21'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/1605.08386v1',
links=None, rel='alternate', content_type=None),
title'http://arxiv.org/pdf/1605.08386v1', title='pdf', rel='related',
arxiv.Result.Link(=None),])
content_type= arxiv.Result(
mock_result_2 ='http://arxiv.org/abs/2106.10586v4',
entry_id=datetime.datetime(2024, 6, 28, 1, 36, 47, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2021, 6, 19, 23, 50, 56, tzinfo=datetime.timezone.utc),
published='Global $\\mathbb{A}^1$ degrees of covering maps between modular curves',
title=[arxiv.Result.Author('Hyun Jong Kim'), arxiv.Result.Author('Sun Woo Park')],
authors="Given a projective smooth curve $X$ over any field $k$, we discuss two\nnotions of global $\\mathbb{A}^1$ degree of a finite morphism of smooth curves\n$f: X \\to \\mathbb{P}^1_k$ satisfying certain conditions. One originates from\ncomputing the Euler number of the pullback of the line bundle\n$\\mathscr{O}_{\\mathbb{P}^1}(1)$ as a generalization of Kass and Wickelgren's\nconstruction of Euler numbers. The other originates from the construction of\nglobal $\\mathbb{A}^1$ degree of morphisms of projective curves by Kass, Levine,\nSolomon, and Wickelgren as a generalization of Morel's construction of\n$\\mathbb{A}^1$-Brouwer degree of a morphism $f: \\mathbb{P}^1_k \\to\n\\mathbb{P}^1_k$. We prove that under certain conditions on $N$, both notions of\nglobal $\\mathbb{A}^1$ degrees of covering maps between modular curves $X_0(N)\n\\to X(1)$, $X_1(N) \\to X(1)$, and $X(N) \\to X(1)$ agree to be equal to sums of\nhyperbolic elements $\\langle 1 \\rangle + \\langle -1 \\rangle$ in the\nGrothendieck-Witt ring $\\mathrm{GW}(k)$ for any field $k$ whose characteristic\nis coprime to $N$ and the pullback of $\\mathscr{O}_{\\mathbb{P}^1}(1)$ is\nrelatively oriented.",
summary='35 pages. Modified various statements to more precisely speak of\n "relatively oriented" maps or vector bundles instead of "relatively\n orientable" maps or vector bundles where appropriate --- the former phrasing\n suggests that a relative orientation is fixed. Additional minor edits',
comment=None,
journal_ref=None,
doi='math.AG',
primary_category=['math.AG', 'math.NT', '14F42, 14G35'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/2106.10586v4', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/2106.10586v4', title='pdf', rel='related', content_type=None)])
links
= arxiv.Result(
mock_result_3 ='http://arxiv.org/abs/math/0512085v2',
entry_id=datetime.datetime(2006, 9, 2, 22, 10, 49, tzinfo=datetime.timezone.utc),
updated=datetime.datetime(2005, 12, 5, 16, 13, 53, tzinfo=datetime.timezone.utc),
published='Finding large Selmer rank via an arithmetic theory of local constants',
title=[arxiv.Result.Author('Barry Mazur'), arxiv.Result.Author('Karl Rubin')],
authors='We obtain lower bounds for Selmer ranks of elliptic curves over dihedral\nextensions of number fields.\n Suppose $K/k$ is a quadratic extension of number fields, $E$ is an elliptic\ncurve defined over $k$, and $p$ is an odd prime. Let $F$ denote the maximal\nabelian $p$-extension of $K$ that is unramified at all primes where $E$ has bad\nreduction and that is Galois over $k$ with dihedral Galois group (i.e., the\ngenerator $c$ of $Gal(K/k)$ acts on $Gal(F/K)$ by -1). We prove (under mild\nhypotheses on $p$) that if the rank of the pro-$p$ Selmer group $S_p(E/K)$ is\nodd, then the rank of $S_p(E/L)$ is at least $[L:K]$ for every finite extension\n$L$ of $K$ in $F$.',
summary='Revised and improved. To appear in Annals of Mathematics',
comment=None,
journal_ref=None,
doi='math.NT',
primary_category=['math.NT', '11G05, 11R20 (Primary) 11G10, 11R23, 14G05 (Secondary)'],
categories=[arxiv.Result.Link('http://arxiv.org/abs/math/0512085v2', title=None, rel='alternate', content_type=None), arxiv.Result.Link('http://arxiv.org/pdf/math/0512085v2', title='pdf', rel='related', content_type=None)])
links
= mock_result_1
single_result = [mock_result_1, mock_result_2]
multiple_results = folder_name_for_source(mock_result_1)
folder_name_1 = folder_name_for_source(mock_result_2) folder_name_2
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory() # 1. Single Result vs. List of Results
# Test with single Result
= download_from_results(mock_result_1, temp_vault, source=True)
downloaded_paths assert (temp_vault / folder_name_1).exists()
assert downloaded_paths
# os.startfile(temp_vault)
=True)
download_from_results(mock_result_3, temp_vault, source# input()
We can also pass multiple results to download_from_results
.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory() # Test with multiple Results
=True)
download_from_results(multiple_results, temp_vault, source# os.startfile(temp_vault)
# input()
assert (temp_vault / folder_name_1).exists()
assert (temp_vault / folder_name_2).exists()
Specifying source=False
downloads the pdf instead of the source files.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory() # 2. Source vs. PDF download
=False)
download_from_results(single_result, temp_vault, sourceassert (temp_vault / f'{folder_name_1}.pdf').exists()
By specifying source=True
and decompress_compressed_file=False
, we can just download the compressed file.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory() # 3. Decompression options
=True, decompress_compressed_file=False)
download_from_results(single_result, temp_vault, source= glob.glob(str(temp_vault / folder_name_1 / '*.tar.gz'))
tar_gz_files assert len(tar_gz_files) > 0
The folder or pdf file can get a custon name
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory() # 4. File/folder naming
='custom_name')
download_from_results(single_result, temp_vault, file_or_folder_namesassert (temp_vault / 'custom_name').exists()
=['name1', 'name2'])
download_from_results(multiple_results, temp_vault, file_or_folder_namesassert (temp_vault / 'name1').exists()
assert (temp_vault / 'name2').exists()
delete_compresed_file
can be set to False
to preserve the compressed file after decomppressing.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory() # 5. Compressed file handling
=False)
download_from_results(single_result, temp_vault, delete_compressed_file= glob.glob(str(temp_vault / folder_name_1 / '*.tar.gz'))
tar_gz_files = glob.glob(str(temp_vault / folder_name_1 / '*.gz'))
gz_files # os.startfile(temp_vault)
# input()
assert len(tar_gz_files) > 0 or len(gz_files) > 0
By default, if the source is downloaded into a folder, then the metadata of the arxiv article is stored in a json
file.
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory()
# 6. Metadata file
=True)
download_from_results(single_result, temp_vault, download_metadataassert (temp_vault / folder_name_1 / 'metadata.json').exists()
# 7. Edge cases
# Empty list
download_from_results([], temp_vault) # Test with non-existent arxiv ID (should handle gracefully)
# non_existent = next(arxiv.Search(id_list=['0000.00000']).results())
# download_from_results(non_existent, temp_vault)
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
= Path(temp_dir) / 'arxiv_file_download_example_folder'
temp_vault / 'arxiv_file_download_example_folder', temp_vault)
shutil.copytree(_test_directory() # 8. Folder creation (duplicate handling)
download_from_results(single_result, temp_vault)# Should create a duplicate folder
download_from_results(single_result, temp_vault) assert (temp_vault / folder_name_1).exists()
# 9. File types (if you have examples of different source types)
# This would require specific known arxiv IDs with different source types
# 10. Error handling
with ExceptionExpected(Exception):
# with pytest.raises(Exception): # Replace with specific exception
'/non/existent/path') download_from_results(single_result,