url = "https://arxiv.org/abs/2106.10586" # Replace this with the url
results = list(arxiv_search(arxiv_id(url)))
latex_dir = Path(get_download_path()) # Replace this with the path that you can to download the source file in; e.g. r'C:\Users\<your user name>' or r'/home/usr'
downloaded_path = download_from_results(results, latex_dir, source=True)[0]
print(downloaded_path)
reference = downloaded_path.name
author_full_names = [author.name for author in results[0].authors]
author_names = extract_last_names(author_full_names)
latex_file = find_main_latex_file(downloaded_path)
print(latex_file)tutorial.concise_code
A concise version of the code used in tutorial.walkthrough
Import statements
Download arXiv source code
Divide arXiv source document into parts for an Obsidian.md (sub)vault.
# Change this as desired.
# The name of the reference as well as the name of the folder that contains
# the latex file
reference = reference
# Change this as desired.
latex_file = latex_file
# latex_file = Path(r'C:\Users\<user>') / reference / 'main.tex'
latex_text = text_from_file(latex_file)
preamble, _ = divide_preamble(latex_text)
preamble = replace_inclusion_of_style_file_with_code(preamble, latex_dir)
parts = divide_latex_text(latex_text, downloaded_path)
cust_comms = custom_commands(preamble)
# Replace the below as needed;
# The path to the Obsidian vault in which to setup the "subvault"
# For convenience, we currently set this as the folder where the
# arXiv source file got downloaded into,
# But you may change this to wherever your Obsiidan.md vault
# actually is located at.
vault = Path(downloaded_path)
# Replace the below as needed;
# The path relative to the vault of the directory in which to make
# the new folder containing the new notes.
location = Path('.')
# Replace the below as needed
# The (family) names of the authors;
author_names = author_names
setup_reference_from_latex_parts(
parts, cust_comms, vault, location,
reference,
author_names,
# You may set this to `True` if you set up a `_references` folder
# in your Obsidian.md vault.
create_reference_file_in_references_folder=False,
# You may set this to `True` if you set up a `_templates` folder
# in your Obsidian.md vault.
create_template_file_in_templates_folder=False,
adjust_common_latex_syntax_to_markdown=True,
repeat_replacing_custom_commands=-1,
copy_obsidian_configs=None
)Load ML models
# Load the model that categorizes the type(s) of standard information notes
repo_id = 'hyunjongkimmath/information_note_type'
if platform.system() == 'Windows':
temp = pathlib.PosixPath # See https://stackoverflow.com/questions/57286486/i-cant-load-my-model-because-i-cant-put-a-posixpath
pathlib.PosixPath = pathlib.WindowsPath
information_note_type_model = from_pretrained_fastai(repo_id)
pathlib.PosixPath = temp
else:
information_note_type_model = from_pretrained_fastai(repo_id)
# Load the model the finds definitions and notations introduced in standard information notes
model = AutoModelForTokenClassification.from_pretrained('hyunjongkimmath/def_and_notat_token_classification_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/def_and_notat_token_classification_model')
def_notat_classifier = pipeline('ner', model=model, tokenizer=tokenizer)
# Load the models that names definitions and notations.
model = AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/definition_naming_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/definition_naming_model')
definition_naming_pipeline = pipeline('summarization', model=model, tokenizer=tokenizer)
model = AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/notation_naming_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/notation_naming_model')
notation_naming_pipeline = pipeline('summarization', model=model, tokenizer=tokenizer)
# Load the model the summarizes what notations denote
model = AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/notation_summarizations_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/notation_summarizations_model')
summarizer = pipeline('summarization', model=model, tokenizer=tokenizer)Make ML predictions
Tagging note types
# Change `vault` and `reference` if necessary. These variables were defined in previous code.
# vault = Path(r'C:\Users\<user>\...') # The path to the Obsidian vault
# `reference` = 'kim_park_ga1dcmmc`
index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
for note in notes:
if not note.exists():
raise Exception(note.name)
print("Tagging notes")
automatically_add_note_type_tags(information_note_type_model, vault, notes)Locating definitions and notations
warnings.filterwarnings("ignore")
index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
for note in notes:
assert note.exists()
print("Finding notations")
note_mfs = [MarkdownFile.from_vault_note(note) for note in notes]
notation_notes = [
note for note, mf in zip(notes, note_mfs)
if mf.has_tag('_auto/_meta/definition') or mf.has_tag('_auto/_meta/notation')
or mf.has_tag('_meta/definition') or mf.has_tag('_meta/notation')]
for note in notation_notes:
auto_mark_def_and_notats(note, def_notat_classifier, excessive_space_threshold=2)
# automatically_mark_notations(note, notation_identification_model, reference_name=reference)Naming definitions and notations
index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
for note in notes:
try:
mf = MarkdownFile.from_vault_note(note)
add_names_to_html_tags_in_info_note(
note, def_pipeline=definition_naming_pipeline,
notat_pipeline=notation_naming_pipeline, overwrite=False)
except Exception as e:
print(f'{note.name} raised an exception')
print(e)Creating notation notes
index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
for note in notes:
try:
new_notes = make_notation_notes_from_HTML_tags(note, vault, reference_name=reference)
except Exception as e:
print(note.name)
raise(e)
# assert len(new_notes) == 0Summarizing notations
index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
for note in notes:
if not note.exists():
print(f"note does not exist: {note.name}")
raise Exception()
print("Summarizing notations")
for note in notes:
notation_notes_linked_in_note = notation_notes_linked_in_see_also_section(note, vault)
for notation_note in notation_notes_linked_in_note:
append_summary_to_notation_note(notation_note, vault, summarizer)