= "https://arxiv.org/abs/2106.10586" # Replace this with the url
url = list(arxiv_search(arxiv_id(url)))
results = Path(get_download_path()) # Replace this with the path that you can to download the source file in; e.g. r'C:\Users\<your user name>' or r'/home/usr'
latex_dir
= download_from_results(results, latex_dir, source=True)[0]
downloaded_path print(downloaded_path)
= downloaded_path.name
reference = [author.name for author in results[0].authors]
author_full_names = extract_last_names(author_full_names)
author_names
= find_main_latex_file(downloaded_path)
latex_file print(latex_file)
tutorial.concise_code
A concise version of the code used in tutorial.walkthrough
Import statements
Download arXiv source code
Divide arXiv source document into parts for an Obsidian.md (sub)vault.
# Change this as desired.
# The name of the reference as well as the name of the folder that contains
# the latex file
= reference
reference # Change this as desired.
= latex_file
latex_file # latex_file = Path(r'C:\Users\<user>') / reference / 'main.tex'
= text_from_file(latex_file)
latex_text = divide_preamble(latex_text)
preamble, _ = replace_inclusion_of_style_file_with_code(preamble, latex_dir)
preamble = divide_latex_text(latex_text, downloaded_path)
parts = custom_commands(preamble)
cust_comms # Replace the below as needed;
# The path to the Obsidian vault in which to setup the "subvault"
# For convenience, we currently set this as the folder where the
# arXiv source file got downloaded into,
# But you may change this to wherever your Obsiidan.md vault
# actually is located at.
= Path(downloaded_path)
vault # Replace the below as needed;
# The path relative to the vault of the directory in which to make
# the new folder containing the new notes.
= Path('.')
location # Replace the below as needed
# The (family) names of the authors;
= author_names
author_names
setup_reference_from_latex_parts(
parts, cust_comms, vault, location,
reference,
author_names,# You may set this to `True` if you set up a `_references` folder
# in your Obsidian.md vault.
=False,
create_reference_file_in_references_folder# You may set this to `True` if you set up a `_templates` folder
# in your Obsidian.md vault.
=False,
create_template_file_in_templates_folder=True,
adjust_common_latex_syntax_to_markdown=-1,
repeat_replacing_custom_commands=None
copy_obsidian_configs )
Load ML models
# Load the model that categorizes the type(s) of standard information notes
= 'hyunjongkimmath/information_note_type'
repo_id if platform.system() == 'Windows':
= pathlib.PosixPath # See https://stackoverflow.com/questions/57286486/i-cant-load-my-model-because-i-cant-put-a-posixpath
temp = pathlib.WindowsPath
pathlib.PosixPath = from_pretrained_fastai(repo_id)
information_note_type_model = temp
pathlib.PosixPath else:
= from_pretrained_fastai(repo_id)
information_note_type_model
# Load the model the finds definitions and notations introduced in standard information notes
= AutoModelForTokenClassification.from_pretrained('hyunjongkimmath/def_and_notat_token_classification_model')
model = AutoTokenizer.from_pretrained('hyunjongkimmath/def_and_notat_token_classification_model')
tokenizer = pipeline('ner', model=model, tokenizer=tokenizer)
def_notat_classifier
# Load the models that names definitions and notations.
= AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/definition_naming_model')
model = AutoTokenizer.from_pretrained('hyunjongkimmath/definition_naming_model')
tokenizer = pipeline('summarization', model=model, tokenizer=tokenizer)
definition_naming_pipeline
= AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/notation_naming_model')
model = AutoTokenizer.from_pretrained('hyunjongkimmath/notation_naming_model')
tokenizer = pipeline('summarization', model=model, tokenizer=tokenizer)
notation_naming_pipeline
# Load the model the summarizes what notations denote
= AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/notation_summarizations_model')
model = AutoTokenizer.from_pretrained('hyunjongkimmath/notation_summarizations_model')
tokenizer = pipeline('summarization', model=model, tokenizer=tokenizer) summarizer
Make ML predictions
Tagging note types
# Change `vault` and `reference` if necessary. These variables were defined in previous code.
# vault = Path(r'C:\Users\<user>\...') # The path to the Obsidian vault
# `reference` = 'kim_park_ga1dcmmc`
= VaultNote(vault, name=f'_index_{reference}')
index_note = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
notes
for note in notes:
if not note.exists():
raise Exception(note.name)
print("Tagging notes")
automatically_add_note_type_tags(information_note_type_model, vault, notes)
Locating definitions and notations
"ignore")
warnings.filterwarnings(
= VaultNote(vault, name=f'_index_{reference}')
index_note = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
notes
for note in notes:
assert note.exists()
print("Finding notations")
= [MarkdownFile.from_vault_note(note) for note in notes]
note_mfs = [
notation_notes for note, mf in zip(notes, note_mfs)
note if mf.has_tag('_auto/_meta/definition') or mf.has_tag('_auto/_meta/notation')
or mf.has_tag('_meta/definition') or mf.has_tag('_meta/notation')]
for note in notation_notes:
=2)
auto_mark_def_and_notats(note, def_notat_classifier, excessive_space_threshold# automatically_mark_notations(note, notation_identification_model, reference_name=reference)
Naming definitions and notations
= VaultNote(vault, name=f'_index_{reference}')
index_note = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
notes
for note in notes:
try:
= MarkdownFile.from_vault_note(note)
mf
add_names_to_html_tags_in_info_note(=definition_naming_pipeline,
note, def_pipeline=notation_naming_pipeline, overwrite=False)
notat_pipelineexcept Exception as e:
print(f'{note.name} raised an exception')
print(e)
Creating notation notes
= VaultNote(vault, name=f'_index_{reference}')
index_note = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
notes
for note in notes:
try:
= make_notation_notes_from_HTML_tags(note, vault, reference_name=reference)
new_notes except Exception as e:
print(note.name)
raise(e)
# assert len(new_notes) == 0
Summarizing notations
= VaultNote(vault, name=f'_index_{reference}')
index_note = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)
notes
for note in notes:
if not note.exists():
print(f"note does not exist: {note.name}")
raise Exception()
print("Summarizing notations")
for note in notes:
= notation_notes_linked_in_see_also_section(note, vault)
notation_notes_linked_in_note for notation_note in notation_notes_linked_in_note:
append_summary_to_notation_note(notation_note, vault, summarizer)