tutorial.concise_code

A concise version of the code used in tutorial.walkthrough

Import statements

Download arXiv source code

url = "https://arxiv.org/abs/2106.10586" # Replace this with the url 
results = list(arxiv_search(arxiv_id(url)))
latex_dir = Path(get_download_path()) # Replace this with the path that you can to download the source file in; e.g. r'C:\Users\<your user name>' or r'/home/usr'

downloaded_path = download_from_results(results, latex_dir, source=True)[0]
print(downloaded_path)
reference = downloaded_path.name
author_full_names = [author.name for author in results[0].authors]
author_names = extract_last_names(author_full_names)

latex_file = find_main_latex_file(downloaded_path)
print(latex_file)

Divide arXiv source document into parts for an Obsidian.md (sub)vault.

# Change this as desired.
# The name of the reference as well as the name of the folder that contains
# the latex file
reference = reference
# Change this as desired.
latex_file = latex_file 
# latex_file = Path(r'C:\Users\<user>') / reference / 'main.tex'
latex_text = text_from_file(latex_file)
preamble, _ = divide_preamble(latex_text)
preamble = replace_inclusion_of_style_file_with_code(preamble, latex_dir)
parts = divide_latex_text(latex_text, downloaded_path)
cust_comms = custom_commands(preamble)
# Replace the below as needed;
# The path to the Obsidian vault in which to setup the "subvault"
# For convenience, we currently set this as the folder where the
# arXiv source file got downloaded into,
# But you may change this to wherever your Obsiidan.md vault
# actually is located at.
vault = Path(downloaded_path)
# Replace the below as needed;
# The path relative to the vault of the directory in which to make
# the new folder containing the new notes.
location = Path('.')  
# Replace the below as needed
# The (family) names of the authors;
author_names = author_names 

setup_reference_from_latex_parts(
    parts, cust_comms, vault, location,
    reference,
    author_names,
    # You may set this to `True` if you set up a `_references` folder
    # in your Obsidian.md vault.
    create_reference_file_in_references_folder=False,
    # You may set this to `True` if you set up a `_templates` folder
    # in your Obsidian.md vault.
    create_template_file_in_templates_folder=False,
    adjust_common_latex_syntax_to_markdown=True,
    repeat_replacing_custom_commands=-1,
    copy_obsidian_configs=None
    )

Load ML models

# Load the model that categorizes the type(s) of standard information notes
repo_id = 'hyunjongkimmath/information_note_type'
if platform.system() == 'Windows':
    temp = pathlib.PosixPath # See https://stackoverflow.com/questions/57286486/i-cant-load-my-model-because-i-cant-put-a-posixpath
    pathlib.PosixPath = pathlib.WindowsPath
    information_note_type_model = from_pretrained_fastai(repo_id)
    pathlib.PosixPath = temp
else:
    information_note_type_model = from_pretrained_fastai(repo_id)


# Load the model the finds definitions and notations introduced in standard information notes
model = AutoModelForTokenClassification.from_pretrained('hyunjongkimmath/def_and_notat_token_classification_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/def_and_notat_token_classification_model')
def_notat_classifier = pipeline('ner', model=model, tokenizer=tokenizer)

# Load the models that names definitions and notations.
model = AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/definition_naming_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/definition_naming_model')
definition_naming_pipeline = pipeline('summarization', model=model, tokenizer=tokenizer)

model = AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/notation_naming_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/notation_naming_model')
notation_naming_pipeline = pipeline('summarization', model=model, tokenizer=tokenizer)

# Load the model the summarizes what notations denote
model = AutoModelForSeq2SeqLM.from_pretrained('hyunjongkimmath/notation_summarizations_model')
tokenizer = AutoTokenizer.from_pretrained('hyunjongkimmath/notation_summarizations_model')
summarizer = pipeline('summarization', model=model, tokenizer=tokenizer)

Make ML predictions

Tagging note types

# Change `vault` and `reference` if necessary. These variables were defined in previous code.
# vault = Path(r'C:\Users\<user>\...')  # The path to the Obsidian vault
# `reference` = 'kim_park_ga1dcmmc`
index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)

for note in notes:
    if not note.exists():
        raise Exception(note.name)

print("Tagging notes")
automatically_add_note_type_tags(information_note_type_model, vault, notes)

Locating definitions and notations

warnings.filterwarnings("ignore")

index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)

for note in notes:
    assert note.exists()

print("Finding notations")
note_mfs = [MarkdownFile.from_vault_note(note) for note in notes]
notation_notes = [
    note for note, mf in zip(notes, note_mfs)
    if mf.has_tag('_auto/_meta/definition') or mf.has_tag('_auto/_meta/notation')
       or mf.has_tag('_meta/definition') or mf.has_tag('_meta/notation')]
for note in notation_notes:
    auto_mark_def_and_notats(note, def_notat_classifier, excessive_space_threshold=2)
    # automatically_mark_notations(note, notation_identification_model, reference_name=reference)

Naming definitions and notations

index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)

for note in notes:
    try:
        mf = MarkdownFile.from_vault_note(note)
        add_names_to_html_tags_in_info_note(
            note, def_pipeline=definition_naming_pipeline,
            notat_pipeline=notation_naming_pipeline, overwrite=False) 
    except Exception as e:
        print(f'{note.name} raised an exception')
        print(e)

Creating notation notes

index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)

for note in notes:
    try:
        new_notes = make_notation_notes_from_HTML_tags(note, vault, reference_name=reference)
    except Exception as e:
        print(note.name)
        raise(e)
    # assert len(new_notes) == 0

Summarizing notations

index_note = VaultNote(vault, name=f'_index_{reference}')
notes = notes_linked_in_notes_linked_in_note(index_note, as_dict=False)

for note in notes:
    if not note.exists():
        print(f"note does not exist: {note.name}")
        raise Exception()

print("Summarizing notations")
for note in notes:
    notation_notes_linked_in_note = notation_notes_linked_in_see_also_section(note, vault)
    for notation_note in notation_notes_linked_in_note:
        append_summary_to_notation_note(notation_note, vault, summarizer)