https://github.com/dylanhogg/minimal-python-project-template
from datetime import datetime print(datetime.now().strftime("%Y%m%d-%H%M%S"))
import ipdb; ipdb.set_trace() # https://github.com/inducer/pudb import pudb; pudb.set_trace() # https://github.com/gotcha/ipdb pytest -s my_test.py # turn off capture output in pytest to see pdb output pytest my_test.py --pdb --pdbcls=IPython.terminal.debugger:TerminalPdb # Alternative
Also see pudb
https://docs.astral.sh/uv/getting-started/installation/
$ curl -LsSf https://astral.sh/uv/install.sh | sh uv venv uv pip install -r requirements-dev.txt uv run example.py
https://docs.astral.sh/uv/guides/scripts/#running-a-script-with-dependencies
https://docs.astral.sh/uv/guides/tools/#commands-with-plugins
https://til.simonwillison.net/llms/bert-ner
https://simonwillison.net/2025/Feb/7/pip-install-llm-smollm2
uv run --with numpy --with torch --with transformers python -c ' from transformers import AutoTokenizer, AutoModelForTokenClassification from transformers import pipeline import json model = AutoModelForTokenClassification.from_pretrained("dslim/distilbert-NER") tokenizer = AutoTokenizer.from_pretrained("dslim/distilbert-NER") nlp = pipeline("ner", model=model, tokenizer=tokenizer) text = "This is an example sentence about Dylan Hogg who lives in Australia and is an ML Engineer." print(json.dumps(nlp(text), indent=2, default=repr))' # Also using uvx to run installed commands: uvx --with mkdocs-material mkdocs --help
https://rich.readthedocs.io/en/stable/tables.html
from rich.console import Console from rich.table import Table table = Table(show_header=True, header_style="bold blue") table.add_column("Date", style="dim", width=12) table.add_column("Title") table.add_column("Production Budget", justify="right") table.add_column("Box Office", justify="right") table.add_row( "Dev 20, 2019", "Star Wars: The Rise of Skywalker", "$275,000,000", "$375,126,118" ) table.add_row( "May 25, 2018", "[red]Solo[/red]: A Star Wars Story", "$275,000,000", "$393,151,347", ) table.add_row( "Dec 15, 2017", "Star Wars Ep. VIII: The Last Jedi", "$262,000,000", "[bold]$1,332,539,889[/bold]", ) console = Console() console.print(table)
https://en.wikipedia.org/w/api.php?action=query&list=search&format=json&srsearch=python
https://en.wikipedia.org/w/api.php?action=help&modules=main
todo
from transformers import AutoTokenizer # A list of colors in RGB for representing the tokens colors = [ '102;194;165', '252;141;98', '141;160;203', '231;138;195', '166;216;84', '255;217;47' ] def show_tokens(sentence: str, tokenizer_name: str): """ Show the tokens each separated by a different color """ # Load the tokenizer and tokenize the input tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) token_ids = tokenizer(sentence).input_ids # Extract vocabulary length print(f"Vocab length: {len(tokenizer)}") # Print a colored list of tokens for idx, t in enumerate(token_ids): print( f'\x1b[0;30;48;2;{colors[idx % len(colors)]}m' + tokenizer.decode(t) + '\x1b[0m', end=' ' ) show_tokens(text, "bert-base-cased")
todo
todo
pandas-pipe-method.ipynb
pandas-pipe-parallel-with-caching.ipynb