import re
unit_designators_mapping = {
"APARTMENT": "APT",
"BASEMENT": "BSMT",
"BUILDING": "BLDG",
"DEPARTMENT": "DEPT",
"FLOOR": "FL",
"FRONT": "FRNT",
"HANGER": "HNGR",
"KEY": "KEY",
"LOBBY": "LBBY",
"LOT": "LOT",
"LOWER": "LOWR",
"OFFICE": "OFC",
"PENTHOUSE": "PH",
"PIER": "PIER",
"REAR": "REAR",
"ROOM": "RM",
"SIDE": "SIDE",
"SLIP": "SLIP",
"SPACE": "SPC",
"STOP": "STOP",
"SUITE": "STE",
"TRAILER": "TRLR",
"UNIT": "UNIT",
"UPPER": "UPPR",
"#": "NUMBER"
}
# build sets of all designators (full words and abbreviations)
all_designators = set(unit_designators_mapping.keys()) | set(unit_designators_mapping.values())
# split into word-only vs symbol-containing
word_designators = [d for d in all_designators if re.match(r'^\w+$', d)]
symbol_designators = [d for d in all_designators if not re.match(r'^\w+$', d)]
# build two sub-patterns: one with \b…\b for word-designators, one raw for symbols
parts = []
if word_designators:
parts.append(r"\b(?:" + "|".join(re.escape(d) for d in word_designators) + r")\b")
if symbol_designators:
parts.append("|".join(re.escape(d) for d in symbol_designators))
# final pattern matches either case
unit_identifiers = r"(?:" + "|".join(parts) + r")"
def normalize_unit_number(unit_info: str) -> str:
"""
Remove known designators (case-insensitive), then trim whitespace.
"""
return re.sub(unit_identifiers, "", unit_info, flags=re.IGNORECASE).strip()
# example usage
if __name__ == "__main__":
print(normalize_unit_number("Apartment 12B")) # "12B"
print(normalize_unit_number("#300")) # "300"
print(normalize_unit_number("# 300")) # "300"
print(normalize_unit_number("number 300")) # "300"
print(normalize_unit_number("Unit 12A")) # "12A"
print(normalize_unit_number("Suite 100")) # "100"
print(normalize_unit_number("Apt 5")) # "5"
print(normalize_unit_number("BSMT 1")) # "1"
print(normalize_unit_number("FL 3")) # "3"
print(normalize_unit_number("PH 2")) # "2"
print(normalize_unit_number("TRLR 4")) # "4"
print(normalize_unit_number("123")) # "123" (no change)
print(normalize_unit_number(" ")) # "" (empty string)