10 files changed, 691 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..76605c9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# Private Data
+data/FFIS-CU200-2021-Q4.xlsx
diff --git a/README.md b/README.md
index 8498ca8..e135132 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,19 @@
 # toriis-analysis
 Repository for scripts to evaluate public institutional investment data.
+
+# Running existing notebook
+
+1) 
+	+ Fork + clone repo
+	+ Run `setup/setup.sh` to set up the local conda environment
+		- you may need to install the correct version of miniconda
+	+ Start a local notebook suerver (`juypter notebook`)
+
+# Contributing
+
+You are welcome to contribute data and analysis results. Please make sure that you contribute **cleared notebooks** so version control capabilities work well. To clear a notebook, use `Kernel -> Researt & Clear Output)` 
+
+1) **CLI:**
+	+ Create a new branch (`$ git checkout -b <branch_name>`)
+	+ Commit the new notebook/data (`$ git add <new file>; git commit`)
+	+ Push and generate a pull request
diff --git a/data/23-138.Records.xlsx b/data/23-138.Records.xlsx
new file mode 100644
index 0000000..05aaa92
--- /dev/null
+++ b/data/23-138.Records.xlsx
diff --git a/setup/checks/check_for_conda.sh b/setup/checks/check_for_conda.sh
new file mode 100644
index 0000000..b17bc1a
--- /dev/null
+++ b/setup/checks/check_for_conda.sh
@@ -0,0 +1,11 @@
+#taken from mobilitynet-analysis-scripts/setup/checks/check_for_conda.sh
+
+CURR_CONDA_VER=`conda --version | cut -d " " -f 2`
+EXP_CONDA_VER=23.1.0
+
+if [ $CURR_CONDA_VER == $EXP_CONDA_VER ]; then
+    echo "For conda, found $CURR_CONDA_VER, expected $EXP_CONDA_VER, all is good!"
+else
+    echo "For conda, found $CURR_CONDA_VER, expected $EXP_CONDA_VER, run 'bash setup/setup_conda.sh $EXP_CONDA_VER <platform>' to get the correct version"
+    echo "Or install manually after downloading from https://repo.anaconda.com/miniconda/"
+fi
diff --git a/setup/environment.yml b/setup/environment.yml
new file mode 100644
index 0000000..c72a998
--- /dev/null
+++ b/setup/environment.yml
@@ -0,0 +1,126 @@
+name: toriis
+channels:
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - anyio=3.5.0=py310h06a4308_0
+  - argon2-cffi=21.3.0=pyhd3eb1b0_0
+  - argon2-cffi-bindings=21.2.0=py310h7f8727e_0
+  - asttokens=2.0.5=pyhd3eb1b0_0
+  - attrs=22.1.0=py310h06a4308_0
+  - babel=2.11.0=py310h06a4308_0
+  - backcall=0.2.0=pyhd3eb1b0_0
+  - beautifulsoup4=4.11.1=py310h06a4308_0
+  - blas=1.0=mkl
+  - bleach=4.1.0=pyhd3eb1b0_0
+  - bottleneck=1.3.5=py310ha9d4c09_0
+  - brotlipy=0.7.0=py310h7f8727e_1002
+  - bzip2=1.0.8=h7b6447c_0
+  - ca-certificates=2023.01.10=h06a4308_0
+  - certifi=2022.12.7=py310h06a4308_0
+  - cffi=1.15.1=py310h5eee18b_3
+  - charset-normalizer=2.0.4=pyhd3eb1b0_0
+  - comm=0.1.2=py310h06a4308_0
+  - cryptography=38.0.4=py310h9ce1e76_0
+  - debugpy=1.5.1=py310h295c915_0
+  - decorator=5.1.1=pyhd3eb1b0_0
+  - defusedxml=0.7.1=pyhd3eb1b0_0
+  - entrypoints=0.4=py310h06a4308_0
+  - executing=0.8.3=pyhd3eb1b0_0
+  - flit-core=3.6.0=pyhd3eb1b0_0
+  - icu=58.2=he6710b0_3
+  - idna=3.4=py310h06a4308_0
+  - intel-openmp=2021.4.0=h06a4308_3561
+  - ipykernel=6.19.2=py310h2f386ee_0
+  - ipython=8.9.0=py310h06a4308_0
+  - ipython_genutils=0.2.0=pyhd3eb1b0_1
+  - jedi=0.18.1=py310h06a4308_1
+  - jinja2=3.1.2=py310h06a4308_0
+  - json5=0.9.6=pyhd3eb1b0_0
+  - jsonschema=4.16.0=py310h06a4308_0
+  - jupyter_client=7.4.9=py310h06a4308_0
+  - jupyter_core=5.1.1=py310h06a4308_0
+  - jupyter_server=1.23.4=py310h06a4308_0
+  - jupyterlab=3.5.3=py310h06a4308_0
+  - jupyterlab_pygments=0.1.2=py_0
+  - jupyterlab_server=2.16.5=py310h06a4308_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - libffi=3.4.2=h6a678d5_6
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libsodium=1.0.18=h7b6447c_0
+  - libstdcxx-ng=11.2.0=h1234567_1
+  - libuuid=1.41.5=h5eee18b_0
+  - libxml2=2.9.14=h74e7548_0
+  - libxslt=1.1.35=h4e12654_0
+  - lxml=4.9.1=py310h1edc446_0
+  - markupsafe=2.1.1=py310h7f8727e_0
+  - matplotlib-inline=0.1.6=py310h06a4308_0
+  - mistune=0.8.4=py310h7f8727e_1000
+  - mkl=2021.4.0=h06a4308_640
+  - mkl-service=2.4.0=py310h7f8727e_0
+  - mkl_fft=1.3.1=py310hd6ae3a3_0
+  - mkl_random=1.2.2=py310h00e6091_0
+  - nbclassic=0.4.8=py310h06a4308_0
+  - nbclient=0.5.13=py310h06a4308_0
+  - nbconvert=6.5.4=py310h06a4308_0
+  - nbformat=5.7.0=py310h06a4308_0
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.5.6=py310h06a4308_0
+  - notebook=6.5.2=py310h06a4308_0
+  - notebook-shim=0.2.2=py310h06a4308_0
+  - numexpr=2.8.4=py310h8879344_0
+  - numpy=1.23.5=py310hd5efca6_0
+  - numpy-base=1.23.5=py310h8e6c178_0
+  - openssl=1.1.1t=h7f8727e_0
+  - packaging=22.0=py310h06a4308_0
+  - pandas=1.5.2=py310h1128e8f_0
+  - pandocfilters=1.5.0=pyhd3eb1b0_0
+  - parso=0.8.3=pyhd3eb1b0_0
+  - pexpect=4.8.0=pyhd3eb1b0_3
+  - pickleshare=0.7.5=pyhd3eb1b0_1003
+  - pip=22.3.1=py310h06a4308_0
+  - platformdirs=2.5.2=py310h06a4308_0
+  - prometheus_client=0.14.1=py310h06a4308_0
+  - prompt-toolkit=3.0.36=py310h06a4308_0
+  - psutil=5.9.0=py310h5eee18b_0
+  - ptyprocess=0.7.0=pyhd3eb1b0_2
+  - pure_eval=0.2.2=pyhd3eb1b0_0
+  - pycparser=2.21=pyhd3eb1b0_0
+  - pygments=2.11.2=pyhd3eb1b0_0
+  - pyopenssl=22.0.0=pyhd3eb1b0_0
+  - pyrsistent=0.18.0=py310h7f8727e_0
+  - pysocks=1.7.1=py310h06a4308_0
+  - python=3.10.9=h7a1cb2a_0
+  - python-dateutil=2.8.2=pyhd3eb1b0_0
+  - python-fastjsonschema=2.16.2=py310h06a4308_0
+  - pytz=2022.7=py310h06a4308_0
+  - pyzmq=23.2.0=py310h6a678d5_0
+  - readline=8.2=h5eee18b_0
+  - requests=2.28.1=py310h06a4308_0
+  - send2trash=1.8.0=pyhd3eb1b0_1
+  - setuptools=65.6.3=py310h06a4308_0
+  - six=1.16.0=pyhd3eb1b0_1
+  - sniffio=1.2.0=py310h06a4308_1
+  - soupsieve=2.3.2.post1=py310h06a4308_0
+  - sqlite=3.40.1=h5082296_0
+  - stack_data=0.2.0=pyhd3eb1b0_0
+  - terminado=0.17.1=py310h06a4308_0
+  - tinycss2=1.2.1=py310h06a4308_0
+  - tk=8.6.12=h1ccaba5_0
+  - tomli=2.0.1=py310h06a4308_0
+  - tornado=6.2=py310h5eee18b_0
+  - traitlets=5.7.1=py310h06a4308_0
+  - typing-extensions=4.4.0=py310h06a4308_0
+  - typing_extensions=4.4.0=py310h06a4308_0
+  - tzdata=2022g=h04d1e81_0
+  - urllib3=1.26.14=py310h06a4308_0
+  - wcwidth=0.2.5=pyhd3eb1b0_0
+  - webencodings=0.5.1=py310h06a4308_1
+  - websocket-client=0.58.0=py310h06a4308_4
+  - wheel=0.37.1=pyhd3eb1b0_0
+  - xz=5.2.10=h5eee18b_1
+  - zeromq=4.3.4=h2531618_0
+  - zlib=1.2.13=h5eee18b_0
+prefix: /home/gabrielkosmacher/miniconda3/envs/toriis
diff --git a/setup/setup.sh b/setup/setup.sh
new file mode 100644
index 0000000..d2631f9
--- /dev/null
+++ b/setup/setup.sh
@@ -0,0 +1,14 @@
+#taken from mobilitynet-analysis-scripts/setup/setup.sh
+# If the conda binary is not found, specify the full path to it
+# you can find it by searching for "conda" under the miniconda3 directory
+# typical paths are:
+# - on linux: /home/<user>/miniconda3/bin/conda
+# - on OSX: /Users/<user>/miniconda3/bin/conda
+# - on Windows: C:/Users/<user>/Miniconda3/Scripts/conda
+
+set -e
+
+source setup/checks/check_for_conda.sh
+
+conda env update --name toriis --file setup/environment.yml
+conda activate toriis
diff --git a/setup/setup_conda.sh b/setup/setup_conda.sh
new file mode 100644
index 0000000..e7d4e3e
--- /dev/null
+++ b/setup/setup_conda.sh
@@ -0,0 +1,22 @@
+# taken from mobilitynet-analysis-scripts/setup/setup_conda.sh
+EXP_CONDA_VER=$1
+PLATFORM=$2
+echo "Installing for version $EXP_CONDA_VER and platform $PLATFORM"
+
+if [[ -z $EXP_CONDA_VER || -z $PLATFORM ]]; then
+    echo "Usage: setup_conda.sh <version> <platform>"
+    echo "   Platform options are Linux-x86_64, MacOSX-x86_64"
+    echo "   For Windows, manually download and install https://repo.anaconda.com/miniconda/Miniconda3-$EXP_CONDA_VER-Windows-x86_64.exe"
+else
+    INSTALL_PREFIX=$HOME/miniconda-$EXP_CONDA_VER
+    SOURCE_SCRIPT="$HOME/miniconda-$EXP_CONDA_VER/etc/profile.d/conda.sh"
+
+    curl -o miniconda.sh -L https://repo.continuum.io/miniconda/Miniconda3-$EXP_CONDA_VER-$PLATFORM.sh;
+    bash miniconda.sh -b -p $INSTALL_PREFIX
+    source $SOURCE_SCRIPT
+    hash -r
+    conda config --set always_yes yes --set changeps1 no
+    # Useful for debugging any issues with conda
+    conda info -a
+    echo "Successfully installed at $INSTALL_PREFIX. Please run 'source $SOURCE_SCRIPT' in every terminal where you want to use conda"
+fi
diff --git a/setup/teardown.sh b/setup/teardown.sh
new file mode 100644
index 0000000..308fde8
--- /dev/null
+++ b/setup/teardown.sh
@@ -0,0 +1,3 @@
+# taken from mobilitynet-analysis-scripts/setup/teardown.sh
+conda activate base
+conda env remove --name toriis
diff --git a/setup/teardown_conda.sh b/setup/teardown_conda.sh
new file mode 100644
index 0000000..2fde7a5
--- /dev/null
+++ b/setup/teardown_conda.sh
@@ -0,0 +1,9 @@
+# taken from mobilitynet-analysis-scripts/setup/teardown_conda.sh
+EXP_CONDA_VER=$1
+
+if [ -z $EXP_CONDA_VER ]; then
+    echo "Usage: teardown_conda.sh <version>"
+else
+    INSTALL_PREFIX=$HOME/miniconda-$EXP_CONDA_VER
+    rm -rf $INSTALL_PREFIX
+fi
diff --git a/ui_security_inventory_23_parsing.ipynb b/ui_security_inventory_23_parsing.ipynb
new file mode 100644
index 0000000..297203f
--- /dev/null
+++ b/ui_security_inventory_23_parsing.ipynb
@@ -0,0 +1,484 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8ce6b023",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import yfinance as yf\n",
+    "import requests\n",
+    "import concurrent.futures\n",
+    "import json"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8304939a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "file_path_23 = \"./data/23-138.Records.xlsx\"\n",
+    " # skip header rows so column names align, drop all NaN rows\n",
+    "df = pd.read_excel(file_path_23, parse_dates=True, skiprows=6).dropna(how='all')"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "538233a8",
+   "metadata": {},
+   "source": [
+    "#### Setting up the Operating Pool DataFrame "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "96dee5bc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bank_i = df[df['Account or Security'].str.contains(\"9-200100\", na=False)].index\n",
+    "op_i = df[df['Account or Security'].str.contains(\"Operating Funds Pool\", na=False)].index\n",
+    "op_df = df.loc[bank_i[0]:op_i[1]-1]\n",
+    "op_df.insert(6, 'Bank', pd.NA)\n",
+    "op_df.insert(7, 'Asset Type', pd.NA)\n",
+    "op_df.insert(8, 'Company', pd.NA)\n",
+    "op_df.insert(9, 'Industry', pd.NA)\n",
+    "op_df.insert(10, 'Private Placement', False)\n",
+    "op_df.insert(11, 'Ticker', pd.NA)\n",
+    "op_df.insert(11, 'Info', object)\n",
+    "# op_df = op_df.insert(7, 'Bank', pd.NA)\n",
+    "op_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "cc1f037f",
+   "metadata": {},
+   "source": [
+    "#### Add the Bank and Asset Type "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "29c95d09",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "bank_name = pd.NA\n",
+    "asset_type = pd.NA\n",
+    "for i in op_df.index:\n",
+    "    if np.isnan(op_df.loc[i][\"Quantity\"]):\n",
+    "        if \"9-200100\" in df.loc[i][\"Account or Security\"]:\n",
+    "            bank_name = df.loc[i][\"Account or Security\"]\n",
+    "        else:\n",
+    "            asset_type = df.loc[i][\"Account or Security\"]\n",
+    "    op_df.at[i,'Bank'] = bank_name\n",
+    "    op_df.at[i,'Asset Type'] = asset_type\n",
+    "op_df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3ea40a3e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "all_asset_types = set([op_df.loc[i]['Asset Type'] for i in op_df.index]);all_asset_types"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "274d0b39",
+   "metadata": {},
+   "source": [
+    "#### First, we just check the corperate bonds "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe124c92",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cb_df = op_df[op_df['Asset Type'].str.contains(\"Corporate Bonds\", na=False)]\n",
+    "print(\"Corperate Bond Totals\")\n",
+    "print(\"Cost Value\\t\",'${:,.2f}'.format(cb_df.sum(numeric_only=True)[\"Cost Value\"]))\n",
+    "print(\"Market Value\\t\",'${:,.2f}'.format(cb_df.sum(numeric_only=True)[\"Market Value\"]))"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "d4aba4d7",
+   "metadata": {},
+   "source": [
+    "Yup, you read that right"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "9879b032",
+   "metadata": {},
+   "source": [
+    "##### get company name\n",
+    "+ Parse our 'PVTPL', which is an abreviation for privatly placed https://www.investopedia.com/terms/p/privateplacement.asp\n",
+    "+ Remove everything after and including the tokens ```['%']```\n",
+    "+ Remove everything after `[\" CAP\", \" INC\", \" FDG\", \" CORP\", \" CO\", \" LLC\", \" CR\"]`\n",
+    "+ Add the company names to a set\n",
+    "+ Map different semantic names to the same syntax for the same company"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d78a9733",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "company_name_dict = {\n",
+    "    \"AMERICAN EXPRESS\" : \"AMERICAN EXPRESS CO\",\n",
+    "    'AIG GLOBAL' : \"AMERICAN INTL GROUP  INC\",\n",
+    "    \"ANHEUSER-BUSCH\" : \"ANHEUSER-BUSCH CO\",\n",
+    "    \"APTIV\" : \"APTIV CO\",\n",
+    "    \"ASTRAZENECA\" : \"ASTRAZENECA PLC\",\n",
+    "    \"AUSTRALIA & NEW\" : \"AUSTRALIA & NEW ZEALAND BKG GR\",\n",
+    "    \"BAE SYS\" : \"BAE SYS PLC\",\n",
+    "    \"BANCO SANTANDER\" : \"BANCO SANTANDER SA\",\n",
+    "    \"BANK MONTREAL\" : \"BANK OF MONTREAL\",\n",
+    "    \"BANK OF MONTREAL\" : \"BANK OF MONTREAL\",\n",
+    "    \"BK MONTREAL\" : \"BANK OF MONTREAL\",\n",
+    "    \"BANK NOVA SCOTIA\" : \"BANK OF NOVA SCOTIA\",\n",
+    "    \"BANK OF NOVA SCOTIA\" : \"BANK OF NOVA SCOTIA\",\n",
+    "    \"BANK AMER\" : \"BANK OF AMERICA CO\",\n",
+    "    \"BAXTER INT\" : \"BAXTER INTERNATIONAL INC\",\n",
+    "    \"BAYER US FIN\" : \"BAYER US FINANCE LLC\",\n",
+    "    \"BB&T\" : \"BB&T CO\",\n",
+    "    \"BLACKSTONE\" : \"BLACKSTONE\",\n",
+    "    \"BMW\" : \"BMW\",\n",
+    "    \"BNP PARIBAS\" : \"BNP PARIBAS\",\n",
+    "    \"BRIGHTHOUSE\" : \"BRIGHTHOUSE\",\n",
+    "    \"BRISTOL MYERS SQUIBB\" : \"BRISTOL MYERS SQUIBB CO\",\n",
+    "    \"BRISTOL-MYERS SQUIBB\" : \"BRISTOL MYERS SQUIBB CO\",\n",
+    "    \"CANADIAN IMPERIAL BK\" : \"CANADIAN IMPERIAL BK CO\",\n",
+    "    \"CAPITAL ONE\" : \"CAPITAL ONE FINL CO\",\n",
+    "    \"CATERPILLAR FINL\" : \"CATERPILLAR FINL\",\n",
+    "    \"CENTERPOINT ENERGY\" : \"CENTERPOINT ENERGY INC\",\n",
+    "    \"CHEVRON U S A\" : \"CHEVRON  CO\",\n",
+    "    \"CREDIT AGRICOLE\" : \"CREDIT AGRICOLE\",\n",
+    "    \"CREDIT SUISSE\" : \"CREDIT SUISSE GROUP AG\",\n",
+    "    \"CROWN CASTLE INTL\" : \"CROWN CASTLE INTL\",\n",
+    "    \"DAIMLER\" : \"DAIMLER\",\n",
+    "    \"DELTA AIR LINES\" : \"DELTA AIR LINES\",\n",
+    "    \"DTE E\" : \"DTE ELEC\",\n",
+    "    \"DUKE ENERGY\" : \"DUKE ENERGY CO\",\n",
+    "    \"DOWDUPONT  INC\" : \"DUPONT DE NEMOURS  INC\",\n",
+    "    \"ENTERGY\" : \"ENTERGY CO\",\n",
+    "    \"EQUITABLE FINL LIFE\" : \"EQUITABLE FINL LIFE GLOBAL FDG\",\n",
+    "    \"ESC CB LEHMAN BROS\" : \"ESC LEHMAN BROTH HLD  INC\",\n",
+    "    \"FIFTH THIRD BANCORP\" : \"FIFTH THIRD BANCORP\",\n",
+    "    \"FLORIDA P\" : \"FLORIDA POWER & LIGHT CO\",\n",
+    "    \"GENERAL MTRS\" : \"GENERAL MOTORS\",\n",
+    "    \"GENERAL MOTORS\" : \"GENERAL MOTORS\",\n",
+    "    \"HEWLETT PACKARD\" : \"HEWLETT PACKARD ENTERPRISE CO\",\n",
+    "    \"HP  INC\" : \"HEWLETT PACKARD ENTERPRISE CO\",\n",
+    "    \"HUNTINGTON\" : \"HUNTINGTON NATL BK MD\",\n",
+    "    \"JACKSON FINANCIAL  INC\" : \"JACKSON NATIONAL LIFE GL\",\n",
+    "    \"JPM CHASE\" : \"JPMORGAN CHASE &  CO\",\n",
+    "    \"KINDER MORGAN\" : \"KINDER MORGAN INC\",\n",
+    "    \"LLOYDS BKG\" : \"LLOYDS BANKING GROUP PLC FORME\",\n",
+    "    \"MACQUARIE\" : \"MACQUARIE BK LTD\",\n",
+    "    \"MIZUHO\" : \"MIZUHO CO\",\n",
+    "    \"MONDELEZ INT\" : \"MONDELEZ INTERNATIONAL INC\",\n",
+    "    \"MORGAN STANLEY\" : \"MORGAN STANLEY\",\n",
+    "    \"NATIONAL AUSTRALIA B\" : \"NATIONAL AUSTRALIA BANK\",\n",
+    "    \"NATIONWIDE BLDG SOC\" : \"NATIONWIDE BLDG SOCIETY\",\n",
+    "    \"NATIONAL BANK OF CANADA\" : \"NATIONAL BANK OF CANADA\",\n",
+    "    \"NATL BK CDA\" : \"NATIONAL BANK OF CANADA\",\n",
+    "    \"NATWEST M\" : \"NATWEST MARKETS PLC\",\n",
+    "    \"NEXTERA ENERGY\" : \"NEXTERA ENERGY CAP\",\n",
+    "    \"NORDEA BANK\" : \"NORDEA BANK\",\n",
+    "    \"NORTHWESTERN\" : \"NORTHWESTERN MUT\",\n",
+    "    \"NXP B V\" : \"NXP B V\",\n",
+    "    \"PHILLIPS 66\" : \"PHILLIPS 66\",\n",
+    "    \"PRINCIPAL LIFE GLOBAL\" : \"PRINCIPAL LIFE GLOBAL FDG\",\n",
+    "    \"PROTECTIVE LIFE G\" : \"PROTECTIVE LIFE GLOBAL\",\n",
+    "    \"PUBLIC SVC\" : \"PUBLIC SERVICE ELECTRIC & GAS\",\n",
+    "    \"RABOBANK NEDERLAND\" : \"RABOBANK NEDERLAND\",\n",
+    "    \"ROCHE H\" : \"ROCHE HOLDINGS INC\",\n",
+    "    \"ROPER \" : \"ROPER TECHNOLOGIES  INC\",\n",
+    "    \"ROYAL BANK OF CANADA\" : \"ROYAL BANK OF CANADA\",\n",
+    "    \"ROYAL BK CDA\" : \"ROYAL BANK OF CANADA\",\n",
+    "    \"SCHLUMBERGER\" : \"SCHLUMBERGER\", \n",
+    "    \"SIEMENS FINANCIERINGSMAA\" : \"SIEMENS FINANCIERINGSMAATSCHAP\",\n",
+    "    \"SIMON PPTY GROUP\" : \"SIMON PPTY GROUP\",\n",
+    "    \"STATE STR\" : \"STATE STREET CO\",\n",
+    "    \"SUMITOMO MITSUI\" : \"SUMITOMO MITSUI BANKING\",\n",
+    "    \"SWEDBANK AB\" : \"SWEDBANK AB\",\n",
+    "    \"TORONTO DOMINION B\" : \"TORONTO DOMINION BANK\",\n",
+    "    \"TOYOTA M\" : \"TOYOTA MOTOR\",\n",
+    "    \"TRANS-CDA PIPELINES\" : \"TRANS-CDA PIPELINES\",\n",
+    "    \"TRUIST \" : \"TRUIST BANK\",\n",
+    "    \"TSMC \" : \"TSMC GLOBAL LTD\",\n",
+    "    \"U S B\" : \"U S BANCORP\",\n",
+    "    \"UBS \" : \"UBS AG LONDON\",\n",
+    "    \"VENTAS REALTY\" : \"VENTAS REALTY LP\",\n",
+    "    \"VOLKSWAGEN GROUP\" : \"VOLKSWAGEN GROUP\",\n",
+    "    \"WESTPAC B\" : \"WESTPAC BANKING\",\n",
+    "}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "659c0ba7",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "company_names = set()\n",
+    "for i in cb_df.index:\n",
+    "    # Set the Company to be cleaned\n",
+    "    cb_df.at[i,'Company'] = cb_df.at[i,'Account or Security']\n",
+    "    if not np.isnan(cb_df.at[i,'Quantity']):\n",
+    "        # clean private placement\n",
+    "        for prefix in [\"PVTPL\", \"PVPTL\", \"PVYPL\", \"PVT PL\", \"PVPTL\"]:\n",
+    "            if prefix in cb_df.loc[i][\"Company\"]:\n",
+    "                cb_df.at[i,'Private Placement'] = True\n",
+    "                cb_df.at[i,'Company'] = cb_df.at[i,'Company'][6:].strip()\n",
+    "        for end in [\" CAP\", \" INC\", \" FDG\", \" CORP\", \" CO\", \" LLC\", \" CR\", \" SR\", \" A/S\", \" LP\", \" ASA\", \" LTD\", ]:\n",
+    "            if end in cb_df.at[i, \"Company\"]:\n",
+    "                cb_df.at[i, \"Company\"] = cb_df.at[i, 'Company'].split(end)[0].strip()+\" \"+end\n",
+    "        for token in ['%']:\n",
+    "            if token in cb_df.at[i, \"Company\"]:\n",
+    "                # get everythng before the token, then get eveything before the last space\n",
+    "                cb_df.at[i, \"Company\"] = cb_df.at[i, \"Company\"].split(token)[0].rsplit(' ', 1)[0].strip()\n",
+    "        for key, value in company_name_dict.items():\n",
+    "            if key in cb_df.at[i, \"Company\"]:\n",
+    "                cb_df.at[i, \"Company\"] = value\n",
+    "        company_names.add(cb_df.at[i, \"Company\"])\n",
+    "    else:\n",
+    "        cb_df.drop(i, axis=0)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e7e06dfe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cb_df"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "8b78d9cd",
+   "metadata": {},
+   "source": [
+    "##### get ticker"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ba37103f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# taken from https://gist.github.com/bruhbruhroblox/dd9d981c8c37983f61e423a45085e063\n",
+    "def getTicker(company_name):\n",
+    "    yfinance = \"https://query2.finance.yahoo.com/v1/finance/search\"\n",
+    "    user_agent = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.0.0 Safari/537.36'\n",
+    "    params = {\"q\": company_name, \"quotes_count\": 1, \"country\": \"United States\"}\n",
+    "\n",
+    "    res = requests.get(url=yfinance, params=params, headers={'User-Agent': user_agent})\n",
+    "    data = res.json()\n",
+    "\n",
+    "    company_code = data['quotes'][0]['symbol']\n",
+    "    return company_code"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "bc68a7ab",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "def get_ticker(name):\n",
+    "    try:\n",
+    "        # try to get the ticker\n",
+    "        ticker = getTicker(name)\n",
+    "    except:\n",
+    "        try:\n",
+    "            # shorten the name and try again\n",
+    "            short_name = name.split(' ')[0]\n",
+    "            ticker = getTicker(short_name)\n",
+    "        except:\n",
+    "            # no ticker could be found, probably a private company, check by hand to make sure\n",
+    "            ticker = 'NO_TICKER_FOUND'\n",
+    "    return (name, ticker)\n",
+    "\n",
+    "company_name_to_ticker = dict()\n",
+    "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+    "    futures = [executor.submit(get_ticker, name) for name in company_names]\n",
+    "    for future in concurrent.futures.as_completed(futures):\n",
+    "        name, ticker = future.result()\n",
+    "        company_name_to_ticker[name] = ticker"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "e3009168",
+   "metadata": {},
+   "source": [
+    "##### match company name to ticker in DF"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "e8f64024",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "for i in cb_df.index:\n",
+    "    try:\n",
+    "        cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n",
+    "    except:\n",
+    "        assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "04e49491",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cb_df.head()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "2342f360",
+   "metadata": {},
+   "source": [
+    "## Get info from ticker "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6fe3d0df",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_info_from_ticker(ticker):\n",
+    "    # Search for the company on Yahoo Finance\n",
+    "    search_results = yf.Tickers(ticker)\n",
+    "    return search_results.tickers[ticker].info"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "9ed7c317",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "%%time\n",
+    "def get_info(name):\n",
+    "    try:\n",
+    "        ticker = company_name_to_ticker[name]\n",
+    "        info = get_info_from_ticker(ticker)\n",
+    "    except:\n",
+    "        info = 'No Info Found'\n",
+    "    return (name, info)\n",
+    "\n",
+    "## use parallelization to speed up this process\n",
+    "company_info_dict = dict()\n",
+    "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+    "    futures = [executor.submit(get_info, name) for name in company_names]\n",
+    "    for future in concurrent.futures.as_completed(futures):\n",
+    "        name, info = future.result()\n",
+    "        company_info_dict[name] = info"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "id": "180e9785",
+   "metadata": {},
+   "source": [
+    "##### Link Info to Company, saved as a json dump in the dataframe"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "570e70b3",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "for i in cb_df.index:\n",
+    "    if cb_df.at[i,'Company'] == 'Corporate Bonds':\n",
+    "        continue\n",
+    "    if company_info_dict[cb_df.at[i,'Company']] is None:\n",
+    "        continue\n",
+    "    info_dict = dict(company_info_dict[cb_df.at[i,'Company']])\n",
+    "    json_str = json.dumps(my_dict)\n",
+    "    cb_df.at[i,'Info'] = json_str\n",
+    "\n",
+    "#         assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bd1606d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "cb_df.head()"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.6"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}