added info to df, functionality uptodate

Notebook is good, is ready for pr before dev night, but I'd still like to clean things up before then
author: kennykos <“gkosmacher01@gmail.com”> 2023-02-15 14:27:15 -0600
committer: kennykos <“gkosmacher01@gmail.com”> 2023-02-15 14:27:15 -0600
commit: 49cbd4bbe57cb81c63c564343b8f85186ea58b21 (patch)
tree: fe573fc7cccfd4385dcf2a0a74f1cd12d79eca1f /ui_security_inventory_23_parsing.ipynb
parent: 8d42c3651a60339085e9bd94d282022052a16dcf (diff)
1 files changed, 77 insertions, 32 deletions
diff --git a/ui_security_inventory_23_parsing.ipynb b/ui_security_inventory_23_parsing.ipynb
index aef55ec..297203f 100644
--- a/ui_security_inventory_23_parsing.ipynb
+++ b/ui_security_inventory_23_parsing.ipynb
@@ -10,7 +10,9 @@
     "import numpy as np\n",
     "import pandas as pd\n",
     "import yfinance as yf\n",
-    "import requests"
+    "import requests\n",
+    "import concurrent.futures\n",
+    "import json"
    ]
   },
   {
@@ -49,6 +51,7 @@
     "op_df.insert(9, 'Industry', pd.NA)\n",
     "op_df.insert(10, 'Private Placement', False)\n",
     "op_df.insert(11, 'Ticker', pd.NA)\n",
+    "op_df.insert(11, 'Info', object)\n",
     "# op_df = op_df.insert(7, 'Bank', pd.NA)\n",
     "op_df.head()"
    ]
@@ -305,13 +308,12 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "f2e232f5",
+   "id": "bc68a7ab",
    "metadata": {},
    "outputs": [],
    "source": [
     "%%time\n",
-    "company_name_to_ticker = dict()\n",
-    "for name in company_names:\n",
+    "def get_ticker(name):\n",
     "    try:\n",
     "        # try to get the ticker\n",
     "        ticker = getTicker(name)\n",
@@ -321,9 +323,16 @@
     "            short_name = name.split(' ')[0]\n",
     "            ticker = getTicker(short_name)\n",
     "        except:\n",
-    "            # no ticker could be found, probably a private company, check to make sure\n",
+    "            # no ticker could be found, probably a private company, check by hand to make sure\n",
     "            ticker = 'NO_TICKER_FOUND'\n",
-    "    company_name_to_ticker[name] = ticker"
+    "    return (name, ticker)\n",
+    "\n",
+    "company_name_to_ticker = dict()\n",
+    "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+    "    futures = [executor.submit(get_ticker, name) for name in company_names]\n",
+    "    for future in concurrent.futures.as_completed(futures):\n",
+    "        name, ticker = future.result()\n",
+    "        company_name_to_ticker[name] = ticker"
    ]
   },
   {
@@ -337,76 +346,112 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "d80dca43",
+   "id": "e8f64024",
    "metadata": {},
    "outputs": [],
    "source": [
-    "company_name_to_ticker"
+    "for i in cb_df.index:\n",
+    "    try:\n",
+    "        cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n",
+    "    except:\n",
+    "        assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "921c344d",
+   "id": "04e49491",
    "metadata": {},
    "outputs": [],
    "source": [
-    "company_names"
+    "cb_df.head()"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
-   "id": "7abeab84",
+   "cell_type": "markdown",
+   "id": "2342f360",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "company_name_to_ticker[cb_df.at[104,'Company']]"
+    "## Get info from ticker "
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "e8f64024",
+   "id": "6fe3d0df",
    "metadata": {},
    "outputs": [],
    "source": [
-    "for i in cb_df.index:\n",
-    "    try:\n",
-    "        cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n",
-    "    except:\n",
-    "        assert cb_df.at[i,'Company'] == 'Corporate Bonds'"
+    "def get_info_from_ticker(ticker):\n",
+    "    # Search for the company on Yahoo Finance\n",
+    "    search_results = yf.Tickers(ticker)\n",
+    "    return search_results.tickers[ticker].info"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "04e49491",
-   "metadata": {},
+   "id": "9ed7c317",
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [],
    "source": [
-    "cb_df.head()"
+    "%%time\n",
+    "def get_info(name):\n",
+    "    try:\n",
+    "        ticker = company_name_to_ticker[name]\n",
+    "        info = get_info_from_ticker(ticker)\n",
+    "    except:\n",
+    "        info = 'No Info Found'\n",
+    "    return (name, info)\n",
+    "\n",
+    "## use parallelization to speed up this process\n",
+    "company_info_dict = dict()\n",
+    "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+    "    futures = [executor.submit(get_info, name) for name in company_names]\n",
+    "    for future in concurrent.futures.as_completed(futures):\n",
+    "        name, info = future.result()\n",
+    "        company_info_dict[name] = info"
    ]
   },
   {
    "cell_type": "markdown",
-   "id": "2342f360",
+   "id": "180e9785",
    "metadata": {},
    "source": [
-    "## Get info from ticker "
+    "##### Link Info to Company, saved as a json dump in the dataframe"
    ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "6fe3d0df",
+   "id": "570e70b3",
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "for i in cb_df.index:\n",
+    "    if cb_df.at[i,'Company'] == 'Corporate Bonds':\n",
+    "        continue\n",
+    "    if company_info_dict[cb_df.at[i,'Company']] is None:\n",
+    "        continue\n",
+    "    info_dict = dict(company_info_dict[cb_df.at[i,'Company']])\n",
+    "    json_str = json.dumps(my_dict)\n",
+    "    cb_df.at[i,'Info'] = json_str\n",
+    "\n",
+    "#         assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3bd1606d",
    "metadata": {},
    "outputs": [],
    "source": [
-    "def get_info_from_ticker(ticker):\n",
-    "    # Search for the company on Yahoo Finance\n",
-    "    search_results = yf.Tickers(company_name)\n",
-    "    return search_results.tickers['T'].info"
+    "cb_df.head()"
    ]
   }
  ],
@@ -426,7 +471,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.9"
+   "version": "3.10.6"
   },
   "vscode": {
    "interpreter": {
author	kennykos <“gkosmacher01@gmail.com”>	2023-02-15 14:27:15 -0600
committer	kennykos <“gkosmacher01@gmail.com”>	2023-02-15 14:27:15 -0600
commit	49cbd4bbe57cb81c63c564343b8f85186ea58b21 (patch)
tree	fe573fc7cccfd4385dcf2a0a74f1cd12d79eca1f /ui_security_inventory_23_parsing.ipynb
parent	8d42c3651a60339085e9bd94d282022052a16dcf (diff)