summaryrefslogtreecommitdiff
path: root/ui_security_inventory_23_parsing.ipynb
diff options
context:
space:
mode:
authorkennykos <“gkosmacher01@gmail.com”>2023-02-15 14:27:15 -0600
committerkennykos <“gkosmacher01@gmail.com”>2023-02-15 14:27:15 -0600
commit49cbd4bbe57cb81c63c564343b8f85186ea58b21 (patch)
treefe573fc7cccfd4385dcf2a0a74f1cd12d79eca1f /ui_security_inventory_23_parsing.ipynb
parent8d42c3651a60339085e9bd94d282022052a16dcf (diff)
added info to df, functionality uptodate
Notebook is good, is ready for pr before dev night, but I'd still like to clean things up before then
Diffstat (limited to 'ui_security_inventory_23_parsing.ipynb')
-rw-r--r--ui_security_inventory_23_parsing.ipynb109
1 files changed, 77 insertions, 32 deletions
diff --git a/ui_security_inventory_23_parsing.ipynb b/ui_security_inventory_23_parsing.ipynb
index aef55ec..297203f 100644
--- a/ui_security_inventory_23_parsing.ipynb
+++ b/ui_security_inventory_23_parsing.ipynb
@@ -10,7 +10,9 @@
"import numpy as np\n",
"import pandas as pd\n",
"import yfinance as yf\n",
- "import requests"
+ "import requests\n",
+ "import concurrent.futures\n",
+ "import json"
]
},
{
@@ -49,6 +51,7 @@
"op_df.insert(9, 'Industry', pd.NA)\n",
"op_df.insert(10, 'Private Placement', False)\n",
"op_df.insert(11, 'Ticker', pd.NA)\n",
+ "op_df.insert(11, 'Info', object)\n",
"# op_df = op_df.insert(7, 'Bank', pd.NA)\n",
"op_df.head()"
]
@@ -305,13 +308,12 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "f2e232f5",
+ "id": "bc68a7ab",
"metadata": {},
"outputs": [],
"source": [
"%%time\n",
- "company_name_to_ticker = dict()\n",
- "for name in company_names:\n",
+ "def get_ticker(name):\n",
" try:\n",
" # try to get the ticker\n",
" ticker = getTicker(name)\n",
@@ -321,9 +323,16 @@
" short_name = name.split(' ')[0]\n",
" ticker = getTicker(short_name)\n",
" except:\n",
- " # no ticker could be found, probably a private company, check to make sure\n",
+ " # no ticker could be found, probably a private company, check by hand to make sure\n",
" ticker = 'NO_TICKER_FOUND'\n",
- " company_name_to_ticker[name] = ticker"
+ " return (name, ticker)\n",
+ "\n",
+ "company_name_to_ticker = dict()\n",
+ "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+ " futures = [executor.submit(get_ticker, name) for name in company_names]\n",
+ " for future in concurrent.futures.as_completed(futures):\n",
+ " name, ticker = future.result()\n",
+ " company_name_to_ticker[name] = ticker"
]
},
{
@@ -337,76 +346,112 @@
{
"cell_type": "code",
"execution_count": null,
- "id": "d80dca43",
+ "id": "e8f64024",
"metadata": {},
"outputs": [],
"source": [
- "company_name_to_ticker"
+ "for i in cb_df.index:\n",
+ " try:\n",
+ " cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n",
+ " except:\n",
+ " assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "921c344d",
+ "id": "04e49491",
"metadata": {},
"outputs": [],
"source": [
- "company_names"
+ "cb_df.head()"
]
},
{
- "cell_type": "code",
- "execution_count": null,
- "id": "7abeab84",
+ "cell_type": "markdown",
+ "id": "2342f360",
"metadata": {},
- "outputs": [],
"source": [
- "company_name_to_ticker[cb_df.at[104,'Company']]"
+ "## Get info from ticker "
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "e8f64024",
+ "id": "6fe3d0df",
"metadata": {},
"outputs": [],
"source": [
- "for i in cb_df.index:\n",
- " try:\n",
- " cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n",
- " except:\n",
- " assert cb_df.at[i,'Company'] == 'Corporate Bonds'"
+ "def get_info_from_ticker(ticker):\n",
+ " # Search for the company on Yahoo Finance\n",
+ " search_results = yf.Tickers(ticker)\n",
+ " return search_results.tickers[ticker].info"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "04e49491",
- "metadata": {},
+ "id": "9ed7c317",
+ "metadata": {
+ "scrolled": true
+ },
"outputs": [],
"source": [
- "cb_df.head()"
+ "%%time\n",
+ "def get_info(name):\n",
+ " try:\n",
+ " ticker = company_name_to_ticker[name]\n",
+ " info = get_info_from_ticker(ticker)\n",
+ " except:\n",
+ " info = 'No Info Found'\n",
+ " return (name, info)\n",
+ "\n",
+ "## use parallelization to speed up this process\n",
+ "company_info_dict = dict()\n",
+ "with concurrent.futures.ThreadPoolExecutor() as executor:\n",
+ " futures = [executor.submit(get_info, name) for name in company_names]\n",
+ " for future in concurrent.futures.as_completed(futures):\n",
+ " name, info = future.result()\n",
+ " company_info_dict[name] = info"
]
},
{
"cell_type": "markdown",
- "id": "2342f360",
+ "id": "180e9785",
"metadata": {},
"source": [
- "## Get info from ticker "
+ "##### Link Info to Company, saved as a json dump in the dataframe"
]
},
{
"cell_type": "code",
"execution_count": null,
- "id": "6fe3d0df",
+ "id": "570e70b3",
+ "metadata": {
+ "scrolled": true
+ },
+ "outputs": [],
+ "source": [
+ "for i in cb_df.index:\n",
+ " if cb_df.at[i,'Company'] == 'Corporate Bonds':\n",
+ " continue\n",
+ " if company_info_dict[cb_df.at[i,'Company']] is None:\n",
+ " continue\n",
+ " info_dict = dict(company_info_dict[cb_df.at[i,'Company']])\n",
+ " json_str = json.dumps(my_dict)\n",
+ " cb_df.at[i,'Info'] = json_str\n",
+ "\n",
+ "# assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3bd1606d",
"metadata": {},
"outputs": [],
"source": [
- "def get_info_from_ticker(ticker):\n",
- " # Search for the company on Yahoo Finance\n",
- " search_results = yf.Tickers(company_name)\n",
- " return search_results.tickers['T'].info"
+ "cb_df.head()"
]
}
],
@@ -426,7 +471,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.10.9"
+ "version": "3.10.6"
},
"vscode": {
"interpreter": {