diff options
| -rw-r--r-- | ui_security_inventory_23_parsing.ipynb | 109 |
1 files changed, 77 insertions, 32 deletions
diff --git a/ui_security_inventory_23_parsing.ipynb b/ui_security_inventory_23_parsing.ipynb index aef55ec..297203f 100644 --- a/ui_security_inventory_23_parsing.ipynb +++ b/ui_security_inventory_23_parsing.ipynb @@ -10,7 +10,9 @@ "import numpy as np\n", "import pandas as pd\n", "import yfinance as yf\n", - "import requests" + "import requests\n", + "import concurrent.futures\n", + "import json" ] }, { @@ -49,6 +51,7 @@ "op_df.insert(9, 'Industry', pd.NA)\n", "op_df.insert(10, 'Private Placement', False)\n", "op_df.insert(11, 'Ticker', pd.NA)\n", + "op_df.insert(11, 'Info', object)\n", "# op_df = op_df.insert(7, 'Bank', pd.NA)\n", "op_df.head()" ] @@ -305,13 +308,12 @@ { "cell_type": "code", "execution_count": null, - "id": "f2e232f5", + "id": "bc68a7ab", "metadata": {}, "outputs": [], "source": [ "%%time\n", - "company_name_to_ticker = dict()\n", - "for name in company_names:\n", + "def get_ticker(name):\n", " try:\n", " # try to get the ticker\n", " ticker = getTicker(name)\n", @@ -321,9 +323,16 @@ " short_name = name.split(' ')[0]\n", " ticker = getTicker(short_name)\n", " except:\n", - " # no ticker could be found, probably a private company, check to make sure\n", + " # no ticker could be found, probably a private company, check by hand to make sure\n", " ticker = 'NO_TICKER_FOUND'\n", - " company_name_to_ticker[name] = ticker" + " return (name, ticker)\n", + "\n", + "company_name_to_ticker = dict()\n", + "with concurrent.futures.ThreadPoolExecutor() as executor:\n", + " futures = [executor.submit(get_ticker, name) for name in company_names]\n", + " for future in concurrent.futures.as_completed(futures):\n", + " name, ticker = future.result()\n", + " company_name_to_ticker[name] = ticker" ] }, { @@ -337,76 +346,112 @@ { "cell_type": "code", "execution_count": null, - "id": "d80dca43", + "id": "e8f64024", "metadata": {}, "outputs": [], "source": [ - "company_name_to_ticker" + "for i in cb_df.index:\n", + " try:\n", + " cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n", + " except:\n", + " assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\"" ] }, { "cell_type": "code", "execution_count": null, - "id": "921c344d", + "id": "04e49491", "metadata": {}, "outputs": [], "source": [ - "company_names" + "cb_df.head()" ] }, { - "cell_type": "code", - "execution_count": null, - "id": "7abeab84", + "cell_type": "markdown", + "id": "2342f360", "metadata": {}, - "outputs": [], "source": [ - "company_name_to_ticker[cb_df.at[104,'Company']]" + "## Get info from ticker " ] }, { "cell_type": "code", "execution_count": null, - "id": "e8f64024", + "id": "6fe3d0df", "metadata": {}, "outputs": [], "source": [ - "for i in cb_df.index:\n", - " try:\n", - " cb_df.at[i,'Ticker'] = company_name_to_ticker[cb_df.at[i,'Company']]\n", - " except:\n", - " assert cb_df.at[i,'Company'] == 'Corporate Bonds'" + "def get_info_from_ticker(ticker):\n", + " # Search for the company on Yahoo Finance\n", + " search_results = yf.Tickers(ticker)\n", + " return search_results.tickers[ticker].info" ] }, { "cell_type": "code", "execution_count": null, - "id": "04e49491", - "metadata": {}, + "id": "9ed7c317", + "metadata": { + "scrolled": true + }, "outputs": [], "source": [ - "cb_df.head()" + "%%time\n", + "def get_info(name):\n", + " try:\n", + " ticker = company_name_to_ticker[name]\n", + " info = get_info_from_ticker(ticker)\n", + " except:\n", + " info = 'No Info Found'\n", + " return (name, info)\n", + "\n", + "## use parallelization to speed up this process\n", + "company_info_dict = dict()\n", + "with concurrent.futures.ThreadPoolExecutor() as executor:\n", + " futures = [executor.submit(get_info, name) for name in company_names]\n", + " for future in concurrent.futures.as_completed(futures):\n", + " name, info = future.result()\n", + " company_info_dict[name] = info" ] }, { "cell_type": "markdown", - "id": "2342f360", + "id": "180e9785", "metadata": {}, "source": [ - "## Get info from ticker " + "##### Link Info to Company, saved as a json dump in the dataframe" ] }, { "cell_type": "code", "execution_count": null, - "id": "6fe3d0df", + "id": "570e70b3", + "metadata": { + "scrolled": true + }, + "outputs": [], + "source": [ + "for i in cb_df.index:\n", + " if cb_df.at[i,'Company'] == 'Corporate Bonds':\n", + " continue\n", + " if company_info_dict[cb_df.at[i,'Company']] is None:\n", + " continue\n", + " info_dict = dict(company_info_dict[cb_df.at[i,'Company']])\n", + " json_str = json.dumps(my_dict)\n", + " cb_df.at[i,'Info'] = json_str\n", + "\n", + "# assert cb_df.at[i,'Company'] == 'Corporate Bonds', f\"Expected Cororate Bonds, got {cb_df.at[i,'Company']}\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3bd1606d", "metadata": {}, "outputs": [], "source": [ - "def get_info_from_ticker(ticker):\n", - " # Search for the company on Yahoo Finance\n", - " search_results = yf.Tickers(company_name)\n", - " return search_results.tickers['T'].info" + "cb_df.head()" ] } ], @@ -426,7 +471,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.10.6" }, "vscode": { "interpreter": { |
