Блог им. SergeySolod
# -*- coding: utf-16 -*- import os import pandas as pd import xml.etree.ElementTree as et import lxml.html as lh import numpy as np #%% path_curencies_rates = os.path.join('..//', 'Market_Data')# folder where data is kept # RUB against USD and EUR rub_USD_2020 = 'USD_RUB_exchange_rate_20200101-20201231.xlsx' rub_EUR_2020 = 'EUR_RUB_exchange_rate_20200101-20201231.xlsx' # dataframe where indicies are dates of 2020 and columns are rates RUB_USD and RUB_EUR # all dates of 2020 dates = pd.date_range(start='1/1/2020', end='12/31/2020') rub_currencies_rates = pd.DataFrame(index=dates, columns=['EUR', 'USD']) # USD df_rub_USD_CBRF = pd.read_excel(os.path.join(path_curencies_rates, rub_USD_2020), index_col=1) df_rub_USD_CBRF = df_rub_USD_CBRF[['curs']] # EUR df_rub_EUR_CBRF = pd.read_excel(os.path.join(path_curencies_rates, rub_EUR_2020), index_col=1) df_rub_EUR_CBRF = df_rub_EUR_CBRF[['curs']] rub_currencies_rates.USD = df_rub_USD_CBRF.curs rub_currencies_rates.EUR = df_rub_EUR_CBRF.curs # fill empty dates rub_currencies_rates.EUR = pd.DataFrame.ffill(rub_currencies_rates.EUR) rub_currencies_rates.USD = pd.DataFrame.ffill(rub_currencies_rates.USD) <br /><br />#%% annual_activity_statement = 'Activity-Annual_2020_2020.htm' table_name = 'Dividends' # table that contains dividends only report_full = lh.parse(annual_activity_statement) parent = report_full.xpath(".//div[contains(text(), '{}')]".format(table_name))[0].getnext() element = parent.getchildren()[0].getchildren()[0] html = et.tostring(element) div_df = pd.read_html(html)[0] div_df.Amount = pd.to_numeric(div_df.Amount, errors='coerce') div_df.Date = pd.to_datetime(div_df.Date, errors='coerce').dt.date div_df = div_df.dropna(subset=['Date']) div_df = div_df[div_df.Amount.notnull()] # If nominated in EUR, taxation was NOT applyed div_df['Currency'] = np.where(div_df.Description.str.contains('EUR'), 'EUR', 'USD') div_df['Taxes_paid'] = np.where(div_df.Description.str.contains('EUR'), 0, (div_df.Amount*0.1).round(decimals=2)) eur_rub_dict = rub_currencies_rates.EUR.to_dict() usd_rub_dict = rub_currencies_rates.USD.to_dict() # eur_rub_dict div_EUR = div_df[div_df.Currency=='EUR'] div_EUR['rate_RUB'] = div_EUR['Date'].map(eur_rub_dict) div_USD = div_df[div_df.Currency=='USD'] div_USD['rate_RUB'] = 0 div_USD['rate_RUB'] = div_USD['Date'].map(usd_rub_dict)#rub_currencies_rates.USD div_total = pd.concat([div_EUR, div_USD], axis=0) div_total['RUS_tax'] = div_total.Amount * 0.13 div_total['Taxes_TO_pay'] = ((div_total.RUS_tax - div_total.Taxes_paid) * div_total.rate_RUB).round(decimals=2) div_total.Taxes_TO_pay.sum().round(decimals=1) div_total.Description = div_total.Description.str.split(' Cash').str[0] for i, d in div_total.Date.iteritems(): ds = d.strftime('%d.%m.%Y') div_total.at[i, 'Date'] = ds #%% # To fill in field `'ОКСМ'` of Rus Tax Form ISIN will be used, # it consists of two alphabetic characters, which are the ISO 3166-1 alpha-2 code for the issuing country. # DataBase is `'country_ISO_codes.csv'` country_codes = pd.read_csv('country_ISO_codes.csv') country_codes = dict(zip(country_codes['alpha-2'], country_codes['country-code'])) def assign_country_code(name, country_codes): ''' Returns a country numeric ICO code. Two first symbols of ISIN represent Country ISO-3166 Alpha Code. Parameters: name : string, field Description from IBKR report; country_codes : dictionary, keys are ISO Alpha Codes, values are corresponding ISO Num Codes. ''' if '(' and ')' in name: start = name.find('(') + 1 end = name.find(')') isin = name[start : end] country_Alpha_code = isin[0:2] try: country_Num_code = country_codes[country_Alpha_code] except Exception as e: country_Num_code = name return country_Num_code return name div_total['Country_Num_Code'] = div_total['Description'].apply(assign_country_code, country_codes=country_codes) #%% # get all attributes of Rus Tax Form as a list path_to_form = 'Tax_form_2020_draft_20210419.xml' tree = et.parse(path_to_form) root = tree.getroot() for el in root.iter('ДоходИстИно'): for child in el[0:1]: attributes_Rus_Tax_Form = list(child.attrib.keys()) # DataFrame Rus Tax Form with indices from div_total df_rus_tax_form = pd.DataFrame(index=div_total.index, columns=attributes_Rus_Tax_Form) # fill all fields in with data from corresponing columns # do some calculations as well df_rus_tax_form['ОКСМ'] = div_total.Country_Num_Code df_rus_tax_form['НаимИстДох'] = div_total.Description df_rus_tax_form['КодВалют'] = np.where(div_total.Currency=='EUR', '978', '840') df_rus_tax_form['КодВидДох'] = '22' # check it out df_rus_tax_form['КурсВалютДох'] = div_total.rate_RUB df_rus_tax_form['ДатаДох'] = div_total.Date df_rus_tax_form['ДатаУплНал'] = div_total.Date df_rus_tax_form['ДоходИноВал'] = div_total.Amount df_rus_tax_form['ДоходИноРуб'] = (df_rus_tax_form['ДоходИноВал'] * df_rus_tax_form['КурсВалютДох']).round(decimals=2) df_rus_tax_form['КурсВалютНал'] = div_total.rate_RUB df_rus_tax_form['НалУплИноВал'] = div_total.Taxes_paid df_rus_tax_form['НалУплИноРуб'] = (df_rus_tax_form['НалУплИноВал'] * df_rus_tax_form['КурсВалютДох']).round(decimals=2) df_rus_tax_form['НалЗачРФОбщ'] = (df_rus_tax_form['ДоходИноРуб'] * 0.13).astype(int) df_rus_tax_form['НалогЗачРФОбщ'] = df_rus_tax_form['НалУплИноРуб'].astype(int) # all data in a xml-object should be str form_to_export = df_rus_tax_form.applymap(str) # create a root income_abroad = et.Element('ДоходИстИно') tree = et.ElementTree(income_abroad) # add children with a relevant tag # each child has attributes that is a dict representing a row from DataFrame for i, r in form_to_export.iterrows(): d = r.to_dict() et.SubElement(income_abroad, tag='РасчДохНалИно', attrib=d) tree.write('rus_tax_form_experiment.xml', encoding='WINDOWS-1251')Файлы
USD_RUB_exchange_rate_20200101-20201231.xlsx EUR_RUB_exchange_rate_20200101-20201231.xlsxможно скачать с сайта ЦБ.