Блог им. SergeySolod

InterActiveBrokers, дивиденды: готовим данные для налоговой декларации 3НДФЛ

Осталось всего несколько дней, чтобы подать налоговую декларацию.
Для тех счастливчиков, кто получал дивиденды через InterActiveBrokers и немного знаком с Python, выкладываю несколько строк кода, который приведет (почти) данные из Activity-Annual этого брокера в вид, дружелюбный для переноса в российскую декларацию.
# -*- coding: utf-16 -*-
import os
import pandas as pd
import xml.etree.ElementTree as et
import lxml.html as lh
import numpy as np
#%%
path_curencies_rates = os.path.join('..//', 'Market_Data')# folder where data is kept

# RUB against USD and EUR
rub_USD_2020 = 'USD_RUB_exchange_rate_20200101-20201231.xlsx'
rub_EUR_2020 = 'EUR_RUB_exchange_rate_20200101-20201231.xlsx'
# dataframe where indicies are dates of 2020 and columns are rates RUB_USD and RUB_EUR
# all dates of 2020
dates = pd.date_range(start='1/1/2020', end='12/31/2020')
rub_currencies_rates = pd.DataFrame(index=dates, columns=['EUR', 'USD'])
# USD
df_rub_USD_CBRF = pd.read_excel(os.path.join(path_curencies_rates, rub_USD_2020), index_col=1)
df_rub_USD_CBRF = df_rub_USD_CBRF[['curs']]
# EUR
df_rub_EUR_CBRF = pd.read_excel(os.path.join(path_curencies_rates, rub_EUR_2020), index_col=1)
df_rub_EUR_CBRF = df_rub_EUR_CBRF[['curs']]

rub_currencies_rates.USD = df_rub_USD_CBRF.curs
rub_currencies_rates.EUR = df_rub_EUR_CBRF.curs

# fill empty dates
rub_currencies_rates.EUR = pd.DataFrame.ffill(rub_currencies_rates.EUR)
rub_currencies_rates.USD = pd.DataFrame.ffill(rub_currencies_rates.USD)
<br /><br />#%%
annual_activity_statement = 'Activity-Annual_2020_2020.htm' 
table_name = 'Dividends' # table that contains dividends only
report_full = lh.parse(annual_activity_statement)
parent = report_full.xpath(".//div[contains(text(), '{}')]".format(table_name))[0].getnext()
element = parent.getchildren()[0].getchildren()[0]
html = et.tostring(element)
div_df = pd.read_html(html)[0]
div_df.Amount = pd.to_numeric(div_df.Amount, errors='coerce')
div_df.Date = pd.to_datetime(div_df.Date, errors='coerce').dt.date
div_df = div_df.dropna(subset=['Date'])
div_df = div_df[div_df.Amount.notnull()]

# If nominated in EUR, taxation was NOT applyed 
div_df['Currency'] = np.where(div_df.Description.str.contains('EUR'), 'EUR', 'USD')

div_df['Taxes_paid'] = np.where(div_df.Description.str.contains('EUR'), 0, (div_df.Amount*0.1).round(decimals=2))

eur_rub_dict = rub_currencies_rates.EUR.to_dict()
usd_rub_dict = rub_currencies_rates.USD.to_dict()
# eur_rub_dict
div_EUR = div_df[div_df.Currency=='EUR']
div_EUR['rate_RUB'] = div_EUR['Date'].map(eur_rub_dict)

div_USD = div_df[div_df.Currency=='USD']
div_USD['rate_RUB'] = 0
div_USD['rate_RUB'] = div_USD['Date'].map(usd_rub_dict)#rub_currencies_rates.USD

div_total = pd.concat([div_EUR, div_USD], axis=0)

div_total['RUS_tax'] = div_total.Amount * 0.13
div_total['Taxes_TO_pay'] = ((div_total.RUS_tax - div_total.Taxes_paid) * div_total.rate_RUB).round(decimals=2)

div_total.Taxes_TO_pay.sum().round(decimals=1)

div_total.Description = div_total.Description.str.split(' Cash').str[0]

for i, d in div_total.Date.iteritems():
    ds = d.strftime('%d.%m.%Y')
    div_total.at[i, 'Date'] = ds

#%%
# To fill in field `'ОКСМ'` of Rus Tax Form ISIN will be used, 
# it consists of two alphabetic characters, which are the ISO 3166-1 alpha-2 code for the issuing country. 
# DataBase is `'country_ISO_codes.csv'`
country_codes = pd.read_csv('country_ISO_codes.csv')
country_codes = dict(zip(country_codes['alpha-2'], country_codes['country-code']))

def assign_country_code(name, country_codes):
    '''
    Returns a country numeric ICO code.
    Two first symbols of ISIN represent Country ISO-3166 Alpha Code.
    Parameters:
        name : string, field Description from IBKR report;
        country_codes : dictionary, keys are ISO Alpha Codes, values are corresponding ISO Num Codes.
        
    '''
    if '(' and ')' in name:
        start = name.find('(') + 1
        end = name.find(')')
        isin = name[start : end]
        country_Alpha_code = isin[0:2]
        try:
            country_Num_code = country_codes[country_Alpha_code]
        except Exception as e:
            country_Num_code = name
        
        return country_Num_code
    
    return name

div_total['Country_Num_Code'] = div_total['Description'].apply(assign_country_code, country_codes=country_codes)

#%%
# get all attributes of Rus Tax Form as a list
path_to_form = 'Tax_form_2020_draft_20210419.xml'
tree = et.parse(path_to_form)
root = tree.getroot()
for el in root.iter('ДоходИстИно'):
    for child in el[0:1]:
        attributes_Rus_Tax_Form = list(child.attrib.keys())

# DataFrame Rus Tax Form with indices from div_total
df_rus_tax_form = pd.DataFrame(index=div_total.index, columns=attributes_Rus_Tax_Form)
# fill all fields in with data from corresponing columns
# do some calculations as well
df_rus_tax_form['ОКСМ'] = div_total.Country_Num_Code
df_rus_tax_form['НаимИстДох'] = div_total.Description
df_rus_tax_form['КодВалют'] = np.where(div_total.Currency=='EUR', '978', '840')
df_rus_tax_form['КодВидДох'] = '22' # check it out
df_rus_tax_form['КурсВалютДох'] = div_total.rate_RUB
df_rus_tax_form['ДатаДох'] = div_total.Date
df_rus_tax_form['ДатаУплНал'] = div_total.Date
df_rus_tax_form['ДоходИноВал'] = div_total.Amount
df_rus_tax_form['ДоходИноРуб'] = (df_rus_tax_form['ДоходИноВал'] * df_rus_tax_form['КурсВалютДох']).round(decimals=2)
df_rus_tax_form['КурсВалютНал'] = div_total.rate_RUB
df_rus_tax_form['НалУплИноВал'] = div_total.Taxes_paid
df_rus_tax_form['НалУплИноРуб'] = (df_rus_tax_form['НалУплИноВал'] * df_rus_tax_form['КурсВалютДох']).round(decimals=2)
df_rus_tax_form['НалЗачРФОбщ'] = (df_rus_tax_form['ДоходИноРуб'] * 0.13).astype(int)
df_rus_tax_form['НалогЗачРФОбщ'] = df_rus_tax_form['НалУплИноРуб'].astype(int)

# all data in a xml-object should be str
form_to_export = df_rus_tax_form.applymap(str)

# create a root
income_abroad = et.Element('ДоходИстИно')
tree = et.ElementTree(income_abroad)

# add children with a relevant tag 
# each child has attributes that is a dict representing a row from DataFrame
for i, r in form_to_export.iterrows():
    d = r.to_dict()
    et.SubElement(income_abroad, tag='РасчДохНалИно', attrib=d)    

tree.write('rus_tax_form_experiment.xml', encoding='WINDOWS-1251')


Файлы 
USD_RUB_exchange_rate_20200101-20201231.xlsx
EUR_RUB_exchange_rate_20200101-20201231.xlsx
можно скачать с сайта ЦБ.
Tax_form_2020_draft_20210419.xml — моя 3-НДФЛ, сохраненная из налогового личного кабинета. 
country_ISO_codes.csv — коды стран, файлом могу поделиться.

За критику и правки буду благодарен.
★1
3 комментария
Курсы можно динамически подгружать через API сайта ЦБ
avatar
Спасибо.
avatar
спасибо!
avatar

теги блога Sergey Solod

....все тэги



UPDONW