Here are the examples of the python api sqlalchemy.types.String taken from open source projects. By voting up you can indicate which examples are most useful and appropriate.
140 Examples
3
Source : test_reflection.py
with MIT License
from analyzeDFIR
with MIT License
from analyzeDFIR
def test_varchar_reflection(self):
typ = self._type_round_trip(sql_types.String(52))[0]
assert isinstance(typ, sql_types.String)
eq_(typ.length, 52)
@testing.requires.table_reflection
3
Source : ConceptClass.py
with GNU General Public License v3.0
from bioinformatics-ua
with GNU General Public License v3.0
from bioinformatics-ua
def getDataTypesForSQL():
return {
'concept_class_id': sa.types.String,
'concept_class_name': sa.types.String,
'concept_class_concept_id': sa.types.BigInteger
}
3
Source : Domain.py
with GNU General Public License v3.0
from bioinformatics-ua
with GNU General Public License v3.0
from bioinformatics-ua
def getDataTypesForSQL():
return {
'domain_id': sa.types.String,
'domain_name': sa.types.String,
'domain_concept_id': sa.types.BigInteger
}
3
Source : Relationship.py
with GNU General Public License v3.0
from bioinformatics-ua
with GNU General Public License v3.0
from bioinformatics-ua
def getDataTypesForSQL():
return {
'relationship_id': sa.types.String,
'relationship_name': sa.types.String,
'is_hierarchical': sa.types.String,
'defines_ancestry': sa.types.String,
'reverse_relationship_id': sa.types.String,
'relationship_concept_id': sa.types.BigInteger
}
3
Source : Vocabulary.py
with GNU General Public License v3.0
from bioinformatics-ua
with GNU General Public License v3.0
from bioinformatics-ua
def getDataTypesForSQL():
return {
'vocabulary_id': sa.types.String,
'vocabulary_name': sa.types.String,
'vocabulary_reference': sa.types.String,
'vocabulary_version': sa.types.String,
'vocabulary_concept_id': sa.types.BigInteger
}
3
Source : reversion_rights_factor.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def reversion_rights_factors_2_vnpy():
table_name = 'wind_future_adj_factor'
dtype = {
'trade_date': Date,
'instrument_id_main': String(20),
'adj_factor_main': DOUBLE,
'instrument_id_secondary': String(20),
'adj_factor_secondary': DOUBLE,
'instrument_type': String(20),
'method': String(20),
}
engine_vnpy = engine_dic[config.DB_SCHEMA_VNPY]
backup_to_db(table_name=table_name, new_engine=engine_vnpy, dtype=dtype)
if __name__ == "__main__":
3
Source : test_sqlite.py
with Apache License 2.0
from gethue
with Apache License 2.0
from gethue
def test_on_conflict_clause_unique_constraint_from_column(self):
meta = MetaData()
t = Table(
"n",
meta,
Column(
"x", String(30), unique=True, sqlite_on_conflict_unique="FAIL"
),
)
self.assert_compile(
CreateTable(t),
"CREATE TABLE n (x VARCHAR(30), " "UNIQUE (x) ON CONFLICT FAIL)",
dialect=sqlite.dialect(),
)
def test_on_conflict_clause_unique_constraint(self):
3
Source : test_sqlite.py
with Apache License 2.0
from gethue
with Apache License 2.0
from gethue
def test_on_conflict_clause_unique_constraint(self):
meta = MetaData()
t = Table(
"n",
meta,
Column("id", Integer),
Column("x", String(30)),
UniqueConstraint("id", "x", sqlite_on_conflict="FAIL"),
)
self.assert_compile(
CreateTable(t),
"CREATE TABLE n (id INTEGER, x VARCHAR(30), "
"UNIQUE (id, x) ON CONFLICT FAIL)",
dialect=sqlite.dialect(),
)
def test_on_conflict_clause_primary_key(self):
3
Source : test_types.py
with Apache License 2.0
from gethue
with Apache License 2.0
from gethue
def test_convert_unicode_text_type(self):
with testing.expect_deprecated(
"The String.convert_unicode parameter is deprecated"
):
eq_(types.String(convert_unicode=True).python_type, util.text_type)
class TypeAffinityTest(fixtures.TestBase):
3
Source : test_sqlite.py
with MIT License
from sqlalchemy
with MIT License
from sqlalchemy
def test_no_convert_unicode(self):
"""test no utf-8 encoding occurs"""
dialect = sqlite.dialect()
for t in (
String(),
sqltypes.CHAR(),
sqltypes.Unicode(),
sqltypes.UnicodeText(),
String(),
sqltypes.CHAR(),
sqltypes.Unicode(),
sqltypes.UnicodeText(),
):
bindproc = t.dialect_impl(dialect).bind_processor(dialect)
assert not bindproc or isinstance(bindproc("some string"), str)
class JSONTest(fixtures.TestBase):
3
Source : test_sqlite.py
with MIT License
from sqlalchemy
with MIT License
from sqlalchemy
def test_function_default(self):
t = Table(
"t",
self.metadata,
Column("id", Integer, primary_key=True),
Column("x", String(), server_default=func.lower("UPPERCASE")),
)
t.create(testing.db)
with testing.db.begin() as conn:
conn.execute(t.insert())
conn.execute(t.insert().values(x="foobar"))
eq_(
conn.execute(select(t.c.x).order_by(t.c.id)).fetchall(),
[("uppercase",), ("foobar",)],
)
@testing.provide_metadata
3
Source : test_lambdas.py
with MIT License
from sqlalchemy
with MIT License
from sqlalchemy
def user_address_fixture(self, metadata):
users = Table(
"users",
metadata,
Column("id", Integer, primary_key=True),
Column("name", String(50)),
)
addresses = Table(
"addresses",
metadata,
Column("id", Integer),
Column("user_id", ForeignKey("users.id")),
Column("email", String(50)),
)
return users, addresses
@testing.metadata_fixture()
3
Source : sqltypes.py
with MIT License
from tiangolo
with MIT License
from tiangolo
def load_dialect_impl(self, dialect: Dialect) -> "types.TypeEngine[Any]":
impl = cast(types.String, self.impl)
if impl.length is None and dialect.name == "mysql":
return dialect.type_descriptor(types.String(self.mysql_default_length)) # type: ignore
return super().load_dialect_impl(dialect)
# Reference form SQLAlchemy docs: https://docs.sqlalchemy.org/en/14/core/custom_types.html#backend-agnostic-guid-type
# with small modifications
class GUID(TypeDecorator): # type: ignore
0
Source : 51b92375e5c4_initial_shipyard_base.py
with Apache License 2.0
from airshipit
with Apache License 2.0
from airshipit
def upgrade():
"""
Create the initial tables needed by shipyard
26 character IDs are ULIDs. See: https://github.com/mdipierro/ulid
"""
op.create_table(
'actions',
# ULID key for the action
sa.Column('id', types.String(26), primary_key=True),
# The name of the action invoked
sa.Column('name', types.String(50), nullable=False),
# The parameters passed by the user to the action
sa.Column('parameters', pg.JSONB, nullable=True),
# The DAG/workflow name used in airflow, if applicable
sa.Column('dag_id', sa.Text, nullable=True),
# The DAG/workflow execution time string from airflow, if applicable
sa.Column('dag_execution_date', sa.Text, nullable=True),
# The invoking user
sa.Column('user', sa.Text, nullable=False),
# Timestamp of when an action was invoked
sa.Column('datetime',
types.TIMESTAMP(timezone=True),
server_default=func.now()),
# The user provided or shipayrd generated context marker
sa.Column('context_marker', types.String(36), nullable=False)
)
op.create_table(
'preflight_validation_failures',
# ID (ULID) of the preflight validation failure
sa.Column('id', types.String(26), primary_key=True),
# The ID of action this failure is associated with
sa.Column('action_id', types.String(26), nullable=False),
# The common language name of the validation that failed
sa.Column('validation_name', sa.Text, nullable=True),
# The text indicating details of the failure
sa.Column('details', sa.Text, nullable=True),
)
op.create_table(
'action_command_audit',
# ID (ULID) of the audit
sa.Column('id', types.String(26), primary_key=True),
# The ID of the action for this audit record
sa.Column('action_id', types.String(26), nullable=False),
# The text indicating command invoked
sa.Column('command', sa.Text, nullable=False),
# The user that invoked the command
sa.Column('user', sa.Text, nullable=False),
# Timestamp of when the command was invoked
sa.Column('datetime',
types.TIMESTAMP(timezone=True),
server_default=func.now()),
)
op.create_table(
'api_locks',
# ID (ULID) of the lock
sa.Column('id', types.String(26), primary_key=True),
# The category/type of the lock
sa.Column('lock_type', types.String(20), nullable=False),
# Timestamp of when the lock was acquired
sa.Column('datetime',
types.TIMESTAMP(timezone=True),
server_default=func.now(),
nullable=False),
# Expires
sa.Column('expires', types.Integer, nullable=False, default=60),
# A marker if the lock is released
sa.Column('released', types.Boolean, nullable=False, default=False),
sa.Column('user', types.String(64), nullable=False),
sa.Column('reference_id', types.String(36), nullable=False),
)
def downgrade():
0
Source : 7486ddec1979_create_notes_table.py
with Apache License 2.0
from airshipit
with Apache License 2.0
from airshipit
def upgrade():
op.create_table(
'notes',
# ULID key for the note
sa.Column('note_id', types.String(26), primary_key=True),
# The supplied association id used for lookup
sa.Column('assoc_id', types.String(128), nullable=False),
# The supplied subject of the note (what is this note about?)
sa.Column('subject', types.String(128), nullable=False),
# The supplied type of the subject (what kind of thing is the subject?)
sa.Column('sub_type', types.String(128), nullable=False),
# The text value of the note
sa.Column('note_val', sa.Text, nullable=False),
# The numeric verbosity level of the note (1-5)
sa.Column('verbosity', types.Integer, nullable=False),
# An optional URL containing more info for the note
sa.Column('link_url', sa.Text, nullable=True),
# Boolean if the link requires a X-Auth-Token header
sa.Column('is_auth_link', types.Boolean, nullable=False),
# The creation timestamp for the note
sa.Column('note_timestamp',
types.TIMESTAMP(timezone=True),
server_default=func.now()),
)
def downgrade():
0
Source : coin.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_coin_info(chain_param=None, ):
"""插入基础信息数据到 cmc_coin_v1_info"""
table_name = "cmc_coin_v1_info"
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
# url = 'https://api.coinmarketcap.com/v2/listings/'
# dtype = {
# 'id': String(60),
# 'name': String(60),
# 'symbol': String(20),
# 'website_slug': String(60),
# }
url = 'https://api.coinmarketcap.com/v1/ticker/?limit=0'
dtype = {
'id': String(60),
'name': String(60),
'symbol': String(20),
'rank': Integer,
'price_usd': DOUBLE,
'price_btc': DOUBLE,
'24h_volume_usd': DOUBLE,
'market_cap_usd': DOUBLE,
'available_supply': DOUBLE,
'total_supply': DOUBLE,
'max_supply': DOUBLE,
'percent_change_1h': DOUBLE,
'percent_change_24h': DOUBLE,
'percent_change_7d': DOUBLE,
'last_updated': DATETIME,
}
rsp = requests.get(url)
if rsp.status_code != 200:
raise ValueError('请求 listings 相应失败')
json = rsp.json()
data_df = pd.DataFrame(json)
data_df['last_updated'] = data_df['last_updated'].apply(
lambda x: None if x is None else datetime.datetime.fromtimestamp(float(x)))
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype)
logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
# build_primary_key([table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
ADD PRIMARY KEY (`id`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
# 更新 code_mapping 表
# update_from_info_table(table_name)
def rename_by_dic(name, names):
0
Source : coin.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_coin_daily(chain_param=None, id_set=None, begin_time=None):
"""插入历史数据到 cmc_coin_v1_daily 试用 v1 接口,该接口可能在2018年12月底到期"""
table_name = "cmc_coin_v1_daily"
info_table_name = "cmc_coin_v1_info"
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
if has_table:
sql_str = """
SELECT id, symbol, date_frm, if(delist_date < end_date, delist_date, end_date) date_to
FROM
(
SELECT info.id, symbol, ifnull(trade_date,date('2013-04-28')) date_frm, null delist_date,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM
{info_table_name} info
LEFT OUTER JOIN
(SELECT id, adddate(max(date),1) trade_date FROM {table_name} GROUP BY id) daily
ON info.id = daily.id
) tt
WHERE date_frm < = if(delist_date < end_date, delist_date, end_date)
ORDER BY id""".format(table_name=table_name, info_table_name=info_table_name)
else:
logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
sql_str = """
SELECT id, symbol, date_frm, if(delist_date < end_date, delist_date, end_date) date_to
FROM
(
SELECT id, symbol, null date_frm, null delist_date,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM {info_table_name} info
) tt
ORDER BY id""".format(info_table_name=info_table_name)
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
# 获取每只股票需要获取日线数据的日期区间
stock_date_dic = {
(coin_id, symbol): (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for coin_id, symbol, date_from, date_to in table.fetchall() if
id_set is None or coin_id in id_set}
# 设置 dtype
dtype = {
'id': String(60),
'date': Date,
'open': DOUBLE,
'high': DOUBLE,
'low': DOUBLE,
'close': DOUBLE,
'volume': DOUBLE,
'market_cap': DOUBLE,
}
col_names = dtype.keys()
data_df_list = []
dic_count = len(stock_date_dic)
data_count = 0
# 获取接口数据
logger.info('%d coins will been import into %s', dic_count, table_name)
try:
for data_num, ((coin_id, symbol), (date_from, date_to)) in enumerate(stock_date_dic.items(), start=1):
logger.debug('%d/%d) %s[%s] [%s - %s]', data_num, dic_count, coin_id, symbol, date_from, date_to)
date_from_str = None
try:
if date_from is None:
scraper = CmcScraperV1(symbol, coin_id)
else:
date_from_str = date_2_str(str_2_date(date_from, DATE_FORMAT_STR), DATE_FORMAT_STR_CMC)
scraper = CmcScraperV1(symbol, coin_id, start_date=date_from_str)
data_df = scraper.get_dataframe()
except Exception as exp:
logger.exception("scraper('%s', '%s', start_date='%s')", symbol, coin_id, date_from_str)
continue
if data_df is None or data_df.shape[0] == 0:
logger.warning('%d/%d) %s has no data during %s %s', data_num, dic_count, coin_id, date_from, date_to)
continue
data_df.rename(columns={col_name: rename_by_dic(col_name, col_names) for col_name in data_df.columns},
inplace=True)
data_df.rename(columns={'market cap': 'market_cap'}, inplace=True)
data_df['market_cap'] = data_df['market_cap'].apply(lambda x: 0 if isinstance(x, str) else x)
data_df['volume'] = data_df['volume'].apply(lambda x: 0 if isinstance(x, str) else x)
logger.info('%d/%d) %d data of %s between %s and %s', data_num, dic_count, data_df.shape[0], coin_id,
data_df['date'].min(), data_df['date'].max())
data_df['id'] = coin_id
data_df_list.append(data_df)
data_count += data_df.shape[0]
# 仅供调试使用
if DEBUG and len(data_df_list) > 10:
break
if data_count > 10000:
data_df_all = pd.concat(data_df_list)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype)
logging.info("%s %d 条信息被更新", table_name, data_count)
data_df_list, data_count = [], 0
finally:
# 导入数据库 创建
if len(data_df_list) > 0:
data_df_all = pd.concat(data_df_list)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype)
logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
# build_primary_key([table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
CHANGE COLUMN `date` `date` DATE NOT NULL AFTER `id`,
ADD PRIMARY KEY (`id`, `date`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
@app.task
0
Source : coin.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_coin_latest(chain_param=None, ):
"""插入最新价格数据到 cmc_coin_pro_latest """
table_name = 'cmc_coin_pro_latest'
has_table = engine_md.has_table(table_name)
# 设置 dtype
dtype = {
'id': Integer,
'name': String(60),
'slug': String(60),
'symbol': String(20),
'date_added': DATETIME,
'last_updated': DATETIME,
'market_cap': DOUBLE,
'circulating_supply': DOUBLE,
'max_supply': DOUBLE,
'num_market_pairs': DOUBLE,
'percent_change_1h': DOUBLE,
'percent_change_24h': DOUBLE,
'percent_change_7d': DOUBLE,
'price': DOUBLE,
'total_supply': DOUBLE,
'volume_24h': DOUBLE,
'cmc_rank': DOUBLE,
}
header = {
'Content-Type': 'application/json',
'X-CMC_PRO_API_KEY': config.CMC_PRO_API_KEY
}
params = {
# 'CMC_PRO_API_KEY': config.CMC_PRO_API_KEY,
'limit': 5000,
'start': 1
}
# https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest?sort=market_cap&start=0&limit=10&cryptocurrency_type=tokens&convert=USD,BTC
url = "https://pro-api.coinmarketcap.com/v1/cryptocurrency/listings/latest"
rsp = requests.get(url=url, params=params, headers=header)
if rsp.status_code != 200:
logger.error('获取数据异常[%d] %s', rsp.status_code, rsp.content)
return
ret_dic = rsp.json()
data_list = ret_dic['data']
data_dic_list = []
for dic in data_list:
data_dic = {}
for key, val in dic.items():
if key == 'quote':
for sub_key, sub_val in val['USD'].items():
data_dic[sub_key] = sub_val
else:
data_dic[key] = val
data_dic_list.append(data_dic)
data_df = pd.DataFrame(data_dic_list)
# 数据整理
data_df['date_added'] = data_df['date_added'].apply(lambda x: str_2_datetime(x, DATETIME_FORMAT_STR))
data_df['last_updated'] = data_df['last_updated'].apply(lambda x: str_2_datetime(x, DATETIME_FORMAT_STR))
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype)
logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
# build_primary_key([table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `id` `id` VARCHAR(60) NOT NULL FIRST ,
CHANGE COLUMN `last_updated` `last_updated` DATETIME NOT NULL AFTER `id`,
ADD PRIMARY KEY (`id`, `last_updated`)""".format(table_name=table_name)
execute_sql(create_pk_str, engine_md)
@app.task
0
Source : edb.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_edb(chain_param=None):
"""
通过ifind接口获取并导入EDB数据
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:return:
"""
table_name = 'ifind_edb'
has_table = engine_md.has_table(table_name)
indicators_dic = {
"M002043802": ("制造业PMI", "manufacturing_PMI", "2005-01-01", None),
"M002811185": ("财新PMI:综合产出指数", "Caixin_PMI_composite_output_index", "2005-01-01", None),
"M002811186": ("财新PMI:制造业", "Caixin_PMI_manufacturing", "2005-01-01",None),
"M002811190": ("财新PMI:服务业经营活动指数", "Caixin_PMI_service_business_activity_index", "2005-01-01", None),
"M002822183": ("GDP:累计同比", "GDP_cumulative_yoy", "1990-01-01", None),
"M002826938": ("GDP:同比", "GDP_yoy", "1990-01-01", None),
"M001620247": ("GDP:全国", "GDP_nationwide", "1990-01-01", None),
"M001620253": ("GDP:人均", "GDP_per_capita", "1990-01-01", None),
}
if has_table:
sql_str = """select id, adddate(max(time),1) from {table_name} group by id""".format(table_name=table_name)
with with_db_session(engine_md) as session:
indicators_date_dic = dict(session.execute(sql_str).fetchall())
else:
indicators_date_dic = {}
# 设置 dtype
dtype = {
'id': String(20),
'cn_name': String(100),
'en_name': String(100),
'time': Date,
'note':String(500),
'value': DOUBLE,
}
data_df_list = []
try:
for num, (indicators, (item_cn_name, item_en_name,start_date_str,item_note)) in enumerate(indicators_dic.items()):
begin_time = indicators_date_dic[indicators] if indicators in indicators_date_dic else start_date_str
end_time = date.today()
if str_2_date(begin_time) > end_time:
continue
begin_time, end_time = date_2_str(begin_time), date_2_str(end_time)
logger.info("获取 %s %s [%s - %s] 数据", indicators, item_cn_name, begin_time, end_time)
data_df = invoker.THS_EDBQuery(indicators=indicators,
begintime=begin_time,
endtime=end_time)
data_df['cn_name'] = item_cn_name
data_df['en_name'] = item_en_name
data_df['note'] = item_note
data_df_list.append(data_df)
if DEBUG and len(data_df_list) > 1:
break
finally:
data_df_all = pd.concat(data_df_list)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype=dtype)
logger.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `id` `id` VARCHAR(20) NOT NULL FIRST,
CHANGE COLUMN `time` `time` DATE NOT NULL AFTER `id`,
ADD PRIMARY KEY (`id`, `time`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
logger.info('%s 表 `id`, `time` 主键设置完成', table_name)
def get_edb_item_date_range():
0
Source : future_info_daily.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_variety_info():
"""保存期货交易所品种信息,一次性数据导入,以后基本上不需要使用了"""
table_name = 'ifind_future_variety_info'
exchange_list = [
'上海期货交易所',
'大连商品交易所',
'郑州商品交易所',
'中国金融期货交易所',
'纽约商业期货交易所(NYMEX)',
'纽约商品交易所(COMEX)',
# '纽约期货交易所(NYBOT)', # 暂无数据
'芝加哥期货交易所(CBOT)',
# '洲际交易所(ICE)', # 暂无数据
'伦敦金属交易所(LME)',
'马来西亚衍生品交易所(BMD)',
'新加坡证券交易所(SGX)',
]
# 设置 dtype
dtype = {
'exchange': String(20),
'ID': String(20),
'SECURITY_NAME': String(20),
}
data_df_list = []
data_count = len(exchange_list)
try:
for num, exchange in enumerate(exchange_list):
logger.debug("%d/%d) %s 获取交易品种信息", num, data_count, exchange)
data_df = invoker.THS_DataPool('variety', exchange, 'variety:Y,id:Y')
data_df['exchange'] = exchange
data_df_list.append(data_df)
finally:
if len(data_df_list) > 0:
tot_data_df = pd.concat(data_df_list)
# tot_data_df.to_sql('ifind_variety_info', engine_md, index=False, if_exists='append', dtype=dtype)
tot_data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype=dtype)
else:
tot_data_count = 0
logger.info('保存交易所品种信息完成, %d条数据被保存', tot_data_count)
def get_exchange_latest_data():
0
Source : future_info_daily.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_future_info(chain_param=None):
"""
更新期货合约列表信息
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:return:
"""
table_name = 'ifind_future_info'
has_table = engine_md.has_table(table_name)
logger.info("更新 %s [%s] 开始", table_name, has_table)
# 获取已存在合约列表
if has_table:
sql_str = f'SELECT ths_code, ths_start_trade_date_future FROM {table_name}'
with with_db_session(engine_md) as session:
table = session.execute(sql_str)
code_ipo_date_dic = dict(table.fetchall())
exchange_latest_ipo_date_dic = get_exchange_latest_data()
else:
code_ipo_date_dic = {}
exchange_latest_ipo_date_dic = {}
exchange_sectorid_dic_list = [
{'exch_eng': 'SHFE', 'exchange_name': '上海期货交易所',
'sectorid': '091001', 'date_establish': '1995-05-10'},
{'exch_eng': 'CFFEX', 'exchange_name': '中国金融期货交易所',
'sectorid': '091004', 'date_establish': '2013-09-10'},
{'exch_eng': 'DCE', 'exchange_name': '大连商品交易所',
'sectorid': '091002', 'date_establish': '1999-01-10'},
{'exch_eng': 'CZCE', 'exchange_name': '郑州商品交易所',
'sectorid': '091003', 'date_establish': '1999-01-10'},
]
# 字段列表及参数
indicator_param_list = [
('ths_future_short_name_future', '', String(50)),
('ths_future_code_future', '', String(20)),
('ths_sec_type_future', '', String(20)),
('ths_td_variety_future', '', String(20)),
('ths_td_unit_future', '', DOUBLE),
('ths_pricing_unit_future', '', String(20)),
('ths_mini_chg_price_future', '', DOUBLE),
('ths_chg_ratio_lmit_future', '', DOUBLE),
('ths_td_deposit_future', '', DOUBLE),
('ths_start_trade_date_future', '', Date),
('ths_last_td_date_future', '', Date),
('ths_last_delivery_date_future', '', Date),
('ths_delivery_month_future', '', String(10)),
('ths_listing_benchmark_price_future', '', DOUBLE),
('ths_initial_td_deposit_future', '', DOUBLE),
('ths_contract_month_explain_future', '', String(120)),
('ths_td_time_explain_future', '', String(120)),
('ths_last_td_date_explian_future', '', String(120)),
('ths_delivery_date_explain_future', '', String(120)),
('ths_exchange_short_name_future', '', String(50)),
('ths_contract_en_short_name_future', '', String(50)),
('ths_contract_en_name_future', '', String(50)),
]
json_indicator, json_param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
# 设置 dtype
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
dtype['exch_eng'] = String(20)
# 获取合约列表
code_set = set()
ndays_per_update = 90
# 获取历史期货合约列表信息
sector_count = len(exchange_sectorid_dic_list)
for num, exchange_sectorid_dic in enumerate(exchange_sectorid_dic_list, start=1):
exchange_name = exchange_sectorid_dic['exchange_name']
exch_eng = exchange_sectorid_dic['exch_eng']
sector_id = exchange_sectorid_dic['sectorid']
date_establish = exchange_sectorid_dic['date_establish']
# 计算获取合约列表的起始日期
date_since = str_2_date(exchange_latest_ipo_date_dic.setdefault(exch_eng, date_establish))
date_yestoday = date.today() - timedelta(days=1)
logger.info("%d/%d) %s[%s][%s] %s ~ %s",
num, sector_count, exchange_name, exch_eng, sector_id, date_since, date_yestoday)
while date_since < = date_yestoday:
date_since_str = date_2_str(date_since)
# #数据池-板块_板块成分-日期;同花顺代码;证券名称;当日行情端证券名称(仅股票节点有效)-iFinD数据接口
# 获取板块成分(期货商品的合约)
# THS_DP('block','2021-01-15;091002003','date:Y,thscode:Y,security_name:Y,security_name_in_time:Y')
try:
future_info_df = invoker.THS_DataPool('block', '%s;%s' % (date_since_str, sector_id),
'thscode:Y,security_name:Y')
except APIError as exp:
if exp.ret_dic['error_code'] in (-4001, -4210,):
future_info_df = None
else:
logger.exception("THS_DataPool %s 获取失败, '%s;%s'", exchange_name, date_since_str, sector_id)
break
# if future_info_df is None or future_info_df.shape[0] == 0:
# break
if future_info_df is not None and future_info_df.shape[0] > 0:
code_set |= set(future_info_df['THSCODE'])
if date_since >= date_yestoday:
break
else:
date_since += timedelta(days=ndays_per_update)
if date_since > date_yestoday:
date_since = date_yestoday
if DEBUG:
break
# 获取合约列表
code_list = [code for code in code_set
if code not in code_ipo_date_dic and
not code.endswith('.CFE') # 用不到股指数据,没有获取权限
]
# 获取合约基本信息
if len(code_list) > 0:
for code_list in split_chunk(code_list, 100):
try:
future_info_df = invoker.THS_BasicData(code_list, json_indicator, json_param)
except APIError as exp:
if exp.ret_dic['error_code'] in (-4001, -4210, -4102):
continue
else:
logger.exception("THS_BasicData %s 获取失败, '%s', '%s'", code_list, json_indicator, json_param)
break
if future_info_df is None or future_info_df.shape[0] == 0:
data_count = 0
logger.warning("更新 %s 结束 %d 条记录被更新", table_name, data_count)
else:
# 补充 exch_eng 字段
future_info_df['exch_eng'] = ''
for exchange_sectorid_dic in exchange_sectorid_dic_list:
future_info_df['exch_eng'][
future_info_df['ths_exchange_short_name_future'] == exchange_sectorid_dic['exchange_name']
] = exchange_sectorid_dic['exch_eng']
data_count = bunch_insert_on_duplicate_update(
future_info_df, table_name, engine_md, dtype,
primary_keys=['ths_code'], schema=config.DB_SCHEMA_MD)
logger.info("更新 %s 结束 %d 条记录被更新", table_name, data_count)
else:
logger.info("没有基础数据需要被更新")
def save_future_daily_df_list(data_df_list):
0
Source : future_info_daily.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def save_future_daily_df_list(data_df_list):
"""将期货历史数据保存的数据库"""
data_df_count = len(data_df_list)
if data_df_count > 0:
logger.info('merge data with %d df', data_df_count)
data_df = pd.concat(data_df_list)
data_count = data_df.shape[0]
data_df.to_sql('ifind_future_daily', engine_md, if_exists='append', index=False,
dtype={
'ths_code': String(20),
'time': Date,
'preClose': String(20),
'open': DOUBLE,
'high': DOUBLE,
'low': DOUBLE,
'close': DOUBLE,
'volume': DOUBLE,
'amount': DOUBLE,
'avgPrice': DOUBLE,
'change': DOUBLE,
'changeRatio': DOUBLE,
'preSettlement': DOUBLE,
'settlement': DOUBLE,
'change_settlement': DOUBLE,
'chg_settlement': DOUBLE,
'openInterest': DOUBLE,
'positionChange': DOUBLE,
'amplitude': DOUBLE,
})
logger.info("更新 wind_future_daily 结束 %d 条记录被更新", data_count)
else:
logger.info("更新 wind_future_daily 结束 0 条记录被更新")
@app.task
0
Source : future_info_daily.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_future_daily_his(chain_param=None, ths_code_set: set = None, begin_time=None):
"""
更新期货合约日级别行情信息
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code_set:
:param begin_time:
:return:
"""
table_name = 'ifind_future_daily'
info_table_name = 'ifind_future_info'
logger.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
indicator_param_list = [
('preClose', String(20)),
('open', DOUBLE),
('high', DOUBLE),
('low', DOUBLE),
('close', DOUBLE),
('volume', DOUBLE),
('amount', DOUBLE),
('avgPrice', DOUBLE),
('change', DOUBLE),
('changeRatio', DOUBLE),
('preSettlement', DOUBLE),
('settlement', DOUBLE),
('change_settlement', DOUBLE),
('chg_settlement', DOUBLE),
('openInterest', DOUBLE),
('positionChange', DOUBLE),
('amplitude', DOUBLE),
]
json_indicator = ','.join([key for key, _ in indicator_param_list])
if has_table:
# 16 点以后 下载当天收盘数据,16点以前只下载前一天的数据
# 对于 date_to 距离今年超过1年的数据不再下载:发现有部分历史过于久远的数据已经无法补全,
# 如:AL0202.SHF AL9902.SHF CU0202.SHF
# TODO: ths_ksjyr_future 字段需要替换为 ths_contract_listed_date_future 更加合理
sql_str = """SELECT ths_code, date_frm,
if(lasttrade_date < end_date, lasttrade_date, end_date) date_to
FROM
(
SELECT fi.ths_code, ifnull(trade_date_max_1, ths_start_trade_date_future) date_frm,
ths_last_td_date_future lasttrade_date,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM {info_table_name} fi LEFT OUTER JOIN
(SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) wfd
ON fi.ths_code = wfd.ths_code
) tt
WHERE date_frm < = if(lasttrade_date < end_date, lasttrade_date, end_date)
-- AND subdate(curdate(), 360) < if(lasttrade_date < end_date, lasttrade_date, end_date)
ORDER BY ths_code""".format(table_name=table_name, info_table_name=info_table_name)
else:
sql_str = """SELECT ths_code, date_frm, if(lasttrade_date < end_date, lasttrade_date, end_date) date_to
FROM
(
SELECT fi.ths_code, ths_start_trade_date_future date_frm,
ths_last_td_date_future lasttrade_date,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM {info_table_name} fi
) tt""".format(info_table_name=info_table_name)
logger.warning('%s 不存在,仅使用 %s 表进行计算日期范围', table_name, info_table_name)
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
# 获取每只股票需要获取日线数据的日期区间
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
if TRIAL:
date_from_min = date.today() - timedelta(days=(365 * 5))
# 试用账号只能获取近5年数据
code_date_range_dic = {
ths_code: (max([date_from, date_from_min]), date_to)
for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min < = date_to}
# 设置 dtype
dtype = {key: val for key, val in indicator_param_list}
dtype['ths_code'] = String(20)
dtype['time'] = Date
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
logger.info("%d future instrument will be handled", code_count)
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
if begin_time is None:
continue
if begin_time > end_time:
logger.warning('%d/%d) %s [%s - %s] 日期范围无效,跳过', num, code_count, ths_code, begin_time, end_time)
continue
else:
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
try:
data_df = invoker.THS_HistoryQuotes(
ths_code, json_indicator,
'', # 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
begin_time, end_time)
except APIError as exp:
from tasks.ifind import ERROR_CODE_MSG_DIC, NO_BREAK_ERROR_CODE
error_code = exp.ret_dic.setdefault('error_code', 0)
break_msg = "不影响后续任务" if error_code in NO_BREAK_ERROR_CODE else ""
if error_code in ERROR_CODE_MSG_DIC:
logger.error("%d/%d) THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s error_code=%d, %s %s",
num, code_count, ths_code, json_indicator, begin_time, end_time,
error_code, ERROR_CODE_MSG_DIC[error_code],
break_msg
)
else:
logger.exception("%d/%d) THS_HistoryQuotes 获取失败 '%s', '%s', %s, %s error_code=%d, %s %s",
num, code_count, ths_code, json_indicator, begin_time, end_time,
error_code, ERROR_CODE_MSG_DIC[error_code],
break_msg
)
if error_code in NO_BREAK_ERROR_CODE:
continue
else:
break
if data_df is not None and data_df.shape[0] > 0:
data_count += data_df.shape[0]
data_df_list.append(data_df)
# 大于阀值有开始插入
if data_count >= 10000:
data_df_all = pd.concat(data_df_list)
# data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
logging.info("%s 新增数据 %d 条", table_name, data_count)
# 仅调试使用
if DEBUG and len(data_df_list) > 1:
break
finally:
if data_count > 0:
data_df_all = pd.concat(data_df_list)
# data_df_all.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
logging.info("%s 新增数据 %d 条", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
if __name__ == "__main__":
0
Source : future_min.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_future_min(chain_param=None, wind_code_set=None, begin_time=None, recent_n_years=2):
"""
更新期货合约分钟级别行情信息
请求语句类似于:
THS_HF('CU2105.SHF','open;high;low;close;volume;amount;change;changeRatio;sellVolume;buyVolume;openInterest',
'Fill:Original','2021-01-18 09:15:00','2021-01-18 15:15:00')
:param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務
:param wind_code_set: 只道 ths_code 集合
:param begin_time: 最早的起始日期
:param recent_n_years: 忽略n年前的合约
:return:
"""
# global DEBUG
# DEBUG = True
table_name = "ifind_future_min"
logger.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
param_list = [
("open", DOUBLE),
("high", DOUBLE),
("low", DOUBLE),
("close", DOUBLE),
("volume", DOUBLE),
("amount", DOUBLE),
("change", DOUBLE),
("changeRatio", DOUBLE),
("sellVolume", DOUBLE),
("buyVolume", DOUBLE),
("openInterest", DOUBLE),
]
ifind_indicator_str = ";".join([key for key, _ in param_list])
if has_table:
sql_str = f"""
select ths_code, date_frm, if(lasttrade_date < end_date, lasttrade_date, end_date) date_to
FROM
(
select fi.ths_code,
ifnull(trade_date_max_1, addtime(ths_start_trade_date_future,'09:00:00')) date_frm,
addtime(ths_last_td_date_future,'15:00:00') lasttrade_date,
case
when hour(now())>=23 then DATE_FORMAT(now(),'%Y-%m-%d 23:00:00')
when hour(now())>=15 then DATE_FORMAT(now(),'%Y-%m-%d 15:00:00')
when hour(now())>=12 then DATE_FORMAT(now(),'%Y-%m-%d 12:00:00')
else DATE_FORMAT(now(),'%Y-%m-%d 03:00:00')
end end_date
from ifind_future_info fi
left outer join
(
select ths_code, addtime(max(trade_datetime),'00:00:01') trade_date_max_1
from {table_name} group by ths_code
) wfd
on fi.ths_code = wfd.ths_code
) tt
where date_frm < = if(lasttrade_date < end_date, lasttrade_date, end_date)
-- and subdate(curdate(), 360) < if(lasttrade_date < end_date, lasttrade_date, end_date)
order by date_to desc, date_frm"""
else:
sql_str = """
SELECT ths_code, date_frm,
if(lasttrade_date < end_date,lasttrade_date, end_date) date_to
FROM
(
SELECT info.ths_code,
addtime(ths_start_trade_date_future,'09:00:00') date_frm,
addtime(ths_last_td_date_future,'15:00:00') lasttrade_date,
case
when hour(now())>=23 then DATE_FORMAT(now(),'%Y-%m-%d 23:00:00')
when hour(now())>=15 then DATE_FORMAT(now(),'%Y-%m-%d 15:00:00')
when hour(now())>=12 then DATE_FORMAT(now(),'%Y-%m-%d 12:00:00')
else DATE_FORMAT(now(),'%Y-%m-%d 03:00:00')
end end_date
FROM ifind_future_info info
) tt
WHERE date_frm < = if(lasttrade_date < end_date, lasttrade_date, end_date)
ORDER BY date_to desc, date_frm"""
logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name)
with with_db_session(engine_md) as session:
table = session.execute(sql_str)
# 获取date_from,date_to,将date_from,date_to做为value值
future_date_dic = {
ths_code: (
str_2_datetime(date_from) if begin_time is None else min([str_2_datetime(date_from), begin_time]),
str_2_datetime(date_to)
)
for ths_code, date_from, date_to in table.fetchall()
if wind_code_set is None or ths_code in wind_code_set
}
# 设置 dtype
dtype = {key: val for key, val in param_list}
dtype['ths_code'] = String(20)
dtype['instrument_id'] = String(20)
dtype['trade_date'] = Date
dtype['trade_datetime'] = DateTime
# 定义统一的插入函数
def insert_db(df: pd.DataFrame):
insert_data_count = bunch_insert_on_duplicate_update(
df, table_name, engine_md, dtype=dtype,
primary_keys=['ths_code', 'trade_datetime'], schema=config.DB_SCHEMA_MD
)
return insert_data_count
data_df_list = []
future_count = len(future_date_dic)
bulk_data_count, tot_data_count = 0, 0
# 忽略更早的历史合约
ignore_before = pd.to_datetime(
date.today() - timedelta(days=int(365 * recent_n_years))) if recent_n_years is not None else None
try:
logger.info("%d future instrument will be handled", future_count)
for num, (ths_code, (date_frm, date_to)) in enumerate(future_date_dic.items(), start=1):
# 暂时只处理 RU 期货合约信息
# if ths_code.find('RU') == -1:
# continue
if not (0 < = (date_to - date_frm).days < 800):
continue
if ignore_before is not None and pd.to_datetime(date_frm) < ignore_before:
# 忽略掉 n 年前的合约
continue
if isinstance(date_frm, datetime):
date_frm_str = date_frm.strftime(STR_FORMAT_DATETIME)
elif isinstance(date_frm, str):
date_frm_str = date_frm
else:
date_frm_str = date_frm.strftime(STR_FORMAT_DATE) + ' 09:00:00'
# 结束时间到次日的凌晨5点
if isinstance(date_frm, datetime):
date_to_str = date_to.strftime(STR_FORMAT_DATETIME)
elif isinstance(date_to, str):
date_to_str = date_to
else:
date_to += timedelta(days=1)
date_to_str = date_to.strftime(STR_FORMAT_DATE) + ' 03:00:00'
logger.info('%d/%d) get %s between %s and %s', num, future_count, ths_code, date_frm_str, date_to_str)
try:
data_df = invoker.THS_HighFrequenceSequence(
ths_code, ifind_indicator_str, 'Fill:Original', date_frm_str, date_to_str)
except APIError as exp:
from tasks.ifind import ERROR_CODE_MSG_DIC, NO_BREAK_ERROR_CODE
error_code = exp.ret_dic.setdefault('error_code', 0)
if error_code in ERROR_CODE_MSG_DIC:
logger.warning("%d/%d) %s 执行异常 error_code=%d, %s",
num, future_count, ths_code, error_code, ERROR_CODE_MSG_DIC[error_code])
else:
logger.exception("%d/%d) %s 执行异常 error_code=%d",
num, future_count, ths_code, error_code)
if error_code in NO_BREAK_ERROR_CODE:
continue
else:
break
if data_df is None:
logger.warning('%d/%d) %s has no data during %s %s',
num, future_count, ths_code, date_frm_str, date_to)
continue
logger.info('%d/%d) %d data of %s between %s and %s',
num, future_count, data_df.shape[0], ths_code, date_frm_str, date_to)
# data_df['ths_code'] = ths_code
data_df.rename(columns={
'time': 'trade_datetime',
'thscode': 'ths_code',
}, inplace=True)
data_df['trade_date'] = pd.to_datetime(data_df['trade_datetime']).apply(lambda x: x.date())
data_df.rename(columns={c: str.lower(c) for c in data_df.columns}, inplace=True)
data_df['instrument_id'] = ths_code.split('.')[0]
data_df_list.append(data_df)
bulk_data_count += data_df.shape[0]
# 仅仅调试时使用
if DEBUG and len(data_df_list) >= 1:
break
if bulk_data_count > 50000:
logger.info('merge data with %d df %d data', len(data_df_list), bulk_data_count)
data_df = pd.concat(data_df_list)
tot_data_count = insert_db(data_df)
logger.info("更新 %s,累计 %d 条记录被更新", table_name, tot_data_count)
data_df_list = []
bulk_data_count = 0
finally:
data_df_count = len(data_df_list)
if data_df_count > 0:
logger.info('merge data with %d df %d data', len(data_df_list), bulk_data_count)
data_df = pd.concat(data_df_list)
tot_data_count += insert_db(data_df)
logger.info("更新 %s 结束 累计 %d 条记录被更新", table_name, tot_data_count)
def _run_import_future_min():
0
Source : reversion_rights_md.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def generate_md_with_adj_factor(instrument_type: str,
method: typing.Optional[Method] = Method.division):
"""
将指定期货品种生产复权后价格。
主力合约名称 f"{instrument_type.upper()}9999"
次主力合约名称 f"{instrument_type.upper()}8888"
仅针对 OHLC 四个价格进行复权处理
"""
table_name = 'ifind_future_adj_factor'
sql_str = f"""SELECT trade_date, instrument_id_main, adj_factor_main,
instrument_id_secondary, adj_factor_secondary
FROM {table_name}
where instrument_type=%s and method=%s"""
adj_factor_df = pd.read_sql(sql_str, engine_md, params=[instrument_type, method.name])
adj_factor_df['trade_date_larger_than'] = adj_factor_df['trade_date'].shift(1)
adj_factor_df.set_index(['trade_date_larger_than', 'trade_date'], inplace=True)
instrument_id_set = set(adj_factor_df['instrument_id_main']) | set(adj_factor_df['instrument_id_secondary'])
in_clause_str = "'" + "', '".join(instrument_id_set) + "'"
# daily_table_name = 'ifind_future_daily'
sql_str = f"""SELECT ths_code Contract, `time` trade_date, `Open`, `High`, `Low`, `Close`, Volume, openInterest OI
FROM ifind_future_daily
where ths_code in ({in_clause_str})
"""
daily_df = pd.read_sql(sql_str, engine_md)
n_final = adj_factor_df.shape[0]
data_count = 0
for n, ((trade_date_larger_than, trade_date), adj_factor_s) in enumerate(adj_factor_df.iterrows(), start=1):
# 截止当日下午3点收盘,保守计算,延迟1个小时,到16点
if pd.isna(trade_date_larger_than):
start = None
else:
start = trade_date_larger_than + timedelta(days=1)
if n == n_final:
if start is None:
start = str_2_date('1990-01-01')
end = str_2_date('2090-12-31')
else:
end = start + timedelta(days=365)
else:
end = trade_date
if start is None:
start = end - timedelta(days=365)
# 对 主力、次主力合约复权
is_match = (
daily_df['code'] >= pd.to_datetime(start)
) & (
daily_df['code'] < pd.to_datetime(end)
)
instrument_id_main = adj_factor_s['instrument_id_main']
adj_factor_main = adj_factor_s['adj_factor_main']
main_df = daily_df[(daily_df['code'] == instrument_id_main) & is_match].copy()
instrument_id_secondary = adj_factor_s['instrument_id_secondary']
adj_factor_secondary = adj_factor_s['adj_factor_secondary']
sec_df = daily_df[(daily_df['code'] == instrument_id_secondary) & is_match].copy()
rename_dic = {_: f"{_}Next" for _ in sec_df.columns if _ != 'trade_date'}
sec_df.rename(columns=rename_dic, inplace=True)
main_sec_df = pd.merge(main_df, sec_df, on='trade_date')
main_sec_df['instrument_type'] = instrument_type
main_sec_df['adj_factor_main'] = adj_factor_main
main_sec_df['adj_factor_secondary'] = adj_factor_secondary
dtype = {
'trade_date': Date,
'Contract': String(20),
'ContractNext': String(20),
'instrument_type': String(20),
'Close': DOUBLE,
'CloseNext': DOUBLE,
'Volume': DOUBLE,
'VolumeNext': DOUBLE,
'OI': DOUBLE,
'OINext': DOUBLE,
'Open': DOUBLE,
'OpenNext': DOUBLE,
'High': DOUBLE,
'HighNext': DOUBLE,
'Low': DOUBLE,
'LowNext': DOUBLE,
'adj_factor_main': DOUBLE,
'adj_factor_secondary': DOUBLE
}
table_name = 'wind_future_continuous_no_adj'
bunch_insert_on_duplicate_update(
main_sec_df, table_name, engine_md, dtype=dtype,
primary_keys=['instrument_type', 'trade_date'], schema=config.DB_SCHEMA_MD
)
main_sec_df['Open'] *= adj_factor_main
main_sec_df['High'] *= adj_factor_main
main_sec_df['Low'] *= adj_factor_main
main_sec_df['Close'] *= adj_factor_main
main_sec_df['Volume'] *= adj_factor_main
main_sec_df['OI'] *= adj_factor_main
main_sec_df['OpenNext'] *= adj_factor_secondary
main_sec_df['HighNext'] *= adj_factor_secondary
main_sec_df['LowNext'] *= adj_factor_secondary
main_sec_df['CloseNext'] *= adj_factor_secondary
main_sec_df['VolumeNext'] *= adj_factor_secondary
main_sec_df['OINext'] *= adj_factor_secondary
table_name = 'wind_future_continuous_adj'
bunch_insert_on_duplicate_update(
main_sec_df, table_name, engine_md, dtype=dtype,
primary_keys=['instrument_type', 'trade_date'], schema=config.DB_SCHEMA_MD
)
data_count += main_sec_df.shape[0]
logger.info("%s [%s ~ %s] 包含 %d 条数据,复权保存完成",
instrument_type, start, end, main_sec_df.shape[0])
logger.info(f'{instrument_type.upper()} {data_count} 条记录 复权保存完成')
def _test_generate_md_with_adj_factor():
0
Source : index.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_index_info(chain_param=None, ths_code=None):
"""
导入 info 表
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code:
:param refresh:
:return:
"""
table_name = 'ifind_index_info'
has_table = engine_md.has_table(table_name)
logging.info("更新 ifind_index_info 开始")
if ths_code is None:
# 获取全市场股票代码及名称
date_end = date.today()
stock_code_set = set()
stock_code_set_sub = get_stock_code_set(date_end)
if stock_code_set_sub is not None:
stock_code_set |= stock_code_set_sub
ths_code = ','.join(stock_code_set)
indicator_param_list = [
('ths_index_short_name_index', '', String(20)),
('ths_index_code_index', '', String(10)),
('ths_index_category_index', '', String(20)),
('ths_index_base_period_index', '', Date),
('ths_index_base_point_index', '', DOUBLE),
('ths_publish_org_index', '', String(20)),
]
# indicator' = 'ths_index_short_name_index;ths_index_code_index;ths_thscode_index;ths_index_category_index;
# ths_index_base_period_index;ths_index_base_point_index;ths_publish_org_index',
# param = ';;;;;;'
indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
data_df = invoker.THS_BasicData(ths_code, indicator, param)
if data_df is None or data_df.shape[0] == 0:
logging.info("没有可用的 index info 可以更新")
return
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
# 更新 code_mapping 表
update_from_info_table(table_name)
@app.task
0
Source : index.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def add_data_2_ckdvp(json_indicator, json_param, ths_code_set: set = None, begin_time=None):
"""
将数据增量保存到 ifind_ckdvp_index 表,code key date value param 五个字段组合成的表 value 为 Varchar(80)
该表用于存放各种新增加字段的值
查询语句举例:
THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31')
:param json_indicator:
:param json_param:
:param ths_code_set:
:param begin_time:
:return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断
"""
all_finished = False
table_name = 'ifind_ckdvp_index'
has_table = engine_md.has_table(table_name)
if has_table:
sql_str = """
select ths_code, date_frm, if(NULL < end_date, NULL, end_date) date_to
FROM
(
select info.ths_code, ifnull(trade_date_max_1, ths_index_base_period_index) date_frm, NULL,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
from
ifind_index_info info
left outer join
(select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name}
where {table_name}.key='{0}' and param='{1}' group by ths_code
) daily
on info.ths_code = daily.ths_code
) tt
where date_frm < = if(NULL < end_date, NULL, end_date)
order by ths_code""".format(json_indicator, json_param, table_name=table_name)
else:
logger.warning('%s 不存在,仅使用 ifind_index_info 表进行计算日期范围', table_name)
sql_str = """
SELECT ths_code, date_frm,
if(NULL < end_date, NULL, end_date) date_to
FROM
(
SELECT info.ths_code, ths_index_base_period_index date_frm, NULL,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM ifind_index_info info
) tt
WHERE date_frm < = if(NULL < end_date, NULL, end_date)
ORDER BY ths_code"""
# 计算每只股票需要获取日线数据的日期区间
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
# 设置 dtype
dtype = {
'ths_code': String(20),
'key': String(80),
'time': Date,
'value': String(80),
'param': String(80),
}
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
data_df = invoker.THS_DateSerial(
ths_code,
json_indicator,
json_param,
'Days:Tradedays,Fill:Previous,Interval:D',
begin_time, end_time
)
if data_df is not None and data_df.shape[0] > 0:
data_df['key'] = json_indicator
data_df['param'] = json_param
data_df.rename(columns={json_indicator: 'value'}, inplace=True)
data_count += data_df.shape[0]
data_df_list.append(data_df)
# 大于阀值有开始插入
if data_count >= 10000:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
# 仅调试使用
if DEBUG and len(data_df_list) > 4:
break
all_finished = True
finally:
if data_count > 0:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
if not has_table:
alter_table_2_myisam(engine_md, [table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL ,
CHANGE COLUMN `time` `time` DATE NOT NULL ,
CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
return all_finished
if __name__ == "__main__":
0
Source : private_fund.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_private_fund_info(chain_param=None, ths_code=None, refresh=False):
"""
更新基础信息表
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code:
:param refresh:
:return:
"""
table_name = 'ifind_private_fund_info'
has_table = engine_md.has_table(table_name)
logging.info("更新 %s 开始", table_name)
if ths_code is None:
# 获取全市场私募基金代码及名称
date_end = date.today()
private_fund_set = set()
if not refresh and has_table:
sql_str = "select ths_code, ths_maturity_date_sp from {table_name}".format(table_name=table_name)
with with_db_session(engine_md) as session:
code_in_db_dict = dict(session.execute(sql_str).fetchall())
code_in_db_set = set(code_in_db_dict.keys())
else:
code_in_db_dict, code_in_db_set = {}, set()
# 查找新增基金
code_set_exists = get_private_fund_set(date_end)
if code_set_exists is not None:
if not refresh and has_table:
code_set_exists -= code_in_db_set
private_fund_set |= code_set_exists
# 查找已清盘基金
code_set_clear = get_private_fund_set(date_end, field='051010005')
if code_set_clear is not None:
if not refresh and has_table:
code_set_clear -= set([key for key, val in code_in_db_dict.items() if val is not None])
private_fund_set |= code_set_clear
ths_code = list(private_fund_set)
if DEBUG:
ths_code = ths_code[:10]
indicator_param_list = [
('ths_product_short_name_sp', '', String(80)),
('ths_product_full_name_sp', '', String(80)),
('ths_trust_category_sp', '', String(40)),
('ths_is_structured_product_sp', '', String(10)),
('ths_threshold_amt_sp', '', Integer),
('ths_low_add_amt_sp', '', Integer),
('ths_fore_max_issue_scale_sp', '', String(40)),
('ths_actual_issue_scale_sp', '', String(40)),
('ths_invest_manager_current_sp', '', String(60)),
('ths_mendator_sp', '', String(20)),
('ths_recommend_sd_sp', '', Date),
('ths_introduction_ed_sp', '', Date),
('ths_established_date_sp', '', Date),
('ths_maturity_date_sp', '', Date),
('ths_found_years_sp', '', Date),
('ths_duration_y_sp', '', Integer),
('ths_remain_duration_d_sp', '', Integer),
('ths_float_manage_rate_sp', '', DOUBLE),
('ths_mandate_fee_rate_sp', '', DOUBLE),
('ths_subscription_rate_explain_sp', '', String(300)),
('ths_redemp_rate_explain_sp', '', String(300)),
('ths_opening_period_explain_sp', '', String(300)),
('ths_close_period_explain_sp', '', String(300)),
('ths_trustee_sp', '', String(100)),
('ths_secbroker_sp', '', String(40))
]
# jsonIndicator='THS_BasicData('SM000008.XT','ths_product_short_name_sp;ths_product_full_name_sp;ths_trust_category_sp;ths_is_structured_product_sp;ths_threshold_amt_sp;ths_low_add_amt_sp;ths_fore_max_issue_scale_sp;ths_actual_issue_scale_sp;ths_invest_manager_current_sp;ths_invest_advisor_sp;ths_mendator_sp;ths_recommend_sd_sp;ths_introduction_ed_sp;ths_established_date_sp;ths_maturity_date_sp;ths_found_years_sp;ths_duration_y_sp;ths_remain_duration_d_sp;ths_float_manage_rate_sp;ths_mandate_fee_rate_sp;ths_subscription_rate_explain_sp;ths_redemp_rate_explain_sp;ths_opening_period_explain_sp;ths_close_period_explain_sp;ths_trustee_sp;ths_secbroker_sp'
# jsonparam=';;;;;;;;;'
indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
data_df = invoker.THS_BasicData(ths_code, indicator, param, max_code_num=8000)
if data_df is None or data_df.shape[0] == 0:
logging.info("没有可用的数据可以更新")
return
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
if not has_table:
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
# 更新 code_mapping 表
update_from_info_table(table_name)
@app.task
0
Source : private_fund.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_private_fund_daily(chain_param=None, ths_code_set: set = None, begin_time=None):
"""
导入 daily 数据
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code_set:
:param begin_time:
:return:
"""
table_name = 'ifind_private_fund_daily'
indicator_param_list = [
('netAssetValue', '', DOUBLE),
('adjustedNAV', '', DOUBLE),
('accumulatedNAV', '', DOUBLE),
('premium', '', DOUBLE),
('premiumRatio', '', DOUBLE),
('estimatedPosition', '', DOUBLE)
]
# jsonIndicator='netAssetValue,adjustedNAV,accumulatedNAV,premium,premiumRatio,estimatedPosition'
# jsonparam=';;;;'
json_indicator, json_param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
has_table = engine_md.has_table(table_name)
if has_table:
sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp < end_date, ths_maturity_date_sp, end_date) date_to
FROM
(
SELECT info.ths_code, ifnull(trade_date_max_1, ths_established_date_sp) date_frm, ths_maturity_date_sp,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM
ifind_private_fund_info info
LEFT OUTER JOIN
(SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
ON info.ths_code = daily.ths_code
) tt
WHERE date_frm < = if(ths_maturity_date_sp < end_date, ths_maturity_date_sp, end_date)
ORDER BY ths_code""".format(table_name=table_name)
else:
logger.warning('ifind_private_fund_daily 不存在,仅使用 ifind_private_fund_info 表进行计算日期范围')
sql_str = """SELECT ths_code, date_frm, if(ths_maturity_date_sp < end_date, ths_maturity_date_sp, end_date) date_to
FROM
(
SELECT info.ths_code, ths_established_date_sp date_frm, ths_maturity_date_sp,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM ifind_private_fund_info info
) tt
WHERE date_frm < = if(ths_maturity_date_sp < end_date, ths_maturity_date_sp, end_date)
ORDER BY ths_code"""
with with_db_session(engine_md) as session:
# 计算每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
if TRIAL:
date_from_min = date.today() - timedelta(days=(365 * 5))
# 试用账号只能获取近5年数据
code_date_range_dic = {
ths_code: (max([date_from, date_from_min]), date_to)
for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min < = date_to}
# 设置 dtype
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
dtype['time'] = Date
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
data_df = invoker.THS_HistoryQuotes(
ths_code,
json_indicator,
json_param,
begin_time, end_time
)
if data_df is not None and data_df.shape[0] > 0:
data_count += data_df.shape[0]
data_df_list.append(data_df)
# 大于阀值有开始插入
if data_count >= 10000:
tot_data_df = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
if DEBUG and len(data_df_list) > 1:
break
finally:
if data_count > 0:
tot_data_df = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
tot_data_count += data_count
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
if not has_table:
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
if __name__ == "__main__":
0
Source : pub_fund.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_pub_fund_info(chain_param=None, ths_code=None, refresh=False):
"""
导入 info 表
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code:
:param refresh:
:return:
"""
table_name = 'ifind_pub_fund_info'
has_table = engine_md.has_table(table_name)
logging.info("更新 %s 开始", table_name)
if ths_code is None:
# 获取全市场公募代码及名称
if refresh:
date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date()
else:
date_fetch = date.today()
date_end = date.today()
pub_fund_code_set = set()
# 直接获取全部基金包含已成立,已到期的基金,因此不再需要循环
# while date_fetch < date_end:
# pub_fund_code_set_sub = get_pub_fund_code_set(date_fetch)
# if pub_fund_code_set_sub is not None:
# pub_fund_code_set |= pub_fund_code_set_sub
# date_fetch += timedelta(days=365)
pub_fund_code_set_sub = get_pub_fund_code_set(date_end)
if pub_fund_code_set_sub is not None:
pub_fund_code_set |= pub_fund_code_set_sub
ths_code = list(pub_fund_code_set)
if DEBUG:
# 该变量仅用于调试期间使用
ths_code_old = ths_code
ths_code = ths_code[:40]
indicator_param_list = [
('ths_fund_short_name_fund', '', String(40)),
('ths_fund_code_fund', '', String(40)),
('ths_fund_thscode_fund', '', String(40)),
('ths_fund_full_name_fund', '', String(80)),
('ths_invest_objective_fund', '', String(500)),
('ths_invest_socpe_fund', '', Text),
('ths_perf_comparative_benchmark_fund', '', Text),
('ths_fund_listed_exchange_fund', '', String(40)),
('ths_fund_td_currency_fund', '', String(60)),
('ths_coupon_value_fund', '', String(40)),
('ths_fund_manager_current_fund', '', String(40)),
('ths_fund_manager_his_fund', '', String(400)),
('ths_fund_supervisor_fund', '', String(40)),
('ths_fund_mandator_fund', '', String(20)),
('ths_fund_sponsor_related_org_fund', '', String(40)),
('ths_fund_type_fund', '', String(10)),
('ths_fund_invest_type_fund', '', String(10)),
('ths_invest_type_first_classi_fund', '', String(40)),
('ths_invest_type_second_classi_fund', '', String(40)),
('ths_galaxy_classi_fund', '', String(300)),
('ths_hts_classi_fund', '', String(100)),
('ths_invest_style_fund', '', String(100)),
('ths_fund_duration_fund', '', String(40)),
('ths_fund_establishment_date_fund', '', Date),
('ths_fund_expiry_date_fund', '', Date),
('ths_redemp_sd_fund', '', String(40)),
('ths_mandate_sd_fund', '', String(40)),
('ths_manage_fee_rate_fund', '', String(40)),
('ths_mandate_fee_rate_fund', '', String(40)),
('ths_sales_service_fee_fund', '', String(40)),
('ths_high_pur_fee_rate_fund', '', String(20)),
('ths_high_redemp_fee_rate_fund', '', String(40)),
('ths_lof_listed_date_fund', '', Date),
('ths_lof_listed_td_share_fund', '', String(40)),
('ths_pm_fund_code_fund', '', String(40)),
('ths_par_short_name_fund', '', String(40)),
('ths_online_cash_sell_code_fund', '', String(40)),
('ths_online_cash_pur_sd_fund', '', String(40)),
('ths_online_cash_pur_ed_fund', '', String(40)),
('ths_online_cash_buy_share_ul_fund', '', String(40)),
('ths_online_cash_buy_share_dl_fund', '', String(40)),
('ths_offline_cash_pur_sd_fund', '', String(40)),
('ths_offline_cash_pur_ed_fund', '', String(40)),
('ths_offline_stock_pur_sd_fund', '', String(40)),
('ths_offline_stock_pur_ed_fund', '', String(40)),
('ths_offline_stock_pur_vol_dl_fund', '', String(40)),
('ths_fund_shares_convert_date_fund', '', String(40)),
('ths_fund_shares_convert_ratio_fund', '', String(40)),
('ths_issue_date_fund', '', Date),
('ths_issue_object_fund', '', String(100)),
('ths_issue_method_fund', '', String(80)),
('ths_fund_reg_and_registrant_fund', '', String(40)),
('ths_fund_main_underwrite_fund', '', String(40)),
('ths_fund_issue_coordinator_fund', '', String(500)),
('ths_fund_sales_agent_fund', '', Text),
('ths_fund_listing_recommended_fund', '', String(40))
]
# jsonIndicator='ths_fund_short_name_fund;ths_fund_code_fund;ths_fund_thscode_fund;ths_fund_full_name_fund;ths_invest_objective_fund;ths_invest_socpe_fund;ths_perf_comparative_benchmark_fund;ths_fund_listed_exchange_fund;ths_fund_td_currency_fund;ths_coupon_value_fund;ths_fund_manager_current_fund;ths_fund_manager_his_fund;ths_fund_supervisor_fund;ths_fund_mandator_fund;ths_fund_sponsor_related_org_fund;ths_fund_type_fund;ths_fund_invest_type_fund;ths_invest_type_first_classi_fund;ths_invest_type_second_classi_fund;ths_galaxy_classi_fund;ths_hts_classi_fund;ths_invest_style_fund;ths_fund_duration_fund;ths_fund_establishment_date_fund;ths_fund_expiry_date_fund;ths_redemp_sd_fund;ths_mandate_sd_fund;ths_mandate_ed_fund;ths_manage_fee_rate_fund;ths_mandate_fee_rate_fund;ths_sales_service_fee_fund;ths_high_pur_fee_rate_fund;ths_high_redemp_fee_rate_fund;ths_lof_listed_date_fund;ths_lof_listed_td_share_fund;ths_pm_fund_code_fund;ths_par_short_name_fund;ths_online_cash_sell_code_fund;ths_online_cash_pur_sd_fund;ths_online_cash_pur_ed_fund;ths_online_cash_buy_share_ul_fund;ths_online_cash_buy_share_dl_fund;ths_offline_cash_pur_sd_fund;ths_offline_cash_pur_ed_fund;ths_offline_cash_pur_share_dl_fund;ths_offline_stock_pur_sd_fund;ths_offline_stock_pur_ed_fund;ths_offline_stock_pur_vol_dl_fund;ths_fund_shares_convert_date_fund;ths_fund_shares_convert_ratio_fund;ths_issue_date_fund;ths_issue_object_fund;ths_issue_method_fund;ths_fund_reg_and_registrant_fund;ths_fund_main_underwrite_fund;ths_fund_issue_coordinator_fund;ths_fund_sales_agent_fund;ths_fund_listing_recommended_fund'
# jsonparam=';;;;;;;;;;;'
indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
data_df = invoker.THS_BasicData(ths_code, indicator, param, max_code_num=3000)
if data_df is None or data_df.shape[0] == 0:
logging.info("没有可用的 pub_fund info 可以更新")
return
# 删除历史数据,更新数据
# table_name_list = engine_md.table_names()
# if table_name in table_name_list:
# with with_db_session(engine_md) as session:
# session.execute(
# "DELETE FROM {table_name} WHERE ths_code IN (".format(table_name=table_name) + ','.join(
# [':code%d' % n for n in range(len(pub_fund_code_set))]
# ) + ")",
# params={'code%d' % n: val for n, val in enumerate(pub_fund_code_set)})
# session.commit()
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
# data_count = data_df.shape[0]
# data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
# 更新 code_mapping 表
update_from_info_table(table_name)
@app.task
0
Source : pub_fund.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_pub_fund_daily(chain_param=None, ths_code_set: set = None, begin_time=None):
"""
通过history接口将历史数据保存到 ifind_pub_fund_daily
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code_set:
:param begin_time: 默认为None,如果非None则代表所有数据更新日期不得晚于该日期
:return:
"""
table_name = 'ifind_pub_fund_daily'
has_table = engine_md.has_table(table_name)
if begin_time is not None and type(begin_time) == date:
begin_time = str_2_date(begin_time)
indicator_param_list = [
('netAssetValue', '', DOUBLE),
('adjustedNAV', '', DOUBLE),
('accumulatedNAV', '', DOUBLE)
]
# THS_HistoryQuotes('600006.SH,600010.SH',
# 'preClose,open,high,low,close,avgPrice,changeRatio,volume,amount,turnoverRatio,transactionAmount,totalShares,totalCapital,floatSharesOfAShares,floatSharesOfBShares,floatCapitalOfAShares,floatCapitalOfBShares,pe_ttm,pe,pb,ps,pcf',
# 'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
# '2018-06-30','2018-07-30')
json_indicator, _ = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
if has_table:
sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund < end_date, ths_fund_expiry_date_fund, end_date) date_to
FROM
(
SELECT info.ths_code, ifnull(trade_date_max_1, ths_lof_listed_date_fund) date_frm, ths_fund_expiry_date_fund,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM
ifind_pub_fund_info info
LEFT OUTER JOIN
(SELECT ths_code, adddate(max(time),1) trade_date_max_1 FROM {table_name} GROUP BY ths_code) daily
ON info.ths_code = daily.ths_code
) tt
WHERE date_frm < = if(ths_fund_expiry_date_fund < end_date, ths_fund_expiry_date_fund, end_date)
ORDER BY ths_code""".format(table_name=table_name)
else:
logger.warning('%s 不存在,仅使用 ifind_pub_fund_info 表进行计算日期范围', table_name)
sql_str = """SELECT ths_code, date_frm, if(ths_fund_expiry_date_fund < end_date, ths_fund_expiry_date_fund, end_date) date_to
FROM
(
SELECT info.ths_code, ths_lof_listed_date_fund date_frm, ths_fund_expiry_date_fund,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM ifind_pub_fund_info info
) tt
WHERE date_frm < = if(ths_fund_expiry_date_fund < end_date, ths_fund_expiry_date_fund, end_date)
ORDER BY ths_code"""
# 计算每只股票需要获取日线数据的日期区间
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
# 计算每只股票需要获取日线数据的日期区间
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
if TRIAL:
date_from_min = date.today() - timedelta(days=(365 * 5))
# 试用账号只能获取近5年数据
code_date_range_dic = {
ths_code: (max([date_from, date_from_min]), date_to)
for ths_code, (date_from, date_to) in code_date_range_dic.items() if date_from_min < = date_to}
# 设置 dtype
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
dtype['time'] = Date
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
data_df = invoker.THS_HistoryQuotes(
ths_code,
json_indicator,
'Interval:D,CPS:1,baseDate:1900-01-01,Currency:YSHB,fill:Previous',
begin_time, end_time
)
if data_df is not None and data_df.shape[0] > 0:
data_count += data_df.shape[0]
data_df.rename(columns={col: col.lower() for col in data_df.columns}, inplace=True)
data_df_list.append(data_df)
if DEBUG and len(data_df_list) > 1:
break
# 大于阀值有开始插入
if data_count >= 10000:
tot_data_df = pd.concat(data_df_list)
data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
finally:
if len(data_df_list) > 0:
tot_data_df = pd.concat(data_df_list)
data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
tot_data_count += data_count
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
if __name__ == "__main__":
0
Source : stock.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_stock_info(chain_param=None, ths_code=None, refresh=False):
"""
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code:
:param refresh:
:return:
"""
table_name = 'ifind_stock_info'
has_table = engine_md.has_table(table_name)
logging.info("更新 wind_stock_info 开始")
if ths_code is None:
# 获取全市场股票代码及名称
if refresh:
date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date()
else:
date_fetch = date.today()
date_end = date.today()
stock_code_set = set()
while date_fetch < date_end:
stock_code_set_sub = get_stock_code_set(date_fetch)
if stock_code_set_sub is not None:
stock_code_set |= stock_code_set_sub
date_fetch += timedelta(days=365)
stock_code_set_sub = get_stock_code_set(date_end)
if stock_code_set_sub is not None:
stock_code_set |= stock_code_set_sub
ths_code = ','.join(stock_code_set)
indicator_param_list = [
('ths_stock_short_name_stock', '', String(10)),
('ths_stock_code_stock', '', String(10)),
('ths_stock_varieties_stock', '', String(10)),
('ths_ipo_date_stock', '', Date),
('ths_listing_exchange_stock', '', String(10)),
('ths_delist_date_stock', '', Date),
('ths_corp_cn_name_stock', '', String(40)),
('ths_corp_name_en_stock', '', String(100)),
('ths_established_date_stock', '', Date),
]
# jsonIndicator='ths_stock_short_name_stock;ths_stock_code_stock;ths_thscode_stock;ths_stock_varieties_stock;ths_ipo_date_stock;ths_listing_exchange_stock;ths_delist_date_stock;ths_corp_cn_name_stock;ths_corp_name_en_stock;ths_established_date_stock'
# jsonparam=';;;;;;;;;'
indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
data_df = invoker.THS_BasicData(ths_code, indicator, param)
if data_df is None or data_df.shape[0] == 0:
logging.info("没有可用的 stock info 可以更新")
return
# 删除历史数据,更新数据
# with with_db_session(engine_md) as session:
# session.execute(
# "DELETE FROM {table_name} WHERE ths_code IN (".format(table_name=table_name) + ','.join(
# [':code%d' % n for n in range(len(stock_code_set))]
# ) + ")",
# params={'code%d' % n: val for n, val in enumerate(stock_code_set)})
# session.commit()
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
# data_count = data_df.shape[0]
# data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
# 更新 code_mapping 表
update_from_info_table(table_name)
@app.task
0
Source : stock.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def add_data_2_ckdvp(json_indicator, json_param, ths_code_set: set = None, begin_time=None):
"""
将数据增量保存到 ifind_ckdvp_stock 表,code key date value param 五个字段组合成的表 value 为 Varchar(80)
该表用于存放各种新增加字段的值
查询语句举例:
THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31')
:param json_indicator:
:param json_param:
:param ths_code_set:
:param begin_time:
:return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断
"""
all_finished = False
table_name = 'ifind_ckdvp_stock'
has_table = engine_md.has_table(table_name)
if has_table:
sql_str = """
select ths_code, date_frm, if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date) date_to
FROM
(
select info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_stock) date_frm, ths_delist_date_stock,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
from
ifind_stock_info info
left outer join
(select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name}
where {table_name}.key='{0}' and param='{1}' group by ths_code
) daily
on info.ths_code = daily.ths_code
) tt
where date_frm < = if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date)
order by ths_code""".format(json_indicator, json_param, table_name=table_name)
else:
logger.warning('%s 不存在,仅使用 ifind_stock_info 表进行计算日期范围', table_name)
sql_str = """
SELECT ths_code, date_frm,
if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date) date_to
FROM
(
SELECT info.ths_code, ths_ipo_date_stock date_frm, ths_delist_date_stock,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM ifind_stock_info info
) tt
WHERE date_frm < = if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date)
ORDER BY ths_code"""
# 计算每只股票需要获取日线数据的日期区间
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
# 设置 dtype
dtype = {
'ths_code': String(20),
'key': String(80),
'time': Date,
'value': String(80),
'param': String(80),
}
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
data_df = invoker.THS_DateSerial(
ths_code,
json_indicator,
json_param,
'Days:Tradedays,Fill:Previous,Interval:D',
begin_time, end_time
)
if data_df is not None and data_df.shape[0] > 0:
data_df['key'] = json_indicator
data_df['param'] = json_param
data_df.rename(columns={json_indicator: 'value'}, inplace=True)
data_count += data_df.shape[0]
data_df_list.append(data_df)
# 大于阀值有开始插入
if data_count >= 10000:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
# 仅调试使用
if DEBUG and len(data_df_list) > 1:
break
all_finished = True
finally:
if data_count > 0:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
if not has_table:
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL ,
CHANGE COLUMN `time` `time` DATE NOT NULL ,
CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
return all_finished
def add_data_2_ckdvp_aginst_report_date(json_indicator, json_param, ths_code_set: set = None, begin_time=None):
0
Source : stock.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def add_data_2_ckdvp_aginst_report_date(json_indicator, json_param, ths_code_set: set = None, begin_time=None):
"""
尚未完成
将数据增量保存到 ifind_ckdvp_stock 表,code key date value param 五个字段组合成的表 value 为 Varchar(80)
该表用于存放各种新增加字段的值
查询语句举例:
THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31')
:param json_indicator:
:param json_param:
:param ths_code_set:
:param begin_time:
:return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断
"""
all_finished = False
table_name = 'ifind_ckdvp_stock'
# report_date_table_name = 'ifind_stock_report_date'
has_table = engine_md.has_table(table_name)
if has_table:
sql_str = """
select ths_code, date_frm, if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date) date_to
FROM
(
select info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_stock) date_frm, ths_delist_date_stock,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
from
ifind_stock_info info
left outer join
(select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name}
where {table_name}.key='{0}' and param='{1}' group by ths_code
) daily
on info.ths_code = daily.ths_code
) tt
where date_frm < = if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date)
order by ths_code""".format(json_indicator, json_param, table_name=table_name)
else:
logger.warning('%s 不存在,仅使用 ifind_stock_info 表进行计算日期范围', table_name)
sql_str = """
SELECT ths_code, date_frm,
if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date) date_to
FROM
(
SELECT info.ths_code, ths_ipo_date_stock date_frm, ths_delist_date_stock,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM ifind_stock_info info
) tt
WHERE date_frm < = if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date)
ORDER BY ths_code"""
report_date_sql_str = """SELECT DISTINCT ths_code, ths_periodic_report_fore_dd_stock
FROM ifind_stock_report_date WHERE ths_periodic_report_fore_dd_stock IS NOT NULL"""
# 计算每只股票需要获取日线数据的日期区间
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
code_report_date_dic = {}
for ths_code, report_date in session.execute(report_date_sql_str).fetchall():
code_report_date_dic.setdefault(ths_code, []).append(report_date)
for report_date_list in code_report_date_dic.values():
report_date_list.sort()
# 设置 dtype
dtype = {
'ths_code': String(20),
'key': String(80),
'time': Date,
'value': String(80),
'param': String(80),
}
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
if ths_code in code_report_date_dic:
report_date_list = code_report_date_dic[ths_code]
# TODO: 未完成
else:
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
data_df = invoker.THS_DateSerial(
ths_code,
json_indicator,
json_param,
'Days:Tradedays,Fill:Previous,Interval:D',
begin_time, end_time
)
if data_df is not None and data_df.shape[0] > 0:
data_df['key'] = json_indicator
data_df['param'] = json_param
data_df.rename(columns={json_indicator: 'value'}, inplace=True)
data_count += data_df.shape[0]
data_df_list.append(data_df)
# 大于阀值有开始插入
if data_count >= 10000:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
# 仅调试使用
if DEBUG and len(data_df_list) > 1:
break
all_finished = True
finally:
if data_count > 0:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
if not has_table:
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL ,
CHANGE COLUMN `time` `time` DATE NOT NULL ,
CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
return all_finished
@app.task
0
Source : stock.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def add_data_fin_2_ckdvp(json_indicator, json_param, ths_code_set: set = None, begin_time=None):
"""
将数据增量保存到 ifind_stock_ckdvp 表,code key date value param 五个字段组合成的表 value 为 Varchar(80)
该表用于存放各种新增加字段的值
查询语句举例:
THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31')
:param json_indicator:
:param json_param:
:param ths_code_set:
:param begin_time:
:return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断
"""
all_finished = False
table_name = 'ifind_stock_ckdvp'
has_table = engine_md.has_table(table_name)
if has_table:
sql_str = """
select ths_code, date_frm, if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date) date_to
FROM
(
select info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_stock) date_frm, ths_delist_date_stock,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
from
ifind_stock_info info
left outer join
(select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name}
where {table_name}.key='{0}' and param='{1}' group by ths_code
) daily
on info.ths_code = daily.ths_code
) tt
where date_frm < = if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date)
order by ths_code""".format(json_indicator, json_param, table_name=table_name)
else:
logger.warning('%s 不存在,仅使用 ifind_stock_info 表进行计算日期范围', table_name)
sql_str = """
SELECT ths_code, date_frm,
if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date) date_to
FROM
(
SELECT info.ths_code, ths_ipo_date_stock date_frm, ths_delist_date_stock,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
FROM ifind_stock_info info
) tt
WHERE date_frm < = if(ths_delist_date_stock < end_date, ths_delist_date_stock, end_date)
ORDER BY ths_code"""
# 计算每只股票需要获取日线数据的日期区间
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
# 设置 dtype
dtype = {
'ths_code': String(20),
'key': String(80),
'time': Date,
'value': String(80),
'param': String(80),
}
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
data_df = invoker.THS_DateSerial(
ths_code,
json_indicator,
json_param,
'Days:Tradedays,Fill:Previous,Interval:D',
begin_time, end_time
)
if data_df is not None and data_df.shape[0] > 0:
data_df['key'] = json_indicator
data_df['param'] = json_param
data_df.rename(columns={json_indicator: 'value'}, inplace=True)
data_count += data_df.shape[0]
data_df_list.append(data_df)
# 大于阀值有开始插入
if data_count >= 10000:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
# 仅调试使用
if DEBUG and len(data_df_list) > 1:
break
all_finished = True
finally:
if data_count > 0:
data_df_all = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df_all, table_name, engine_md, dtype)
tot_data_count += data_count
if not has_table:
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL ,
CHANGE COLUMN `time` `time` DATE NOT NULL ,
CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
return all_finished
if __name__ == "__main__":
0
Source : stock_hk.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_stock_hk_info(chain_param=None, ths_code=None, refresh=False):
"""
导入 info 表
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:param ths_code:
:param refresh:
:return:
"""
table_name = 'ifind_stock_hk_info'
logging.info("更新 %s 开始", table_name)
if ths_code is None:
# 获取全市场港股代码及名称
if refresh:
date_fetch = datetime.strptime('1991-02-01', STR_FORMAT_DATE).date()
else:
date_fetch = date.today()
date_end = date.today()
stock_hk_code_set = set()
while date_fetch < date_end:
stock_hk_code_set_sub = get_stock_hk_code_set(date_fetch)
if stock_hk_code_set_sub is not None:
stock_hk_code_set |= stock_hk_code_set_sub
date_fetch += timedelta(days=365)
stock_hk_code_set_sub = get_stock_hk_code_set(date_end)
if stock_hk_code_set_sub is not None:
stock_hk_code_set |= stock_hk_code_set_sub
if DEBUG:
stock_hk_code_set = list(stock_hk_code_set)[:10]
ths_code = ','.join(stock_hk_code_set)
indicator_param_list = [
('ths_stock_short_name_hks', '', String(40)),
('ths_stock_code_hks', '', String(20)),
('ths_isin_code_hks', '', String(40)),
('ths_corp_ashare_short_name_hks', '', String(10)),
('ths_corp_ashare_code_hks', '', String(60)),
('ths_stock_varieties_hks', '', String(40)),
('ths_ipo_date_hks', '', Date),
('ths_listed_exchange_hks', '', String(60)),
('ths_stop_listing_date_hks', '', Date),
('ths_corp_cn_name_hks', '', String(120)),
('ths_corp_name_en_hks', '', String(120)),
('ths_established_date_hks', '', Date),
('ths_accounting_date_hks', '', String(20)),
('ths_general_manager_hks', '', String(40)),
('ths_secretary_hks', '', String(40)),
('ths_operating_scope_hks', '', Text),
('ths_mo_product_name_hks', '', String(200)),
('ths_district_hks', '', String(60)),
('ths_reg_address_hks', '', String(200)),
('ths_office_address_hks', '', String(200)),
('ths_corp_tel_hks', '', String(200)),
('ths_corp_fax_hks', '', String(200)),
('ths_corp_website_hks', '', String(200)),
('ths_auditor_hks', '', String(60)),
('ths_legal_counsel_hks', '', String(300)),
('ths_hs_industry_hks', '', String(40)),
]
# jsonIndicator='ths_stock_short_name_hks;ths_stock_code_hks;ths_thscode_hks;ths_isin_code_hks;ths_corp_ashare_short_name_hks;ths_corp_ashare_code_hks;ths_stock_varieties_hks;ths_ipo_date_hks;ths_listed_exchange_hks;ths_stop_listing_date_hks;ths_corp_cn_name_hks;ths_corp_name_en_hks;ths_established_date_hks;ths_accounting_date_hks;ths_general_manager_hks;ths_secretary_hks;ths_operating_scope_hks;ths_mo_product_name_hks;ths_district_hks;ths_reg_address_hks;ths_office_address_hks;ths_corp_tel_hks;ths_corp_fax_hks;ths_corp_website_hks;ths_auditor_hks;ths_legal_counsel_hks;ths_hs_industry_hks'
# jsonparam=';;;;;;;;;;;'
indicator, param = unzip_join([(key, val) for key, val, _ in indicator_param_list], sep=';')
param += '100'
data_df = invoker.THS_BasicData(ths_code, indicator, param)
if data_df is None or data_df.shape[0] == 0:
logging.info("没有可用的 stock_hk info 可以更新")
return
# 删除历史数据,更新数据
has_table = engine_md.has_table(table_name)
if has_table:
with with_db_session(engine_md) as session:
session.execute(
"DELETE FROM {table_name} WHERE ths_code IN (".format(table_name=table_name) + ','.join(
[':code%d' % n for n in range(len(stock_hk_code_set))]
) + ")",
params={'code%d' % n: val for n, val in enumerate(stock_hk_code_set)})
session.commit()
dtype = {key: val for key, _, val in indicator_param_list}
dtype['ths_code'] = String(20)
# data_count = data_df.shape[0]
# data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
# 更新 code_mapping 表
update_from_info_table(table_name)
@app.task
0
Source : stock_hk.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def add_data_2_ckdvp(json_indicator, json_param, ths_code_set: set = None, begin_time=None):
"""
将数据增量保存到 ifind_ckdvp_stock_hk 表,code key date value param 五个字段组合成的表 value 为 Varchar(80)
该表用于存放各种新增加字段的值
查询语句举例:
THS_DateSerial('600007.SH,600009.SH','ths_pe_ttm_stock','101','Days:Tradedays,Fill:Previous,Interval:D','2018-07-31','2018-07-31')
:param json_indicator:
:param json_param:
:param ths_code_set:
:param begin_time:
:return: 全部数据加载完成,返回True,否则False,例如数据加载中途流量不够而中断
"""
all_finished = False
table_name = 'ifind_ckdvp_stock_hk'
has_table = engine_md.has_table(table_name)
if has_table:
sql_str = """
select ths_code, date_frm, if(ths_stop_listing_date_hks < end_date, ths_stop_listing_date_hks, end_date) date_to
FROM
(
select info.ths_code, ifnull(trade_date_max_1, ths_ipo_date_hks) date_frm, ths_stop_listing_date_hks,
if(hour(now()) < 19, subdate(curdate(),1), curdate()) end_date
from
ifind_stock_hk_info info
left outer join
(select ths_code, adddate(max(time),1) trade_date_max_1 from {table_name}
where {table_name}.key='{json_indicator}' and param='{json_param}' group by ths_code
) daily
on info.ths_code = daily.ths_code
) tt
where date_frm < = if(ths_stop_listing_date_hks < end_date, ths_stop_listing_date_hks, end_date)
order by ths_code""".format(json_indicator=json_indicator, json_param=json_param, table_name=table_name)
else:
logger.warning('%s 不存在,仅使用 ifind_stock_hk_info 表进行计算日期范围', table_name)
sql_str = """
SELECT ths_code, date_frm,
if(ths_stop_listing_date_hks < end_date, ths_stop_listing_date_hks, end_date) date_to
FROM
(
SELECT info.ths_code, ths_ipo_date_hks date_frm, ths_stop_listing_date_hks,
if(hour(now()) < 19, subdate(curdate(),1), curdate()) end_date
FROM ifind_stock_hk_info info
) tt
WHERE date_frm < = if(ths_stop_listing_date_hks < end_date, ths_stop_listing_date_hks, end_date)
ORDER BY ths_code"""
# 计算每只股票需要获取日线数据的日期区间
with with_db_session(engine_md) as session:
# 获取每只股票需要获取日线数据的日期区间
table = session.execute(sql_str)
# 计算每只股票需要获取日线数据的日期区间
code_date_range_dic = {
ths_code: (date_from if begin_time is None else min([date_from, begin_time]), date_to)
for ths_code, date_from, date_to in table.fetchall() if
ths_code_set is None or ths_code in ths_code_set}
# 设置 dtype
dtype = {
'ths_code': String(20),
'key': String(80),
'time': Date,
'value': String(80),
'param': String(80),
}
data_df_list, data_count, tot_data_count, code_count = [], 0, 0, len(code_date_range_dic)
try:
for num, (ths_code, (begin_time, end_time)) in enumerate(code_date_range_dic.items(), start=1):
logger.debug('%d/%d) %s [%s - %s]', num, code_count, ths_code, begin_time, end_time)
data_df = invoker.THS_DateSerial(
ths_code,
json_indicator,
json_param,
'Days:Tradedays,Fill:Previous,Interval:D',
begin_time, end_time
)
if data_df is not None and data_df.shape[0] > 0:
data_df['key'] = json_indicator
data_df['param'] = json_param
data_df.rename(columns={json_indicator: 'value'}, inplace=True)
data_count += data_df.shape[0]
data_df_list.append(data_df)
# 大于阀值有开始插入
if data_count >= 10000:
tot_data_df = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
tot_data_count += data_count
data_df_list, data_count = [], 0
# 仅调试使用
if DEBUG and len(data_df_list) > 1:
break
all_finished = True
finally:
if data_count > 0:
tot_data_df = pd.concat(data_df_list)
# tot_data_df.to_sql(table_name, engine_md, if_exists='append', index=False, dtype=dtype)
data_count = bunch_insert_on_duplicate_update(tot_data_df, table_name, engine_md, dtype)
tot_data_count += data_count
if not has_table and engine_md.has_table(table_name):
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `ths_code` `ths_code` VARCHAR(20) NOT NULL ,
CHANGE COLUMN `time` `time` DATE NOT NULL ,
CHANGE COLUMN `key` `key` VARCHAR(80) NOT NULL ,
CHANGE COLUMN `param` `param` VARCHAR(80) NOT NULL ,
ADD PRIMARY KEY (`ths_code`, `time`, `key`, `param`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
logging.info("更新 %s 完成 新增数据 %d 条", table_name, tot_data_count)
return all_finished
@app.task
0
Source : trade_date.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_trade_date(chain_param=None):
"""
增量导入交易日数据导数据库表 wind_trade_date,默认导入未来300天的交易日数据
2018-01-17 增加港股交易日数据,眼前考虑对减少对已有代码的冲击,新建一张 wind_trade_date_hk表
日后将会考虑将两张表进行合并
:param chain_param: 该参数仅用于 task.chain 串行操作时,上下传递参数使用
:return:
"""
table_name = 'ifind_trade_date'
has_table = engine_md.has_table(table_name)
exch_code_trade_date_dic = {}
with with_db_session(engine_md) as session:
try:
table = session.execute('SELECT exch_code,max(trade_date) FROM {table_name} GROUP BY exch_code'.format(
table_name=table_name
))
exch_code_trade_date_dic = {exch_code: trade_date for exch_code, trade_date in table.fetchall()}
except Exception as exp:
logger.exception("交易日获取异常")
exchange_code_dict = {
"HKEX": "香港",
"NYMEX": "纽约证券交易所",
"SZSE": "深圳",
"CBOT": "芝加哥商品交易所",
"NASDAQ": "纳斯达克",
"AMEX": "美国证券交易所",
"ICE": "洲际交易所",
"BMD": "马来西亚衍生品交易所"
}
exchange_code_list = list(exchange_code_dict.keys())
for exchange_code in exchange_code_list:
if exchange_code in exch_code_trade_date_dic:
trade_date_max = exch_code_trade_date_dic[exchange_code]
start_date_str = (trade_date_max + timedelta(days=1)).strftime(STR_FORMAT_DATE)
else:
start_date_str = '1980-01-01'
end_date_str = (date.today() + timedelta(days=310)).strftime(STR_FORMAT_DATE)
trade_date_df = invoker.THS_DateQuery(
exchange_code, 'dateType:0,period:D,dateFormat:0', start_date_str, end_date_str)
if trade_date_df is None or trade_date_df.shape[0] == 0:
logger.warning('%s[%s] [%s - %s] 没有查询到交易日期',
exchange_code_dict[exchange_code], exchange_code, start_date_str, end_date_str)
continue
data_count = trade_date_df.shape[0]
logger.info("%s[%s] %d 条交易日数据将被导入 %s",
exchange_code_dict[exchange_code], exchange_code, data_count, table_name)
# with with_db_session(engine_md) as session:
# session.execute("INSERT INTO ifind_trade_date (trade_date,exch_code) VALUE (:trade_date,:exch_code)",
# params=[{'trade_date': trade_date, 'exch_code': exchange_code} for trade_date in
# trade_date_df['time']])
trade_date_df['exch_code'] = exchange_code
# trade_date_df.rename(columns={'time': 'trade_date'}, inplace=True)
# trade_date_df.to_sql('ifind_trade_date', engine_md, if_exists='append', index=False, dtype={
# 'exch_code': String(10),
# 'trade_date': Date,
# })
data_count = bunch_insert_on_duplicate_update(trade_date_df, table_name, engine_md, dtype={
'exch_code': String(10),
'time': Date,
})
logger.info('%s[%s] %d 条交易日数据导入 %s 完成',
exchange_code_dict[exchange_code], exchange_code, data_count, table_name)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
# build_primary_key([table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `exch_code` `exch_code` VARCHAR(10) NOT NULL FIRST,
CHANGE COLUMN `time` `time` DATE NOT NULL AFTER `exch_code`,
ADD PRIMARY KEY (`exch_code`, `time`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
logger.info('%s 表 `exch_code`, `time` 主键设置完成', table_name)
if __name__ == "__main__":
0
Source : stock_info.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_jq_stock_info(chain_param=None, refresh=False):
""" 获取全市场股票代码及名称
"""
table_name = TABLE_NAME
logging.info("更新 %s 开始", table_name)
# has_table = engine_md.has_table(table_name)
param_list = [
('jq_code', String(20)),
('display_name', String(20)),
('name', String(20)),
('start_date', Date),
('end_date', Date),
]
# 设置 dtype
dtype = {key: val for key, val in param_list}
# 数据提取
# types: list: 用来过滤securities的类型, list元素可选:
# 'stock', 'fund', 'index', 'futures', 'etf', 'lof', 'fja', 'fjb'。types为空时返回所有股票, 不包括基金,指数和期货
# date: 日期, 一个字符串或者 [datetime.datetime]/[datetime.date] 对象,
# 用于获取某日期还在上市的股票信息. 默认值为 None, 表示获取所有日期的股票信息
stock_info_all_df = get_all_securities()
stock_info_all_df.index.rename('jq_code', inplace=True)
stock_info_all_df.reset_index(inplace=True)
logging.info('%s stock data will be import', stock_info_all_df.shape[0])
data_count = bunch_insert_on_duplicate_update(
stock_info_all_df, table_name, engine_md, dtype=dtype,
myisam_if_create_table=True, primary_keys=['jq_code'], schema=config.DB_SCHEMA_MD)
logging.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
# 更新 map 表
update_from_info_table(table_name)
if __name__ == "__main__":
0
Source : __init__.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_info_table(type_name, insert_db=True) -> pd.DataFrame:
"""
调用 get_all_securities 获取指定 type 的信息
type: 'stock', 'fund', 'index', 'futures', 'etf', 'lof', 'fja', 'fjb'。types为空时返回所有股票, 不包括基金,指数和期货
:param type_name:
:return:
"""
table_name = f'jq_{type_name}_info'
logger.info("更新 %s 开始", table_name)
# has_table = engine_md.has_table(table_name)
param_list = [
('jq_code', String(20)),
('display_name', String(20)),
('name', String(20)),
('start_date', Date),
('end_date', Date),
]
# 设置 dtype
dtype = {key: val for key, val in param_list}
# 数据提取
# types: list: 用来过滤securities的类型, list元素可选:
# 'stock', 'fund', 'index', 'futures', 'etf', 'lof', 'fja', 'fjb'。types为空时返回所有股票, 不包括基金,指数和期货
# date: 日期, 一个字符串或者 [datetime.datetime]/[datetime.date] 对象,
# 用于获取某日期还在上市的股票信息. 默认值为 None, 表示获取所有日期的股票信息
stock_info_all_df = get_all_securities(types=type_name)
stock_info_all_df.index.rename('jq_code', inplace=True)
stock_info_all_df.reset_index(inplace=True)
if insert_db:
logger.info('%s 数据将被导入', stock_info_all_df.shape[0])
data_count = bunch_insert_p(stock_info_all_df, table_name=table_name, dtype=dtype, primary_keys=['jq_code'])
logger.info("更新 %s 完成 存量数据 %d 条", table_name, data_count)
return stock_info_all_df
0
Source : future.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def merge_future_info():
"""
合并 wind,ifind 数据到对应名称的表中
:return:
"""
table_name = 'future_info'
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
# ifind_model = TABLE_MODEL_DIC[ifind_table_name]
# wind_model = TABLE_MODEL_DIC[wind_table_name]
# with with_db_session(engine_md) as session:
# session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
ifind_sql_str = "select * from {table_name}".format(table_name=ifind_table_name)
wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name)
ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code'
joined_df = pd.merge(ifind_df, wind_df, how='outer',
left_on='ths_code', right_on='wind_code', indicator='indicator_column')
col_merge_dic = {
'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}),
'sec_name': (String(50), prefer_left, {'left_key': 'ths_future_short_name_future', 'right_key': 'sec_name'}),
'dl_month': (String(20), prefer_left, {'left_key': 'ths_delivery_month_future', 'right_key': 'dlmonth'}),
'delivery_date': (
Date, prefer_left, {'left_key': 'ths_last_delivery_date_future', 'right_key': 'lastdelivery_date'}),
'prince': (DOUBLE, prefer_left, {'left_key': 'ths_mini_chg_price_future', 'right_key': 'mfprice'}),
'ex_eng': (String(20), get_value, {'key': 'ths_exchange_short_name_future'}),
'sc_code': (String(20), prefer_left, {'left_key': 'ths_td_variety_future', 'right_key': 'sccode'}),
'p_unit': (String(20), prefer_left, {'left_key': 'ths_pricing_unit_future', 'right_key': 'punit'}),
'l_prince': (DOUBLE, prefer_left, {'left_key': 'ths_listing_benchmark_price_future', 'right_key': 'lprice'}),
'margin': (DOUBLE, prefer_left, {'left_key': 'ths_initial_td_deposit_future', 'right_key': 'margin'}),
'ratio_lmit': (DOUBLE, get_value, {'key': 'ths_chg_ratio_lmit_future'}),
'enshort_name': (String(100), get_value, {'key': 'ths_contract_en_short_name_future'}),
'en_name': (String(100), get_value, {'key': 'ths_contract_en_name_future'}),
'future_code': (String(20), prefer_left, {'left_key': 'ths_future_code_future', 'right_key': 'trade_code'}),
'last_date': (Date, prefer_left, {'left_key': 'ths_last_td_date_future', 'right_key': 'lasttrade_date'}),
'con_month': (String(60), get_value, {'key': 'ths_contract_month_explain_future'}),
'de_date': (String(60), get_value, {'key': 'ths_delivery_date_explain_future'}),
'td_date': (String(60), get_value, {'key': 'ths_last_td_date_explian_future'}),
'benchmark_': (DOUBLE, get_value, {'key': 'ths_listing_benchmark_price_future'}),
'sec_type': (String(20), get_value, {'key': 'ths_sec_type_future'}),
'td_time': (String(60), get_value, {'key': 'ths_td_time_explain_future'}),
'td_unit': (String(20), get_value, {'key': 'ths_td_unit_future'}),
'td_variety': (String(20), get_value, {'key': 'ths_td_variety_future'}),
'sec_englishname': (String(50), get_value, {'key': 'sec_englishname'}),
'exch_eng': (String(50), get_value, {'key': 'exch_eng'}),
'changelt': (DOUBLE, get_value, {'key': 'changelt'}),
'contractmultiplier': (String(100), get_value, {'key': 'contractmultiplier'}),
'ftmargins': (String(100), get_value, {'key': 'ftmargins'}),
'thours': (String(200), get_value, {'key': 'thours'}),
'ipo_date': (Date, prefer_left, {'left_key': 'ths_start_trade_date_future', 'right_key': 'ipo_date'}),
}
col_merge_rule_dic = {
key: (val[1], val[2]) for key, val in col_merge_dic.items()
}
dtype = {
key: val[0] for key, val in col_merge_dic.items()
}
data_df = merge_data(joined_df, col_merge_rule_dic)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
# build_primary_key([table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `unique_code` `unique_code` VARCHAR(20) NOT NULL ,
ADD PRIMARY KEY (`unique_code`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
return data_df
@app.task
0
Source : future.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def merge_future_daily(date_from=None):
"""
合并 wind,ifind 数据到对应名称的表中
:param date_from:
:return:
"""
table_name = 'future_daily'
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
if date_from is None and has_table:
sql_str = "select adddate(max(trade_date),1) from {table_name}".format(table_name=table_name)
with with_db_session(engine_md) as session:
date_from = date_2_str(session.execute(sql_str).scalar())
ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
if date_from is None:
ifind_sql_str = "select * from {table_name}".format(table_name=ifind_table_name)
wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name)
ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code'
else:
ifind_sql_str = "select * from {table_name} where time >= %s".format(table_name=ifind_table_name)
wind_sql_str = "select * from {table_name} where trade_date >= %s".format(table_name=wind_table_name)
ifind_df = pd.read_sql(ifind_sql_str, engine_md, params=[date_from]) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code'
# change (ifind_df, wind_df) into joined_df
joined_df = pd.merge(ifind_df, wind_df, how='outer',
left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'],
indicator='indicator_column')
# data, columns processing
col_merge_dic = {
'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}),
'trade_date': (Date, prefer_left, {'left_key': 'time', 'right_key': 'trade_date'}),
'open': (DOUBLE, mean_value, {
'left_key': 'open_x', 'right_key': 'open_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'high': (DOUBLE, mean_value, {
'left_key': 'high_x', 'right_key': 'high_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'low': (DOUBLE, mean_value, {
'left_key': 'low_x', 'right_key': 'low_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
# TODO: 原因不详,wind接口取到的部分 close 数据不准确
'close': (DOUBLE, prefer_left, {
'left_key': 'close_x', 'right_key': 'close_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'volume': (DOUBLE, mean_value, {
'left_key': 'volume_x', 'right_key': 'volume_y',
'warning_accuracy': 10, 'primary_keys': ('ths_code', 'time')}),
'amount': (DOUBLE, mean2_value, {
'left_key': 'amount', 'right_key': 'amt',
'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time')}),
'settle': (DOUBLE, mean_value, {
'left_key': 'settlement', 'right_key': 'settle',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'position': (DOUBLE, mean_value, {
'left_key': 'openInterest', 'right_key': 'position',
'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time')}),
'maxupordown': (Integer, get_value, {'key': 'maxupordown'}),
'st_stock': (DOUBLE, get_value, {'key': 'st_stock'}),
'instrument_id': (String(20), get_value, {'key': 'instrument_id'}),
'positionchange': (DOUBLE, get_value, {'key': 'positionChange'}),
'preclose': (String(20), get_value, {'key': 'preClose'}),
'presettlement': (DOUBLE, get_value, {'key': 'preSettlement'}),
'amplitude': (DOUBLE, get_value, {'key': 'amplitude'}),
'avgprice': (DOUBLE, get_value, {'key': 'avgPrice'}),
'change': (DOUBLE, get_value, {'key': 'change'}),
'change_settlement': (DOUBLE, get_value, {'key': 'change_settlement'}),
'chg_settlement': (DOUBLE, get_value, {'key': 'chg_settlement'}),
}
col_merge_rule_dic = {
key: (val[1], val[2]) for key, val in col_merge_dic.items()
}
dtype = {
key: val[0] for key, val in col_merge_dic.items()
}
data_df = merge_data(joined_df, col_merge_rule_dic)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
return data_df
def max_up_or_down(data_s: pd.Series, ths_key, wind_key, primary_keys=None, **kwargs):
0
Source : index.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def merge_index_info():
"""
合并 wind,ifind 数据到对应名称的表中
:return:
"""
table_name = 'index_info'
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
# ifind_model = TABLE_MODEL_DIC[ifind_table_name]
# wind_model = TABLE_MODEL_DIC[wind_table_name]
# with with_db_session(engine_md) as session:
# session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
ifind_sql_str = "select * from {table_name}".format(table_name=ifind_table_name)
wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name)
ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code'
joined_df = pd.merge(ifind_df, wind_df, how='outer',
left_on='ths_code', right_on='wind_code', indicator='indicator_column')
col_merge_dic = {
'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}),
'sec_name': (String(20), prefer_left, {'left_key': 'ths_index_short_name_index', 'right_key': 'sec_name'}),
'crm_issuer': (String(20), prefer_left, {'left_key': 'ths_publish_org_index', 'right_key': 'crm_issuer'}),
'base_date': (
Date, prefer_left, {'left_key': 'ths_index_base_period_index', 'right_key': 'basedate'}),
'basevalue': (DOUBLE, prefer_left, {'left_key': 'ths_index_base_point_index', 'right_key': 'basevalue'}),
'country': (String(20), get_value, {'key': 'country'}),
'launchdate': (Date, get_value, {'key': 'launchdate'}),
'index_code': (String(20), get_value, {'key': 'ths_index_code_index'}),
'index_category': (String(10), get_value, {'key': 'ths_index_category_index'}),
}
col_merge_rule_dic = {
key: (val[1], val[2]) for key, val in col_merge_dic.items()
}
dtype = {
key: val[0] for key, val in col_merge_dic.items()
}
data_df = merge_data(joined_df, col_merge_rule_dic)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
# build_primary_key([table_name])
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `unique_code` `unique_code` VARCHAR(20) NOT NULL ,
ADD PRIMARY KEY (`unique_code`)""".format(table_name=table_name)
with with_db_session(engine_md) as session:
session.execute(create_pk_str)
return data_df
@app.task
0
Source : index.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def merge_index_daily(date_from=None):
"""
合并 wind,ifind 数据到对应名称的表中
:param date_from:
:return:
"""
table_name = 'index_daily'
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
if date_from is None and has_table:
sql_str = "select adddate(max(trade_date),1) from {table_name}".format(table_name=table_name)
with with_db_session(engine_md) as session:
date_from = date_2_str(session.execute(sql_str).scalar())
ifind_table_name_his = 'ifind_{table_name}_his'.format(table_name=table_name)
ifind_table_name_ds = 'ifind_{table_name}_ds'.format(table_name=table_name)
wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
if date_from is None:
ifind_his_sql_str = "select * from {table_name}".format(table_name=ifind_table_name_his)
ifind_ds_sql_str = "select * from {table_name}".format(table_name=ifind_table_name_ds)
wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name)
ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md) # , index_col='ths_code'
ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code'
else:
ifind_his_sql_str = "select * from {table_name} where time >= %s".format(table_name=ifind_table_name_his)
ifind_ds_sql_str = "select * from {table_name} where time >= %s".format(table_name=ifind_table_name_ds)
wind_sql_str = "select * from {table_name} where trade_date >= %s".format(table_name=wind_table_name)
ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md, params=[date_from]) # , index_col='ths_code'
ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md, params=[date_from]) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code'
# change (ifind_df, wind_df) into joined_df
ifind_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer',
on=['ths_code', 'time'])
joined_df = pd.merge(ifind_df, wind_df, how='outer',
left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'],
indicator='indicator_column')
# data, columns processing
col_merge_dic = {
'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}),
'trade_date': (Date, prefer_left, {'left_key': 'time', 'right_key': 'trade_date'}),
'open': (DOUBLE, mean_value, {
'left_key': 'open_x', 'right_key': 'open_x',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'high': (DOUBLE, mean_value, {
'left_key': 'high_x', 'right_key': 'high_x',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'low': (DOUBLE, mean_value, {
'left_key': 'low_x', 'right_key': 'low_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'close': (DOUBLE, prefer_left, {
'left_key': 'close_x', 'right_key': 'close_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'volume': (DOUBLE, mean3_value, {
'left_key': 'volume_x', 'right_key': 'volume_y',
'warning_accuracy': 10, 'primary_keys': ('ths_code', 'time')}),
'amount': (DOUBLE, mean_value, {
'left_key': 'amount', 'right_key': 'amt',
'warning_accuracy': 100, 'primary_keys': ('ths_code', 'time')}),
'free_turn': (DOUBLE, mean2_value, {
'left_key': 'turnoverRatio', 'right_key': 'free_turn',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'avgPrice': (DOUBLE, get_value, {'key': 'avgPrice'}),
'changeRatio': (DOUBLE, get_value, {'key': 'changeRatio'}),
'floatCapitalOfAShares': (DOUBLE, get_value, {'key': 'floatCapitalOfAShares'}),
'floatCapitalOfBShares': (DOUBLE, get_value, {'key': 'floatCapitalOfBShares'}),
'floatSharesOfAShares': (DOUBLE, get_value, {'key': 'floatSharesOfAShares'}),
'floatSharesOfBShares': (DOUBLE, get_value, {'key': 'floatSharesOfBShares'}),
'pb': (DOUBLE, get_value, {'key': 'pb'}),
'pcf': (DOUBLE, get_value, {'key': 'pcf'}),
'pe': (DOUBLE, get_value, {'key': 'pe'}),
'pe_ttm': (DOUBLE, get_value, {'key': 'preClose'}),
'preClose': (DOUBLE, get_value, {'key': 'avgPrice'}),
'ps': (DOUBLE, get_value, {'key': 'ps'}),
'totalCapital': (DOUBLE, get_value, {'key': 'totalCapital'}),
'totalShares': (DOUBLE, get_value, {'key': 'totalShares'}),
'transactionAmount': (DOUBLE, get_value, {'key': 'transactionAmount'}),
'current_index': (DOUBLE, get_value, {'key': 'ths_current_mv_index'}),
'dividend_rate_index': (DOUBLE, get_value, {'key': 'ths_dividend_rate_index'}),
'buy_amt_index': (DOUBLE, get_value, {'key': 'ths_financing_buy_amt_int_index'}),
'payment_amt_index': (DOUBLE, get_value, {'key': 'ths_financing_payment_amt_int_index'}),
'ths_flaot_mv_ratio_index': (DOUBLE, get_value, {'key': 'ths_flaot_mv_ratio_index'}),
'ths_float_ashare_index': (DOUBLE, get_value, {'key': 'ths_float_ashare_index'}),
'ths_float_ashare_mv_index': (DOUBLE, get_value, {'key': 'ths_float_ashare_mv_index'}),
'ths_float_ashare_to_total_shares_index': (
DOUBLE, get_value, {'key': 'ths_float_ashare_to_total_shares_index'}),
'ths_float_bshare_index': (DOUBLE, get_value, {'key': 'ths_float_bshare_index'}),
'ths_float_bshare_to_total_shares_index': (
DOUBLE, get_value, {'key': 'ths_float_bshare_to_total_shares_index'}),
'ths_float_hshare_to_total_shares_index': (
DOUBLE, get_value, {'key': 'ths_float_hshare_to_total_shares_index'}),
'ths_limited_ashare_index': (DOUBLE, get_value, {'key': 'ths_limited_ashare_index'}),
'ths_margin_trading_amtb_index': (DOUBLE, get_value, {'key': 'ths_margin_trading_amtb_index'}),
'ths_margin_trading_balance_index': (DOUBLE, get_value, {'key': 'ths_margin_trading_balance_index'}),
'ths_margin_trading_repay_amt_index': (DOUBLE, get_value, {'key': 'ths_margin_trading_repay_amt_index'}),
'ths_margin_trading_sell_amt_index': (DOUBLE, get_value, {'key': 'ths_margin_trading_sell_amt_index'}),
'ths_float_hshare_index': (DOUBLE, get_value, {'key': 'ths_float_hshare_index'}),
'ths_limited_bshare_index': (DOUBLE, get_value, {'key': 'ths_limited_bshare_index'}),
'ths_market_value_index': (DOUBLE, get_value, {'key': 'ths_market_value_index'}),
'ths_mt_payment_vol_int_index': (DOUBLE, get_value, {'key': 'ths_mt_payment_vol_int_index'}),
'ths_mt_sell_vol_int_index': (DOUBLE, get_value, {'key': 'ths_mt_sell_vol_int_index'}),
'ths_mv_csrc_alg_index': (DOUBLE, get_value, {'key': 'ths_mv_csrc_alg_index'}),
'ths_pb_index': (DOUBLE, get_value, {'key': 'ths_pb_index'}),
'ths_pcf_index': (DOUBLE, get_value, {'key': 'ths_pcf_index'}),
'ths_pe_index': (DOUBLE, get_value, {'key': 'ths_pe_index'}),
'ths_ps_index': (DOUBLE, get_value, {'key': 'ths_ps_index'}),
'ths_short_selling_amtb_index': (DOUBLE, get_value, {'key': 'ths_short_selling_amtb_index'}),
'ths_short_selling_payment_vol_index': (DOUBLE, get_value, {'key': 'ths_short_selling_payment_vol_index'}),
'ths_short_selling_sell_vol_index': (DOUBLE, get_value, {'key': 'ths_short_selling_sell_vol_index'}),
'ths_short_selling_vol_balance_index': (DOUBLE, get_value, {'key': 'ths_short_selling_vol_balance_index'}),
'ths_state_owned_lp_shares_index': (DOUBLE, get_value, {'key': 'ths_state_owned_lp_shares_index'}),
'ths_state_owned_shares_index': (DOUBLE, get_value, {'key': 'ths_state_owned_shares_index'}),
'ths_total_domestic_lp_shares_index': (DOUBLE, get_value, {'key': 'ths_total_domestic_lp_shares_index'}),
'ths_total_float_shares_index': (DOUBLE, get_value, {'key': 'ths_total_float_shares_index'}),
'ths_total_float_shares_ratio_index': (DOUBLE, get_value, {'key': 'ths_total_float_shares_ratio_index'}),
'ths_total_limited_ashare_ratio_index': (DOUBLE, get_value, {'key': 'ths_total_limited_ashare_ratio_index'}),
'ths_total_shares_index': (DOUBLE, get_value, {'key': 'ths_total_shares_index'}),
'ths_unfloat_shares_index': (DOUBLE, get_value, {'key': 'ths_unfloat_shares_index'}),
'ths_annual_volatility_index': (DOUBLE, get_value, {'key': 'ths_annual_volatility_index'}),
}
col_merge_rule_dic = {
key: (val[1], val[2]) for key, val in col_merge_dic.items()
}
dtype = {
key: val[0] for key, val in col_merge_dic.items()
}
data_df = merge_data(joined_df, col_merge_rule_dic)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
return data_df
def is_not_nan_or_none(x):
0
Source : stock.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def merge_stock_info():
"""
合并 wind,ifind 数据到对应名称的表中
:return:
"""
table_name = 'stock_info'
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
ifind_table_name = 'ifind_{table_name}'.format(table_name=table_name)
wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
# ifind_model = TABLE_MODEL_DIC[ifind_table_name]
# wind_model = TABLE_MODEL_DIC[wind_table_name]
# with with_db_session(engine_md) as session:
# session.query(ifind_model, wind_model).filter(ifind_model.c.ths_code == wind_model.c.wind_code)
ifind_sql_str = "select * from {table_name}".format(table_name=ifind_table_name)
wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name)
ifind_df = pd.read_sql(ifind_sql_str, engine_md) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code'
joined_df = pd.merge(ifind_df, wind_df, how='outer',
left_on='ths_code', right_on='wind_code', indicator='indicator_column')
col_merge_dic = {
'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}),
'sec_name': (String(20), prefer_left, {'left_key': 'ths_stock_short_name_stock', 'right_key': 'sec_name'}),
'cn_name': (String(100), get_value, {'key': 'ths_corp_cn_name_stock'}),
'en_name': (String(100), get_value, {'key': 'ths_corp_name_en_stock'}),
'delist_date': (Date, prefer_left, {'left_key': 'ths_delist_date_stock', 'right_key': 'delist_date'}),
'ipo_date': (Date, prefer_left, {'left_key': 'ths_ipo_date_stock', 'right_key': 'ipo_date'}),
'pre_name': (Text, prefer_left, {'left_key': 'ths_corp_name_en_stock', 'right_key': 'prename'}),
'established_date': (Date, get_value, {'key': 'ths_established_date_stock'}),
'exch_city': (String(20), get_value, {'key': 'exch_city'}),
'exch_cn': (String(20), get_value, {'key': 'ths_listing_exchange_stock'}),
'exch_eng': (String(20), get_value, {'key': 'exch_eng'}),
'stock_code': (String(20), prefer_left, {'left_key': 'ths_stock_code_stock', 'right_key': 'trade_code'}),
'mkt': (String(20), get_value, {'key': 'mkt'}),
}
col_merge_rule_dic = {
key: (val[1], val[2]) for key, val in col_merge_dic.items()
}
dtype = {
key: val[0] for key, val in col_merge_dic.items()
}
data_df = merge_data(joined_df, col_merge_rule_dic)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
return data_df
@app.task
0
Source : stock.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def merge_stock_daily(date_from=None):
"""
合并 wind,ifind 数据到对应名称的表中
:param date_from:
:return:
"""
table_name = 'stock_daily'
logging.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
if date_from is None and has_table:
sql_str = "select adddate(max(trade_date),1) from {table_name}".format(table_name=table_name)
with with_db_session(engine_md) as session:
date_from = date_2_str(session.execute(sql_str).scalar())
ifind_table_ds_name = 'ifind_{table_name}_ds'.format(table_name=table_name)
ifind_table_his_name = 'ifind_{table_name}_his'.format(table_name=table_name)
wind_table_name = 'wind_{table_name}'.format(table_name=table_name)
if date_from is None:
ifind_his_sql_str = "select * from {table_name}".format(table_name=ifind_table_ds_name)
ifind_ds_sql_str = "select * from {table_name}".format(table_name=ifind_table_his_name)
wind_sql_str = "select * from {table_name}".format(table_name=wind_table_name)
ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md) # , index_col='ths_code'
ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md) # , index_col='wind_code'
else:
ifind_his_sql_str = "select * from {table_name} where time >= %s".format(table_name=ifind_table_ds_name)
ifind_ds_sql_str = "select * from {table_name} where time >= %s".format(table_name=ifind_table_his_name)
wind_sql_str = "select * from {table_name} where trade_date >= %s".format(table_name=wind_table_name)
ifind_his_df = pd.read_sql(ifind_his_sql_str, engine_md, params=[date_from]) # , index_col='ths_code'
ifind_ds_df = pd.read_sql(ifind_ds_sql_str, engine_md, params=[date_from]) # , index_col='ths_code'
wind_df = pd.read_sql(wind_sql_str, engine_md, params=[date_from]) # , index_col='wind_code'
ifind_df = pd.merge(ifind_his_df, ifind_ds_df, how='outer',
on=['ths_code', 'time'])
joined_df = pd.merge(ifind_df, wind_df, how='outer',
left_on=['ths_code', 'time'], right_on=['wind_code', 'trade_date'],
indicator='indicator_column')
col_merge_dic = {
'unique_code': (String(20), prefer_left, {'left_key': 'ths_code', 'right_key': 'wind_code'}),
'trade_date': (Date, prefer_left, {'left_key': 'time', 'right_key': 'trade_date'}),
'open': (DOUBLE, mean_value, {
'left_key': 'open_x', 'right_key': 'open_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'high': (DOUBLE, mean_value, {
'left_key': 'high_x', 'right_key': 'high_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'low': (DOUBLE, mean_value, {
'left_key': 'low_x', 'right_key': 'low_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
# TODO: 原因不详,wind接口取到的部分 close 数据不准确
'close': (DOUBLE, prefer_left, {
'left_key': 'close_x', 'right_key': 'close_y',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'volume': (DOUBLE, mean_value, {
'left_key': 'volume_x', 'right_key': 'volume_y',
'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time')}),
'amount': (DOUBLE, mean_value, {
'left_key': 'amount', 'right_key': 'amt',
'warning_accuracy': 1, 'primary_keys': ('ths_code', 'time')}),
# 总股本字段:同花顺的 totalShares 字段以变动日期为准,wind total_shares 以公告日为准
# 因此出现冲突时应该以 wind 为准
'total_shares': (DOUBLE, prefer_right, {
'left_key': 'totalShares', 'right_key': 'total_shares'}),
# 'susp_days': (Integer, '***', {
# 'left_key': 'ths_up_and_down_status_stock', 'right_key': 'susp_days', 'other_key': 'trade_status',
# 'primary_keys': ('ths_code', 'time')}),
'max_up_or_down': (Integer, max_up_or_down, {
'ths_key': 'ths_up_and_down_status_stock', 'wind_key': 'maxupordown',
'primary_keys': ('ths_code', 'time')}),
'total_capital': (DOUBLE, get_value, {'key': 'totalCapital'}),
'float_capital': (DOUBLE, get_value, {'key': 'floatCapitalOfAShares'}),
'pct_chg': (DOUBLE, mean_value, {
'left_key': 'changeRatio', 'right_key': 'pct_chg',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'float_a_shares': (DOUBLE, get_value, {'key': 'floatSharesOfAShares'}), # 对应wind float_a_shares
'free_float_shares': (DOUBLE, get_value, {'key': 'free_float_shares'}), # 对应 ths ths_free_float_shares_stock
# PE_TTM 对应 ths ths_pe_ttm_stock 以财务报告期为基准日,对应 wind pe_ttm 以报告期为准
# 因此应该如有不同应该以 wind 为准
'pe_ttm': (DOUBLE, prefer_right, {
'left_key': 'ths_pe_ttm_stock', 'right_key': 'pe_ttm',
'warning_accuracy': 0.01, 'primary_keys': ('ths_code', 'time')}),
'pe': (DOUBLE, get_value, {'key': 'pe'}),
'pb': (DOUBLE, get_value, {'key': 'pb'}),
'ps': (DOUBLE, get_value, {'key': 'ps'}),
'pcf': (DOUBLE, get_value, {'key': 'pcf'}),
}
col_merge_rule_dic = {
key: (val[1], val[2]) for key, val in col_merge_dic.items()
}
dtype = {
key: val[0] for key, val in col_merge_dic.items()
}
data_df = merge_data(joined_df, col_merge_rule_dic)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype)
logger.info('%s 新增或更新记录 %d 条', table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
return data_df
def max_up_or_down(data_s: pd.Series, ths_key, wind_key, primary_keys=None, **kwargs):
0
Source : web_crawler.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def start_web_crawler(date_from=None):
chrome_opt = webdriver.ChromeOptions()
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_opt.add_experimental_option("prefs", prefs)
browser = webdriver.Chrome(executable_path=config.DRIVER_PATH, chrome_options=chrome_opt)
browser.get('https://gc.mysteel.com/huizong/index.html')
time.sleep(1)
# 点击登录
elem = browser.find_element_by_css_selector("[class = 'loginBtn btn']")
elem.click()
# 输入用户名
elem = browser.find_element_by_class_name("userName")
elem.send_keys(config.MYSTEEL_USERNAME)
# 输入密码
elem = browser.find_element_by_css_selector("[class = 'pasd pasdPsd hide']")
elem.send_keys(config.MYSTEEL_PASSWORD)
# 提交表单
elem = browser.find_element_by_class_name("loginBtnBig")
elem.click()
time.sleep(1)
# 点击进入下一个链接
elem = browser.find_element_by_xpath("/html/body/div[7]/div[1]/div[2]/ul[1]/li[1]/a")
elem.click()
browser.switch_to.window(browser.window_handles[1])
time.sleep(1)
# 获取数据发布日期
date_str = browser.find_element_by_id("publishtime").text
date_time = datetime.datetime.strptime(date_str.split(" ")[0], '%Y-%m-%d')
# 将表格数据储存在一个dataframe中
array = []
table = browser.find_element_by_xpath("/html/body/div[8]/div/div[3]/table/tbody")
trs = table.find_elements_by_tag_name("tr")
for tr in trs:
tds = tr.find_elements_by_tag_name("td")
row_array = []
for td in tds:
row_array.append(td.text)
array.append(row_array)
df = pd.DataFrame(array)
df1 = df.head(6).T.tail(df.shape[1]-1)
df1['datetime'] = date_time
df1.columns = ['city', 'price', 'growth_rate', 'producer', 'wighting_mode', '30ds_price_avg', 'publish_date']
df2 = df.head(1)
df2 = df2.append(df.tail(5))
df2 = df2.T.tail(df.shape[1]-1)
df2['datetime'] = date_time
df2.columns = ['city', 'price', 'growth_rate', 'producer', 'wighting_mode', '30ds_price_avg', 'publish_date']
df1['price'] = df1['price'].map(lambda x: Decimal(x))
df1['growth_rate'] = df1['growth_rate'].map(lambda x: Decimal(x))
df1['30ds_price_avg'] = df1['30ds_price_avg'].map(lambda x: Decimal(x))
df2['price'] = df2['price'].map(lambda x: Decimal(x))
df2['growth_rate'] = df2['growth_rate'].map(lambda x: Decimal(x))
df2['30ds_price_avg'] = df2['30ds_price_avg'].map(lambda x: Decimal(x))
table_name1 = "mysteel_hrb400_12mm"
table_name2 = "mysteel_hrb400_20mm"
param_list = [
('city', String(50)),
('price', DOUBLE),
('growth_rate', DOUBLE),
('producer', String(50)),
('wighting_mode', String(50)),
('30ds_price_avg', DOUBLE),
('publish_date', Date,)
]
# 设置 dtype
dtype = {key: val for key, val in param_list}
create_pk_str = """ALTER TABLE %s ADD PRIMARY KEY (`city`, `publish_date`)"""
logger.info("更新 %s 开始", table_name1)
has_table1 = engine_model.has_table(table_name1)
bunch_insert_on_duplicate_update(df1, table_name1, engine_model, dtype=dtype)
if not has_table1:
engine_model.execute(create_pk_str % table_name1)
logger.info("更新 %s 开始", table_name2)
has_table2 = engine_model.has_table(table_name1)
bunch_insert_on_duplicate_update(df2, table_name2, engine_model, dtype=dtype)
if not has_table2:
engine_model.execute(create_pk_str % table_name2)
browser.close()
if __name__ == "__main__":
0
Source : stock_fin.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_stock_fin():
"""
通过pytdx接口下载财务数据
:return:
"""
table_name = 'pytdx_stock_fin'
has_table = engine_md.has_table(table_name)
report_date_latest = None
if has_table:
with with_db_session(engine_md) as session:
sql_str = "select max(report_date) from {table_name}".format(table_name=table_name)
report_date_latest = session.execute(sql_str).scalar()
# 财务指标的名称含义,参考 issue #163 https://github.com/QUANTAXIS/QUANTAXIS/blob/master/QUANTAXIS/QAData/financial_mean.py
financial_dict = {
# 1.每股指标
'001基本每股收益': 'EPS',
'002扣除非经常性损益每股收益': 'deductEPS',
'003每股未分配利润': 'undistributedProfitPerShare',
'004每股净资产': 'netAssetsPerShare',
'005每股资本公积金': 'capitalReservePerShare',
'006净资产收益率': 'ROE',
'007每股经营现金流量': 'operatingCashFlowPerShare',
# 2. 资产负债表 BALANCE SHEET
# 2.1 资产
# 2.1.1 流动资产
'008货币资金': 'moneyFunds',
'009交易性金融资产': 'tradingFinancialAssets',
'010应收票据': 'billsReceivables',
'011应收账款': 'accountsReceivables',
'012预付款项': 'prepayments',
'013其他应收款': 'otherReceivables',
'014应收关联公司款': 'interCompanyReceivables',
'015应收利息': 'interestReceivables',
'016应收股利': 'dividendsReceivables',
'017存货': 'inventory',
'018其中:消耗性生物资产': 'expendableBiologicalAssets',
'019一年内到期的非流动资产': 'noncurrentAssetsDueWithinOneYear',
'020其他流动资产': 'otherLiquidAssets',
'021流动资产合计': 'totalLiquidAssets',
# 2.1.2 非流动资产
'022可供出售金融资产': 'availableForSaleSecurities',
'023持有至到期投资': 'heldToMaturityInvestments',
'024长期应收款': 'longTermReceivables',
'025长期股权投资': 'longTermEquityInvestment',
'026投资性房地产': 'investmentRealEstate',
'027固定资产': 'fixedAssets',
'028在建工程': 'constructionInProgress',
'029工程物资': 'engineerMaterial',
'030固定资产清理': 'fixedAssetsCleanUp',
'031生产性生物资产': 'productiveBiologicalAssets',
'032油气资产': 'oilAndGasAssets',
'033无形资产': 'intangibleAssets',
'034开发支出': 'developmentExpenditure',
'035商誉': 'goodwill',
'036长期待摊费用': 'longTermDeferredExpenses',
'037递延所得税资产': 'deferredIncomeTaxAssets',
'038其他非流动资产': 'otherNonCurrentAssets',
'039非流动资产合计': 'totalNonCurrentAssets',
'040资产总计': 'totalAssets',
# 2.2 负债
# 2.2.1 流动负债
'041短期借款': 'shortTermLoan',
'042交易性金融负债': 'tradingFinancialLiabilities',
'043应付票据': 'billsPayable',
'044应付账款': 'accountsPayable',
'045预收款项': 'advancedReceivable',
'046应付职工薪酬': 'employeesPayable',
'047应交税费': 'taxPayable',
'048应付利息': 'interestPayable',
'049应付股利': 'dividendPayable',
'050其他应付款': 'otherPayable',
'051应付关联公司款': 'interCompanyPayable',
'052一年内到期的非流动负债': 'noncurrentLiabilitiesDueWithinOneYear',
'053其他流动负债': 'otherCurrentLiabilities',
'054流动负债合计': 'totalCurrentLiabilities',
# 2.2.2 非流动负债
'055长期借款': 'longTermLoans',
'056应付债券': 'bondsPayable',
'057长期应付款': 'longTermPayable',
'058专项应付款': 'specialPayable',
'059预计负债': 'estimatedLiabilities',
'060递延所得税负债': 'defferredIncomeTaxLiabilities',
'061其他非流动负债': 'otherNonCurrentLiabilities',
'062非流动负债合计': 'totalNonCurrentLiabilities',
'063负债合计': 'totalLiabilities',
# 2.3 所有者权益
'064实收资本(或股本)': 'totalShare',
'065资本公积': 'capitalReserve',
'066盈余公积': 'surplusReserve',
'067减:库存股': 'treasuryStock',
'068未分配利润': 'undistributedProfits',
'069少数股东权益': 'minorityEquity',
'070外币报表折算价差': 'foreignCurrencyReportTranslationSpread',
'071非正常经营项目收益调整': 'abnormalBusinessProjectEarningsAdjustment',
'072所有者权益(或股东权益)合计': 'totalOwnersEquity',
'073负债和所有者(或股东权益)合计': 'totalLiabilitiesAndOwnersEquity',
# 3. 利润表
'074其中:营业收入': 'operatingRevenue',
'075其中:营业成本': 'operatingCosts',
'076营业税金及附加': 'taxAndSurcharges',
'077销售费用': 'salesCosts',
'078管理费用': 'managementCosts',
'079堪探费用': 'explorationCosts',
'080财务费用': 'financialCosts',
'081资产减值损失': 'assestsDevaluation',
'082加:公允价值变动净收益': 'profitAndLossFromFairValueChanges',
'083投资收益': 'investmentIncome',
'084其中:对联营企业和合营企业的投资收益': 'investmentIncomeFromAffiliatedBusinessAndCooperativeEnterprise',
'085影响营业利润的其他科目': 'otherSubjectsAffectingOperatingProfit',
'086三、营业利润': 'operatingProfit',
'087加:补贴收入': 'subsidyIncome',
'088营业外收入': 'nonOperatingIncome',
'089减:营业外支出': 'nonOperatingExpenses',
'090其中:非流动资产处置净损失': 'netLossFromDisposalOfNonCurrentAssets',
'091加:影响利润总额的其他科目': 'otherSubjectsAffectTotalProfit',
'092四、利润总额': 'totalProfit',
'093减:所得税': 'incomeTax',
'094加:影响净利润的其他科目': 'otherSubjectsAffectNetProfit',
'095五、净利润': 'netProfit',
'096归属于母公司所有者的净利润': 'netProfitsBelongToParentCompanyOwner',
'097少数股东损益': 'minorityProfitAndLoss',
# 4. 现金流量表
# 4.1 经营活动 Operating
'098销售商品、提供劳务收到的现金': 'cashFromGoodsSalesorOrRenderingOfServices',
'099收到的税费返还': 'refundOfTaxAndFeeReceived',
'100收到其他与经营活动有关的现金': 'otherCashRelatedBusinessActivitiesReceived',
'101经营活动现金流入小计': 'cashInflowsFromOperatingActivities',
'102购买商品、接受劳务支付的现金': 'buyingGoodsReceivingCashPaidForLabor',
'103支付给职工以及为职工支付的现金': 'paymentToEmployeesAndCashPaidForEmployees',
'104支付的各项税费': 'paymentsOfVariousTaxes',
'105支付其他与经营活动有关的现金': 'paymentOfOtherCashRelatedToBusinessActivities',
'106经营活动现金流出小计': 'cashOutflowsFromOperatingActivities',
'107经营活动产生的现金流量净额': 'netCashFlowsFromOperatingActivities',
# 4.2 投资活动 Investment
'108收回投资收到的现金': 'cashReceivedFromInvestmentReceived',
'109取得投资收益收到的现金': 'cashReceivedFromInvestmentIncome',
'110处置固定资产、无形资产和其他长期资产收回的现金净额': 'disposalOfNetCashForRecoveryOfFixedAssets',
'111处置子公司及其他营业单位收到的现金净额': 'disposalOfNetCashReceivedFromSubsidiariesAndOtherBusinessUnits',
'112收到其他与投资活动有关的现金': 'otherCashReceivedRelatingToInvestingActivities',
'113投资活动现金流入小计': 'cashinFlowsFromInvestmentActivities',
'114购建固定资产、无形资产和其他长期资产支付的现金': 'cashForThePurchaseConstructionPaymentOfFixedAssets',
'115投资支付的现金': 'cashInvestment',
'116取得子公司及其他营业单位支付的现金净额': 'acquisitionOfNetCashPaidBySubsidiariesAndOtherBusinessUnits',
'117支付其他与投资活动有关的现金': 'otherCashPaidRelatingToInvestingActivities',
'118投资活动现金流出小计': 'cashOutflowsFromInvestmentActivities',
'119投资活动产生的现金流量净额': 'netCashFlowsFromInvestingActivities',
# 4.3 筹资活动 Financing
'120吸收投资收到的现金': 'cashReceivedFromInvestors',
'121取得借款收到的现金': 'cashFromBorrowings',
'122收到其他与筹资活动有关的现金': 'otherCashReceivedRelatingToFinancingActivities',
'123筹资活动现金流入小计': 'cashInflowsFromFinancingActivities',
'124偿还债务支付的现金': 'cashPaymentsOfAmountBorrowed',
'125分配股利、利润或偿付利息支付的现金': 'cashPaymentsForDistrbutionOfDividendsOrProfits',
'126支付其他与筹资活动有关的现金': 'otherCashPaymentRelatingToFinancingActivities',
'127筹资活动现金流出小计': 'cashOutflowsFromFinancingActivities',
'128筹资活动产生的现金流量净额': 'netCashFlowsFromFinancingActivities',
# 4.4 汇率变动
'129四、汇率变动对现金的影响': 'effectOfForeignExchangRateChangesOnCash',
'130四(2)、其他原因对现金的影响': 'effectOfOtherReasonOnCash',
# 4.5 现金及现金等价物净增加
'131五、现金及现金等价物净增加额': 'netIncreaseInCashAndCashEquivalents',
'132期初现金及现金等价物余额': 'initialCashAndCashEquivalentsBalance',
# 4.6 期末现金及现金等价物余额
'133期末现金及现金等价物余额': 'theFinalCashAndCashEquivalentsBalance',
# 4.x 补充项目 Supplementary Schedule:
# 现金流量附表项目 Indirect Method
# 4.x.1 将净利润调节为经营活动现金流量 Convert net profit to cash flow from operating activities
'134净利润': 'netProfitFromOperatingActivities',
'135资产减值准备': 'provisionForAssetsLosses',
'136固定资产折旧、油气资产折耗、生产性生物资产折旧': 'depreciationForFixedAssets',
'137无形资产摊销': 'amortizationOfIntangibleAssets',
'138长期待摊费用摊销': 'amortizationOfLong_termDeferredExpenses',
'139处置固定资产、无形资产和其他长期资产的损失': 'lossOfDisposingFixedAssetsIntangibleAssetsAndOtherLongTermAssets',
'140固定资产报废损失': 'scrapLossOfFixedAssets',
'141公允价值变动损失': 'lossFromFairValueChange',
'142财务费用': 'financialExpenses',
'143投资损失': 'investmentLosses',
'144递延所得税资产减少': 'decreaseOfDeferredTaxAssets',
'145递延所得税负债增加': 'increaseOfDeferredTaxLiabilities',
'146存货的减少': 'decreaseOfInventory',
'147经营性应收项目的减少': 'decreaseOfOperationReceivables',
'148经营性应付项目的增加': 'increaseOfOperationPayables',
'149其他': 'others',
'150经营活动产生的现金流量净额2': 'netCashFromOperatingActivities2',
# 4.x.2 不涉及现金收支的投资和筹资活动 Investing and financing activities not involved in cash
'151债务转为资本': 'debtConvertedToCSapital',
'152一年内到期的可转换公司债券': 'convertibleBondMaturityWithinOneYear',
'153融资租入固定资产': 'leaseholdImprovements',
# 4.x.3 现金及现金等价物净增加情况 Net increase of cash and cash equivalents
'154现金的期末余额': 'cashEndingBal',
'155现金的期初余额': 'cashBeginingBal',
'156现金等价物的期末余额': 'cashEquivalentsEndingBal',
'157现金等价物的期初余额': 'cashEquivalentsBeginningBal',
'158现金及现金等价物净增加额': 'netIncreaseOfCashAndCashEquivalents',
# 5. 偿债能力分析
'159流动比率': 'currentRatio', # 流动资产/流动负债
'160速动比率': 'acidTestRatio', # (流动资产-存货)/流动负债
'161现金比率(%)': 'cashRatio', # (货币资金+有价证券)÷流动负债
'162利息保障倍数': 'interestCoverageRatio', # (利润总额+财务费用(仅指利息费用部份))/利息费用
'163非流动负债比率(%)': 'noncurrentLiabilitiesRatio',
'164流动负债比率(%)': 'currentLiabilitiesRatio',
'165现金到期债务比率(%)': 'cashDebtRatio', # 企业经营现金净流入/(本期到期长期负债+本期应付票据)
'166有形资产净值债务率(%)': 'debtToTangibleAssetsRatio',
'167权益乘数(%)': 'equityMultiplier', # 资产总额/股东权益总额
'168股东的权益/负债合计(%)': 'equityDebtRatio', # 权益负债率
'169有形资产/负债合计(%)': 'tangibleAssetDebtRatio', # 有形资产负债率
'170经营活动产生的现金流量净额/负债合计(%)': 'netCashFlowsFromOperatingActivitiesDebtRatio',
'171EBITDA/负债合计(%)': 'EBITDA_Liabilities',
# 6. 经营效率分析
# 销售收入÷平均应收账款=销售收入\(0.5 x(应收账款期初+期末))
'172应收帐款周转率': 'turnoverRatioOfReceivable',
'173存货周转率': 'turnoverRatioOfInventory',
# (存货周转天数+应收帐款周转天数-应付帐款周转天数+预付帐款周转天数-预收帐款周转天数)/365
'174运营资金周转率': 'turnoverRatioOfOperatingAssets',
'175总资产周转率': 'turnoverRatioOfTotalAssets',
'176固定资产周转率': 'turnoverRatioOfFixedAssets', # 企业销售收入与固定资产净值的比率
'177应收帐款周转天数': 'daysSalesOutstanding', # 企业从取得应收账款的权利到收回款项、转换为现金所需要的时间
'178存货周转天数': 'daysSalesOfInventory', # 企业从取得存货开始,至消耗、销售为止所经历的天数
'179流动资产周转率': 'turnoverRatioOfCurrentAssets', # 流动资产周转率(次)=主营业务收入/平均流动资产总额
'180流动资产周转天数': 'daysSalesofCurrentAssets',
'181总资产周转天数': 'daysSalesofTotalAssets',
'182股东权益周转率': 'equityTurnover', # 销售收入/平均股东权益
# 7. 发展能力分析
'183营业收入增长率(%)': 'operatingIncomeGrowth',
'184净利润增长率(%)': 'netProfitGrowthRate', # NPGR 利润总额-所得税
'185净资产增长率(%)': 'netAssetsGrowthRate',
'186固定资产增长率(%)': 'fixedAssetsGrowthRate',
'187总资产增长率(%)': 'totalAssetsGrowthRate',
'188投资收益增长率(%)': 'investmentIncomeGrowthRate',
'189营业利润增长率(%)': 'operatingProfitGrowthRate',
'190暂无': 'None1',
'191暂无': 'None2',
'192暂无': 'None3',
# 8. 获利能力分析
'193成本费用利润率(%)': 'rateOfReturnOnCost',
'194营业利润率': 'rateOfReturnOnOperatingProfit',
'195营业税金率': 'rateOfReturnOnBusinessTax',
'196营业成本率': 'rateOfReturnOnOperatingCost',
'197净资产收益率': 'rateOfReturnOnCommonStockholdersEquity',
'198投资收益率': 'rateOfReturnOnInvestmentIncome',
'199销售净利率(%)': 'rateOfReturnOnNetSalesProfit',
'200总资产报酬率': 'rateOfReturnOnTotalAssets',
'201净利润率': 'netProfitMargin',
'202销售毛利率(%)': 'rateOfReturnOnGrossProfitFromSales',
'203三费比重': 'threeFeeProportion',
'204管理费用率': 'ratioOfChargingExpense',
'205财务费用率': 'ratioOfFinancialExpense',
'206扣除非经常性损益后的净利润': 'netProfitAfterExtraordinaryGainsAndLosses',
'207息税前利润(EBIT)': 'EBIT',
'208息税折旧摊销前利润(EBITDA)': 'EBITDA',
'209EBITDA/营业总收入(%)': 'EBITDA_GrossRevenueRate',
# 9. 资本结构分析
'210资产负债率(%)': 'assetsLiabilitiesRatio',
'211流动资产比率': 'currentAssetsRatio', # 期末的流动资产除以所有者权益
'212货币资金比率': 'monetaryFundRatio',
'213存货比率': 'inventoryRatio',
'214固定资产比率': 'fixedAssetsRatio',
'215负债结构比': 'liabilitiesStructureRatio',
'216归属于母公司股东权益/全部投入资本(%)': 'shareholdersOwnershipOfAParentCompany_TotalCapital',
'217股东的权益/带息债务(%)': 'shareholdersInterest_InterestRateDebtRatio',
'218有形资产/净债务(%)': 'tangibleAssets_NetDebtRatio',
# 10. 现金流量分析
'219每股经营性现金流(元)': 'operatingCashFlowPerShareY',
'220营业收入现金含量(%)': 'cashOfOperatingIncome',
'221经营活动产生的现金流量净额/经营活动净收益(%)': 'netOperatingCashFlow_netOperationProfit',
'222销售商品提供劳务收到的现金/营业收入(%)': 'cashFromGoodsSales_OperatingRevenue',
'223经营活动产生的现金流量净额/营业收入': 'netOperatingCashFlow_OperatingRevenue',
'224资本支出/折旧和摊销': 'capitalExpenditure_DepreciationAndAmortization',
'225每股现金流量净额(元)': 'netCashFlowPerShare',
'226经营净现金比率(短期债务)': 'operatingCashFlow_ShortTermDebtRatio',
'227经营净现金比率(全部债务)': 'operatingCashFlow_LongTermDebtRatio',
'228经营活动现金净流量与净利润比率': 'cashFlowRateAndNetProfitRatioOfOperatingActivities',
'229全部资产现金回收率': 'cashRecoveryForAllAssets',
# 11. 单季度财务指标
'230营业收入': 'operatingRevenueSingle',
'231营业利润': 'operatingProfitSingle',
'232归属于母公司所有者的净利润': 'netProfitBelongingToTheOwnerOfTheParentCompanySingle',
'233扣除非经常性损益后的净利润': 'netProfitAfterExtraordinaryGainsAndLossesSingle',
'234经营活动产生的现金流量净额': 'netCashFlowsFromOperatingActivitiesSingle',
'235投资活动产生的现金流量净额': 'netCashFlowsFromInvestingActivitiesSingle',
'236筹资活动产生的现金流量净额': 'netCashFlowsFromFinancingActivitiesSingle',
'237现金及现金等价物净增加额': 'netIncreaseInCashAndCashEquivalentsSingle',
# 12.股本股东
'238总股本': 'totalCapital',
'239已上市流通A股': 'listedAShares',
'240已上市流通B股': 'listedBShares',
'241已上市流通H股': 'listedHShares',
'242股东人数(户)': 'numberOfShareholders',
'243第一大股东的持股数量': 'theNumberOfFirstMajorityShareholder',
'244十大流通股东持股数量合计(股)': 'totalNumberOfTopTenCirculationShareholders',
'245十大股东持股数量合计(股)': 'totalNumberOfTopTenMajorShareholders',
# 13.机构持股
'246机构总量(家)': 'institutionNumber',
'247机构持股总量(股)': 'institutionShareholding',
'248QFII机构数': 'QFIIInstitutionNumber',
'249QFII持股量': 'QFIIShareholding',
'250券商机构数': 'brokerNumber',
'251券商持股量': 'brokerShareholding',
'252保险机构数': 'securityNumber',
'253保险持股量': 'securityShareholding',
'254基金机构数': 'fundsNumber',
'255基金持股量': 'fundsShareholding',
'256社保机构数': 'socialSecurityNumber',
'257社保持股量': 'socialSecurityShareholding',
'258私募机构数': 'privateEquityNumber',
'259私募持股量': 'privateEquityShareholding',
'260财务公司机构数': 'financialCompanyNumber',
'261财务公司持股量': 'financialCompanyShareholding',
'262年金机构数': 'pensionInsuranceAgencyNumber',
'263年金持股量': 'pensionInsuranceAgencyShareholfing',
# 14.新增指标
# [注:季度报告中,若股东同时持有非流通A股性质的股份(如同时持有流通A股和流通B股),取的是包含同时持有非流通A股性质的流通股数]
'264十大流通股东中持有A股合计(股)': 'totalNumberOfTopTenCirculationShareholdersForA',
'265第一大流通股东持股量(股)': 'firstLargeCirculationShareholdersNumber',
# [注:1.自由流通股=已流通A股-十大流通股东5%以上的A股;2.季度报告中,若股东同时持有非流通A股性质的股份(如同时持有流通A股和流通H股),5%以上的持股取的是不包含同时持有非流通A股性质的流通股数,结果可能偏大; 3.指标按报告期展示,新股在上市日的下个报告期才有数据]
'266自由流通股(股)': 'freeCirculationStock',
'267受限流通A股(股)': 'limitedCirculationAShares',
'268一般风险准备(金融类)': 'generalRiskPreparation',
'269其他综合收益(利润表)': 'otherComprehensiveIncome',
'270综合收益总额(利润表)': 'totalComprehensiveIncome',
'271归属于母公司股东权益(资产负债表)': 'shareholdersOwnershipOfAParentCompany',
'272银行机构数(家)(机构持股)': 'bankInstutionNumber',
'273银行持股量(股)(机构持股)': 'bankInstutionShareholding',
'274一般法人机构数(家)(机构持股)': 'corporationNumber',
'275一般法人持股量(股)(机构持股)': 'corporationShareholding',
'276近一年净利润(元)': 'netProfitLastYear',
'277信托机构数(家)(机构持股)': 'trustInstitutionNumber',
'278信托持股量(股)(机构持股)': 'trustInstitutionShareholding',
'279特殊法人机构数(家)(机构持股)': 'specialCorporationNumber',
'280特殊法人持股量(股)(机构持股)': 'specialCorporationShareholding',
'281加权净资产收益率(每股指标)': 'weightedROE',
'282扣非每股收益(单季度财务指标)': 'nonEPSSingle',
}
# 整理字段
for key in financial_dict.keys():
val = financial_dict[key]
val = val.strip().replace('/', '_').replace('-', '_')[:64]
financial_dict[key] = val
# 设置列名称对应关系
_pattern = re.compile(r'\d{3}')
col_name_dic = {
'col%d' % int(_pattern.search(key).group()): val
for key, val in financial_dict.items() if _pattern.search(key) is not None
}
# 设置 dtype
dtype = {val: DOUBLE for key, val in financial_dict.items() if _pattern.search(key) is not None}
dtype['ts_code'] = String(10)
dtype['report_date'] = Date
# 下载财务数据
crawler = HistoryFinancialListCrawler()
list_data = crawler.fetch_and_parse()
# print(pd.DataFrame(data=list_data))
list_count = len(list_data)
logger.debug('%d 财务数据包可用', len(list_data))
datacrawler = HistoryFinancialCrawler()
pd.set_option('display.max_columns', None)
tot_data_count = 0
_pattern_file_date = re.compile(r'(? < =gpcw)\d{8}(?=.zip)')
try:
for num, file_info in enumerate(list_data, start=1):
filename = file_info['filename']
# 检查当前文件的日期是否大于数据库中的最大记录日期
if report_date_latest is not None:
m = _pattern_file_date.search(filename)
if m is None:
logger.error('filename:%s 格式匹配失败 %s', filename, _pattern_file_date)
else:
report_date_cur = str_2_date(m.group(), '%Y%m%d')
if report_date_cur < = report_date_latest:
continue
logger.info('%d/%d) 开始下载 %s 数据', num, list_count, filename)
# result = datacrawler.fetch_and_parse(
# reporthook=demo_reporthook, filename=filename, path_to_download="/tmpfile.zip")
result = fetch_and_parse(datacrawler, reporthook=demo_reporthook, filename=filename)
if result is None:
continue
data_df = datacrawler.to_df(data=result)
data_df.rename(columns=col_name_dic, inplace=True)
data_df.reset_index(inplace=True)
data_df['ts_code'] = data_df['code'].apply(lambda x: x + ".SH" if x[0] == '6' else x + '.SZ')
data_df.drop(['code'], axis=1, inplace=True)
data_count = bunch_insert_on_duplicate_update(data_df, table_name, engine_md, dtype=dtype,
myisam_if_create_table=True)
tot_data_count += data_count
finally:
logging.info("更新 %s 结束 %d 条信息被更新", table_name, tot_data_count)
if not has_table and engine_md.has_table(table_name):
create_pk_str = """ALTER TABLE {table_name}
CHANGE COLUMN `ts_code` `ts_code` VARCHAR(20) NOT NULL FIRST,
CHANGE COLUMN `report_date` `report_date` DATE NOT NULL AFTER `ts_code`,
ADD PRIMARY KEY (`ts_code`, `report_date`)""".format(table_name=table_name)
execute_sql(create_pk_str, engine_md, commit=True)
logger.info('%s 建立主键 [code, report_date]', table_name)
if __name__ == "__main__":
0
Source : future.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_future_info(chain_param=None):
"""
更新期货合约列表信息
:param chain_param: 在celery 中將前面結果做爲參數傳給後面的任務
:return:
"""
table_name = "rqdatac_future_info"
has_table = engine_md.has_table(table_name)
logger.info("更新 %s 开始", table_name)
# 获取已存在合约列表
if has_table:
sql_str = 'select max(listed_date) from {table_name}'.format(table_name=table_name)
with with_db_session(engine_md) as session:
listed_date = session.scalar(sql_str)
if listed_date is None:
has_table = False
ndays_per_update = 60
# 获取接口参数以及参数列表
col_name_param_list = [
("order_book_id", String(20)),
# 期货代码,期货的独特的标识符(郑商所期货合约数字部分进行了补齐。例如原有代码'ZC609'补齐之后变为'ZC1609')。主力连续合约 UnderlyingSymbol+88,例如'IF88' ;指数连续合约命名规则为 UnderlyingSymbol+99
("underlying_symbol", String(20)), # 合约标的名称,例如 IF1005 的合约标的名称为'IF'
("market_tplus", TINYINT), # 交易制度。'0'表示 T+0,'1'表示 T+1,往后顺推
("symbol", String(100)), # 期货的简称,例如'沪深 1005'
("margin_rate", DOUBLE), # 期货合约的最低保证金率
("maturity_date", Date), # 期货到期日。主力连续合约与指数连续合约都为'0000-00-00'
("type", String(20)), # 合约类型,'Future'
("trading_code", String(20)), #
("exchange", String(10)), # 交易所,'DCE' - 大连商品交易所, 'SHFE' - 上海期货交易所,'CFFEX' - 中国金融期货交易所, 'CZCE'- 郑州商品交易所
("product", String(20)), # 合约种类,'Commodity'-商品期货,'Index'-股指期货,'Government'-国债期货
("contract_multiplier", SMALLINT), # 合约乘数,例如沪深 300 股指期货的乘数为 300.0
("round_lot", TINYINT), # 期货全部为 1.0
("trading_hours", String(100)), # 合约交易时间
("listed_date", Date), # 期货的上市日期。主力连续合约与指数连续合约都为'0000-00-00'
("industry_name", String(50)),
("de_listed_date", Date), # 目测与 maturity_date 相同
("underlying_order_book_id", String(20)), # 合约标的代码,目前除股指期货(IH, IF, IC)之外的期货合约,这一字段全部为'null'
]
dtype = {key: val for key, val in col_name_param_list}
if not has_table:
instrument_info_df = rqdatac.all_instruments(type='Future', market='cn')
else:
date_yestoday = date.today() - timedelta(days=1)
ndays_per_update = 60
instrument_info_df = None
for param in get_date_iter(listed_date, date_yestoday, ndays_per_update):
if instrument_info_df is None:
instrument_info_df = rqdatac.all_instruments(type='Future', market='cn', date=param)
else:
instrument_info_df_tmp = rqdatac.all_instruments(type='Future', market='cn', date=param)
instrument_info_df.append(instrument_info_df_tmp)
instrument_info_df.drop_duplicates(inplace=True)
instrument_info_df.loc[instrument_info_df['underlying_order_book_id'] == 'null', 'underlying_order_book_id'] = None
instrument_info_df.rename(columns={c: str.lower(c) for c in instrument_info_df.columns}, inplace=True)
data_count = bunch_insert_on_duplicate_update(instrument_info_df, table_name, engine_md, dtype=dtype)
logging.info("更新 %s 结束 %d 条信息被更新", table_name, data_count)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
logger.info("更新 wind_future_info 结束 %d 条记录被更新", data_count)
# update_from_info_table(table_name)
def import_future_min(chain_param=None, order_book_id_set=None, begin_time=date(2000, 1, 4)):
0
Source : future.py
with GNU General Public License v3.0
from DataIntegrationAlliance
with GNU General Public License v3.0
from DataIntegrationAlliance
def import_future_min(chain_param=None, order_book_id_set=None, begin_time=date(2000, 1, 4)):
"""
加载商品期货分钟级数据
"""
table_name = "rqdatac_future_min"
logger.info("更新 %s 开始", table_name)
has_table = engine_md.has_table(table_name)
param_list = [
("open", DOUBLE),
("high", DOUBLE),
("low", DOUBLE),
("close", DOUBLE),
# ("limit_up", DOUBLE),
# ("limit_down", DOUBLE),
("total_turnover", DOUBLE), # 成交额
("volume", DOUBLE), # 成交量
# ("num_trades", DOUBLE), # 成交笔数 (仅限股票日线数据)
# ("settlement", DOUBLE), # 结算价 (仅限期货日线数据)
# ("prev_settlement", DOUBLE), # 昨日结算价(仅限期货日线数据)
("open_interest", DOUBLE), # 累计持仓量(期货专用)
('trading_date', Date,), # 交易日期(仅限期货分钟线数据),对应期货夜盘的情况
('dominant_id', DOUBLE), # 实际合约的 order_book_id,对应期货 888 系主力连续合约的情况
# ('strike_price', DOUBLE), # 行权价,仅限 ETF 期权日线数据
# ('contract_multiplier', DOUBLE), # 合约乘数,仅限 ETF 期权日线数据
# ('iopv', DOUBLE), # 场内基金实时估算净值
]
field_list = [_[0] for _ in param_list]
if not has_table:
# 考虑到流量有限,避免浪费,首次建表的时候只获取一个合约数据进行建表
sql_str = """
SELECT order_book_id, date_frm,
if(lasttrade_date < end_date,lasttrade_date, end_date) date_to
FROM
(
SELECT info.order_book_id,listed_date date_frm, maturity_date lasttrade_date,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date,
maturity_date
FROM rqdatac_future_info info
) tt
WHERE date_frm < = if(lasttrade_date < end_date, lasttrade_date, end_date)
ORDER BY maturity_date desc limit 1
"""
logger.warning('%s 不存在,仅使用 wind_future_info 表进行计算日期范围', table_name)
else:
sql_str = """select order_book_id, date_frm, if(lasttrade_date < end_date, lasttrade_date, end_date) date_to
FROM
(
select fi.order_book_id, ifnull(trade_date_max_1, listed_date) date_frm,
maturity_date lasttrade_date,
if(hour(now()) < 16, subdate(curdate(),1), curdate()) end_date
from rqdatac_future_info fi
left outer join
(select order_book_id, DATE(adddate(max(trade_date),1)) trade_date_max_1 from {table_name} group by order_book_id) wfd
on fi.order_book_id = wfd.order_book_id
) tt
where date_frm < = if(lasttrade_date < end_date, lasttrade_date, end_date)
-- and subdate(curdate(), 360) < if(lasttrade_date < end_date, lasttrade_date, end_date)
order by date_frm desc""".format(table_name=table_name)
with with_db_session(engine_md) as session:
table = session.execute(sql_str)
# 获取date_from,date_to,将date_from,date_to做为value值
future_date_dic = {
order_book_id: (
date_from if begin_time is None or date_from is None else max([date_from, begin_time]),
date_to)
for order_book_id, date_from, date_to in table.fetchall()
if order_book_id_set is None or order_book_id in order_book_id_set
}
# 设置 dtype
dtype = {key: val for key, val in param_list}
dtype['order_book_id'] = String(20)
# 定义统一的插入函数
def insert_db(df: pd.DataFrame):
nonlocal has_table
insert_data_count = bunch_insert_on_duplicate_update(df, table_name, engine_md, dtype=dtype)
if not has_table and engine_md.has_table(table_name):
alter_table_2_myisam(engine_md, [table_name])
build_primary_key([table_name])
has_table = True
return insert_data_count
data_df_list = []
data_len = len(future_date_dic)
bulk_data_count = 0
tot_data_count = 0
try:
logger.info("%d future instrument will be handled", data_len)
for num, (order_book_id, (date_frm, date_to)) in enumerate(future_date_dic.items(), start=1):
if date_frm is None or date_to is None or date_frm > date_to:
continue
date_frm_str = date_frm.strftime(STR_FORMAT_DATE)
date_to_str = date_to.strftime(STR_FORMAT_DATE)
logger.info('%d/%d) get %s between %s and %s', num, data_len, order_book_id, date_frm_str, date_to_str)
try:
# get_price(order_book_ids, start_date='2013-01-04', end_date='2014-01-04', frequency='1d', fields=None,
# adjust_type='pre', skip_suspended=False, market='cn', expect_df=False)
data_df = rqdatac.get_price(
order_book_id, start_date=date_frm_str, end_date=date_to_str, frequency='1m',
adjust_type='none', skip_suspended=False, market='cn') # fields=field_list,
except QuotaExceeded:
logger.exception("获取数据超量")
break
if data_df is None or data_df.shape[0] == 0:
logger.warning('%d/%d) %s has no data during %s %s', num, data_len, order_book_id, date_frm_str,
date_to_str)
continue
logger.info('%d/%d) %d data of %s between %s and %s', num, data_len, data_df.shape[0], order_book_id,
date_frm_str, date_to_str)
data_df['order_book_id'] = order_book_id
data_df.index.rename('trade_date', inplace=True)
data_df.reset_index(inplace=True)
data_df.rename(columns={c: str.lower(c) for c in data_df.columns}, inplace=True)
data_df_list.append(data_df)
data_df_count = data_df.shape[0]
bulk_data_count += data_df.shape[0]
data_df_list_count = len(data_df_list)
# 仅仅调试时使用
if DEBUG and data_df_list_count > 1:
break
if data_df_list_count > 1 and bulk_data_count > 100000:
# 数据量过大可能导致更新时内存不足,采取分批方法进行更新
logger.info('merge data with %d df %d data', data_df_list_count, bulk_data_count)
data_df = pd.concat(data_df_list)
tot_data_count += insert_db(data_df)
logger.info("更新 %s,累计 %d 条记录被更新", table_name, tot_data_count)
data_df_list = []
bulk_data_count = 0
elif data_df_list_count == 1 and data_df_count > 50000:
# 本语句只是为了避免大的 df concat 造成时间浪费,如果单个df已经很大,直接插入
logger.info('data_df has %d data insert db directly', data_df_count)
tot_data_count += insert_db(data_df)
logger.info("更新 %s,累计 %d 条记录被更新", table_name, tot_data_count)
data_df_list = []
bulk_data_count = 0
finally:
data_df_list_count = len(data_df_list)
if data_df_list_count > 0:
logger.info('merge data with %d df %d data', data_df_list_count, bulk_data_count)
data_df = pd.concat(data_df_list)
tot_data_count = insert_db(data_df)
logger.info("更新 %s 结束,累计 %d 条记录被更新", table_name, tot_data_count)
def get_code_list_by_types(instrument_types: list, all_if_none=True) -> list:
See More Examples